org.ccil.cowan.tagsoup.Parser Java Examples

The following examples show how to use org.ccil.cowan.tagsoup.Parser. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DomLoader.java    From html5index with Apache License 2.0 9 votes vote down vote up
public static Document loadDom(String url) {
 Parser parser = new Parser();

 try {
   parser.setFeature(Parser.namespacesFeature, false);
   parser.setFeature(Parser.namespacePrefixesFeature, false);
   Reader reader = openReader(url);
   DOMResult result = new DOMResult();
   Transformer transformer = TransformerFactory.newInstance().newTransformer();
   transformer.transform(new SAXSource(parser, new InputSource(reader)), result);
   reader.close();
   return (Document) result.getNode();
 } catch (Exception e) {
   throw new RuntimeException(e);
 }
}
 
Example #2
Source File: ConvertHTMLBuilder.java    From kite with Apache License 2.0 6 votes vote down vote up
public ConvertHTML(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) throws SAXNotRecognizedException, SAXNotSupportedException {
  super(builder, config, parent, child, context);
  this.charset = getConfigs().getCharset(config, "charset", null);
  this.omitXMLDeclaration = getConfigs().getBoolean(config, "omitXMLDeclaration", false);      
  this.xmlReader = new Parser(); // no reuse?
  xmlReader.setProperty(Parser.schemaProperty, htmlSchema);
  xmlReader.setFeature(Parser.CDATAElementsFeature, getConfigs().getBoolean(config, "noCDATA", false));
  xmlReader.setFeature(Parser.namespacesFeature, !getConfigs().getBoolean(config, "noNamespaces", true));
  xmlReader.setFeature(Parser.ignoreBogonsFeature, getConfigs().getBoolean(config, "noBogons", false)); // also see TIKA-599
  xmlReader.setFeature(Parser.bogonsEmptyFeature, getConfigs().getBoolean(config, "emptyBogons", false));
  xmlReader.setFeature(Parser.rootBogonsFeature, getConfigs().getBoolean(config, "noRootBogons", false));
  xmlReader.setFeature(Parser.defaultAttributesFeature, getConfigs().getBoolean(config, "noDefaultAttributes", false));
  xmlReader.setFeature(Parser.translateColonsFeature, getConfigs().getBoolean(config, "noColons", false));
  xmlReader.setFeature(Parser.restartElementsFeature, getConfigs().getBoolean(config, "noRestart", false));
  xmlReader.setFeature(Parser.ignorableWhitespaceFeature, !getConfigs().getBoolean(config, "suppressIgnorableWhitespace", true));
  validateArguments();
}
 
Example #3
Source File: Table.java    From JTAF-ExtWebDriver with Apache License 2.0 6 votes vote down vote up
/**
 * 
 * @param xpath
 *            of the NodeList
 * @return a list of nodes found at the xpath
 * @throws Exception
 */
private NodeList getNodeListUsingJavaXPath(String xpath) throws Exception {
    XPathFactory xpathFac = XPathFactory.newInstance();
    XPath theXpath = xpathFac.newXPath();

    String html = getGUIDriver().getHtmlSource();
    html = html.replaceAll(">\\s+<", "><");
    InputStream input = new ByteArrayInputStream(html.getBytes(Charset.forName("UTF-8")));

    XMLReader reader = new Parser();
    reader.setFeature(Parser.namespacesFeature, false);
    Transformer transformer = TransformerFactory.newInstance().newTransformer();

    DOMResult result = new DOMResult();
    transformer.transform(new SAXSource(reader, new InputSource(input)), result);

    // This code gets a Node from the result.
    Node htmlNode = result.getNode();
    NodeList nodes = (NodeList) theXpath.evaluate(xpath, htmlNode, XPathConstants.NODESET);

    return nodes;
}
 
Example #4
Source File: HtmlCompat.java    From HtmlCompat with Apache License 2.0 6 votes vote down vote up
/**
 * Returns displayable styled text from the provided HTML string. Any &lt;img&gt; tags in the
 * HTML will use the specified ImageGetter to request a representation of the image (use null
 * if you don't want this) and the specified TagHandler to handle unknown tags (specify null if
 * you don't want this).
 * <p>
 * <p>This uses TagSoup to handle real HTML, including all of the brokenness found in the wild.
 */
public static Spanned fromHtml(@NonNull Context context, @NonNull String source, int flags,
                               @Nullable ImageGetter imageGetter, @Nullable TagHandler tagHandler,
                               @Nullable SpanCallback spanCallback) {
    if (source == null) {
        return null;
    }
    Parser parser = new Parser();
    try {
        parser.setProperty(Parser.schemaProperty, HtmlParser.schema);
    } catch (org.xml.sax.SAXNotRecognizedException | org.xml.sax.SAXNotSupportedException e) {
        // Should not happen.
        throw new RuntimeException(e);
    }
    HtmlToSpannedConverter converter =
            new HtmlToSpannedConverter(context, source, imageGetter, tagHandler, spanCallback, parser, flags);
    return converter.convert();
}
 
Example #5
Source File: Element.java    From JTAF-ExtWebDriver with Apache License 2.0 6 votes vote down vote up
/**
 * Get the list of nodes which satisfy the xpath expression passed in
 * 
 * @param xpath
 *            the input xpath expression
 * @return the nodeset of matching elements
 * @throws Exception
 */
private NodeList getNodeListUsingJavaXPath(String xpath) throws Exception {
	XPathFactory xpathFac = XPathFactory.newInstance();
	XPath theXpath = xpathFac.newXPath();

	String html = getGUIDriver().getHtmlSource();
	html = html.replaceAll(">\\s+<", "><");
	InputStream input = new ByteArrayInputStream(html.getBytes(Charset.forName("UTF-8")));

	XMLReader reader = new Parser();
	reader.setFeature(Parser.namespacesFeature, false);
	Transformer transformer = TransformerFactory.newInstance().newTransformer();

	DOMResult result = new DOMResult();
	transformer.transform(new SAXSource(reader, new InputSource(input)), result);

	Node htmlNode = result.getNode(); // This code gets a Node from the
										// result.
	NodeList nodes = (NodeList) theXpath.evaluate(xpath, htmlNode, XPathConstants.NODESET);

	return nodes;
}
 
Example #6
Source File: DefaultExtWebDriver.java    From JTAF-ExtWebDriver with Apache License 2.0 6 votes vote down vote up
@Override
public String evaluateXpath(String xpath) throws Exception {
	XPathFactory xpathFac = XPathFactory.newInstance();
	XPath theXpath = xpathFac.newXPath();

	String html = getHtmlSource();
	html = html.replaceAll(">\\s+<", "><");
	InputStream input = new ByteArrayInputStream(html.getBytes(Charset.forName("UTF-8")));

	XMLReader reader = new Parser();
	reader.setFeature(Parser.namespacesFeature, false);
	Transformer transformer = TransformerFactory.newInstance()
			.newTransformer();

	DOMResult result = new DOMResult();
	transformer.transform(new SAXSource(reader, new InputSource(input)),
			result);

	Node htmlNode = result.getNode(); // This code gets a Node from the
										// result.
	return (String) theXpath.evaluate(xpath, htmlNode,
			XPathConstants.STRING);
}
 
Example #7
Source File: Html.java    From Nimingban with Apache License 2.0 6 votes vote down vote up
/**
 * Returns displayable styled text from the provided HTML string.
 * Any &lt;img&gt; tags in the HTML will use the specified ImageGetter
 * to request a representation of the image (use null if you don't
 * want this) and the specified TagHandler to handle unknown tags
 * (specify null if you don't want this).
 *
 * <p>This uses TagSoup to handle real HTML, including all of the brokenness found in the wild.
 */
public static SpannableStringBuilder fromHtml(String source, ImageGetter imageGetter,
        TagHandler tagHandler) {
    Parser parser = new Parser();
    try {
        parser.setProperty(Parser.schemaProperty, HtmlParser.schema);
    } catch (org.xml.sax.SAXNotRecognizedException | org.xml.sax.SAXNotSupportedException e) {
        // Should not happen.
        throw new RuntimeException(e);
    }

    HtmlToSpannedConverter converter =
            new HtmlToSpannedConverter(source, imageGetter, tagHandler,
                    parser);
    return converter.convert();
}
 
Example #8
Source File: HtmlParser.java    From Overchan-Android with GNU General Public License v3.0 6 votes vote down vote up
public HtmlToSpannedConverter(String subject, String source, ThemeColors colors, HtmlParser.ImageGetter imageGetter, boolean openSpoilers,
        Parser parser) {
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    if (!TextUtils.isEmpty(subject)) {
        mSpannableStringBuilder.append(subject);
        int len = mSpannableStringBuilder.length();
        mSpannableStringBuilder.setSpan(new RelativeSizeSpan(1.25f), 0, len, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
        mSpannableStringBuilder.setSpan(new StyleSpan(Typeface.BOLD), 0, len, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
        if (colors != null) {
            mSpannableStringBuilder.setSpan(new ForegroundColorSpan(colors.subjectForeground), 0, len, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
        }
        mSpannableStringBuilder.append('\n');
        mStartLength = mSpannableStringBuilder.length();
    }
    mColors = colors;
    mOpenSpoilers = openSpoilers;
    mImageGetter = imageGetter;
    mReader = parser;
}
 
Example #9
Source File: CustomHtmlToSpannedConverter.java    From zulip-android with Apache License 2.0 5 votes vote down vote up
public CustomHtmlToSpannedConverter(String source,
                                    Html.ImageGetter imageGetter, Html.TagHandler tagHandler,
                                    Parser parser, Html.ImageGetter emojiGetter, String baseUri, Context context) {
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    mImageGetter = imageGetter;
    mTagHandler = tagHandler;
    mReader = parser;
    mEmojiGetter = emojiGetter;
    mBaseUri = baseUri;
    userMentionColor = ContextCompat.getColor(context, R.color.dark_red);
    userMentionSelfColor = ContextCompat.getColor(context, R.color.self_mention_color);
}
 
Example #10
Source File: AKHtml.java    From Mupdf with Apache License 2.0 5 votes vote down vote up
public HtmlToSpannedConverter(String source, Html.ImageGetter imageGetter,
                              Html.TagHandler tagHandler, Parser parser, int flags) {
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    mImageGetter = imageGetter;
    mTagHandler = tagHandler;
    mReader = parser;
    mFlags = flags;
}
 
Example #11
Source File: Html.java    From EhViewer with Apache License 2.0 5 votes vote down vote up
public HtmlToSpannedConverter(
        String source, Html.ImageGetter imageGetter, Html.TagHandler tagHandler,
        Parser parser) {
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    mImageGetter = imageGetter;
    mTagHandler = tagHandler;
    mReader = parser;
}
 
Example #12
Source File: Html.java    From android_9.0.0_r45 with Apache License 2.0 5 votes vote down vote up
public HtmlToSpannedConverter( String source, Html.ImageGetter imageGetter,
        Html.TagHandler tagHandler, Parser parser, int flags) {
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    mImageGetter = imageGetter;
    mTagHandler = tagHandler;
    mReader = parser;
    mFlags = flags;
}
 
Example #13
Source File: Html.java    From Nimingban with Apache License 2.0 5 votes vote down vote up
public HtmlToSpannedConverter(
        String source, Html.ImageGetter imageGetter, Html.TagHandler tagHandler,
        Parser parser) {
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    mImageGetter = imageGetter;
    mTagHandler = tagHandler;
    mReader = parser;
}
 
Example #14
Source File: OFXHomeFIDataStore.java    From ofx4j with Apache License 2.0 5 votes vote down vote up
private BaseFinancialInstitutionData loadInstitutionData(String href) throws IOException, SAXException {
  if (LOG.isInfoEnabled()) {
    LOG.info("Loading institution data from: " + href);
  }
  
  URL url = new URL(href);
  XMLReader xmlReader = new Parser();
  xmlReader.setFeature("http://xml.org/sax/features/namespaces", false);
  xmlReader.setFeature("http://xml.org/sax/features/validation", false);
  InstitutionContentHandler institutionHandler = new InstitutionContentHandler();
  xmlReader.setContentHandler(institutionHandler);
  xmlReader.parse(new InputSource(url.openStream()));
  return institutionHandler.data;
}
 
Example #15
Source File: OFXHomeFIDataStore.java    From ofx4j with Apache License 2.0 5 votes vote down vote up
private void initializeFIData() throws IOException, SAXException {
  URL url = new URL(getUrl());
  XMLReader xmlReader = new Parser();
  xmlReader.setFeature("http://xml.org/sax/features/namespaces", false);
  xmlReader.setFeature("http://xml.org/sax/features/validation", false);
  xmlReader.setContentHandler(new DirectoryContentHandler());
  xmlReader.parse(new InputSource(url.openStream()));
}
 
Example #16
Source File: Html.java    From ForPDA with GNU General Public License v3.0 5 votes vote down vote up
public HtmlToSpannedConverter(String source, Html.ImageGetter imageGetter,
                              Html.TagHandler tagHandler, Parser parser, int flags) {
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    mImageGetter = imageGetter;
    mTagHandler = tagHandler;
    mReader = parser;
    mFlags = flags;
}
 
Example #17
Source File: HtmlToSpannedConverter.java    From HtmlCompat with Apache License 2.0 5 votes vote down vote up
HtmlToSpannedConverter(Context context, String source, HtmlCompat.ImageGetter imageGetter,
                       HtmlCompat.TagHandler tagHandler, HtmlCompat.SpanCallback spanCallback,
                       Parser parser, int flags) {
    mContext = context;
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    mImageGetter = imageGetter;
    mTagHandler = tagHandler;
    mSpanCallback = spanCallback;
    mReader = parser;
    mFlags = flags;
}
 
Example #18
Source File: Html.java    From tysq-android with GNU General Public License v3.0 5 votes vote down vote up
public HtmlToSpannedConverter(String source, Html.ImageGetter imageGetter,
                              Html.TagHandler tagHandler, Parser parser, int flags) {
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    mImageGetter = imageGetter;
    mTagHandler = tagHandler;
    mReader = parser;
    mFlags = flags;
}
 
Example #19
Source File: Html.java    From MHViewer with Apache License 2.0 5 votes vote down vote up
public HtmlToSpannedConverter(
        String source, Html.ImageGetter imageGetter, Html.TagHandler tagHandler,
        Parser parser) {
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    mImageGetter = imageGetter;
    mTagHandler = tagHandler;
    mReader = parser;
}
 
Example #20
Source File: TagSoupDocumentParser.java    From android-test with Apache License 2.0 4 votes vote down vote up
private TagSoupDocumentParser() throws SAXNotRecognizedException, SAXNotSupportedException {
  parser = new Parser();
  // We do xpath evaluations which are not namespace aware. So make the parser
  // not use any namespaces.
  parser.setFeature(Parser.namespacesFeature, false);
}