org.ccil.cowan.tagsoup.Parser Java Examples

The following examples show how to use org.ccil.cowan.tagsoup.Parser. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: DomLoader.java From html5index with Apache License 2.0

9 votes

public static Document loadDom(String url) {
 Parser parser = new Parser();

 try {
   parser.setFeature(Parser.namespacesFeature, false);
   parser.setFeature(Parser.namespacePrefixesFeature, false);
   Reader reader = openReader(url);
   DOMResult result = new DOMResult();
   Transformer transformer = TransformerFactory.newInstance().newTransformer();
   transformer.transform(new SAXSource(parser, new InputSource(reader)), result);
   reader.close();
   return (Document) result.getNode();
 } catch (Exception e) {
   throw new RuntimeException(e);
 }
}

Example #2

Source File: ConvertHTMLBuilder.java From kite with Apache License 2.0

6 votes

public ConvertHTML(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) throws SAXNotRecognizedException, SAXNotSupportedException {
  super(builder, config, parent, child, context);
  this.charset = getConfigs().getCharset(config, "charset", null);
  this.omitXMLDeclaration = getConfigs().getBoolean(config, "omitXMLDeclaration", false);      
  this.xmlReader = new Parser(); // no reuse?
  xmlReader.setProperty(Parser.schemaProperty, htmlSchema);
  xmlReader.setFeature(Parser.CDATAElementsFeature, getConfigs().getBoolean(config, "noCDATA", false));
  xmlReader.setFeature(Parser.namespacesFeature, !getConfigs().getBoolean(config, "noNamespaces", true));
  xmlReader.setFeature(Parser.ignoreBogonsFeature, getConfigs().getBoolean(config, "noBogons", false)); // also see TIKA-599
  xmlReader.setFeature(Parser.bogonsEmptyFeature, getConfigs().getBoolean(config, "emptyBogons", false));
  xmlReader.setFeature(Parser.rootBogonsFeature, getConfigs().getBoolean(config, "noRootBogons", false));
  xmlReader.setFeature(Parser.defaultAttributesFeature, getConfigs().getBoolean(config, "noDefaultAttributes", false));
  xmlReader.setFeature(Parser.translateColonsFeature, getConfigs().getBoolean(config, "noColons", false));
  xmlReader.setFeature(Parser.restartElementsFeature, getConfigs().getBoolean(config, "noRestart", false));
  xmlReader.setFeature(Parser.ignorableWhitespaceFeature, !getConfigs().getBoolean(config, "suppressIgnorableWhitespace", true));
  validateArguments();
}

Example #3

Source File: Table.java From JTAF-ExtWebDriver with Apache License 2.0

6 votes

/**
 * 
 * @param xpath
 *            of the NodeList
 * @return a list of nodes found at the xpath
 * @throws Exception
 */
private NodeList getNodeListUsingJavaXPath(String xpath) throws Exception {
    XPathFactory xpathFac = XPathFactory.newInstance();
    XPath theXpath = xpathFac.newXPath();

    String html = getGUIDriver().getHtmlSource();
    html = html.replaceAll(">\\s+<", "><");
    InputStream input = new ByteArrayInputStream(html.getBytes(Charset.forName("UTF-8")));

    XMLReader reader = new Parser();
    reader.setFeature(Parser.namespacesFeature, false);
    Transformer transformer = TransformerFactory.newInstance().newTransformer();

    DOMResult result = new DOMResult();
    transformer.transform(new SAXSource(reader, new InputSource(input)), result);

    // This code gets a Node from the result.
    Node htmlNode = result.getNode();
    NodeList nodes = (NodeList) theXpath.evaluate(xpath, htmlNode, XPathConstants.NODESET);

    return nodes;
}

Example #4

Source File: HtmlCompat.java From HtmlCompat with Apache License 2.0

6 votes

/**
 * Returns displayable styled text from the provided HTML string. Any &lt;img&gt; tags in the
 * HTML will use the specified ImageGetter to request a representation of the image (use null
 * if you don't want this) and the specified TagHandler to handle unknown tags (specify null if
 * you don't want this).
 * <p>
 * <p>This uses TagSoup to handle real HTML, including all of the brokenness found in the wild.
 */
public static Spanned fromHtml(@NonNull Context context, @NonNull String source, int flags,
                               @Nullable ImageGetter imageGetter, @Nullable TagHandler tagHandler,
                               @Nullable SpanCallback spanCallback) {
    if (source == null) {
        return null;
    }
    Parser parser = new Parser();
    try {
        parser.setProperty(Parser.schemaProperty, HtmlParser.schema);
    } catch (org.xml.sax.SAXNotRecognizedException | org.xml.sax.SAXNotSupportedException e) {
        // Should not happen.
        throw new RuntimeException(e);
    }
    HtmlToSpannedConverter converter =
            new HtmlToSpannedConverter(context, source, imageGetter, tagHandler, spanCallback, parser, flags);
    return converter.convert();
}

Example #5

Source File: Element.java From JTAF-ExtWebDriver with Apache License 2.0

6 votes

/**
 * Get the list of nodes which satisfy the xpath expression passed in
 * 
 * @param xpath
 *            the input xpath expression
 * @return the nodeset of matching elements
 * @throws Exception
 */
private NodeList getNodeListUsingJavaXPath(String xpath) throws Exception {
	XPathFactory xpathFac = XPathFactory.newInstance();
	XPath theXpath = xpathFac.newXPath();

	String html = getGUIDriver().getHtmlSource();
	html = html.replaceAll(">\\s+<", "><");
	InputStream input = new ByteArrayInputStream(html.getBytes(Charset.forName("UTF-8")));

	XMLReader reader = new Parser();
	reader.setFeature(Parser.namespacesFeature, false);
	Transformer transformer = TransformerFactory.newInstance().newTransformer();

	DOMResult result = new DOMResult();
	transformer.transform(new SAXSource(reader, new InputSource(input)), result);

	Node htmlNode = result.getNode(); // This code gets a Node from the
										// result.
	NodeList nodes = (NodeList) theXpath.evaluate(xpath, htmlNode, XPathConstants.NODESET);

	return nodes;
}

Example #6

Source File: DefaultExtWebDriver.java From JTAF-ExtWebDriver with Apache License 2.0

6 votes

@Override
public String evaluateXpath(String xpath) throws Exception {
	XPathFactory xpathFac = XPathFactory.newInstance();
	XPath theXpath = xpathFac.newXPath();

	String html = getHtmlSource();
	html = html.replaceAll(">\\s+<", "><");
	InputStream input = new ByteArrayInputStream(html.getBytes(Charset.forName("UTF-8")));

	XMLReader reader = new Parser();
	reader.setFeature(Parser.namespacesFeature, false);
	Transformer transformer = TransformerFactory.newInstance()
			.newTransformer();

	DOMResult result = new DOMResult();
	transformer.transform(new SAXSource(reader, new InputSource(input)),
			result);

	Node htmlNode = result.getNode(); // This code gets a Node from the
										// result.
	return (String) theXpath.evaluate(xpath, htmlNode,
			XPathConstants.STRING);
}

Example #7

Source File: Html.java From Nimingban with Apache License 2.0

6 votes

/**
 * Returns displayable styled text from the provided HTML string.
 * Any &lt;img&gt; tags in the HTML will use the specified ImageGetter
 * to request a representation of the image (use null if you don't
 * want this) and the specified TagHandler to handle unknown tags
 * (specify null if you don't want this).
 *
 * <p>This uses TagSoup to handle real HTML, including all of the brokenness found in the wild.
 */
public static SpannableStringBuilder fromHtml(String source, ImageGetter imageGetter,
        TagHandler tagHandler) {
    Parser parser = new Parser();
    try {
        parser.setProperty(Parser.schemaProperty, HtmlParser.schema);
    } catch (org.xml.sax.SAXNotRecognizedException | org.xml.sax.SAXNotSupportedException e) {
        // Should not happen.
        throw new RuntimeException(e);
    }

    HtmlToSpannedConverter converter =
            new HtmlToSpannedConverter(source, imageGetter, tagHandler,
                    parser);
    return converter.convert();
}

Example #8

Source File: HtmlParser.java From Overchan-Android with GNU General Public License v3.0

6 votes

public HtmlToSpannedConverter(String subject, String source, ThemeColors colors, HtmlParser.ImageGetter imageGetter, boolean openSpoilers,
        Parser parser) {
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    if (!TextUtils.isEmpty(subject)) {
        mSpannableStringBuilder.append(subject);
        int len = mSpannableStringBuilder.length();
        mSpannableStringBuilder.setSpan(new RelativeSizeSpan(1.25f), 0, len, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
        mSpannableStringBuilder.setSpan(new StyleSpan(Typeface.BOLD), 0, len, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
        if (colors != null) {
            mSpannableStringBuilder.setSpan(new ForegroundColorSpan(colors.subjectForeground), 0, len, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
        }
        mSpannableStringBuilder.append('\n');
        mStartLength = mSpannableStringBuilder.length();
    }
    mColors = colors;
    mOpenSpoilers = openSpoilers;
    mImageGetter = imageGetter;
    mReader = parser;
}

Example #9

Source File: CustomHtmlToSpannedConverter.java From zulip-android with Apache License 2.0

5 votes

public CustomHtmlToSpannedConverter(String source,
                                    Html.ImageGetter imageGetter, Html.TagHandler tagHandler,
                                    Parser parser, Html.ImageGetter emojiGetter, String baseUri, Context context) {
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    mImageGetter = imageGetter;
    mTagHandler = tagHandler;
    mReader = parser;
    mEmojiGetter = emojiGetter;
    mBaseUri = baseUri;
    userMentionColor = ContextCompat.getColor(context, R.color.dark_red);
    userMentionSelfColor = ContextCompat.getColor(context, R.color.self_mention_color);
}

Example #10

Source File: AKHtml.java From Mupdf with Apache License 2.0

5 votes

public HtmlToSpannedConverter(String source, Html.ImageGetter imageGetter,
                              Html.TagHandler tagHandler, Parser parser, int flags) {
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    mImageGetter = imageGetter;
    mTagHandler = tagHandler;
    mReader = parser;
    mFlags = flags;
}

Example #11

Source File: Html.java From EhViewer with Apache License 2.0

5 votes

public HtmlToSpannedConverter(
        String source, Html.ImageGetter imageGetter, Html.TagHandler tagHandler,
        Parser parser) {
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    mImageGetter = imageGetter;
    mTagHandler = tagHandler;
    mReader = parser;
}

Example #12

Source File: Html.java From android_9.0.0_r45 with Apache License 2.0

5 votes

public HtmlToSpannedConverter( String source, Html.ImageGetter imageGetter,
        Html.TagHandler tagHandler, Parser parser, int flags) {
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    mImageGetter = imageGetter;
    mTagHandler = tagHandler;
    mReader = parser;
    mFlags = flags;
}

Example #13

Source File: Html.java From Nimingban with Apache License 2.0

5 votes

public HtmlToSpannedConverter(
        String source, Html.ImageGetter imageGetter, Html.TagHandler tagHandler,
        Parser parser) {
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    mImageGetter = imageGetter;
    mTagHandler = tagHandler;
    mReader = parser;
}

Example #14

Source File: OFXHomeFIDataStore.java From ofx4j with Apache License 2.0

5 votes

private BaseFinancialInstitutionData loadInstitutionData(String href) throws IOException, SAXException {
  if (LOG.isInfoEnabled()) {
    LOG.info("Loading institution data from: " + href);
  }
  
  URL url = new URL(href);
  XMLReader xmlReader = new Parser();
  xmlReader.setFeature("http://xml.org/sax/features/namespaces", false);
  xmlReader.setFeature("http://xml.org/sax/features/validation", false);
  InstitutionContentHandler institutionHandler = new InstitutionContentHandler();
  xmlReader.setContentHandler(institutionHandler);
  xmlReader.parse(new InputSource(url.openStream()));
  return institutionHandler.data;
}

Example #15

Source File: OFXHomeFIDataStore.java From ofx4j with Apache License 2.0

5 votes

private void initializeFIData() throws IOException, SAXException {
  URL url = new URL(getUrl());
  XMLReader xmlReader = new Parser();
  xmlReader.setFeature("http://xml.org/sax/features/namespaces", false);
  xmlReader.setFeature("http://xml.org/sax/features/validation", false);
  xmlReader.setContentHandler(new DirectoryContentHandler());
  xmlReader.parse(new InputSource(url.openStream()));
}

Example #16

Source File: Html.java From ForPDA with GNU General Public License v3.0

5 votes

public HtmlToSpannedConverter(String source, Html.ImageGetter imageGetter,
                              Html.TagHandler tagHandler, Parser parser, int flags) {
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    mImageGetter = imageGetter;
    mTagHandler = tagHandler;
    mReader = parser;
    mFlags = flags;
}

Example #17

Source File: HtmlToSpannedConverter.java From HtmlCompat with Apache License 2.0

5 votes

HtmlToSpannedConverter(Context context, String source, HtmlCompat.ImageGetter imageGetter,
                       HtmlCompat.TagHandler tagHandler, HtmlCompat.SpanCallback spanCallback,
                       Parser parser, int flags) {
    mContext = context;
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    mImageGetter = imageGetter;
    mTagHandler = tagHandler;
    mSpanCallback = spanCallback;
    mReader = parser;
    mFlags = flags;
}

Example #18

Source File: Html.java From tysq-android with GNU General Public License v3.0

5 votes

public HtmlToSpannedConverter(String source, Html.ImageGetter imageGetter,
                              Html.TagHandler tagHandler, Parser parser, int flags) {
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    mImageGetter = imageGetter;
    mTagHandler = tagHandler;
    mReader = parser;
    mFlags = flags;
}

Example #19

Source File: Html.java From MHViewer with Apache License 2.0

5 votes

public HtmlToSpannedConverter(
        String source, Html.ImageGetter imageGetter, Html.TagHandler tagHandler,
        Parser parser) {
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    mImageGetter = imageGetter;
    mTagHandler = tagHandler;
    mReader = parser;
}

Example #20

Source File: TagSoupDocumentParser.java From android-test with Apache License 2.0

4 votes

private TagSoupDocumentParser() throws SAXNotRecognizedException, SAXNotSupportedException {
  parser = new Parser();
  // We do xpath evaluations which are not namespace aware. So make the parser
  // not use any namespaces.
  parser.setFeature(Parser.namespacesFeature, false);
}