Java Code Examples for org.ccil.cowan.tagsoup.Parser

The following examples show how to use org.ccil.cowan.tagsoup.Parser. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may want to check out the right sidebar which shows the related API usage.
Example 1
Source Project: html5index   Source File: DomLoader.java    License: Apache License 2.0 7 votes vote down vote up
public static Document loadDom(String url) {
 Parser parser = new Parser();

 try {
   parser.setFeature(Parser.namespacesFeature, false);
   parser.setFeature(Parser.namespacePrefixesFeature, false);
   Reader reader = openReader(url);
   DOMResult result = new DOMResult();
   Transformer transformer = TransformerFactory.newInstance().newTransformer();
   transformer.transform(new SAXSource(parser, new InputSource(reader)), result);
   reader.close();
   return (Document) result.getNode();
 } catch (Exception e) {
   throw new RuntimeException(e);
 }
}
 
Example 2
Source Project: HtmlCompat   Source File: HtmlCompat.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Returns displayable styled text from the provided HTML string. Any <img> tags in the
 * HTML will use the specified ImageGetter to request a representation of the image (use null
 * if you don't want this) and the specified TagHandler to handle unknown tags (specify null if
 * you don't want this).
 * <p>
 * <p>This uses TagSoup to handle real HTML, including all of the brokenness found in the wild.
 */
public static Spanned fromHtml(@NonNull Context context, @NonNull String source, int flags,
                               @Nullable ImageGetter imageGetter, @Nullable TagHandler tagHandler,
                               @Nullable SpanCallback spanCallback) {
    if (source == null) {
        return null;
    }
    Parser parser = new Parser();
    try {
        parser.setProperty(Parser.schemaProperty, HtmlParser.schema);
    } catch (org.xml.sax.SAXNotRecognizedException | org.xml.sax.SAXNotSupportedException e) {
        // Should not happen.
        throw new RuntimeException(e);
    }
    HtmlToSpannedConverter converter =
            new HtmlToSpannedConverter(context, source, imageGetter, tagHandler, spanCallback, parser, flags);
    return converter.convert();
}
 
Example 3
Source Project: Nimingban   Source File: Html.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Returns displayable styled text from the provided HTML string.
 * Any &lt;img&gt; tags in the HTML will use the specified ImageGetter
 * to request a representation of the image (use null if you don't
 * want this) and the specified TagHandler to handle unknown tags
 * (specify null if you don't want this).
 *
 * <p>This uses TagSoup to handle real HTML, including all of the brokenness found in the wild.
 */
public static SpannableStringBuilder fromHtml(String source, ImageGetter imageGetter,
        TagHandler tagHandler) {
    Parser parser = new Parser();
    try {
        parser.setProperty(Parser.schemaProperty, HtmlParser.schema);
    } catch (org.xml.sax.SAXNotRecognizedException | org.xml.sax.SAXNotSupportedException e) {
        // Should not happen.
        throw new RuntimeException(e);
    }

    HtmlToSpannedConverter converter =
            new HtmlToSpannedConverter(source, imageGetter, tagHandler,
                    parser);
    return converter.convert();
}
 
Example 4
Source Project: Overchan-Android   Source File: HtmlParser.java    License: GNU General Public License v3.0 6 votes vote down vote up
public HtmlToSpannedConverter(String subject, String source, ThemeColors colors, HtmlParser.ImageGetter imageGetter, boolean openSpoilers,
        Parser parser) {
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    if (!TextUtils.isEmpty(subject)) {
        mSpannableStringBuilder.append(subject);
        int len = mSpannableStringBuilder.length();
        mSpannableStringBuilder.setSpan(new RelativeSizeSpan(1.25f), 0, len, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
        mSpannableStringBuilder.setSpan(new StyleSpan(Typeface.BOLD), 0, len, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
        if (colors != null) {
            mSpannableStringBuilder.setSpan(new ForegroundColorSpan(colors.subjectForeground), 0, len, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
        }
        mSpannableStringBuilder.append('\n');
        mStartLength = mSpannableStringBuilder.length();
    }
    mColors = colors;
    mOpenSpoilers = openSpoilers;
    mImageGetter = imageGetter;
    mReader = parser;
}
 
Example 5
Source Project: JTAF-ExtWebDriver   Source File: DefaultExtWebDriver.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public String evaluateXpath(String xpath) throws Exception {
	XPathFactory xpathFac = XPathFactory.newInstance();
	XPath theXpath = xpathFac.newXPath();

	String html = getHtmlSource();
	html = html.replaceAll(">\\s+<", "><");
	InputStream input = new ByteArrayInputStream(html.getBytes(Charset.forName("UTF-8")));

	XMLReader reader = new Parser();
	reader.setFeature(Parser.namespacesFeature, false);
	Transformer transformer = TransformerFactory.newInstance()
			.newTransformer();

	DOMResult result = new DOMResult();
	transformer.transform(new SAXSource(reader, new InputSource(input)),
			result);

	Node htmlNode = result.getNode(); // This code gets a Node from the
										// result.
	return (String) theXpath.evaluate(xpath, htmlNode,
			XPathConstants.STRING);
}
 
Example 6
Source Project: JTAF-ExtWebDriver   Source File: Element.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Get the list of nodes which satisfy the xpath expression passed in
 * 
 * @param xpath
 *            the input xpath expression
 * @return the nodeset of matching elements
 * @throws Exception
 */
private NodeList getNodeListUsingJavaXPath(String xpath) throws Exception {
	XPathFactory xpathFac = XPathFactory.newInstance();
	XPath theXpath = xpathFac.newXPath();

	String html = getGUIDriver().getHtmlSource();
	html = html.replaceAll(">\\s+<", "><");
	InputStream input = new ByteArrayInputStream(html.getBytes(Charset.forName("UTF-8")));

	XMLReader reader = new Parser();
	reader.setFeature(Parser.namespacesFeature, false);
	Transformer transformer = TransformerFactory.newInstance().newTransformer();

	DOMResult result = new DOMResult();
	transformer.transform(new SAXSource(reader, new InputSource(input)), result);

	Node htmlNode = result.getNode(); // This code gets a Node from the
										// result.
	NodeList nodes = (NodeList) theXpath.evaluate(xpath, htmlNode, XPathConstants.NODESET);

	return nodes;
}
 
Example 7
Source Project: JTAF-ExtWebDriver   Source File: Table.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * 
 * @param xpath
 *            of the NodeList
 * @return a list of nodes found at the xpath
 * @throws Exception
 */
private NodeList getNodeListUsingJavaXPath(String xpath) throws Exception {
    XPathFactory xpathFac = XPathFactory.newInstance();
    XPath theXpath = xpathFac.newXPath();

    String html = getGUIDriver().getHtmlSource();
    html = html.replaceAll(">\\s+<", "><");
    InputStream input = new ByteArrayInputStream(html.getBytes(Charset.forName("UTF-8")));

    XMLReader reader = new Parser();
    reader.setFeature(Parser.namespacesFeature, false);
    Transformer transformer = TransformerFactory.newInstance().newTransformer();

    DOMResult result = new DOMResult();
    transformer.transform(new SAXSource(reader, new InputSource(input)), result);

    // This code gets a Node from the result.
    Node htmlNode = result.getNode();
    NodeList nodes = (NodeList) theXpath.evaluate(xpath, htmlNode, XPathConstants.NODESET);

    return nodes;
}
 
Example 8
Source Project: kite   Source File: ConvertHTMLBuilder.java    License: Apache License 2.0 6 votes vote down vote up
public ConvertHTML(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) throws SAXNotRecognizedException, SAXNotSupportedException {
  super(builder, config, parent, child, context);
  this.charset = getConfigs().getCharset(config, "charset", null);
  this.omitXMLDeclaration = getConfigs().getBoolean(config, "omitXMLDeclaration", false);      
  this.xmlReader = new Parser(); // no reuse?
  xmlReader.setProperty(Parser.schemaProperty, htmlSchema);
  xmlReader.setFeature(Parser.CDATAElementsFeature, getConfigs().getBoolean(config, "noCDATA", false));
  xmlReader.setFeature(Parser.namespacesFeature, !getConfigs().getBoolean(config, "noNamespaces", true));
  xmlReader.setFeature(Parser.ignoreBogonsFeature, getConfigs().getBoolean(config, "noBogons", false)); // also see TIKA-599
  xmlReader.setFeature(Parser.bogonsEmptyFeature, getConfigs().getBoolean(config, "emptyBogons", false));
  xmlReader.setFeature(Parser.rootBogonsFeature, getConfigs().getBoolean(config, "noRootBogons", false));
  xmlReader.setFeature(Parser.defaultAttributesFeature, getConfigs().getBoolean(config, "noDefaultAttributes", false));
  xmlReader.setFeature(Parser.translateColonsFeature, getConfigs().getBoolean(config, "noColons", false));
  xmlReader.setFeature(Parser.restartElementsFeature, getConfigs().getBoolean(config, "noRestart", false));
  xmlReader.setFeature(Parser.ignorableWhitespaceFeature, !getConfigs().getBoolean(config, "suppressIgnorableWhitespace", true));
  validateArguments();
}
 
Example 9
Source Project: android_9.0.0_r45   Source File: Html.java    License: Apache License 2.0 5 votes vote down vote up
public HtmlToSpannedConverter( String source, Html.ImageGetter imageGetter,
        Html.TagHandler tagHandler, Parser parser, int flags) {
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    mImageGetter = imageGetter;
    mTagHandler = tagHandler;
    mReader = parser;
    mFlags = flags;
}
 
Example 10
Source Project: MHViewer   Source File: Html.java    License: Apache License 2.0 5 votes vote down vote up
public HtmlToSpannedConverter(
        String source, Html.ImageGetter imageGetter, Html.TagHandler tagHandler,
        Parser parser) {
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    mImageGetter = imageGetter;
    mTagHandler = tagHandler;
    mReader = parser;
}
 
Example 11
Source Project: tysq-android   Source File: Html.java    License: GNU General Public License v3.0 5 votes vote down vote up
public HtmlToSpannedConverter(String source, Html.ImageGetter imageGetter,
                              Html.TagHandler tagHandler, Parser parser, int flags) {
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    mImageGetter = imageGetter;
    mTagHandler = tagHandler;
    mReader = parser;
    mFlags = flags;
}
 
Example 12
Source Project: HtmlCompat   Source File: HtmlToSpannedConverter.java    License: Apache License 2.0 5 votes vote down vote up
HtmlToSpannedConverter(Context context, String source, HtmlCompat.ImageGetter imageGetter,
                       HtmlCompat.TagHandler tagHandler, HtmlCompat.SpanCallback spanCallback,
                       Parser parser, int flags) {
    mContext = context;
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    mImageGetter = imageGetter;
    mTagHandler = tagHandler;
    mSpanCallback = spanCallback;
    mReader = parser;
    mFlags = flags;
}
 
Example 13
Source Project: ForPDA   Source File: Html.java    License: GNU General Public License v3.0 5 votes vote down vote up
public HtmlToSpannedConverter(String source, Html.ImageGetter imageGetter,
                              Html.TagHandler tagHandler, Parser parser, int flags) {
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    mImageGetter = imageGetter;
    mTagHandler = tagHandler;
    mReader = parser;
    mFlags = flags;
}
 
Example 14
Source Project: ofx4j   Source File: OFXHomeFIDataStore.java    License: Apache License 2.0 5 votes vote down vote up
private void initializeFIData() throws IOException, SAXException {
  URL url = new URL(getUrl());
  XMLReader xmlReader = new Parser();
  xmlReader.setFeature("http://xml.org/sax/features/namespaces", false);
  xmlReader.setFeature("http://xml.org/sax/features/validation", false);
  xmlReader.setContentHandler(new DirectoryContentHandler());
  xmlReader.parse(new InputSource(url.openStream()));
}
 
Example 15
Source Project: ofx4j   Source File: OFXHomeFIDataStore.java    License: Apache License 2.0 5 votes vote down vote up
private BaseFinancialInstitutionData loadInstitutionData(String href) throws IOException, SAXException {
  if (LOG.isInfoEnabled()) {
    LOG.info("Loading institution data from: " + href);
  }
  
  URL url = new URL(href);
  XMLReader xmlReader = new Parser();
  xmlReader.setFeature("http://xml.org/sax/features/namespaces", false);
  xmlReader.setFeature("http://xml.org/sax/features/validation", false);
  InstitutionContentHandler institutionHandler = new InstitutionContentHandler();
  xmlReader.setContentHandler(institutionHandler);
  xmlReader.parse(new InputSource(url.openStream()));
  return institutionHandler.data;
}
 
Example 16
Source Project: Nimingban   Source File: Html.java    License: Apache License 2.0 5 votes vote down vote up
public HtmlToSpannedConverter(
        String source, Html.ImageGetter imageGetter, Html.TagHandler tagHandler,
        Parser parser) {
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    mImageGetter = imageGetter;
    mTagHandler = tagHandler;
    mReader = parser;
}
 
Example 17
public CustomHtmlToSpannedConverter(String source,
                                    Html.ImageGetter imageGetter, Html.TagHandler tagHandler,
                                    Parser parser, Html.ImageGetter emojiGetter, String baseUri, Context context) {
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    mImageGetter = imageGetter;
    mTagHandler = tagHandler;
    mReader = parser;
    mEmojiGetter = emojiGetter;
    mBaseUri = baseUri;
    userMentionColor = ContextCompat.getColor(context, R.color.dark_red);
    userMentionSelfColor = ContextCompat.getColor(context, R.color.self_mention_color);
}
 
Example 18
Source Project: EhViewer   Source File: Html.java    License: Apache License 2.0 5 votes vote down vote up
public HtmlToSpannedConverter(
        String source, Html.ImageGetter imageGetter, Html.TagHandler tagHandler,
        Parser parser) {
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    mImageGetter = imageGetter;
    mTagHandler = tagHandler;
    mReader = parser;
}
 
Example 19
Source Project: Mupdf   Source File: AKHtml.java    License: Apache License 2.0 5 votes vote down vote up
public HtmlToSpannedConverter(String source, Html.ImageGetter imageGetter,
                              Html.TagHandler tagHandler, Parser parser, int flags) {
    mSource = source;
    mSpannableStringBuilder = new SpannableStringBuilder();
    mImageGetter = imageGetter;
    mTagHandler = tagHandler;
    mReader = parser;
    mFlags = flags;
}
 
Example 20
Source Project: android-test   Source File: TagSoupDocumentParser.java    License: Apache License 2.0 4 votes vote down vote up
private TagSoupDocumentParser() throws SAXNotRecognizedException, SAXNotSupportedException {
  parser = new Parser();
  // We do xpath evaluations which are not namespace aware. So make the parser
  // not use any namespaces.
  parser.setFeature(Parser.namespacesFeature, false);
}