Java Code Examples for org.jsoup.nodes.Element#append()

The following examples show how to use org.jsoup.nodes.Element#append() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: JsoupTest.java    From crawler-jsoup-maven with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
    
    String d = "<span><div>test</div></span>";
    Document doc = Jsoup.parse(d);
    Element div = doc.select("div").first(); // <div></div>
    div.html("<p>lorem ipsum</p>"); // <div><p>lorem ipsum</p></div>
    div.prepend("<p>First</p>");
    div.append("<p>Last</p>");
    // now: <div><p>First</p><p>lorem ipsum</p><p>Last</p></div>
    div.appendElement(d);
    Element span = doc.select("span").first(); // <span>One</span>
    span.wrap("<li><a href='http://example.com/'></a></li>");
    // now: <li><a href="http://example.com"><span>One</span></a></li>
    System.out.println(doc.html());
    
    String s = Jsoup.clean(doc.html(), "", Whitelist.relaxed(), new OutputSettings().prettyPrint(false));
    
    System.out.println(s);
}
 
Example 2
Source File: Html5.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Override
protected void writeBody(JCas jCas, Element body) {
  // Entities
  Map<Integer, String> insertPositions = getEntityInsertPositions(jCas);

  Element div = body.appendElement("div");
  div.attr("style", "white-space: pre-line");

  String text = jCas.getDocumentText();
  Integer offset = 0;
  for (Entry<Integer, String> pos : insertPositions.entrySet()) {
    String insert = pos.getValue();
    text =
        text.substring(0, pos.getKey() + offset) + insert + text.substring(pos.getKey() + offset);
    offset += insert.length();
  }

  div.append(text);
}
 
Example 3
Source File: ResourceTransfer.java    From wx-crawl with Apache License 2.0 5 votes vote down vote up
/**
 * 处理mpvoice音频节点
 * @param voiceElement
 */
public void parseVoiceElement(Element voiceElement) {
    String voiceURL = WxCrawlerConstant.VOICE_URL + voiceElement.attr("voice_encode_fileid");
    ResultBase<String> newURL = getOssValue(voiceURL);
    if (newURL.isSuccess()) {
        voiceElement.append("<audio src=\"" + newURL + "\">您的浏览器不支持audio标签</audio>");
    } else {
        log.info("Failed to voice resourceTranslation, voiceURL={}", voiceURL);
    }
}
 
Example 4
Source File: Rgaa3Extractor.java    From Asqatasun with GNU Affero General Public License v3.0 5 votes vote down vote up
private static void createTestcaseFiles() throws IOException {
    File srcDir = new File(RGAA3_TESTCASE_PATH);
    for (File file : srcDir.listFiles()) {
        String fileName = file.getName().replace("Rgaa30Rule", "").replace(".java", "");
        String theme = fileName.substring(0, 2);
        String crit = fileName.substring(2, 4);
        String test = fileName.substring(4, 6);
        String testKey = Integer.valueOf(theme).toString()+"-"+Integer.valueOf(crit).toString()+"-"+Integer.valueOf(test).toString();
        String wrongKey = theme+"."+crit+"."+test;
        for (File testcase : file.listFiles()) {
            if (testcase.isFile() && testcase.getName().contains("html")) {
                Document doc = Jsoup.parse(FileUtils.readFileToString(testcase));
                Element detail = doc.select(".test-detail").first();
                if (detail == null) {
                    System.out.println(doc.outerHtml());
                } else {
                    detail.tagName("div");
                    detail.text("");
                    for (Element el : detail.children()) {
                        el.remove();
                    }
                    if (!detail.hasAttr("lang")) {
                        detail.attr("lang", "fr");
                    }
                    detail.append("\n"+RGAA3.get(testKey).ruleRawHtml+"\n");
                    doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
                    doc.outputSettings().outline(false);
                    doc.outputSettings().indentAmount(4);
                    String outputHtml = doc.outerHtml();
                    if (outputHtml.contains(wrongKey)) {
                        outputHtml = outputHtml.replaceAll(wrongKey, RGAA3.get(testKey).getRuleDot());
                    }
                    FileUtils.writeStringToFile(testcase, outputHtml);
                }
            }
        }
    }
}
 
Example 5
Source File: Elements.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Add the supplied HTML to the end of each matched element's inner HTML.
 * @param html HTML to add inside each element, after the existing HTML
 * @return this, for chaining
 * @see Element#append(String)
 */
public Elements append(String html) {
    for (Element element : this) {
        element.append(html);
    }
    return this;
}
 
Example 6
Source File: Elements.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Add the supplied HTML to the end of each matched element's inner HTML.
 * @param html HTML to add inside each element, after the existing HTML
 * @return this, for chaining
 * @see Element#append(String)
 */
public Elements append(String html) {
    for (Element element : this) {
        element.append(html);
    }
    return this;
}
 
Example 7
Source File: Elements.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Add the supplied HTML to the end of each matched element's inner HTML.
 * @param html HTML to add inside each element, after the existing HTML
 * @return this, for chaining
 * @see Element#append(String)
 */
public Elements append(String html) {
    for (Element element : this) {
        element.append(html);
    }
    return this;
}
 
Example 8
Source File: RandomGoogleTrendExtractor.java    From swcv with MIT License 5 votes vote down vote up
private static String[] downloadTrends() throws Exception
{
    String query = constructQuery();
    InputStream is = new URL(query).openStream();
    try
    {
        StringWriter writer = new StringWriter();
        IOUtils.copy(is, writer);
        JSONObject json = new JSONObject(writer.toString());
        json = json.getJSONObject("responseData");
        json = json.getJSONObject("feed");

        JSONArray arr = json.getJSONArray("entries");
        List<String> res = new ArrayList();
        for (int i = 0; i < arr.length(); i++)
        {
            JSONObject tmp = arr.getJSONObject(i);
            String en = tmp.getString("content");

            Document doc = Jsoup.parse(en);
            for (Element t : doc.select("a"))
                t.append(".");
            
            en = doc.text();
            res.addAll(Arrays.asList(en.split("\\.")));
        }

        return getNonEmptyTrends(res);
    }
    finally
    {
        is.close();
    }
}
 
Example 9
Source File: SiteCreator.java    From java8-explorer with MIT License 5 votes vote down vote up
public void createSite(ExplorerResult result) throws IOException {
    InputStream inputStream = getClass()
            .getClassLoader()
            .getResourceAsStream("template.html");

    Document document = Jsoup.parse(inputStream, "UTF-8", URI);
    Element contentList = document.body().getElementById("content-list");
    Element details = document.body().getElementById("details");


    List<TypeInfo> typeInfos = result.getTypeInfos();
    for (TypeInfo typeInfo : typeInfos) {
        StringBuilder listEntry = createListEntry(typeInfo);
        contentList.append(listEntry.toString());

        String detailView = createDetailView(typeInfo);
        details.append(detailView);
    }

    rewriteRelativeUrls(document);

    File file = new File("_site/index.html");
    BufferedWriter htmlWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8"));
    htmlWriter.write(document.toString());
    htmlWriter.flush();
    htmlWriter.close();
}
 
Example 10
Source File: Elements.java    From jsoup-learning with MIT License 5 votes vote down vote up
/**
 * Add the supplied HTML to the end of each matched element's inner HTML.
 * @param html HTML to add inside each element, after the existing HTML
 * @return this, for chaining
 * @see Element#append(String)
 */
public Elements append(String html) {
    for (Element element : contents) {
        element.append(html);
    }
    return this;
}
 
Example 11
Source File: PutHTMLElement.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }

    final Document doc;
    final Elements eles;
    try {
        doc = parseHTMLDocumentFromFlowfile(flowFile, context, session);
        eles = doc.select(context.getProperty(CSS_SELECTOR).evaluateAttributeExpressions().getValue());
    } catch (Exception ex) {
        getLogger().error("Failed to extract HTML from {} due to {}; routing to {}", new Object[] {flowFile, ex.toString(), REL_INVALID_HTML.getName()}, ex);
        session.transfer(flowFile, REL_INVALID_HTML);
        return;
    }


    if (eles == null || eles.isEmpty()) {
        // No element found
        session.transfer(flowFile, REL_NOT_FOUND);
    } else {
        final String putValue = context.getProperty(PUT_VALUE).evaluateAttributeExpressions(flowFile).getValue();

        for (final Element ele : eles) {
            switch (context.getProperty(PUT_LOCATION_TYPE).getValue()) {
                case APPEND_ELEMENT:
                    ele.append(putValue);
                    break;
                case PREPEND_ELEMENT:
                    ele.prepend(putValue);
                    break;
            }
        }

        FlowFile ff = session.write(session.create(flowFile), new StreamCallback() {
            @Override
            public void process(final InputStream in, final OutputStream out) throws IOException {
                out.write(doc.html().getBytes(StandardCharsets.UTF_8));
            }
        });

        session.transfer(ff, REL_SUCCESS);

        // Transfer the original HTML
        session.transfer(flowFile, REL_ORIGINAL);
    }
}
 
Example 12
Source File: PutHTMLElement.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }

    final Document doc;
    final Elements eles;
    try {
        doc = parseHTMLDocumentFromFlowfile(flowFile, context, session);
        eles = doc.select(context.getProperty(CSS_SELECTOR).evaluateAttributeExpressions(flowFile).getValue());
    } catch (Exception ex) {
        getLogger().error("Failed to extract HTML from {} due to {}; routing to {}", new Object[] {flowFile, ex.toString(), REL_INVALID_HTML.getName()}, ex);
        session.transfer(flowFile, REL_INVALID_HTML);
        return;
    }


    if (eles == null || eles.isEmpty()) {
        // No element found
        session.transfer(flowFile, REL_NOT_FOUND);
    } else {
        final String putValue = context.getProperty(PUT_VALUE).evaluateAttributeExpressions(flowFile).getValue();

        for (final Element ele : eles) {
            switch (context.getProperty(PUT_LOCATION_TYPE).getValue()) {
                case APPEND_ELEMENT:
                    ele.append(putValue);
                    break;
                case PREPEND_ELEMENT:
                    ele.prepend(putValue);
                    break;
            }
        }

        FlowFile ff = session.write(session.create(flowFile), new StreamCallback() {
            @Override
            public void process(final InputStream in, final OutputStream out) throws IOException {
                out.write(doc.html().getBytes(StandardCharsets.UTF_8));
            }
        });

        session.transfer(ff, REL_SUCCESS);

        // Transfer the original HTML
        session.transfer(flowFile, REL_ORIGINAL);
    }
}
 
Example 13
Source File: CustomFooterPostProcessor.java    From asciidoctorj with Apache License 2.0 4 votes vote down vote up
@Override
public String process(Document document, String output) {
    
    String copyright  = "Copyright Acme, Inc.";
    
    if(document.isBasebackend("html")) {
        org.jsoup.nodes.Document doc = Jsoup.parse(output, "UTF-8");

        Element contentElement = doc.getElementById("footer-text");
        contentElement.append(copyright);
        
        output = doc.html();
        
    }

    
    return output;
}
 
Example 14
Source File: BasePageHandler.java    From cetty with Apache License 2.0 2 votes vote down vote up
/**
 * 解析图文格式
 *
 * @param figcaptionText
 * @return
 */
protected Element buildFigcaption(String figcaptionText) {
    final Element figcaption = new Element(Tag.valueOf("figcaption"), "");
    figcaption.append(figcaptionText);
    return figcaption;
}
 
Example 15
Source File: BasePageHandler.java    From cetty with Apache License 2.0 2 votes vote down vote up
/**
 * 解析段落块格式
 *
 * @param blockquote
 * @param articleBody
 */
protected void buildBlockquote(String blockquote, Element articleBody) {
    final Element blockquoteEl = new Element(Tag.valueOf("blockquote"), "");
    blockquoteEl.append(blockquote);
    articleBody.appendChild(blockquoteEl);
}