Java Code Examples for org.jsoup.nodes.Document#head()

The following examples show how to use org.jsoup.nodes.Document#head() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: URLDownloadTests.java    From java_in_examples with Apache License 2.0 8 votes vote down vote up
private static void testHtmlParser(String url) throws Exception {
    Document doc = Jsoup.connect(url).userAgent(USER_AGENT).cookie("auth", "token")
            .timeout(30000).get();
    Charset charset = doc.charset();
    System.out.println("charset = " + charset);
    System.out.println("location = " + doc.location());
    System.out.println("nodeName = " + doc.nodeName());
    Document.OutputSettings outputSettings = doc.outputSettings();
    System.out.println("charset = " + outputSettings.charset());
    System.out.println("indentAmount = " + outputSettings.indentAmount());
    System.out.println("syntax = " + outputSettings.syntax());
    System.out.println("escapeMode = " + outputSettings.escapeMode());
    System.out.println("prettyPrint = " + outputSettings.prettyPrint());
    System.out.println("outline = " + outputSettings.outline());

    System.out.println("title = " + doc.title());
    System.out.println("baseUri = " + doc.baseUri());

    Element head = doc.head();
    Elements children = head.children();
    for(Element child: children) {
        System.out.print(child.tag().getName() + " : ");
        System.out.println(child);
    }
    printElements(doc.body().children());
}
 
Example 2
Source File: BootstrapHandlerTest.java    From flow with Apache License 2.0 6 votes vote down vote up
@Test
public void headHasMetaTags() throws Exception {
    initUI(testUI, createVaadinRequest());

    Document page = pageBuilder.getBootstrapPage(new BootstrapContext(
            request, null, session, testUI, this::contextRootRelativePath));

    Element head = page.head();
    Elements metas = head.getElementsByTag("meta");

    Assert.assertEquals(3, metas.size());
    Element meta = metas.get(0);
    assertEquals("Content-Type", meta.attr("http-equiv"));
    assertEquals("text/html; charset=utf-8", meta.attr("content"));

    meta = metas.get(1);
    assertEquals("X-UA-Compatible", meta.attr("http-equiv"));
    assertEquals("IE=edge", meta.attr("content"));

    meta = metas.get(2);
    assertEquals(BootstrapHandler.VIEWPORT, meta.attr("name"));
    assertEquals(Viewport.DEFAULT,
            meta.attr(BootstrapHandler.CONTENT_ATTRIBUTE));
}
 
Example 3
Source File: BootstrapHandlerTest.java    From flow with Apache License 2.0 6 votes vote down vote up
private void bootstrapPage_productionModeTest(boolean productionMode) {
    mocks.setProductionMode(productionMode);
    TestUI anotherUI = new TestUI();
    initUI(testUI);

    anotherUI.getInternals().setSession(session);
    VaadinRequest vaadinRequest = createVaadinRequest();
    anotherUI.doInit(vaadinRequest, 0);
    anotherUI.getRouter().initializeUI(anotherUI, request);
    BootstrapContext bootstrapContext = new BootstrapContext(vaadinRequest,
            null, session, anotherUI, this::contextRootRelativePath);
    anotherUI.getInternals()
            .setContextRoot(contextRootRelativePath(request));

    Document page = pageBuilder.getBootstrapPage(bootstrapContext);

    Element head = page.head();
    Assert.assertTrue(
            head.outerHtml().contains("mode = " + productionMode));
}
 
Example 4
Source File: HtmlParserTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Test public void createsDocumentStructure() {
    String html = "<meta name=keywords /><link rel=stylesheet /><title>jsoup</title><p>Hello world</p>";
    Document doc = Jsoup.parse(html);
    Element head = doc.head();
    Element body = doc.body();

    assertEquals(1, doc.children().size()); // root node: contains html node
    assertEquals(2, doc.child(0).children().size()); // html node: head and body
    assertEquals(3, head.children().size());
    assertEquals(1, body.children().size());

    assertEquals("keywords", head.getElementsByTag("meta").get(0).attr("name"));
    assertEquals(0, body.getElementsByTag("meta").size());
    assertEquals("jsoup", doc.title());
    assertEquals("Hello world", body.text());
    assertEquals("Hello world", body.children().get(0).text());
}
 
Example 5
Source File: HtmlParserTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Test public void createsDocumentStructure() {
    String html = "<meta name=keywords /><link rel=stylesheet /><title>jsoup</title><p>Hello world</p>";
    Document doc = Jsoup.parse(html);
    Element head = doc.head();
    Element body = doc.body();

    assertEquals(1, doc.children().size()); // root node: contains html node
    assertEquals(2, doc.child(0).children().size()); // html node: head and body
    assertEquals(3, head.children().size());
    assertEquals(1, body.children().size());

    assertEquals("keywords", head.getElementsByTag("meta").get(0).attr("name"));
    assertEquals(0, body.getElementsByTag("meta").size());
    assertEquals("jsoup", doc.title());
    assertEquals("Hello world", body.text());
    assertEquals("Hello world", body.children().get(0).text());
}
 
Example 6
Source File: WxCrawlServiceImpl.java    From wx-crawl with Apache License 2.0 5 votes vote down vote up
private String getArticleTitle(Document sourceDoc) {
    String title = "";
    if(sourceDoc.head() != null &&
            StringUtils.isNotEmpty(sourceDoc.head().attr(WxCrawlerConstant.BackupArticle.ARTICLE_TITLE))) {
        title = sourceDoc.head().attr(WxCrawlerConstant.BackupArticle.ARTICLE_TITLE);
    } else if (sourceDoc.select(WxCrawlerConstant.HTMLElementSelector.TITLE).first() != null) {
        title = sourceDoc.select(WxCrawlerConstant.HTMLElementSelector.TITLE).first().text();
    } else {
        title = sourceDoc.title();
    }
    return title;
}
 
Example 7
Source File: ArticleConvert.java    From wx-crawl with Apache License 2.0 5 votes vote down vote up
public static ArticleTransferVO convert2ArticleTransferVO(ArticleTransferVO articleTransferVO, Document doc){
    Element header = doc.head();
    articleTransferVO.setPublishDate(header.attr(WxCrawlerConstant.BackupArticle.PUBLISH_DATE));
    articleTransferVO.setAuthor(header.attr(WxCrawlerConstant.BackupArticle.AUTHOR));
    articleTransferVO.setAccountId(header.attr(WxCrawlerConstant.BackupArticle.ACCOUNT_ID));
    articleTransferVO.setAccountName(header.attr(WxCrawlerConstant.BackupArticle.ACCOUNT_NAME));
    articleTransferVO.setDigest(header.attr(WxCrawlerConstant.BackupArticle.DIGEST));
    articleTransferVO.setOssCover(header.attr(WxCrawlerConstant.BackupArticle.COVER));
    articleTransferVO.setArticleType(header.attr(WxCrawlerConstant.BackupArticle.ARTICLE_TYPE));
    articleTransferVO.setTitle(header.attr(WxCrawlerConstant.BackupArticle.ARTICLE_TITLE));
    return articleTransferVO;
}
 
Example 8
Source File: BaseSoup.java    From ShareBox with Apache License 2.0 5 votes vote down vote up
public Map<String, Object> doParse(Object... arg) {
    mArguments = arg;
    if (mValues == null) {
        mValues = new HashMap<>();
    }
    Document doc = Jsoup.parse(mHtml);
    mHeader = doc.head();
    mBody = doc.body();
    parse(doc, mHeader, mBody, mValues);
    return mValues;
}
 
Example 9
Source File: BootstrapHandlerTest.java    From flow with Apache License 2.0 5 votes vote down vote up
@Test
public void bootstrapPage_configJsonPatternIsReplacedBeforeInitialUidl() {
    TestUI anotherUI = new TestUI();
    initUI(testUI);

    SystemMessages messages = Mockito.mock(SystemMessages.class);
    service.setSystemMessagesProvider(info -> messages);
    Mockito.when(messages.isSessionExpiredNotificationEnabled())
            .thenReturn(true);
    Mockito.when(session.getSession())
            .thenReturn(Mockito.mock(WrappedSession.class));

    String url = "http://{{CONFIG_JSON}}/file";
    Mockito.when(messages.getSessionExpiredURL()).thenReturn(url);

    anotherUI.getInternals().setSession(session);
    VaadinRequest vaadinRequest = createVaadinRequest();
    anotherUI.doInit(vaadinRequest, 0);
    anotherUI.getRouter().initializeUI(anotherUI, request);
    BootstrapContext bootstrapContext = new BootstrapContext(vaadinRequest,
            null, session, anotherUI, this::contextRootRelativePath);
    anotherUI.getInternals()
            .setContextRoot(contextRootRelativePath(request));

    Document page = pageBuilder.getBootstrapPage(bootstrapContext);
    Element head = page.head();
    Assert.assertTrue(head.outerHtml().contains(url));
}
 
Example 10
Source File: BootstrapHandlerTest.java    From flow with Apache License 2.0 5 votes vote down vote up
@Test
public void addMultiMetaTagViaMetaAnnotation_MetaSizeCorrect_ContentCorrect()
        throws InvalidRouteConfigurationException {
    initUI(testUI, createVaadinRequest(),
            Collections.singleton(MetaAnnotations.class));

    Document page = pageBuilder.getBootstrapPage(new BootstrapContext(
            request, null, session, testUI, this::contextRootRelativePath));

    Element head = page.head();
    Elements metas = head.getElementsByTag("meta");

    Assert.assertEquals(5, metas.size());
    Element meta = metas.get(0);
    assertEquals("Content-Type", meta.attr("http-equiv"));
    assertEquals("text/html; charset=utf-8", meta.attr("content"));

    meta = metas.get(1);
    assertEquals("X-UA-Compatible", meta.attr("http-equiv"));
    assertEquals("IE=edge", meta.attr("content"));

    meta = metas.get(2);
    assertEquals(BootstrapHandler.VIEWPORT, meta.attr("name"));
    assertEquals(Viewport.DEFAULT,
            meta.attr(BootstrapHandler.CONTENT_ATTRIBUTE));

    meta = metas.get(3);
    assertEquals("apple-mobile-web-app-status-bar-style",
            meta.attr("name"));
    assertEquals("black", meta.attr(BootstrapHandler.CONTENT_ATTRIBUTE));

    meta = metas.get(4);
    assertEquals("apple-mobile-web-app-capable", meta.attr("name"));
    assertEquals("yes", meta.attr(BootstrapHandler.CONTENT_ATTRIBUTE));
}
 
Example 11
Source File: BootstrapHandlerTest.java    From flow with Apache License 2.0 5 votes vote down vote up
@Test
public void defaultViewport() {
    initUI(testUI);
    Document page = pageBuilder.getBootstrapPage(context);
    Element head = page.head();
    Elements viewports = head.getElementsByAttributeValue("name",
            BootstrapHandler.VIEWPORT);
    Assert.assertEquals(1, viewports.size());
    Element viewport = viewports.get(0);
    Assert.assertEquals(Viewport.DEFAULT,
            viewport.attr(BootstrapHandler.CONTENT_ATTRIBUTE));

}
 
Example 12
Source File: BootstrapHandlerTest.java    From flow with Apache License 2.0 5 votes vote down vote up
@Test
public void viewportAnnotationOverridesDefault() throws Exception {
    initUI(testUI, createVaadinRequest(),
            Collections.singleton(RouteWithViewport.class));
    Document page = pageBuilder.getBootstrapPage(context);
    Element head = page.head();
    Elements viewports = head.getElementsByAttributeValue("name",
            BootstrapHandler.VIEWPORT);
    Assert.assertEquals(1, viewports.size());
    Element viewport = viewports.get(0);
    Assert.assertEquals("viewport-annotation-value",
            viewport.attr(BootstrapHandler.CONTENT_ATTRIBUTE));

}
 
Example 13
Source File: AbstractHtmlConsumer.java    From baleen with Apache License 2.0 4 votes vote down vote up
@Override
protected void doProcess(final JCas jCas) throws AnalysisEngineProcessException {
  final File f = getFileName(jCas);
  final DocumentAnnotation da = getDocumentAnnotation(jCas);

  final Document doc =
      Jsoup.parse("<!DOCTYPE html>\n<html lang=\"" + da.getLanguage() + "\"></html>");
  doc.outputSettings(new Document.OutputSettings().prettyPrint(false));
  final Element head = doc.head();

  if (!Strings.isNullOrEmpty(css)) {
    final Element cssLink = head.appendElement("link");
    cssLink.attr("rel", "stylesheet");
    cssLink.attr("href", css);
  }

  final Element charset = head.appendElement("meta");
  charset.attr("charset", "utf-8");

  appendMeta(head, "document.type", da.getDocType());
  appendMeta(head, "document.sourceUri", da.getSourceUri());
  appendMeta(head, "externalId", da.getHash());

  appendMeta(head, "document.classification", da.getDocumentClassification());
  appendMeta(
      head,
      "document.caveats",
      String.join(",", UimaTypesUtils.toArray(da.getDocumentCaveats())));
  appendMeta(
      head,
      "document.releasability",
      String.join(",", UimaTypesUtils.toArray(da.getDocumentReleasability())));

  String title = null;
  for (final Metadata md : JCasUtil.select(jCas, Metadata.class)) {
    appendMeta(head, md.getKey(), md.getValue());
    if ("documentTitle".equalsIgnoreCase(md.getKey())) {
      title = md.getValue();
    }
  }

  if (!Strings.isNullOrEmpty(title)) {
    doc.title(title);
  }

  final Element body = doc.body();

  writeBody(jCas, body);

  try {
    FileUtils.writeStringToFile(f, doc.html(), Charset.defaultCharset());
  } catch (final IOException e) {
    throw new AnalysisEngineProcessException(e);
  }
}
 
Example 14
Source File: BootstrapHandler.java    From flow with Apache License 2.0 4 votes vote down vote up
private static void setupPwa(Document document, PwaRegistry registry) {
    if (registry == null) {
        return;
    }

    PwaConfiguration config = registry.getPwaConfiguration();

    if (config.isEnabled()) {
        // Add header injections
        Element head = document.head();

        // Describe PWA capability for iOS devices
        head.appendElement(META_TAG)
                .attr("name", "apple-mobile-web-app-capable")
                .attr(CONTENT_ATTRIBUTE, "yes");

        // Theme color
        head.appendElement(META_TAG).attr("name", "theme-color")
                .attr(CONTENT_ATTRIBUTE, config.getThemeColor());
        head.appendElement(META_TAG)
                .attr("name", "apple-mobile-web-app-status-bar-style")
                .attr(CONTENT_ATTRIBUTE, config.getThemeColor());

        // Add manifest
        head.appendElement("link").attr("rel", "manifest").attr("href",
                config.getManifestPath());

        // Add icons
        for (PwaIcon icon : registry.getHeaderIcons()) {
            head.appendChild(icon.asElement());
        }

        // Add service worker initialization
        head.appendElement(SCRIPT_TAG)
                .text("if ('serviceWorker' in navigator) {\n"
                        + "  window.addEventListener('load', function() {\n"
                        + "    navigator.serviceWorker.register('"
                        + config.getServiceWorkerPath() + "');\n"
                        + "  });\n" + "}");
    }
}