Java Code Examples for org.jsoup.nodes.Document#title()

The following examples show how to use org.jsoup.nodes.Document#title() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: DynamicIp.java From rank with Apache License 2.0

6 votes

public static boolean isConnected(){
    try {
        Document doc = Jsoup.connect("http://www.baidu.com/s?wd=杨尚川&t=" + System.currentTimeMillis())
                .header("Accept", ACCEPT)
                .header("Accept-Encoding", ENCODING)
                .header("Accept-Language", LANGUAGE)
                .header("Connection", CONNECTION)
                .header("Referer", "https://www.baidu.com")
                .header("Host", "www.baidu.com")
                .header("User-Agent", USER_AGENT)
                .ignoreContentType(true)
                .timeout(30000)
                .get();
        LOGGER.info("搜索结果页面标题："+doc.title());
        if(doc.title() != null && doc.title().contains("杨尚川")){
            return true;
        }
    }catch (Exception e){
        if("Network is unreachable".equals(e.getMessage())){
            return false;
        }else{
            LOGGER.error("状态检查失败:"+e.getMessage());
        }
    }
    return false;
}

Example 2

Source File: UrlTitleAnnouncer.java From VileBot with MIT License

6 votes

/**
 * Accesses the source of a HTML page and looks for a title element
 * 
 * @param url http URI String
 * @return String of text between the first <title> tag group on the page, empty if error.
 */
private String scrapeURLHTMLTitle( String url )
{
    String title = "";

    try
    {
        Document doc = Jsoup.connect( url ).get();
        title = doc.title();
    }
    catch ( IOException x )
    {
        System.err.format( "scrapeURLHTMLTitle BufferedReader error: %s%n", x );
    }

    return title;
}

Example 3

Source File: DynamicIp.java From superword with Apache License 2.0

6 votes

public static boolean isConnected(){
    try {
        Document doc = Jsoup.connect("http://www.baidu.com/s?wd=杨尚川&t=" + System.currentTimeMillis())
                .header("Accept", ACCEPT)
                .header("Accept-Encoding", ENCODING)
                .header("Accept-Language", LANGUAGE)
                .header("Connection", CONNECTION)
                .header("Referer", "https://www.baidu.com")
                .header("Host", "www.baidu.com")
                .header("User-Agent", USER_AGENT)
                .ignoreContentType(true)
                .timeout(30000)
                .get();
        LOGGER.info("搜索结果页面标题："+doc.title());
        if(doc.title() != null && doc.title().contains("杨尚川")){
            return true;
        }
    }catch (Exception e){
        if("Network is unreachable".equals(e.getMessage())){
            return false;
        }else{
            LOGGER.error("状态检查失败:"+e.getMessage());
        }
    }
    return false;
}

Example 4

Source File: ArticalRemoteDataSource.java From KotlinMVPRxJava2Dagger2GreenDaoRetrofitDemo with Apache License 2.0

6 votes

private List<String> parseData(String html) {
    //jsoup解析数据
    Document document = Jsoup.parse(html);
    String title = document.title();
    ArrayList<String> strings = new ArrayList<>();
    strings.add(title);

    Elements ul = document.getElementsByTag("ul");
    for (Element element : ul) {
        if (ul.hasClass("panel_body itemlist")) {
            Elements a = element.getElementsByTag("a");
            for (Element aa : a) {
                if (aa.ownText().length() > 20)
                    strings.add(aa.ownText());
            }
        }
    }
    return strings;
}

Example 5

Source File: TwitchVideoRipper.java From ripme with MIT License

6 votes

@Override
public void rip() throws IOException {
    LOGGER.info("Retrieving " + this.url);
    Document doc = Http.url(url).get();
    
    //Get user friendly filename from page title
    String title = doc.title();
    
    Elements script = doc.select("script");
    if (script.isEmpty()) {
        throw new IOException("Could not find script code at " + url);
    }
    //Regex assumes highest quality source is listed first
    Pattern p = Pattern.compile("\"source\":\"(.*?)\"");
    
    for (Element element : script) {
        Matcher m = p.matcher(element.data());
        if (m.find()){
            String vidUrl = m.group(1);
            addURLToDownload(new URL(vidUrl), HOST + "_" + title);
        }
    }
    waitForThreads();
}

Example 6

Source File: JsoupTesting.java From Java-Data-Science-Cookbook with MIT License

6 votes

public void extractDataWithJsoup(String href){
	Document doc = null;
	try {
		doc = Jsoup.connect(href).timeout(10*1000).userAgent("Mozilla").ignoreHttpErrors(true).get();
	} catch (IOException e) {
		//Your exception handling here
	}
	if(doc != null){
		String title = doc.title();
		String text = doc.body().text();
		Elements links = doc.select("a[href]");
		for (Element link : links) {
			String linkHref = link.attr("href");
			String linkText = link.text();
			String linkOuterHtml = link.outerHtml(); 
			String linkInnerHtml = link.html();
		}
	}
}

Example 7

Source File: ParserDemo.java From crawler4j with Apache License 2.0

5 votes

@Override
public void parse(HttpFetchResult result, String url, String threadName, boolean isUpdate) {
	try {
		String html = result.getHtml();
		Document doc = Jsoup.parse(html);
		String title = doc.title();
		logger.info(threadName +" " + title + " " + url + " ");
		
	} catch (Exception e) {
		e.printStackTrace();
	}

}

Example 8

Source File: UtilsDemoActivity.java From UltimateAndroid with Apache License 2.0

5 votes

@Override
protected Void doInBackground(Void... params) {
    try {
        // Connect to the web site
        Document document = Jsoup.connect(url).get();
        // Get the html document title
        title = document.title();
    } catch (IOException e) {
        e.printStackTrace();
    }
    return null;
}

Example 9

Source File: UtilsDemoActivity.java From UltimateAndroid with Apache License 2.0

5 votes

public void onHandleIntent(Intent intent) {
    this.url = intent.getStringExtra("url");
    try {
        // Connect to the web site
        Document document = Jsoup.connect(url).get();
        // Get the html document title
        title = document.title();
    } catch (IOException e) {
        e.printStackTrace();
    }
    Intent resultIntent = new Intent(TITLE_FILTER);
    resultIntent.putExtra("title", title);
    LocalBroadcastManager.getInstance(this).sendBroadcast(resultIntent);
}

Example 10

Source File: DynamicIp.java From superword with Apache License 2.0

5 votes

public static boolean execute(Map<String, String> cookies, String action){
    String url = "http://192.168.0.1/goform/SysStatusHandle";
    Map<String, String> map = new HashMap<>();
    map.put("action", action);
    map.put("CMD", "WAN_CON");
    map.put("GO", "system_status.asp");
    Connection conn = Jsoup.connect(url)
            .header("Accept", ACCEPT)
            .header("Accept-Encoding", ENCODING)
            .header("Accept-Language", LANGUAGE)
            .header("Connection", CONNECTION)
            .header("Host", HOST)
            .header("Referer", REFERER)
            .header("User-Agent", USER_AGENT)
            .ignoreContentType(true)
            .timeout(30000);
    for(String cookie : cookies.keySet()){
        conn.cookie(cookie, cookies.get(cookie));
    }

    String title = null;
    try {
        Connection.Response response = conn.method(Connection.Method.POST).data(map).execute();
        String html = response.body();
        Document doc = Jsoup.parse(html);
        title = doc.title();
        LOGGER.info("操作连接页面标题："+title);
        Thread.sleep(10000);
    }catch (Exception e){
        LOGGER.error(e.getMessage());
    }
    if("LAN | LAN Settings".equals(title)){
        if(("3".equals(action) && isConnected())
                || ("4".equals(action) && !isConnected())){
            return true;
        }
    }
    return false;
}

Example 11

Source File: WxCrawlServiceImpl.java From wx-crawl with Apache License 2.0

5 votes

private String getArticleTitle(Document sourceDoc) {
    String title = "";
    if(sourceDoc.head() != null &&
            StringUtils.isNotEmpty(sourceDoc.head().attr(WxCrawlerConstant.BackupArticle.ARTICLE_TITLE))) {
        title = sourceDoc.head().attr(WxCrawlerConstant.BackupArticle.ARTICLE_TITLE);
    } else if (sourceDoc.select(WxCrawlerConstant.HTMLElementSelector.TITLE).first() != null) {
        title = sourceDoc.select(WxCrawlerConstant.HTMLElementSelector.TITLE).first().text();
    } else {
        title = sourceDoc.title();
    }
    return title;
}

Example 12

Source File: JSoupExamples.java From Java-for-Data-Science with MIT License

5 votes

public void displayBodyText(Document document) {
    // Displays the entire body of the document
    String title = document.title();
    out.println("Title: " + title);

    out.println("---Body---");
    Elements element = document.select("body");
    out.println("Text: " + element.text());
}

Example 13

Source File: InternetBrowser.java From petscii-bbs with Mozilla Public License 2.0

5 votes

public static List<Entry> getAllLinks(Document webpage) throws Exception {
    List<Entry> urls = new ArrayList<>(); //why
    String title = webpage.title();
    Elements links = webpage.select("a[href]");
    Element link;

    for(int j=0; j < links.size(); j++){
        link=links.get(j);
        final String label = defaultIfBlank(link.text(), link.attr("href"));

        urls.add(new Entry(link.absUrl("href"), label));

    }
    return urls;
}

Example 14

Source File: JsoupHCalendarExtractor.java From wandora with GNU General Public License v3.0

5 votes

private void parseCalendar(Document document) throws TopicMapException {
    String title = document.title();
    Topic type = getType("vcalendar");
    Topic topic = getOrCreateTopic(tm,null, title);
    topic.addType(type);
    
    parseCalendar(topic, document.body());
}

Example 15

Source File: ApiCatalogEndpointIntegrationTest.java From api-layer with Eclipse Public License 2.0

5 votes

@Test
public void whenMisSpeltContainersEndpoint_thenNotFoundResponseWithAPIMessage() throws Exception {
    HttpResponse response = getResponse(INVALID_CONTAINER_ENDPOINT, HttpStatus.SC_NOT_FOUND);
    final String htmlResponse = EntityUtils.toString(response.getEntity());
    Document doc = Jsoup.parse(htmlResponse);
    String title = doc.title();
    Elements h1 = doc.select("h1:first-child");
    Elements a = doc.select("a");
    assertNotNull(title);
    assertEquals("404 Not Found", title);
    assertEquals("404 Page Not Found", h1.text());
    assertEquals("Go to Dashboard", a.text());
}

Example 16

Source File: ShadowSocksCrawlerService.java From ShadowSocks-Share with Apache License 2.0

5 votes

/**
 * 爬取 ss 账号
 */
public ShadowSocksEntity getShadowSocks() {
	try {
		Document document = getDocument();
		ShadowSocksEntity entity = new ShadowSocksEntity(getTargetURL(), document.title(), true, new Date());
		entity.setShadowSocksSet(parse(document));
		return entity;
	} catch (IOException e) {
		log.error(e.getMessage());
	}
	return new ShadowSocksEntity(getTargetURL(), "", false, new Date());
}

Example 17

Source File: AutoGetHtml.java From danyuan-application with Apache License 2.0

4 votes

/**
 * @throws IOException
 * 方法名： getBody
 * 功 能： TODO(这里用一句话描述这个方法的作用)
 * 参 数： @param url
 * 参 数： @param key
 * 参 数： @return
 * 返 回： String
 * 作 者 ： Tenghui.Wang
 * @throws
 */
public static String getBody(String url, String key) throws IOException {
	Document doc = Jsoup.connect("http://www.oschina.net/")
	        
	        .data("query", "Java") // 请求参数
	        
	        .userAgent("Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.2.2) Gecko/20100316 Firefox/3.6.2") // 设置 User-Agent
	        
	        .cookie("auth", "token") // 设置 cookie
	        
	        .timeout(3000) // 设置连接超时时间
	        
	        .post(); // 使用 POST 方法访问 URL
	
	return doc.title();
}

Example 18

Source File: AbstractHtmlConsumer.java From baleen with Apache License 2.0

4 votes

@Override
protected void doProcess(final JCas jCas) throws AnalysisEngineProcessException {
  final File f = getFileName(jCas);
  final DocumentAnnotation da = getDocumentAnnotation(jCas);

  final Document doc =
      Jsoup.parse("<!DOCTYPE html>\n<html lang=\"" + da.getLanguage() + "\"></html>");
  doc.outputSettings(new Document.OutputSettings().prettyPrint(false));
  final Element head = doc.head();

  if (!Strings.isNullOrEmpty(css)) {
    final Element cssLink = head.appendElement("link");
    cssLink.attr("rel", "stylesheet");
    cssLink.attr("href", css);
  }

  final Element charset = head.appendElement("meta");
  charset.attr("charset", "utf-8");

  appendMeta(head, "document.type", da.getDocType());
  appendMeta(head, "document.sourceUri", da.getSourceUri());
  appendMeta(head, "externalId", da.getHash());

  appendMeta(head, "document.classification", da.getDocumentClassification());
  appendMeta(
      head,
      "document.caveats",
      String.join(",", UimaTypesUtils.toArray(da.getDocumentCaveats())));
  appendMeta(
      head,
      "document.releasability",
      String.join(",", UimaTypesUtils.toArray(da.getDocumentReleasability())));

  String title = null;
  for (final Metadata md : JCasUtil.select(jCas, Metadata.class)) {
    appendMeta(head, md.getKey(), md.getValue());
    if ("documentTitle".equalsIgnoreCase(md.getKey())) {
      title = md.getValue();
    }
  }

  if (!Strings.isNullOrEmpty(title)) {
    doc.title(title);
  }

  final Element body = doc.body();

  writeBody(jCas, body);

  try {
    FileUtils.writeStringToFile(f, doc.html(), Charset.defaultCharset());
  } catch (final IOException e) {
    throw new AnalysisEngineProcessException(e);
  }
}

Example 19

Source File: Utils.java From SteamGifts with MIT License

4 votes

/**
 * The document title is in the format "Game Title - Page X" if we're on /giveaways/id/name/search?page=X,
 * so we strip out the page number.
 */
public static String getPageTitle(Document document) {
    String title = document.title();
    return title.replaceAll(" - Page ([\\d,]+)$", "");
}

Example 20

Source File: HtmlUtils.java From ogham with Apache License 2.0

2 votes

/**
 * Get the title of the HTML. If no <code>title</code> tag exists, then the
 * title is null.
 * 
 * @param htmlContent
 *            the HTML content that may contain a title
 * @return the title of the HTML or null if none
 */
public static String getTitle(String htmlContent) {
	Document doc = Jsoup.parse(htmlContent);
	Elements titleNode = doc.select("head > title");
	return titleNode.isEmpty() ? null : doc.title();
}