Java Code Examples for org.jsoup.nodes.Document#title()

The following examples show how to use org.jsoup.nodes.Document#title() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DynamicIp.java    From rank with Apache License 2.0 6 votes vote down vote up
public static boolean isConnected(){
    try {
        Document doc = Jsoup.connect("http://www.baidu.com/s?wd=杨尚川&t=" + System.currentTimeMillis())
                .header("Accept", ACCEPT)
                .header("Accept-Encoding", ENCODING)
                .header("Accept-Language", LANGUAGE)
                .header("Connection", CONNECTION)
                .header("Referer", "https://www.baidu.com")
                .header("Host", "www.baidu.com")
                .header("User-Agent", USER_AGENT)
                .ignoreContentType(true)
                .timeout(30000)
                .get();
        LOGGER.info("搜索结果页面标题:"+doc.title());
        if(doc.title() != null && doc.title().contains("杨尚川")){
            return true;
        }
    }catch (Exception e){
        if("Network is unreachable".equals(e.getMessage())){
            return false;
        }else{
            LOGGER.error("状态检查失败:"+e.getMessage());
        }
    }
    return false;
}
 
Example 2
Source File: UrlTitleAnnouncer.java    From VileBot with MIT License 6 votes vote down vote up
/**
 * Accesses the source of a HTML page and looks for a title element
 * 
 * @param url http URI String
 * @return String of text between the first <title> tag group on the page, empty if error.
 */
private String scrapeURLHTMLTitle( String url )
{
    String title = "";

    try
    {
        Document doc = Jsoup.connect( url ).get();
        title = doc.title();
    }
    catch ( IOException x )
    {
        System.err.format( "scrapeURLHTMLTitle BufferedReader error: %s%n", x );
    }

    return title;
}
 
Example 3
Source File: DynamicIp.java    From superword with Apache License 2.0 6 votes vote down vote up
public static boolean isConnected(){
    try {
        Document doc = Jsoup.connect("http://www.baidu.com/s?wd=杨尚川&t=" + System.currentTimeMillis())
                .header("Accept", ACCEPT)
                .header("Accept-Encoding", ENCODING)
                .header("Accept-Language", LANGUAGE)
                .header("Connection", CONNECTION)
                .header("Referer", "https://www.baidu.com")
                .header("Host", "www.baidu.com")
                .header("User-Agent", USER_AGENT)
                .ignoreContentType(true)
                .timeout(30000)
                .get();
        LOGGER.info("搜索结果页面标题:"+doc.title());
        if(doc.title() != null && doc.title().contains("杨尚川")){
            return true;
        }
    }catch (Exception e){
        if("Network is unreachable".equals(e.getMessage())){
            return false;
        }else{
            LOGGER.error("状态检查失败:"+e.getMessage());
        }
    }
    return false;
}
 
Example 4
Source File: ArticalRemoteDataSource.java    From KotlinMVPRxJava2Dagger2GreenDaoRetrofitDemo with Apache License 2.0 6 votes vote down vote up
private List<String> parseData(String html) {
    //jsoup解析数据
    Document document = Jsoup.parse(html);
    String title = document.title();
    ArrayList<String> strings = new ArrayList<>();
    strings.add(title);

    Elements ul = document.getElementsByTag("ul");
    for (Element element : ul) {
        if (ul.hasClass("panel_body itemlist")) {
            Elements a = element.getElementsByTag("a");
            for (Element aa : a) {
                if (aa.ownText().length() > 20)
                    strings.add(aa.ownText());
            }
        }
    }
    return strings;
}
 
Example 5
Source File: TwitchVideoRipper.java    From ripme with MIT License 6 votes vote down vote up
@Override
public void rip() throws IOException {
    LOGGER.info("Retrieving " + this.url);
    Document doc = Http.url(url).get();
    
    //Get user friendly filename from page title
    String title = doc.title();
    
    Elements script = doc.select("script");
    if (script.isEmpty()) {
        throw new IOException("Could not find script code at " + url);
    }
    //Regex assumes highest quality source is listed first
    Pattern p = Pattern.compile("\"source\":\"(.*?)\"");
    
    for (Element element : script) {
        Matcher m = p.matcher(element.data());
        if (m.find()){
            String vidUrl = m.group(1);
            addURLToDownload(new URL(vidUrl), HOST + "_" + title);
        }
    }
    waitForThreads();
}
 
Example 6
Source File: JsoupTesting.java    From Java-Data-Science-Cookbook with MIT License 6 votes vote down vote up
public void extractDataWithJsoup(String href){
	Document doc = null;
	try {
		doc = Jsoup.connect(href).timeout(10*1000).userAgent("Mozilla").ignoreHttpErrors(true).get();
	} catch (IOException e) {
		//Your exception handling here
	}
	if(doc != null){
		String title = doc.title();
		String text = doc.body().text();
		Elements links = doc.select("a[href]");
		for (Element link : links) {
			String linkHref = link.attr("href");
			String linkText = link.text();
			String linkOuterHtml = link.outerHtml(); 
			String linkInnerHtml = link.html();
		}
	}
}
 
Example 7
Source File: ParserDemo.java    From crawler4j with Apache License 2.0 5 votes vote down vote up
@Override
public void parse(HttpFetchResult result, String url, String threadName, boolean isUpdate) {
	try {
		String html = result.getHtml();
		Document doc = Jsoup.parse(html);
		String title = doc.title();
		logger.info(threadName +" " + title + " " + url + " ");
		
	} catch (Exception e) {
		e.printStackTrace();
	}

}
 
Example 8
Source File: UtilsDemoActivity.java    From UltimateAndroid with Apache License 2.0 5 votes vote down vote up
@Override
protected Void doInBackground(Void... params) {
    try {
        // Connect to the web site
        Document document = Jsoup.connect(url).get();
        // Get the html document title
        title = document.title();
    } catch (IOException e) {
        e.printStackTrace();
    }
    return null;
}
 
Example 9
Source File: UtilsDemoActivity.java    From UltimateAndroid with Apache License 2.0 5 votes vote down vote up
public void onHandleIntent(Intent intent) {
    this.url = intent.getStringExtra("url");
    try {
        // Connect to the web site
        Document document = Jsoup.connect(url).get();
        // Get the html document title
        title = document.title();
    } catch (IOException e) {
        e.printStackTrace();
    }
    Intent resultIntent = new Intent(TITLE_FILTER);
    resultIntent.putExtra("title", title);
    LocalBroadcastManager.getInstance(this).sendBroadcast(resultIntent);
}
 
Example 10
Source File: DynamicIp.java    From superword with Apache License 2.0 5 votes vote down vote up
public static boolean execute(Map<String, String> cookies, String action){
    String url = "http://192.168.0.1/goform/SysStatusHandle";
    Map<String, String> map = new HashMap<>();
    map.put("action", action);
    map.put("CMD", "WAN_CON");
    map.put("GO", "system_status.asp");
    Connection conn = Jsoup.connect(url)
            .header("Accept", ACCEPT)
            .header("Accept-Encoding", ENCODING)
            .header("Accept-Language", LANGUAGE)
            .header("Connection", CONNECTION)
            .header("Host", HOST)
            .header("Referer", REFERER)
            .header("User-Agent", USER_AGENT)
            .ignoreContentType(true)
            .timeout(30000);
    for(String cookie : cookies.keySet()){
        conn.cookie(cookie, cookies.get(cookie));
    }

    String title = null;
    try {
        Connection.Response response = conn.method(Connection.Method.POST).data(map).execute();
        String html = response.body();
        Document doc = Jsoup.parse(html);
        title = doc.title();
        LOGGER.info("操作连接页面标题:"+title);
        Thread.sleep(10000);
    }catch (Exception e){
        LOGGER.error(e.getMessage());
    }
    if("LAN | LAN Settings".equals(title)){
        if(("3".equals(action) && isConnected())
                || ("4".equals(action) && !isConnected())){
            return true;
        }
    }
    return false;
}
 
Example 11
Source File: WxCrawlServiceImpl.java    From wx-crawl with Apache License 2.0 5 votes vote down vote up
private String getArticleTitle(Document sourceDoc) {
    String title = "";
    if(sourceDoc.head() != null &&
            StringUtils.isNotEmpty(sourceDoc.head().attr(WxCrawlerConstant.BackupArticle.ARTICLE_TITLE))) {
        title = sourceDoc.head().attr(WxCrawlerConstant.BackupArticle.ARTICLE_TITLE);
    } else if (sourceDoc.select(WxCrawlerConstant.HTMLElementSelector.TITLE).first() != null) {
        title = sourceDoc.select(WxCrawlerConstant.HTMLElementSelector.TITLE).first().text();
    } else {
        title = sourceDoc.title();
    }
    return title;
}
 
Example 12
Source File: JSoupExamples.java    From Java-for-Data-Science with MIT License 5 votes vote down vote up
public void displayBodyText(Document document) {
    // Displays the entire body of the document
    String title = document.title();
    out.println("Title: " + title);

    out.println("---Body---");
    Elements element = document.select("body");
    out.println("Text: " + element.text());
}
 
Example 13
Source File: InternetBrowser.java    From petscii-bbs with Mozilla Public License 2.0 5 votes vote down vote up
public static List<Entry> getAllLinks(Document webpage) throws Exception {
    List<Entry> urls = new ArrayList<>(); //why
    String title = webpage.title();
    Elements links = webpage.select("a[href]");
    Element link;

    for(int j=0; j < links.size(); j++){
        link=links.get(j);
        final String label = defaultIfBlank(link.text(), link.attr("href"));

        urls.add(new Entry(link.absUrl("href"), label));

    }
    return urls;
}
 
Example 14
Source File: JsoupHCalendarExtractor.java    From wandora with GNU General Public License v3.0 5 votes vote down vote up
private void parseCalendar(Document document) throws TopicMapException {
    String title = document.title();
    Topic type = getType("vcalendar");
    Topic topic = getOrCreateTopic(tm,null, title);
    topic.addType(type);
    
    parseCalendar(topic, document.body());
}
 
Example 15
Source File: ApiCatalogEndpointIntegrationTest.java    From api-layer with Eclipse Public License 2.0 5 votes vote down vote up
@Test
public void whenMisSpeltContainersEndpoint_thenNotFoundResponseWithAPIMessage() throws Exception {
    HttpResponse response = getResponse(INVALID_CONTAINER_ENDPOINT, HttpStatus.SC_NOT_FOUND);
    final String htmlResponse = EntityUtils.toString(response.getEntity());
    Document doc = Jsoup.parse(htmlResponse);
    String title = doc.title();
    Elements h1 = doc.select("h1:first-child");
    Elements a = doc.select("a");
    assertNotNull(title);
    assertEquals("404 Not Found", title);
    assertEquals("404 Page Not Found", h1.text());
    assertEquals("Go to Dashboard", a.text());
}
 
Example 16
Source File: ShadowSocksCrawlerService.java    From ShadowSocks-Share with Apache License 2.0 5 votes vote down vote up
/**
 * 爬取 ss 账号
 */
public ShadowSocksEntity getShadowSocks() {
	try {
		Document document = getDocument();
		ShadowSocksEntity entity = new ShadowSocksEntity(getTargetURL(), document.title(), true, new Date());
		entity.setShadowSocksSet(parse(document));
		return entity;
	} catch (IOException e) {
		log.error(e.getMessage());
	}
	return new ShadowSocksEntity(getTargetURL(), "", false, new Date());
}
 
Example 17
Source File: AutoGetHtml.java    From danyuan-application with Apache License 2.0 4 votes vote down vote up
/**
 * @throws IOException
 * 方法名: getBody
 * 功 能: TODO(这里用一句话描述这个方法的作用)
 * 参 数: @param url
 * 参 数: @param key
 * 参 数: @return
 * 返 回: String
 * 作 者 : Tenghui.Wang
 * @throws
 */
public static String getBody(String url, String key) throws IOException {
	Document doc = Jsoup.connect("http://www.oschina.net/")
	        
	        .data("query", "Java") // 请求参数
	        
	        .userAgent("Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.2.2) Gecko/20100316 Firefox/3.6.2") // 设置 User-Agent
	        
	        .cookie("auth", "token") // 设置 cookie
	        
	        .timeout(3000) // 设置连接超时时间
	        
	        .post(); // 使用 POST 方法访问 URL
	
	return doc.title();
}
 
Example 18
Source File: AbstractHtmlConsumer.java    From baleen with Apache License 2.0 4 votes vote down vote up
@Override
protected void doProcess(final JCas jCas) throws AnalysisEngineProcessException {
  final File f = getFileName(jCas);
  final DocumentAnnotation da = getDocumentAnnotation(jCas);

  final Document doc =
      Jsoup.parse("<!DOCTYPE html>\n<html lang=\"" + da.getLanguage() + "\"></html>");
  doc.outputSettings(new Document.OutputSettings().prettyPrint(false));
  final Element head = doc.head();

  if (!Strings.isNullOrEmpty(css)) {
    final Element cssLink = head.appendElement("link");
    cssLink.attr("rel", "stylesheet");
    cssLink.attr("href", css);
  }

  final Element charset = head.appendElement("meta");
  charset.attr("charset", "utf-8");

  appendMeta(head, "document.type", da.getDocType());
  appendMeta(head, "document.sourceUri", da.getSourceUri());
  appendMeta(head, "externalId", da.getHash());

  appendMeta(head, "document.classification", da.getDocumentClassification());
  appendMeta(
      head,
      "document.caveats",
      String.join(",", UimaTypesUtils.toArray(da.getDocumentCaveats())));
  appendMeta(
      head,
      "document.releasability",
      String.join(",", UimaTypesUtils.toArray(da.getDocumentReleasability())));

  String title = null;
  for (final Metadata md : JCasUtil.select(jCas, Metadata.class)) {
    appendMeta(head, md.getKey(), md.getValue());
    if ("documentTitle".equalsIgnoreCase(md.getKey())) {
      title = md.getValue();
    }
  }

  if (!Strings.isNullOrEmpty(title)) {
    doc.title(title);
  }

  final Element body = doc.body();

  writeBody(jCas, body);

  try {
    FileUtils.writeStringToFile(f, doc.html(), Charset.defaultCharset());
  } catch (final IOException e) {
    throw new AnalysisEngineProcessException(e);
  }
}
 
Example 19
Source File: Utils.java    From SteamGifts with MIT License 4 votes vote down vote up
/**
 * The document title is in the format "Game Title - Page X" if we're on /giveaways/id/name/search?page=X,
 * so we strip out the page number.
 */
public static String getPageTitle(Document document) {
    String title = document.title();
    return title.replaceAll(" - Page ([\\d,]+)$", "");
}
 
Example 20
Source File: HtmlUtils.java    From ogham with Apache License 2.0 2 votes vote down vote up
/**
 * Get the title of the HTML. If no <code>title</code> tag exists, then the
 * title is null.
 * 
 * @param htmlContent
 *            the HTML content that may contain a title
 * @return the title of the HTML or null if none
 */
public static String getTitle(String htmlContent) {
	Document doc = Jsoup.parse(htmlContent);
	Elements titleNode = doc.select("head > title");
	return titleNode.isEmpty() ? null : doc.title();
}