Java Code Examples for org.jsoup.nodes.Document

The following examples show how to use org.jsoup.nodes.Document. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: HHComicViewer   Author: huxizhijian   File: ComicRecommendPresenter.java    License: Apache License 2.0 6 votes vote down vote up
private ComicTabList getComicTabList(Document doc, String divId, String title) {
    Element hotDoc = doc.select("div[id=" + divId + "]").first();
    Elements links = hotDoc.select("a[class=image_link]");
    Elements tumbs = hotDoc.select("img");
    Elements infos = hotDoc.select("li");
    List<Comic> hotComics = new ArrayList<>();
    for (int i = 0; i < links.size(); i++) {
        Comic comic = new Comic();
        comic.setTitle(links.get(i).attr("title"));
        String url = links.get(i).attr("href");
        String end = url.substring(HHApplication.getInstance()
                .getHHWebVariable().getPre().length());
        comic.setCid(Integer.parseInt(end.split("\\.")[0]));
        comic.setThumbnailUrl(tumbs.get(i).attr("src"));
        String authorDoc = tumbs.get(i).attr("alt");
        comic.setAuthor(authorDoc.split(" - ")[1].split("20")[0]);
        comic.setComicStatus("[" + infos.get(i).text().split("\\[")[1]);
        hotComics.add(comic);
    }
    return new ComicTabList(hotComics, title);
}
 
Example #2
@Test
public void testGenerateBasicNumberProperty() throws Exception {
    Map<String, Object> root = TestHelpers.createInspectionResultWrapper(ENTITY_NAME, NUMBER_PROP);

    Resource<URL> templateResource = resourceFactory.create(getClass().getResource(Deployments.BASE_PACKAGE_PATH + Deployments.SEARCH_FORM_INPUT));
    Template processor = processorFactory.create(templateResource, FreemarkerTemplate.class);
    String output = processor.process(root);
    Document html = Jsoup.parseBodyFragment(output);
    assertThat(output.trim(), not(equalTo("")));

    Elements container = html.select("div.form-group");
    assertThat(container, notNullValue());

    Elements formInputElement = container.select("div.col-sm-10 > input");
    assertThat(formInputElement.attr("id"), equalTo("score"));
    assertThat(formInputElement.attr("type"), equalTo("text"));
    assertThat(formInputElement.attr("ng-model"), equalTo("search" + "." + "score"));
}
 
Example #3
Source Project: PicKing   Author: lanyuanxiaoyao   File: Aitaotu.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Map<DetailActivity.parameter, Object> getDetailContent(String baseUrl, String currentUrl, byte[] result, Map<DetailActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
    List<PicInfo> urls = new ArrayList<>();
    Document document = Jsoup.parse(new String(result, "utf-8"));
    Elements elements = document.select("#big-pic img");
    Elements title = document.select("#photos h1");
    String sTitle = title.size() > 0 ? title.get(0).text() : "";
    Elements tags = document.select(".fbl a");
    List<String> tagList = new ArrayList<>();
    if (tags.size() > 0) {
        for (Element tag : tags)
            tagList.add(tag.text());
    }
    for (Element element : elements) {
        PicInfo picInfo = new PicInfo()
                .setTags(tagList)
                .setTitle(sTitle)
                .setPicUrl(element.attr("src"));
        urls.add(picInfo);
    }
    resultMap.put(DetailActivity.parameter.CURRENT_URL, currentUrl);
    resultMap.put(DetailActivity.parameter.RESULT, urls);
    return resultMap;
}
 
Example #4
Source Project: flow   Author: vaadin   File: BootstrapHandler.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public boolean synchronizedHandleRequest(VaadinSession session,
        VaadinRequest request, VaadinResponse response) throws IOException {
    // Find UI class
    Class<? extends UI> uiClass = getUIClass(request);

    BootstrapContext context = createAndInitUI(uiClass, request, response,
            session);

    HandlerHelper.setResponseNoCacheHeaders(response::setHeader,
            response::setDateHeader);

    Document document = pageBuilder.getBootstrapPage(context);

    writeBootstrapPage(response, document.outerHtml());

    return true;
}
 
Example #5
Source Project: GankGirl   Author: dalingge   File: ReadTadPagePresenter.java    License: GNU Lesser General Public License v2.1 6 votes vote down vote up
@Override
public void subscribe(ObservableEmitter<List<ReadTypeBean>> subscriber) throws Exception {
    List<ReadTypeBean> datas = new ArrayList<>();
    try {
        Document doc = Jsoup.connect(Constants.API_URL_READ).get();
        Elements tads = doc.select("div#xiandu_cat").select("a");
        for (Element tad : tads) {
            ReadTypeBean bean = new ReadTypeBean();
            bean.setTitle(tad.text());//获取标题
            bean.setUrl(tad.absUrl("href"));//absUrl可以获取地址的绝对路径
            datas.add(bean);
            Log.v("Jsoup","title= "+bean.getTitle()+"   url= "+bean.getUrl());
        }
    } catch (IOException e) {
        subscriber.onError(e);
    }

    subscriber.onNext(datas);
    subscriber.onComplete();
}
 
Example #6
Source Project: astor   Author: SpoonLabs   File: HtmlParserTest.java    License: GNU General Public License v2.0 6 votes vote down vote up
@Test public void handlesBaseTags() {
    // only listen to the first base href
    String h = "<a href=1>#</a><base href='/2/'><a href='3'>#</a><base href='http://bar'><a href=/4>#</a>";
    Document doc = Jsoup.parse(h, "http://foo/");
    assertEquals("http://foo/2/", doc.baseUri()); // gets set once, so doc and descendants have first only

    Elements anchors = doc.getElementsByTag("a");
    assertEquals(3, anchors.size());

    assertEquals("http://foo/2/", anchors.get(0).baseUri());
    assertEquals("http://foo/2/", anchors.get(1).baseUri());
    assertEquals("http://foo/2/", anchors.get(2).baseUri());

    assertEquals("http://foo/2/1", anchors.get(0).absUrl("href"));
    assertEquals("http://foo/2/3", anchors.get(1).absUrl("href"));
    assertEquals("http://foo/4", anchors.get(2).absUrl("href"));
}
 
Example #7
Source Project: Crawer   Author: garyudeng   File: Processer3.java    License: MIT License 6 votes vote down vote up
/**
 * 处理详细页面的信息
 * @param visitUrl
 */
public static void process(String visitUrl){
	//下载页面
	Document doc = Jsoup.parse(HttpConnnectionManager.getHtml(visitUrl));//Jsoup.connect(visitUrl).get();
	BookDD book = new BookDD(); 
	//解析数据
	book.setAuthor(doc.select(PropertiesUtils.getProperties().getProperty("author")).text());
	book.setIsbn(doc.select(PropertiesUtils.getProperties().getProperty("isbn")).text());
	book.setPrice(Double.parseDouble(doc.select(PropertiesUtils.getProperties().getProperty("price")).text().substring(1)));
	book.setOutLine(doc.select(PropertiesUtils.getProperties().getProperty("outline")).text());
	book.setBookName(doc.select(PropertiesUtils.getProperties().getProperty("bookName")).text());
	book.setCover_pic(doc.select(PropertiesUtils.getProperties().getProperty("pic")).attr("wsrc").trim());
	
	String outline = doc.select(PropertiesUtils.getProperties().getProperty("outline")).text().trim();
	if(outline.startsWith("<p>")){
		outline = Jsoup.parse(outline).select("p").text();
	}
	book.setOutLine(outline.length() < 2000 ? outline : outline.substring(0, 2000));
	//封装到实体
	System.out.println("bookName---->"+book.getBookName()+"<-->"+book.getAuthor()+"<-->"+book.getOutLine()+"<-->"+book.getIsbn());
	
	//TODO
}
 
Example #8
Source Project: rank   Author: ysc   File: DynamicIp.java    License: Apache License 2.0 6 votes vote down vote up
public static boolean isConnected(){
    try {
        Document doc = Jsoup.connect("http://www.baidu.com/s?wd=杨尚川&t=" + System.currentTimeMillis())
                .header("Accept", ACCEPT)
                .header("Accept-Encoding", ENCODING)
                .header("Accept-Language", LANGUAGE)
                .header("Connection", CONNECTION)
                .header("Referer", "https://www.baidu.com")
                .header("Host", "www.baidu.com")
                .header("User-Agent", USER_AGENT)
                .ignoreContentType(true)
                .timeout(30000)
                .get();
        LOGGER.info("搜索结果页面标题:"+doc.title());
        if(doc.title() != null && doc.title().contains("杨尚川")){
            return true;
        }
    }catch (Exception e){
        if("Network is unreachable".equals(e.getMessage())){
            return false;
        }else{
            LOGGER.error("状态检查失败:"+e.getMessage());
        }
    }
    return false;
}
 
Example #9
Source Project: AppApis   Author: zuiwuyuan   File: TieTuActivity.java    License: Apache License 2.0 6 votes vote down vote up
public void getTietuDetail(View view) {
        final String url = TieTuApi.TIETU_DETAIL2;

        new Thread(new Runnable() {
            @Override
            public void run() {

                System.out.println(url);

                try {
                    Document doc = Jsoup.connect(url).timeout(10000).get();
//                    System.out.println(doc);
                    if (doc != null) {
                        TietuUtil tietuUtil = new TietuUtil();
                        tietuUtil.getTietuDetail(TieTuActivity.this, doc);
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }).start();
    }
 
Example #10
Source Project: coolreader   Author: dotcool   File: CommonParser.java    License: MIT License 6 votes vote down vote up
/**
 * Get the image model from /project/index.php?title=File:xxx
 * @param doc
 * @return
 */
public static ImageModel parseImagePage(Document doc) {
	ImageModel image = new ImageModel();

	Element mainContent = doc.select("#mw-content-text").first();
	Element fullMedia = mainContent.select(".fullMedia").first();
	String imageUrl = fullMedia.select("a").first().attr("href");

	try {
		image.setUrl(new URL(UIHelper.getBaseUrl(LNReaderApplication.getInstance().getApplicationContext()) + imageUrl));
	} catch (MalformedURLException e) {
		// shouldn't happened
		Log.e(TAG, "Invalid URL: " + UIHelper.getBaseUrl(LNReaderApplication.getInstance().getApplicationContext()) + imageUrl, e);
	}
	return image;
}
 
Example #11
@Test
public void testGenerateBasicDateProperty() throws Exception {
    Map<String, Object> root = TestHelpers.createInspectionResultWrapper(ENTITY_NAME, DATE_PROP);

    Resource<URL> templateResource = resourceFactory.create(getClass().getResource(Deployments.BASE_PACKAGE_PATH + Deployments.SEARCH_FORM_INPUT));
    Template processor = processorFactory.create(templateResource, FreemarkerTemplate.class);
    String output = processor.process(root);
    Document html = Jsoup.parseBodyFragment(output);
    assertThat(output.trim(), not(equalTo("")));

    Elements container = html.select("div.form-group");
    assertThat(container, notNullValue());

    Elements formInputElement = container.select("div.col-sm-10 > input");
    assertThat(formInputElement.attr("id"), equalTo("dateOfBirth"));
    assertThat(formInputElement.attr("type"), equalTo("text"));
    assertThat(formInputElement.attr("ng-model"), equalTo("search" + "." + "dateOfBirth"));
}
 
Example #12
Source Project: asciidoctorj   Author: asciidoctor   File: WhenRubyExtensionGroupIsRegistered.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void ruby_includeprocessor_should_be_registered() {
    asciidoctor.createGroup()
        .loadRubyClass(getClass().getResourceAsStream("/ruby-extensions/response-include-processor.rb"))
        .rubyIncludeProcessor("ResponseIncludeProcessor")
        .register();

    String content = asciidoctor.convert(
        "The response to everything is\n" +
            "\n" +
            "include::response[]" +
            "",
        options().toFile(false).safe(SafeMode.SAFE).get());

    final Document document = Jsoup.parse(content);
    assertThat(
        document.getElementsByClass("paragraph").get(1).getElementsByTag("p").get(0).toString(),
        is("<p>42</p>"));
}
 
Example #13
Source Project: ProxyPool   Author: fengzhizi715   File: CoderBusyProxyListPageParser.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public List<Proxy> parse(String content) {
    Document document = Jsoup.parse(content);
    Elements elements = document.select("div[class='table-responsive'] table[class='table'] tbody tr");
    List<Proxy> proxyList = new ArrayList<>(elements.size());
    for (Element element : elements){
        String ip = element.select("td:eq(0)").first().text();
        String port  = element.select("td:eq(2)").first().text();
        String type = element.select("td:eq(5)").first().text();
        String isAnonymous = element.select("td:eq(7)").first().text();
        System.out.println("ip:"+ip);
        log.debug("parse result = "+type+"://"+ip+":"+port+"  "+isAnonymous);
        if(!anonymousFlag || isAnonymous.contains("匿")){
            proxyList.add(new Proxy(ip, Integer.valueOf(port), type, Constant.TIME_INTERVAL));
        }
    }
    return proxyList;
}
 
Example #14
Source Project: commerce-cif-connector   Author: adobe   File: GraphqlProductConsoleIT.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCategoryRoot() throws Exception {

    // Prepare
    mockServerRule.add(CATEGORY_ROOT_RULE.build());

    // Perform
    SlingHttpResponse response = cAuthorAuthor.doGet("/libs/commerce/gui/content/products.html" + JCR_BASE_PATH, null, NO_CACHE_HEADERS,
        SC_OK);

    // Verify
    mockServerRule.verify();
    Document doc = Jsoup.parse(response.getContent());

    // Check existence of root categories
    Assert.assertTrue(doc.select(String.format(CORAL_COLUMN_FORMAT_EQUALS, JCR_BASE_PATH + "/equipment")).size() > 0);
    Assert.assertTrue(doc.select(String.format(CORAL_COLUMN_FORMAT_EQUALS, JCR_BASE_PATH + "/men")).size() > 0);
    Assert.assertTrue(doc.select(String.format(CORAL_COLUMN_FORMAT_EQUALS, JCR_BASE_PATH + "/women")).size() > 0);

    // Check that child categories are not displayed
    Assert.assertEquals(0, doc.select(String.format(CORAL_COLUMN_FORMAT_EQUALS, JCR_BASE_PATH + "/men/pants")).size());
}
 
Example #15
Source Project: astor   Author: SpoonLabs   File: CleanerTest.java    License: GNU General Public License v2.0 6 votes vote down vote up
@Test public void supplyOutputSettings() {
    // test that one can override the default document output settings
    Document.OutputSettings os = new Document.OutputSettings();
    os.prettyPrint(false);
    os.escapeMode(Entities.EscapeMode.extended);
    os.charset("ascii");

    String html = "<div><p>&bernou;</p></div>";
    String customOut = Jsoup.clean(html, "http://foo.com/", Whitelist.relaxed(), os);
    String defaultOut = Jsoup.clean(html, "http://foo.com/", Whitelist.relaxed());
    assertNotSame(defaultOut, customOut);

    assertEquals("<div><p>&Bscr;</p></div>", customOut); // entities now prefers shorted names if aliased
    assertEquals("<div>\n" +
        " <p>ℬ</p>\n" +
        "</div>", defaultOut);

    os.charset("ASCII");
    os.escapeMode(Entities.EscapeMode.base);
    String customOut2 = Jsoup.clean(html, "http://foo.com/", Whitelist.relaxed(), os);
    assertEquals("<div><p>&#x212c;</p></div>", customOut2);
}
 
Example #16
Source Project: commerce-cif-connector   Author: adobe   File: GraphqlProductConsoleIT.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCategoryFolderProperties() throws Exception {

    // Prepare
    mockServerRule.add(CATEGORY_COATS_RULE.build());

    // Perform
    SlingHttpResponse response = cAuthorAuthor.doGet(FOLDER_PROPERTIES + JCR_BASE_PATH + "/men/coats", null, NO_CACHE_HEADERS,
        SC_OK);

    // Verify
    mockServerRule.verify();
    Document doc = Jsoup.parse(response.getContent());

    // Verify property fields
    Assert.assertEquals("Coats", doc.select("input[name=jcr:title]").val());
    Assert.assertTrue(doc.select("input[name=jcr:title]").hasAttr("disabled"));
}
 
Example #17
Source Project: flow   Author: vaadin   File: WebComponentBootstrapHandler.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public boolean synchronizedHandleRequest(VaadinSession session,
        VaadinRequest request, VaadinResponse response) throws IOException {
    // Find UI class
    Class<? extends UI> uiClass = getUIClass(request);

    BootstrapContext context = createAndInitUI(uiClass, request, response,
            session);

    HandlerHelper.setResponseNoCacheHeaders(response::setHeader,
            response::setDateHeader);

    String serviceUrl = getServiceUrl(request, response);

    Document document = getPageBuilder().getBootstrapPage(context);
    writeBootstrapPage(response, document.head(), serviceUrl);
    return true;
}
 
Example #18
Source Project: jinjava   Author: HubSpot   File: SliceFilterTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void itSlicesLists() throws Exception {
  Document dom = Jsoup.parseBodyFragment(
    jinjava.render(
      Resources.toString(
        Resources.getResource("filter/slice-filter.jinja"),
        StandardCharsets.UTF_8
      ),
      ImmutableMap.of(
        "items",
        (Object) Lists.newArrayList("a", "b", "c", "d", "e", "f", "g")
      )
    )
  );

  assertThat(dom.select(".columwrapper ul")).hasSize(3);
  assertThat(dom.select(".columwrapper .column-1 li")).hasSize(3);
  assertThat(dom.select(".columwrapper .column-2 li")).hasSize(3);
  assertThat(dom.select(".columwrapper .column-3 li")).hasSize(1);
}
 
Example #19
Source Project: asciidoctorj   Author: asciidoctor   File: WhenAttributesAreUsedInAsciidoctor.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void should_skip_front_matter_if_specified_by_skip_front_matter_attribute()
        throws IOException {

    Attributes attributes = attributes().skipFrontMatter(true).get();
    Options options = options().toFile(false).inPlace(false).attributes(attributes).get();

    String content = asciidoctor.convertFile(classpath.getResource("renderwithfrontmatter.adoc"), options);
    Document doc = Jsoup.parse(content, "UTF-8");
    Elements hrElements = doc.getElementsByTag("hr");

    assertThat(hrElements.size(), is(0));

}
 
Example #20
Source Project: flow   Author: vaadin   File: IndexHtmlRequestHandler.java    License: Apache License 2.0 5 votes vote down vote up
private void addInitialFlow(JsonObject initialJson, Document indexDocument,
                            VaadinSession session) {
    String csrfToken = session.getCsrfToken();
    if (csrfToken != null) {
        initialJson.put(CSRF_TOKEN, csrfToken);
    }

    Element elm = new Element("script");
    elm.attr("initial", "");
    elm.appendChild(new DataNode(
            "window.Vaadin = {TypeScript: " + JsonUtil.stringify(initialJson) + "};"
    ));
    indexDocument.head().insertChildren(0, elm);
}
 
Example #21
Source Project: substitution-schedule-parser   Author: vertretungsplanme   File: SVPlanDemoTest.java    License: Mozilla Public License 2.0 5 votes vote down vote up
@Test
public void demoTest3() throws IOException, JSONException {
    List<Document> docs = new ArrayList<>();
    docs.add(Jsoup.parse(html3));
    SubstitutionSchedule schedule = parser.parseSVPlanSchedule(docs);

    assertEquals(new LocalDateTime(2017, 5, 2, 7, 19), schedule.getLastChange());
    assertEquals(1, schedule.getDays().size());

    SubstitutionScheduleDay day = schedule.getDays().get(0);

    assertEquals(new LocalDate(2017, 5, 2), day.getDate());
    assertEquals(32, day.getSubstitutions().size());
    assertEquals(0, day.getMessages().size());

    for (Substitution subst : day.getSubstitutions()) {
        if (!subst.getSubject().equals("Profi")) assertTrue(subst.getClasses().size() >= 1);
        assertNotEmpty(subst.getLesson());
        assertNullOrNotEmpty(subst.getPreviousSubject());
        assertNotEmpty(subst.getSubject());
        assertNullOrNotEmpty(subst.getRoom());
        assertNullOrNotEmpty(subst.getTeacher());
        assertNullOrNotEmpty(subst.getPreviousTeacher());
        assertNullOrNotEmpty(subst.getDesc());
        assertNotEmpty(subst.getType());
    }
}
 
Example #22
Source Project: newsleak   Author: uhh-lt   File: ElasticsearchDocumentWriter.java    License: GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Replace html line breaks and &gt; &lt; entities.
 *
 * @param html
 *            the html
 * @return the string
 */
public static String replaceHtmlLineBreaks(String html) {
	if (html == null)
		return html;
	Document document = Jsoup.parse(html);
	// makes html() preserve linebreaks and spacing
	document.outputSettings(new Document.OutputSettings().prettyPrint(false));
	document.select("br").append("\\n");
	document.select("p").prepend("\\n\\n");
	String s = document.html().replaceAll("\\\\n", "\n");
	String cleanedString = Jsoup.clean(s, "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
	cleanedString = cleanedString.replaceAll("&gt;", ">");
	cleanedString = cleanedString.replaceAll("&lt;", "<");
	return cleanedString;
}
 
Example #23
Source Project: jsoup-learning   Author: code4craft   File: ParseTest.java    License: MIT License 5 votes vote down vote up
@Test
public void testGoogleSearchIpod() throws IOException {
    File in = getFile("/htmltests/google-ipod.html");
    Document doc = Jsoup.parse(in, "UTF-8", "http://www.google.com/search?hl=en&q=ipod&aq=f&oq=&aqi=g10");
    assertEquals("ipod - Google Search", doc.title());
    Elements results = doc.select("h3.r > a");
    assertEquals(12, results.size());
    assertEquals(
        "http://news.google.com/news?hl=en&q=ipod&um=1&ie=UTF-8&ei=uYlKS4SbBoGg6gPf-5XXCw&sa=X&oi=news_group&ct=title&resnum=1&ved=0CCIQsQQwAA",
        results.get(0).attr("href"));
    assertEquals("http://www.apple.com/itunes/",
        results.get(1).attr("href"));
}
 
Example #24
Source Project: PicKing   Author: lanyuanxiaoyao   File: Yande.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public String getContentNext(String baseUrl, String currentUrl, byte[] result) throws UnsupportedEncodingException {
    Document document = Jsoup.parse(new String(result, "utf-8"));
    Log.e("Yande", "getSinglePicContent: " + new String(result, "utf-8"));
    Elements elements = document.select("div#paginator a.next_page");
    if (elements.size() > 0) {
        Log.e("Yande", "getContentNext: " + baseUrl + elements.get(0).attr("href"));
        return baseUrl + elements.get(0).attr("href");
    }
    return "";
}
 
Example #25
Source Project: gocd   Author: gocd   File: BuildDetailPageVelocityTemplateTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void shouldRenderANormalIframeForTestsTabIfUserHasDisabledSandbox() throws IOException {
    HashMap<String, Object> data = new HashMap<>();
    JobDetailPresentationModel jobDetailPresentationModel = mock(JobDetailPresentationModel.class);
    data.put("presenter", jobDetailPresentationModel);
    data.put("useIframeSandbox", false);
    when(jobDetailPresentationModel.hasTests()).thenReturn(true);
    Document actualDoc = Jsoup.parse(getBuildDetailVelocityView(data).render());

    assertThat(actualDoc.select("#tab-content-of-tests").last().html(), containsString("<iframe src="));
}
 
Example #26
Source Project: ripme   Author: RipMeApp   File: ThechiveRipper.java    License: MIT License 5 votes vote down vote up
@Override
public Document getNextPage(Document doc) throws IOException {
    Matcher matcher = p1.matcher(url.toExternalForm());

    if (matcher.matches()) {
        // url type thechive.com/YEAR/MONTH/DAY/POSTTITLE/ has a single page.
        return null;
    } else {
        if (nextSeed == null) {
            throw new IOException("No more pages.");
        }
    }

    // Following try block checks if the next JSON object has images or not.
    // This is done to avoid IOException in rip() method, caused when
    // getURLsFromPage() returns empty list.
    JSONArray imgList;
    try {
        Response response = Http.url(jsonUrl).data("seed", nextSeed).data("queryType", "by-username")
                .data("username", username).ignoreContentType().cookies(cookies).response();
        cookies = response.cookies();
        JSONObject json = new JSONObject(response.body());
        imgList = json.getJSONArray("uploads");
    } catch (Exception e) {
        throw new IOException("Error fetching next page.", e);
    }

    if (imgList != null && imgList.length() > 0) {
        // Pass empty document as it is of no use for thechive.com/userName url type.
        return new Document(url.toString());
    } else {
        // Return null as this is last page.
        return null;
    }
}
 
Example #27
Source Project: ripme   Author: RipMeApp   File: ImgboxRipper.java    License: MIT License 5 votes vote down vote up
@Override
public List<String> getURLsFromPage(Document doc) {
    List<String> imageURLs = new ArrayList<>();
    for (Element thumb : doc.select("div.boxed-content > a > img")) {
        String image = thumb.attr("src").replaceAll("thumbs", "images");
        image = image.replace("_b", "_o");
        image = image.replaceAll("\\d-s", "i");
        imageURLs.add(image);
    }
    return imageURLs;
}
 
Example #28
Source Project: PicKing   Author: lanyuanxiaoyao   File: AKabe.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public String getContentNext(String baseUrl, String currentUrl, byte[] result) throws UnsupportedEncodingException {
    Document document = Jsoup.parse(new String(result, "utf-8"));
    Elements elements = document.select(".pageButeNav a.link_next");
    if (elements.size() > 0)
        return elements.get(0).attr("href");
    return "";
}
 
Example #29
Source Project: ripme   Author: RipMeApp   File: NfsfwRipper.java    License: MIT License 5 votes vote down vote up
private List<String> getSubalbumURLs(Document page){
    // Check if sub-albums are present on this page
    List<String> subalbumURLs = new ArrayList<>();
    for (Element suba : page.select("td.IMG > a")) {
        String subURL = "http://nfsfw.com" + suba.attr("href");
        subalbumURLs.add(subURL);
    }
    return subalbumURLs;
}
 
Example #30
Source Project: rank   Author: ysc   File: ITEYEBlogSimilarChecker.java    License: Apache License 2.0 5 votes vote down vote up
private Blog getBlog(String url) {
    try {
        String html = getHtml(url);
        Document doc = Jsoup.parse(html);
        Elements elements = doc.select(BLOG_CSS_PATH);
        String title = null;
        String content = null;
        for(Element element : elements){
            Elements ts = element.select(BLOG_TITLE_CSS_PATH);
            if(ts.size()==1){
                title = ts.get(0).text();
            }
            ts = element.select(BLOG_CONTENT_CSS_PATH);
            if(ts.size()==1){
                content = ts.get(0).text();
            }
        }
        if(title!=null && content!=null){
            Blog blog = new Blog();
            blog.setUrl(url);
            blog.setTitle(title);
            blog.setContent(content);
            return blog;
        }
    } catch (Exception e) {
        LOGGER.error("获取博文失败", e);
    }
    return null;
}