org.jsoup.nodes.Document Java Examples

The following examples show how to use org.jsoup.nodes.Document. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TieTuActivity.java    From AppApis with Apache License 2.0 6 votes vote down vote up
public void getTietuDetail(View view) {
        final String url = TieTuApi.TIETU_DETAIL2;

        new Thread(new Runnable() {
            @Override
            public void run() {

                System.out.println(url);

                try {
                    Document doc = Jsoup.connect(url).timeout(10000).get();
//                    System.out.println(doc);
                    if (doc != null) {
                        TietuUtil tietuUtil = new TietuUtil();
                        tietuUtil.getTietuDetail(TieTuActivity.this, doc);
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }).start();
    }
 
Example #2
Source File: WebComponentBootstrapHandler.java    From flow with Apache License 2.0 6 votes vote down vote up
@Override
public boolean synchronizedHandleRequest(VaadinSession session,
        VaadinRequest request, VaadinResponse response) throws IOException {
    // Find UI class
    Class<? extends UI> uiClass = getUIClass(request);

    BootstrapContext context = createAndInitUI(uiClass, request, response,
            session);

    HandlerHelper.setResponseNoCacheHeaders(response::setHeader,
            response::setDateHeader);

    String serviceUrl = getServiceUrl(request, response);

    Document document = getPageBuilder().getBootstrapPage(context);
    writeBootstrapPage(response, document.head(), serviceUrl);
    return true;
}
 
Example #3
Source File: FreemarkerClientPartialsSearchInputTest.java    From angularjs-addon with Eclipse Public License 1.0 6 votes vote down vote up
@Test
public void testGenerateBasicNumberProperty() throws Exception {
    Map<String, Object> root = TestHelpers.createInspectionResultWrapper(ENTITY_NAME, NUMBER_PROP);

    Resource<URL> templateResource = resourceFactory.create(getClass().getResource(Deployments.BASE_PACKAGE_PATH + Deployments.SEARCH_FORM_INPUT));
    Template processor = processorFactory.create(templateResource, FreemarkerTemplate.class);
    String output = processor.process(root);
    Document html = Jsoup.parseBodyFragment(output);
    assertThat(output.trim(), not(equalTo("")));

    Elements container = html.select("div.form-group");
    assertThat(container, notNullValue());

    Elements formInputElement = container.select("div.col-sm-10 > input");
    assertThat(formInputElement.attr("id"), equalTo("score"));
    assertThat(formInputElement.attr("type"), equalTo("text"));
    assertThat(formInputElement.attr("ng-model"), equalTo("search" + "." + "score"));
}
 
Example #4
Source File: ComicRecommendPresenter.java    From HHComicViewer with Apache License 2.0 6 votes vote down vote up
private ComicTabList getComicTabList(Document doc, String divId, String title) {
    Element hotDoc = doc.select("div[id=" + divId + "]").first();
    Elements links = hotDoc.select("a[class=image_link]");
    Elements tumbs = hotDoc.select("img");
    Elements infos = hotDoc.select("li");
    List<Comic> hotComics = new ArrayList<>();
    for (int i = 0; i < links.size(); i++) {
        Comic comic = new Comic();
        comic.setTitle(links.get(i).attr("title"));
        String url = links.get(i).attr("href");
        String end = url.substring(HHApplication.getInstance()
                .getHHWebVariable().getPre().length());
        comic.setCid(Integer.parseInt(end.split("\\.")[0]));
        comic.setThumbnailUrl(tumbs.get(i).attr("src"));
        String authorDoc = tumbs.get(i).attr("alt");
        comic.setAuthor(authorDoc.split(" - ")[1].split("20")[0]);
        comic.setComicStatus("[" + infos.get(i).text().split("\\[")[1]);
        hotComics.add(comic);
    }
    return new ComicTabList(hotComics, title);
}
 
Example #5
Source File: SliceFilterTest.java    From jinjava with Apache License 2.0 6 votes vote down vote up
@Test
public void itSlicesLists() throws Exception {
  Document dom = Jsoup.parseBodyFragment(
    jinjava.render(
      Resources.toString(
        Resources.getResource("filter/slice-filter.jinja"),
        StandardCharsets.UTF_8
      ),
      ImmutableMap.of(
        "items",
        (Object) Lists.newArrayList("a", "b", "c", "d", "e", "f", "g")
      )
    )
  );

  assertThat(dom.select(".columwrapper ul")).hasSize(3);
  assertThat(dom.select(".columwrapper .column-1 li")).hasSize(3);
  assertThat(dom.select(".columwrapper .column-2 li")).hasSize(3);
  assertThat(dom.select(".columwrapper .column-3 li")).hasSize(1);
}
 
Example #6
Source File: BootstrapHandler.java    From flow with Apache License 2.0 6 votes vote down vote up
@Override
public boolean synchronizedHandleRequest(VaadinSession session,
        VaadinRequest request, VaadinResponse response) throws IOException {
    // Find UI class
    Class<? extends UI> uiClass = getUIClass(request);

    BootstrapContext context = createAndInitUI(uiClass, request, response,
            session);

    HandlerHelper.setResponseNoCacheHeaders(response::setHeader,
            response::setDateHeader);

    Document document = pageBuilder.getBootstrapPage(context);

    writeBootstrapPage(response, document.outerHtml());

    return true;
}
 
Example #7
Source File: Aitaotu.java    From PicKing with Apache License 2.0 6 votes vote down vote up
@Override
public Map<DetailActivity.parameter, Object> getDetailContent(String baseUrl, String currentUrl, byte[] result, Map<DetailActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
    List<PicInfo> urls = new ArrayList<>();
    Document document = Jsoup.parse(new String(result, "utf-8"));
    Elements elements = document.select("#big-pic img");
    Elements title = document.select("#photos h1");
    String sTitle = title.size() > 0 ? title.get(0).text() : "";
    Elements tags = document.select(".fbl a");
    List<String> tagList = new ArrayList<>();
    if (tags.size() > 0) {
        for (Element tag : tags)
            tagList.add(tag.text());
    }
    for (Element element : elements) {
        PicInfo picInfo = new PicInfo()
                .setTags(tagList)
                .setTitle(sTitle)
                .setPicUrl(element.attr("src"));
        urls.add(picInfo);
    }
    resultMap.put(DetailActivity.parameter.CURRENT_URL, currentUrl);
    resultMap.put(DetailActivity.parameter.RESULT, urls);
    return resultMap;
}
 
Example #8
Source File: GraphqlProductConsoleIT.java    From commerce-cif-connector with Apache License 2.0 6 votes vote down vote up
@Test
public void testCategoryFolderProperties() throws Exception {

    // Prepare
    mockServerRule.add(CATEGORY_COATS_RULE.build());

    // Perform
    SlingHttpResponse response = cAuthorAuthor.doGet(FOLDER_PROPERTIES + JCR_BASE_PATH + "/men/coats", null, NO_CACHE_HEADERS,
        SC_OK);

    // Verify
    mockServerRule.verify();
    Document doc = Jsoup.parse(response.getContent());

    // Verify property fields
    Assert.assertEquals("Coats", doc.select("input[name=jcr:title]").val());
    Assert.assertTrue(doc.select("input[name=jcr:title]").hasAttr("disabled"));
}
 
Example #9
Source File: ReadTadPagePresenter.java    From GankGirl with GNU Lesser General Public License v2.1 6 votes vote down vote up
@Override
public void subscribe(ObservableEmitter<List<ReadTypeBean>> subscriber) throws Exception {
    List<ReadTypeBean> datas = new ArrayList<>();
    try {
        Document doc = Jsoup.connect(Constants.API_URL_READ).get();
        Elements tads = doc.select("div#xiandu_cat").select("a");
        for (Element tad : tads) {
            ReadTypeBean bean = new ReadTypeBean();
            bean.setTitle(tad.text());//获取标题
            bean.setUrl(tad.absUrl("href"));//absUrl可以获取地址的绝对路径
            datas.add(bean);
            Log.v("Jsoup","title= "+bean.getTitle()+"   url= "+bean.getUrl());
        }
    } catch (IOException e) {
        subscriber.onError(e);
    }

    subscriber.onNext(datas);
    subscriber.onComplete();
}
 
Example #10
Source File: HtmlParserTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Test public void handlesBaseTags() {
    // only listen to the first base href
    String h = "<a href=1>#</a><base href='/2/'><a href='3'>#</a><base href='http://bar'><a href=/4>#</a>";
    Document doc = Jsoup.parse(h, "http://foo/");
    assertEquals("http://foo/2/", doc.baseUri()); // gets set once, so doc and descendants have first only

    Elements anchors = doc.getElementsByTag("a");
    assertEquals(3, anchors.size());

    assertEquals("http://foo/2/", anchors.get(0).baseUri());
    assertEquals("http://foo/2/", anchors.get(1).baseUri());
    assertEquals("http://foo/2/", anchors.get(2).baseUri());

    assertEquals("http://foo/2/1", anchors.get(0).absUrl("href"));
    assertEquals("http://foo/2/3", anchors.get(1).absUrl("href"));
    assertEquals("http://foo/4", anchors.get(2).absUrl("href"));
}
 
Example #11
Source File: Processer3.java    From Crawer with MIT License 6 votes vote down vote up
/**
 * 处理详细页面的信息
 * @param visitUrl
 */
public static void process(String visitUrl){
	//下载页面
	Document doc = Jsoup.parse(HttpConnnectionManager.getHtml(visitUrl));//Jsoup.connect(visitUrl).get();
	BookDD book = new BookDD(); 
	//解析数据
	book.setAuthor(doc.select(PropertiesUtils.getProperties().getProperty("author")).text());
	book.setIsbn(doc.select(PropertiesUtils.getProperties().getProperty("isbn")).text());
	book.setPrice(Double.parseDouble(doc.select(PropertiesUtils.getProperties().getProperty("price")).text().substring(1)));
	book.setOutLine(doc.select(PropertiesUtils.getProperties().getProperty("outline")).text());
	book.setBookName(doc.select(PropertiesUtils.getProperties().getProperty("bookName")).text());
	book.setCover_pic(doc.select(PropertiesUtils.getProperties().getProperty("pic")).attr("wsrc").trim());
	
	String outline = doc.select(PropertiesUtils.getProperties().getProperty("outline")).text().trim();
	if(outline.startsWith("<p>")){
		outline = Jsoup.parse(outline).select("p").text();
	}
	book.setOutLine(outline.length() < 2000 ? outline : outline.substring(0, 2000));
	//封装到实体
	System.out.println("bookName---->"+book.getBookName()+"<-->"+book.getAuthor()+"<-->"+book.getOutLine()+"<-->"+book.getIsbn());
	
	//TODO
}
 
Example #12
Source File: CleanerTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Test public void supplyOutputSettings() {
    // test that one can override the default document output settings
    Document.OutputSettings os = new Document.OutputSettings();
    os.prettyPrint(false);
    os.escapeMode(Entities.EscapeMode.extended);
    os.charset("ascii");

    String html = "<div><p>&bernou;</p></div>";
    String customOut = Jsoup.clean(html, "http://foo.com/", Whitelist.relaxed(), os);
    String defaultOut = Jsoup.clean(html, "http://foo.com/", Whitelist.relaxed());
    assertNotSame(defaultOut, customOut);

    assertEquals("<div><p>&Bscr;</p></div>", customOut); // entities now prefers shorted names if aliased
    assertEquals("<div>\n" +
        " <p>ℬ</p>\n" +
        "</div>", defaultOut);

    os.charset("ASCII");
    os.escapeMode(Entities.EscapeMode.base);
    String customOut2 = Jsoup.clean(html, "http://foo.com/", Whitelist.relaxed(), os);
    assertEquals("<div><p>&#x212c;</p></div>", customOut2);
}
 
Example #13
Source File: GraphqlProductConsoleIT.java    From commerce-cif-connector with Apache License 2.0 6 votes vote down vote up
@Test
public void testCategoryRoot() throws Exception {

    // Prepare
    mockServerRule.add(CATEGORY_ROOT_RULE.build());

    // Perform
    SlingHttpResponse response = cAuthorAuthor.doGet("/libs/commerce/gui/content/products.html" + JCR_BASE_PATH, null, NO_CACHE_HEADERS,
        SC_OK);

    // Verify
    mockServerRule.verify();
    Document doc = Jsoup.parse(response.getContent());

    // Check existence of root categories
    Assert.assertTrue(doc.select(String.format(CORAL_COLUMN_FORMAT_EQUALS, JCR_BASE_PATH + "/equipment")).size() > 0);
    Assert.assertTrue(doc.select(String.format(CORAL_COLUMN_FORMAT_EQUALS, JCR_BASE_PATH + "/men")).size() > 0);
    Assert.assertTrue(doc.select(String.format(CORAL_COLUMN_FORMAT_EQUALS, JCR_BASE_PATH + "/women")).size() > 0);

    // Check that child categories are not displayed
    Assert.assertEquals(0, doc.select(String.format(CORAL_COLUMN_FORMAT_EQUALS, JCR_BASE_PATH + "/men/pants")).size());
}
 
Example #14
Source File: CoderBusyProxyListPageParser.java    From ProxyPool with Apache License 2.0 6 votes vote down vote up
@Override
public List<Proxy> parse(String content) {
    Document document = Jsoup.parse(content);
    Elements elements = document.select("div[class='table-responsive'] table[class='table'] tbody tr");
    List<Proxy> proxyList = new ArrayList<>(elements.size());
    for (Element element : elements){
        String ip = element.select("td:eq(0)").first().text();
        String port  = element.select("td:eq(2)").first().text();
        String type = element.select("td:eq(5)").first().text();
        String isAnonymous = element.select("td:eq(7)").first().text();
        System.out.println("ip:"+ip);
        log.debug("parse result = "+type+"://"+ip+":"+port+"  "+isAnonymous);
        if(!anonymousFlag || isAnonymous.contains("匿")){
            proxyList.add(new Proxy(ip, Integer.valueOf(port), type, Constant.TIME_INTERVAL));
        }
    }
    return proxyList;
}
 
Example #15
Source File: WhenRubyExtensionGroupIsRegistered.java    From asciidoctorj with Apache License 2.0 6 votes vote down vote up
@Test
public void ruby_includeprocessor_should_be_registered() {
    asciidoctor.createGroup()
        .loadRubyClass(getClass().getResourceAsStream("/ruby-extensions/response-include-processor.rb"))
        .rubyIncludeProcessor("ResponseIncludeProcessor")
        .register();

    String content = asciidoctor.convert(
        "The response to everything is\n" +
            "\n" +
            "include::response[]" +
            "",
        options().toFile(false).safe(SafeMode.SAFE).get());

    final Document document = Jsoup.parse(content);
    assertThat(
        document.getElementsByClass("paragraph").get(1).getElementsByTag("p").get(0).toString(),
        is("<p>42</p>"));
}
 
Example #16
Source File: DynamicIp.java    From rank with Apache License 2.0 6 votes vote down vote up
public static boolean isConnected(){
    try {
        Document doc = Jsoup.connect("http://www.baidu.com/s?wd=杨尚川&t=" + System.currentTimeMillis())
                .header("Accept", ACCEPT)
                .header("Accept-Encoding", ENCODING)
                .header("Accept-Language", LANGUAGE)
                .header("Connection", CONNECTION)
                .header("Referer", "https://www.baidu.com")
                .header("Host", "www.baidu.com")
                .header("User-Agent", USER_AGENT)
                .ignoreContentType(true)
                .timeout(30000)
                .get();
        LOGGER.info("搜索结果页面标题:"+doc.title());
        if(doc.title() != null && doc.title().contains("杨尚川")){
            return true;
        }
    }catch (Exception e){
        if("Network is unreachable".equals(e.getMessage())){
            return false;
        }else{
            LOGGER.error("状态检查失败:"+e.getMessage());
        }
    }
    return false;
}
 
Example #17
Source File: FreemarkerClientPartialsSearchInputTest.java    From angularjs-addon with Eclipse Public License 1.0 6 votes vote down vote up
@Test
public void testGenerateBasicDateProperty() throws Exception {
    Map<String, Object> root = TestHelpers.createInspectionResultWrapper(ENTITY_NAME, DATE_PROP);

    Resource<URL> templateResource = resourceFactory.create(getClass().getResource(Deployments.BASE_PACKAGE_PATH + Deployments.SEARCH_FORM_INPUT));
    Template processor = processorFactory.create(templateResource, FreemarkerTemplate.class);
    String output = processor.process(root);
    Document html = Jsoup.parseBodyFragment(output);
    assertThat(output.trim(), not(equalTo("")));

    Elements container = html.select("div.form-group");
    assertThat(container, notNullValue());

    Elements formInputElement = container.select("div.col-sm-10 > input");
    assertThat(formInputElement.attr("id"), equalTo("dateOfBirth"));
    assertThat(formInputElement.attr("type"), equalTo("text"));
    assertThat(formInputElement.attr("ng-model"), equalTo("search" + "." + "dateOfBirth"));
}
 
Example #18
Source File: CommonParser.java    From coolreader with MIT License 6 votes vote down vote up
/**
 * Get the image model from /project/index.php?title=File:xxx
 * @param doc
 * @return
 */
public static ImageModel parseImagePage(Document doc) {
	ImageModel image = new ImageModel();

	Element mainContent = doc.select("#mw-content-text").first();
	Element fullMedia = mainContent.select(".fullMedia").first();
	String imageUrl = fullMedia.select("a").first().attr("href");

	try {
		image.setUrl(new URL(UIHelper.getBaseUrl(LNReaderApplication.getInstance().getApplicationContext()) + imageUrl));
	} catch (MalformedURLException e) {
		// shouldn't happened
		Log.e(TAG, "Invalid URL: " + UIHelper.getBaseUrl(LNReaderApplication.getInstance().getApplicationContext()) + imageUrl, e);
	}
	return image;
}
 
Example #19
Source File: Location.java    From youtube-comment-suite with MIT License 5 votes vote down vote up
public K getLocation(String ipv4, String userAgent) throws JsonSyntaxException, IOException {
    Document result = Jsoup.connect(locationProvider.getRequestUrl(ipv4))
            .userAgent(userAgent)
            .ignoreContentType(true)
            .get();
    return gson.fromJson(result.text(), dataType);
}
 
Example #20
Source File: HtmlParserTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test public void handlesProtocolRelativeUrl() {
    String base = "https://example.com/";
    String html = "<img src='//example.net/img.jpg'>";
    Document doc = Jsoup.parse(html, base);
    Element el = doc.select("img").first();
    assertEquals("https://example.net/img.jpg", el.absUrl("src"));
}
 
Example #21
Source File: Mzitu.java    From PicKing with Apache License 2.0 5 votes vote down vote up
@Override
public String getDetailNext(String baseUrl, String currentUrl, byte[] result) throws UnsupportedEncodingException {
    Document document = Jsoup.parse(new String(result, "utf-8"));
    Elements elements = document.select("div.pagenavi a:contains(下一页)");
    if (elements.size() > 0)
        return elements.get(0).attr("href");
    return "";
}
 
Example #22
Source File: SelectorTest.java    From jsoup-learning with MIT License 5 votes vote down vote up
@Test public void testPseudoEquals() {
    Document doc = Jsoup.parse("<div><p>One</p><p>Two</p><p>Three</>p></div><div><p>Four</p>");
    Elements ps = doc.select("div p:eq(0)");
    assertEquals(2, ps.size());
    assertEquals("One", ps.get(0).text());
    assertEquals("Four", ps.get(1).text());

    Elements ps2 = doc.select("div:eq(0) p:eq(0)");
    assertEquals(1, ps2.size());
    assertEquals("One", ps2.get(0).text());
    assertEquals("p", ps2.get(0).tagName());
}
 
Example #23
Source File: CityParser.java    From zuihou-admin-boot with Apache License 2.0 5 votes vote down vote up
/**
 * 村庄数据
 *
 * @param url
 * @return
 */
public List<Area> parseVillagetr(String fullName, String url) {
    String htmlStr = HttpUtil.get(url, CHARSET);
    Document document = Jsoup.parse(htmlStr);
    Elements trs = document.getElementsByClass("villagetr");

    List<Area> counties = new LinkedList<Area>();
    int sort = 1;
    for (Element tr : trs) {
        Elements tds = tr.getElementsByTag("td");
        if (tds == null || tds.size() != 3) {
            continue;
        }
        String villagetrCode = tds.get(0).text();
        String villagetrName = tds.get(2).text();

        Area villagetrArea = Area.builder().code(villagetrCode)
                .label(villagetrName)
                .fullName(fullName + villagetrName)
                .sortValue(sort++)
                .source(url).build();
        StaticLog.info("				村级数据:  {}  ", villagetrArea);

        counties.add(villagetrArea);
    }
    return counties;
}
 
Example #24
Source File: SelectorTest.java    From jsoup-learning with MIT License 5 votes vote down vote up
@Test public void handlesCommasInSelector() {
    Document doc = Jsoup.parse("<p name='1,2'>One</p><div>Two</div><ol><li>123</li><li>Text</li></ol>");

    Elements ps = doc.select("[name=1,2]");
    assertEquals(1, ps.size());

    Elements containers = doc.select("div, li:matches([0-9,]+)");
    assertEquals(2, containers.size());
    assertEquals("div", containers.get(0).tagName());
    assertEquals("li", containers.get(1).tagName());
    assertEquals("123", containers.get(1).text());
}
 
Example #25
Source File: SpdxLicenseTemplateHelper.java    From tools with Apache License 2.0 5 votes vote down vote up
/**
 * Converts an HTML string to text preserving line breaks for <br/> tags
 * @param html
 * @return
 */
public static String htmlToText(String html) {
	String newlineString = "NewLineGoesHere";
	String replaceBrs = html.replaceAll("(?i)<br[^>]*>", newlineString);
	String replaceBrsAndPs = replaceBrs.replaceAll("(?i)<p[^>]*>", newlineString);
	Document doc = Jsoup.parse(replaceBrsAndPs);
	String retval  = doc.text();
	retval = retval.replace(newlineString, "\n");
	return retval;
}
 
Example #26
Source File: XSSUtil.java    From youran with Apache License 2.0 5 votes vote down vote up
public static String clean(String value) {
    if (value == null) {
        return null;
    }
    //允许base64格式的图片,字符串不进行美化
    return Jsoup.clean(value, "", whitelist,
        new Document.OutputSettings().prettyPrint(false));
}
 
Example #27
Source File: START.java    From NLIWOD with GNU Affero General Public License v3.0 5 votes vote down vote up
public void search(IQuestion question, String language) throws Exception {
	String questionString;
	if (!question.getLanguageToQuestion().containsKey(language)) {
		return;
	}
	questionString = question.getLanguageToQuestion().get(language);
	log.debug(this.getClass().getSimpleName() + ": " + questionString);

	RequestConfig requestConfig = RequestConfig.custom().setSocketTimeout(this.timeout).build();
	HttpClient client = HttpClientBuilder.create().setDefaultRequestConfig(requestConfig).build();
	
	URIBuilder builder = new URIBuilder().setScheme("http")
			.setHost("start.csail.mit.edu").setPath("/justanswer.php")
			.setParameter("query", questionString);
	if(this.setLangPar){
		builder = builder.setParameter("lang", language);
	}
	URI uri = builder.build();
	HttpGet httpget = new HttpGet(uri);
	HttpResponse response = client.execute(httpget);
	//Test if error occured
	if(response.getStatusLine().getStatusCode()>=400){
		throw new Exception("START Server could not answer due to: "+response.getStatusLine());
	}
	
	Document doc = Jsoup.parse(responseparser.responseToString(response));
	System.out.println(doc.select("span[type=reply]").text());

	// TODO return senseful answer from start
	// return resultSet;
}
 
Example #28
Source File: YoutubeSearchProvider.java    From lavaplayer with Apache License 2.0 5 votes vote down vote up
private AudioItem extractSearchResults(Document document, String query,
                                       Function<AudioTrackInfo, AudioTrack> trackFactory) {

  List<AudioTrack> tracks = new ArrayList<>();
  Elements resultsSelection = document.select("#page > #content #results");
  if (!resultsSelection.isEmpty()) {
    for (Element results : resultsSelection) {
      for (Element result : results.select(".yt-lockup-video")) {
        if (!result.hasAttr("data-ad-impressions") && result.select(".standalone-ypc-badge-renderer-label").isEmpty()) {
          extractTrackFromResultEntry(tracks, result, trackFactory);
        }
      }
    }
  } else {
    log.debug("Attempting to parse results page as polymer");
    try {
      tracks = polymerExtractTracks(document, trackFactory);
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }

  if (tracks.isEmpty()) {
    return AudioReference.NO_TRACK;
  } else {
    return new BasicAudioPlaylist("Search results for: " + query, tracks, null, true);
  }
}
 
Example #29
Source File: ParseTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testGoogleSearchIpod() throws IOException {
    File in = getFile("/htmltests/google-ipod.html");
    Document doc = Jsoup.parse(in, "UTF-8", "http://www.google.com/search?hl=en&q=ipod&aq=f&oq=&aqi=g10");
    assertEquals("ipod - Google Search", doc.title());
    Elements results = doc.select("h3.r > a");
    assertEquals(12, results.size());
    assertEquals(
            "http://news.google.com/news?hl=en&q=ipod&um=1&ie=UTF-8&ei=uYlKS4SbBoGg6gPf-5XXCw&sa=X&oi=news_group&ct=title&resnum=1&ved=0CCIQsQQwAA",
            results.get(0).attr("href"));
    assertEquals("http://www.apple.com/itunes/",
            results.get(1).attr("href"));
}
 
Example #30
Source File: XmlTreeBuilderTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testDoesNotForceSelfClosingKnownTags() {
    // html will force "<br>one</br>" to logically "<br />One<br />". XML should be stay "<br>one</br> -- don't recognise tag.
    Document htmlDoc = Jsoup.parse("<br>one</br>");
    assertEquals("<br>one\n<br>", htmlDoc.body().html());

    Document xmlDoc = Jsoup.parse("<br>one</br>", "", Parser.xmlParser());
    assertEquals("<br>one</br>", xmlDoc.html());
}