Java Code Examples for org.jsoup.select.Elements#select()

The following examples show how to use org.jsoup.select.Elements#select() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CifnewsPageHandler.java    From cetty with Apache License 2.0 6 votes vote down vote up
@Override
public Element appendBody(Elements tempBody) {
    final Element articleBody = new Element(Tag.valueOf("div"), "");
    String blockquote = tempBody.select("div.fetch-read>div.summary").text();
    buildBlockquote(blockquote, articleBody);
    Elements inner = tempBody.select("div.article-inner>*");
    for (Element pEl : inner) {
        if (pEl.select("div.fetch-present").size() != 0) {
            continue;
        }
        Element imgEl = pEl.select("p>img").first();
        if (imgEl != null) {
            Element figure = buildFigure(imgEl);
            if (imgEl.nextElementSibling() != null && imgEl.nextElementSibling().tagName().equals("p")) {
                Element figcaption = buildFigcaption(imgEl.nextElementSibling().text());
                figure.appendChild(figcaption);
                articleBody.appendChild(figure);
                continue;
            }
            articleBody.appendChild(figure);
            continue;
        }
        articleBody.appendChild(pEl);
    }
    return articleBody;
}
 
Example 2
Source File: XiuMM.java    From PicKing with Apache License 2.0 6 votes vote down vote up
@Override
public Map<ContentsActivity.parameter, Object> getContent(String baseUrl, String currentUrl, byte[] result, Map<ContentsActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
    List<AlbumInfo> urls = new ArrayList<>();
    Document document = Jsoup.parse(new String(result, "utf-8"));
    Elements elements = document.select("div.album");
    for (Element element : elements) {
        AlbumInfo temp = new AlbumInfo();

        Elements title = element.select("span.name");
        if (title.size() > 0)
            temp.setTitle(title.get(0).text());

        Elements album = element.select(".pic_box a");
        temp.setAlbumUrl(album.attr("href"));
        Elements pic = album.select("img");
        if (pic.size() > 0)
            temp.setPicUrl(pic.get(0).attr("src"));
        urls.add(temp);
    }
    resultMap.put(ContentsActivity.parameter.CURRENT_URL, currentUrl);
    resultMap.put(ContentsActivity.parameter.RESULT, urls);
    return resultMap;
}
 
Example 3
Source File: RosiMM.java    From PicKing with Apache License 2.0 6 votes vote down vote up
@Override
public Map<ContentsActivity.parameter, Object> getContent(String baseUrl, String currentUrl, byte[] result, Map<ContentsActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
    List<AlbumInfo> urls = new ArrayList<>();
    Document document = Jsoup.parse(new String(result, "gbk"));
    Elements elements = document.select("#sliding li");
    for (Element element : elements) {
        AlbumInfo temp = new AlbumInfo();

        Elements title = element.select(".p-title");
        if (title.size() > 0)
            temp.setTitle(title.get(0).text());

        Elements album = element.select("a:has(img)");
        temp.setAlbumUrl(baseUrl + album.attr("href"));
        Elements pic = album.select("img");
        if (pic.size() > 0)
            temp.setPicUrl(pic.get(0).attr("src"));
        urls.add(temp);
    }
    resultMap.put(ContentsActivity.parameter.CURRENT_URL, currentUrl);
    resultMap.put(ContentsActivity.parameter.RESULT, urls);
    return resultMap;
}
 
Example 4
Source File: GetCommentsTask.java    From guanggoo-android with Apache License 2.0 6 votes vote down vote up
public static Map<Integer, Comment> getCommentsFromElements(Elements elements) {
    TreeMap<Integer, Comment> comments = new TreeMap<>();

    Elements replyItems = elements.select("div.reply-item");

    for (Element replyItem : replyItems) {
        Comment comment = getCommentFromElement(replyItem);
        comments.put(comment.getMeta().getFloor(), comment);
    }

    if (PrefsUtil.getBoolean(App.getInstance(), ConstantUtil.KEY_COMMENTS_ORDER_DESC, false)) {
        return comments.descendingMap();
    } else {
        return comments;
    }
}
 
Example 5
Source File: FreemarkerClientPartialsSearchInputTest.java    From angularjs-addon with Eclipse Public License 1.0 6 votes vote down vote up
@Test
public void testGenerateBasicDateProperty() throws Exception {
    Map<String, Object> root = TestHelpers.createInspectionResultWrapper(ENTITY_NAME, DATE_PROP);

    Resource<URL> templateResource = resourceFactory.create(getClass().getResource(Deployments.BASE_PACKAGE_PATH + Deployments.SEARCH_FORM_INPUT));
    Template processor = processorFactory.create(templateResource, FreemarkerTemplate.class);
    String output = processor.process(root);
    Document html = Jsoup.parseBodyFragment(output);
    assertThat(output.trim(), not(equalTo("")));

    Elements container = html.select("div.form-group");
    assertThat(container, notNullValue());

    Elements formInputElement = container.select("div.col-sm-10 > input");
    assertThat(formInputElement.attr("id"), equalTo("dateOfBirth"));
    assertThat(formInputElement.attr("type"), equalTo("text"));
    assertThat(formInputElement.attr("ng-model"), equalTo("search" + "." + "dateOfBirth"));
}
 
Example 6
Source File: FreemarkerClientPartialsSearchResultsTest.java    From angularjs-addon with Eclipse Public License 1.0 5 votes vote down vote up
@Test
public void testGenerateOneToOneProperty() throws Exception
{
   Map<String, String> voucherProperties = new HashMap<String, String>();
   String oneToOneProperty = "voucher";
   voucherProperties.put("name", oneToOneProperty);
   voucherProperties.put("label", StringUtils.uncamelCase(oneToOneProperty));
   voucherProperties.put("type", "com.example.scaffoldtester.model.DiscountVoucher");
   voucherProperties.put("one-to-one", "true");
   voucherProperties.put("simpleType", "DiscountVoucher");
   voucherProperties.put("optionLabel", "id");

   List<Map<String, ? extends Object>> properties = new ArrayList<Map<String, ? extends Object>>();
   properties.add(voucherProperties);

   Map<String, Object> root = new HashMap<String, Object>();
   root.put("entityName", "SampleEntity");
   root.put("pluralizedEntityName", "SampleEntities");
   root.put("entityId", oneToOneProperty);
   root.put("properties", properties);
   Resource<URL> templateResource = resourceFactory.create(getClass().getResource(
            Deployments.BASE_PACKAGE_PATH + Deployments.SEARCH_RESULTS));
   Template processor = processorFactory.create(templateResource, FreemarkerTemplate.class);
   String output = processor.process(root);
   Document html = Jsoup.parseBodyFragment(output);
   assertThat(output.trim(), not(equalTo("")));

   Elements headers = html.select("table > thead > tr > th");
   assertThat(headers.size(), equalTo(1));
   assertThat(headers.text(), equalTo("Voucher"));

   Elements resultRows = html.select("table > tbody > tr");
   assertThat(resultRows.attr("ng-repeat"), containsString("result in filteredResults"));

   Elements resultCells = resultRows.select(" > td");
   assertThat(resultCells.size(), equalTo(1));
   assertThat(resultCells.select("a").attr("href"), equalTo("#/" + "SampleEntities" + "/edit/{{result.voucher}}"));
   assertThat(resultCells.select("a").text(), equalTo("{{result.voucher.id}}"));
}
 
Example 7
Source File: JDlingyu.java    From PicKing with Apache License 2.0 5 votes vote down vote up
@Override
public Map<ContentsActivity.parameter, Object> getContent(String baseUrl, String currentUrl, byte[] result, Map<ContentsActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
    List<AlbumInfo> urls = new ArrayList<>();
    Document document = Jsoup.parse(new String(result, "utf-8"));
    Elements elements = document.select("#postlist .pin");
    for (Element element : elements) {
        AlbumInfo temp = new AlbumInfo();

        Elements album = element.select(".pin-coat>a");
        temp.setAlbumUrl(album.attr("href"));
        Elements pic = album.select("img");
        if (pic.size() > 0) {
            temp.setPicUrl(pic.get(0).attr("original"));
        }

        Elements title = element.select("span.bg");
        if (title.size() > 0)
            temp.setTitle(title.get(0).text());

        Elements time = element.select("span.timer");
        if (time.size() > 0)
            temp.setTime(time.get(0).text());

        urls.add(temp);
    }
    resultMap.put(ContentsActivity.parameter.CURRENT_URL, currentUrl);
    resultMap.put(ContentsActivity.parameter.RESULT, urls);
    return resultMap;
}
 
Example 8
Source File: PostsActivity.java    From Ruisi with Apache License 2.0 5 votes vote down vote up
@Override
protected List<ArticleListData> doInBackground(String... params) {
    String response = params[0];
    Document document = Jsoup.parse(response);
    Elements list = document.select("ul[id=waterfall]");
    Elements imagelist = list.select("li");
    List<ArticleListData> temps = new ArrayList<>();
    for (Element tmp : imagelist) {
        //链接不带前缀
        //http://rs.xidian.edu.cn/
        String img = tmp.select("img").attr("src");
        String url = tmp.select("h3.xw0").select("a[href^=forum.php]").attr("href");
        String title = tmp.select("h3.xw0").select("a[href^=forum.php]").text();
        String author = tmp.select("a[href^=home.php]").text();
        String replyCount = tmp.select(".xg1.y").select("a[href^=forum.php]").text();
        tmp.select(".xg1.y").select("a[href^=forum.php]").remove();
        temps.add(new ArticleListData(title, url, img, author, replyCount));
    }

    Element page = document.select("#fd_page_bottom .pg").first();
    if (page == null) {
        maxPage = currentPage;
    } else {
        maxPage = GetId.getNumber(page.select("label span").text());
    }

    return temps;
}
 
Example 9
Source File: FreemarkerClientPartialsNToOnePropertyTest.java    From angularjs-addon with Eclipse Public License 1.0 5 votes vote down vote up
@Test
public void testGenerateManyToOneProperty() throws Exception
{
   Map<String, String> customerProperties = new HashMap<String, String>();
   String oneToOneProperty = "customer";
   customerProperties.put("name", oneToOneProperty);
   customerProperties.put("identifier", oneToOneProperty);
   customerProperties.put("label", "Customer");
   customerProperties.put("type", "com.example.scaffoldtester.model.Customer");
   customerProperties.put("many-to-one", "true");
   customerProperties.put("simpleType", "Customer");
   customerProperties.put("optionLabel", "id");

   Map<String, Object> root = new HashMap<String, Object>();
   String entityName = "SampleEntity";
   root.put("entityName", entityName);
   root.put("property", customerProperties);
   Resource<URL> templateResource = resourceFactory.create(getClass().getResource(
            Deployments.BASE_PACKAGE_PATH + Deployments.N_TO_ONE_PROPERTY_DETAIL_INCLUDE));
   Template processor = processorFactory.create(templateResource, FreemarkerTemplate.class);
   String output = processor.process(root);
   Document html = Jsoup.parseBodyFragment(output);
   assertThat(output.trim(), not(equalTo("")));

   Elements container = html.select("div.form-group");
   assertThat(container, notNullValue());
   assertThat(container.attr("ng-class"), not(equalTo("")));

   Elements oneToOneWidgetElement = html.select("div.form-group > div.col-sm-10");
   assertThat(oneToOneWidgetElement, notNullValue());

   Elements selectElement = oneToOneWidgetElement.select(" > select");
   assertThat(selectElement, notNullValue());
   assertThat(selectElement.attr("id"), equalTo(oneToOneProperty));
   String collectionElement = oneToOneProperty.substring(0, 1);
   String optionsExpression = collectionElement + ".text for " + collectionElement + " in "
            + oneToOneProperty + "SelectionList";
   assertThat(selectElement.attr("ng-options"), equalTo(optionsExpression));
   assertThat(selectElement.attr("ng-model"), equalTo(oneToOneProperty + "Selection"));
}
 
Example 10
Source File: FreemarkerClientPartialsNToManyPropertyTest.java    From angularjs-addon with Eclipse Public License 1.0 5 votes vote down vote up
@Test
public void testGenerateManyToManyProperty() throws Exception
{
   String manyToManyProperty = "users";
   Map<String, Object> root = createInspectionResultWrapper(ENTITY_NAME, MANY_TO_MANY_PROP);

   Resource<URL> templateResource = resourceFactory.create(getClass().getResource(
            Deployments.BASE_PACKAGE_PATH + Deployments.N_TO_MANY_PROPERTY_DETAIL_INCLUDE));
   Template processor = processorFactory.create(templateResource, FreemarkerTemplate.class);
   String output = processor.process(root);
   Document html = Jsoup.parseBodyFragment(output);
   assertThat(output.trim(), not(equalTo("")));

   Elements container = html.select("div.form-group");
   assertThat(container, notNullValue());
   assertThat(container.attr("ng-class"), not(equalTo("")));

   Elements nToManyWidgetElement = html.select("div.form-group > div.col-sm-10");
   assertThat(nToManyWidgetElement, notNullValue());

   Elements selectElement = nToManyWidgetElement.select(" > select");
   assertThat(selectElement.attr("id"), equalTo(manyToManyProperty));
   assertThat(selectElement.attr("multiple"), notNullValue());
   assertThat(selectElement.attr("ng-model"), equalTo(manyToManyProperty + "Selection"));
   String collectionElement = manyToManyProperty.substring(0, 1);
   String optionsExpression = collectionElement + ".text for " + collectionElement + " in " + manyToManyProperty
            + "SelectionList";
   assertThat(selectElement.attr("ng-options"), equalTo(optionsExpression));
}
 
Example 11
Source File: ElementSelector.java    From spring-boot with Apache License 2.0 5 votes vote down vote up
/**
 * 构造  element.select(name).select(name1);
 *
 * @param tagName
 * @return
 */
public ElementSelector byTagsNested(String... tagName) {
    Elements temp = this.currentElements;
    for (String name : tagName)
        temp = temp.select(name);
    this.currentElements = temp;
    return this;
}
 
Example 12
Source File: GoogleSearch.java    From ache with Apache License 2.0 5 votes vote down vote up
public List<BackLinkNeighborhood> submitQuery(String query, int page) throws IOException {
    
    timer.waitMinimumDelayIfNecesary();
    
    // 21 -> max number allowed by google... decreases after
    String queryUrl = "https://www.google.com/search?q=" + query + "&num="+docsPerPage + "&start="+page*docsPerPage;
    System.out.println("URL:"+queryUrl);
    try {
        FetchedResult result = fetcher.get(queryUrl);
        
        InputStream is = new ByteArrayInputStream(result.getContent());
        Document doc = Jsoup.parse(is, "UTF-8", query);
        is.close();
        
        Elements searchItems = doc.select("div#search");
        Elements linkHeaders = searchItems.select(".r");
        Elements linksUrl = linkHeaders.select("a[href]");
        
        List<BackLinkNeighborhood> links = new ArrayList<>();
        for (Element link : linksUrl) {
            String title = link.text();
            String url = link.attr("href");
            links.add(new BackLinkNeighborhood(url, title));
        }
        
        System.out.println(getClass().getSimpleName()+" hits: "+links.size());
        return links;
    } catch (IOException | BaseFetchException e) {
        throw new IOException("Failed to download backlinks from Google.", e);
    }

}
 
Example 13
Source File: SteamGiftsUserData.java    From SteamGifts with MIT License 5 votes vote down vote up
public static void extract(@Nullable Context context, @Nullable Document document) {
    if (getCurrent(context) == null)
        return;

    if (document == null)
        return;

    Elements navbar = document.select(".nav__button-container");

    Element userContainer = navbar.last().select("a").first();
    String link = userContainer.attr("href");

    if (link.startsWith("/user/")) {
        current.setName(link.substring(6));

        // fetch the image
        String style = userContainer.select("div").first().attr("style");
        style = Utils.extractAvatar(style);
        current.setImageUrl(style);

        // points
        Element accountContainer = navbar.select("a[href=/account]").first();
        current.setPoints(Utils.parseInt(accountContainer.select(".nav__points").text()));

        // Level
        float level = Float.parseFloat(accountContainer.select("span").last().attr("title"));
        current.setLevel((int) level);

        // Notifications
        Elements notifications = navbar.select(".nav__button-container--notification");
        current.setCreatedNotification(getInt(notifications.select("a[href=/giveaways/created]").first().text()));
        current.setWonNotification(getInt(notifications.select("a[href=/giveaways/won]").first().text()));
        current.setMessageNotification(getInt(notifications.select("a[href=/messages]").first().text()));
    } else if (link.startsWith("/?login") && current.isLoggedIn()) {
        current = new SteamGiftsUserData();
        if (context != null)
            current.save(context);
    }
}
 
Example 14
Source File: Acg12.java    From PicKing with Apache License 2.0 5 votes vote down vote up
@Override
public Map<ContentsActivity.parameter, Object> getContent(String baseUrl, String currentUrl, byte[] result, Map<ContentsActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
    List<AlbumInfo> data = new ArrayList<>();
    Document document = Jsoup.parse(new String(result, "utf-8"));
    Elements elements = document.select("section");
    for (Element element : elements) {
        AlbumInfo temp = new AlbumInfo();

        Elements album = element.select(".card-bg > a:has(img)");
        if (album.size() > 0) {
            temp.setAlbumUrl(album.attr("href"));
            Elements pic = album.select("img");
            if (pic.size() > 0) {
                Log.e("Acg12", "getContent: " + pic.get(0).attr("data-src"));
                temp.setPicUrl(pic.get(0).attr("data-src"));
            }
        }

        Elements title = element.select("h3.title");
        if (title.size() > 0)
            temp.setTitle(title.get(0).text());

        Elements time = element.select("time");
        if (time.size() > 0)
            temp.setTime(time.attr("title") + " " + time.text());

        data.add(temp);
    }

    resultMap.put(ContentsActivity.parameter.CURRENT_URL, currentUrl);
    resultMap.put(ContentsActivity.parameter.RESULT, data);
    return resultMap;
}
 
Example 15
Source File: TEDScraper.java    From Babler with Apache License 2.0 4 votes vote down vote up
/**
 * Parses all the pages containing links to talks in a specific languageCode
 * saves it to urls and then calls getAndSaveData()
 * @param language destination languageCode
 */
public TEDScraper(String language) {
    this.language = language;
    String iso1Lang = LanguageCode.convertIso2toIso1(language);
    this.logDb = new LogDB(this.language); //saving text files

    urls = new ArrayList<URL>(2);


        log.info("Scraping TED.COM for subtitles in:  "+ language);

        //get the first page and parse
        HTTPClient client = new HTTPClient(VIDEOS_URL + iso1Lang);
        String html = client.getHTMLData();

        if(html.contains("We couldn't find a talk quite like that")){
            log.info("TED.COM Does not have any talks in "+language + " langauge code");
            log.info("Stopping process");
            return;
        }


        Document doc = Jsoup.parse(html);

        //get the number of pages from the page's pagination
        Element lastPagination = doc.select(".pagination__item").last();

        int numOfPages = 1;
        if(lastPagination != null)
            numOfPages = Integer.parseInt(lastPagination.text());


        //for every page of that languageCode
        for (int i = 1; i <= numOfPages; i++) {
            log.info("Getting links from page: "+i +" out of: "+numOfPages);

            //we already fetched the first page
            if (i != 1) {
                //get the page and parse
                client = new HTTPClient(VIDEOS_URL + iso1Lang + "&page=" + i);
                html = client.getHTMLData();
                doc = Jsoup.parse(html);
            }

            Elements videoContainers = doc.select(".media__message");
            Elements links = videoContainers.select("a");

            //add href value only to urls
            for (Element link : links) {
                //get the href value
                String modifiedLink = link.attr("href");
                //remove everything after the ? -> /talks/ze_frank_are_you_human?languageCode=lt
                modifiedLink = modifiedLink.substring(0, modifiedLink.indexOf("?"));
                //add to array of all links
                urls.add(new URL("https://www.ted.com" + modifiedLink + "/transcript.json?language=" + iso1Lang, language,modifiedLink.substring((modifiedLink.indexOf("/talks/")+"/talks/".length()),modifiedLink.length())));
  //urls.add(new URL("https://www.ted.com" + modifiedLink + "/transcript.json?language=en", "eng", modifiedLink.substring((modifiedLink.indexOf("/talks/")+"/talks/".length()),modifiedLink.length())));
            }

        }

    getAndSaveData();
}
 
Example 16
Source File: GetReplyListTask.java    From guanggoo-android with Apache License 2.0 4 votes vote down vote up
@Override
public void run() {
    List<Reply> replies = new ArrayList<>();

    boolean succeed = false;
    boolean hasMore = false;

    try {
        Document doc = get(mUrl);

        Elements elements = doc.select("div.reply-item");

        for (Element element : elements) {
            Reply reply = new Reply();

            Elements topicElements = element.select("span.title a");
            if (topicElements.isEmpty()) {
                continue;
            }

            Topic topic = new Topic();
            topic.setTitle(topicElements.first().text());
            topic.setUrl(topicElements.first().absUrl("href"));

            reply.setTopic(topic);

            Elements contentElements = element.select("div.content");
            if (contentElements.isEmpty()) {
                continue;
            }

            reply.setContent(contentElements.outerHtml());

            replies.add(reply);
        }

        succeed = true;

        Elements paginationElements = doc.select("ul.pagination");
        if (!paginationElements.isEmpty()) {
            Elements disabledElements = paginationElements.select("li.disabled");
            if (disabledElements.isEmpty()) {
                hasMore = true;
            } else if (disabledElements.last() != null) {
                Elements disableLinkElements = disabledElements.last().select("a");
                if (!ConstantUtil.NEXT_PAGE.equals(disableLinkElements.text())) {
                    hasMore = true;
                }
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
    }

    if (succeed) {
        ListResult<Reply> replyList = new ListResult<>();
        replyList.setData(replies);
        replyList.setHasMore(hasMore);
        successOnUI(replyList);
    } else {
        failedOnUI("获取回复列表失败");
    }
}
 
Example 17
Source File: GetNotificationListTask.java    From guanggoo-android with Apache License 2.0 4 votes vote down vote up
@Override
public void run() {
    List<Notification> notifications = new ArrayList<>();

    boolean succeed = false;
    boolean hasMore = false;
    try {
        Document doc = get(mUrl);

        Elements elements = doc.select("div.notification-item");

        for (Element element : elements) {
            Notification notification = createNotificationFromElement(element);
            notifications.add(notification);
        }

        succeed = true;

        Elements paginationElements = doc.select("ul.pagination");
        if (!paginationElements.isEmpty()) {
            Elements disabledElements = paginationElements.select("li.disabled");
            if (disabledElements.isEmpty()) {
                hasMore = true;
            } else if (disabledElements.last() != null) {
                Elements disableLinkElements = disabledElements.last().select("a");
                if (!ConstantUtil.NEXT_PAGE.equals(disableLinkElements.text())) {
                    hasMore = true;
                }
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
    }

    if (succeed) {
        ListResult<Notification> notificationList = new ListResult<>();
        notificationList.setData(notifications);
        notificationList.setHasMore(hasMore);
        successOnUI(notificationList);
    } else {
        failedOnUI("获取消息提醒列表失败");
    }
}
 
Example 18
Source File: GetTopicListTask.java    From guanggoo-android with Apache License 2.0 4 votes vote down vote up
@Override
public void run() {
    List<Topic> topics = new ArrayList<>();

    boolean succeed = false;
    boolean hasMore = false;
    try {
        Document doc = get(mUrl);

        tryFixAuthStatus(doc);

        Elements elements = doc.select("div.topic-item");

        for (Element element : elements) {
            Topic topic = createTopicFromElement(element);
            topics.add(topic);
        }

        succeed = true;

        Elements paginationElements = doc.select("ul.pagination");
        if (!paginationElements.isEmpty()) {
            Elements disabledElements = paginationElements.select("li.disabled");
            if (disabledElements.isEmpty()) {
                hasMore = true;
            } else if (disabledElements.last() != null) {
                Elements disableLinkElements = disabledElements.last().select("a");
                if (!ConstantUtil.NEXT_PAGE.equals(disableLinkElements.text())) {
                    hasMore = true;
                }
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
    }

    if (succeed) {
        ListResult<Topic> topicList = new ListResult<>();
        topicList.setData(topics);
        topicList.setHasMore(hasMore);
        successOnUI(topicList);
    } else {
        failedOnUI("获取主题列表失败");
    }
}
 
Example 19
Source File: BookClass.java    From nju-lib-downloader with GNU General Public License v3.0 4 votes vote down vote up
private Set<Book> queryBooks(Elements booksliNode) {
    Set<Book> books = new HashSet<>();
    for (Element element : booksliNode) {
        //获取书名和id
        String name = null, id = null, author = null, publishDate = null, theme = null, detailBookClass = null;
        BookClass bookBookClass;
        Elements nameIdNode = element.select("p[class=name]");
        if (nameIdNode != null) {
            name = nameIdNode.text();
            Elements idNode = nameIdNode.select("a[onclick]");
            if (idNode != null && idNode.size() > 0) {
                String idOnClick = idNode.get(0).attr("onclick");
                int start = idOnClick.indexOf("(") + 1, end = idOnClick.lastIndexOf(",");
                if (start != 0 && end != -1) {
                    id = idOnClick.substring(start, end);
                }
            }
        }
        //获取分类
        BookClass[] bookClasses = new BookClass[0];
        Elements infoNode = element.select("p[class=info]");
        if (infoNode != null) {
            Elements bookInfos = infoNode.select("a");
            if (bookInfos != null && bookInfos.size() > 0) {
                Element terminalCataNode = bookInfos.last();
                bookInfos.remove(terminalCataNode);
                List<BookClass> tmplist = bookInfos.stream()
                        .map(bookInfo -> getBookCata(bookInfo, false))
                        .filter(Objects::nonNull)
                        .collect(Collectors.toList());
                BookClass terminalBookClass = getBookCata(terminalCataNode, true);
                if (terminalBookClass != null) {
                    tmplist.add(terminalBookClass);
                }
                bookClasses = tmplist.toArray(bookClasses);
            }
        }
        bookBookClass = new RootBookClass().link(bookClasses);

        //获取作者,出版日期,主题词,分类
        String info = element.text();
        Pattern pattern = Pattern.compile("\\d+\\. (.*) 作者[::](.*) 出版日期[::](\\d+).*?(?:主题词[::](.+))? 分类[::](.*)");
        Matcher matcher = pattern.matcher(info);
        while (matcher.find()) {
            name = matcher.group(1);
            author = matcher.group(2);
            publishDate = matcher.group(3);
            theme = matcher.group(4);
            detailBookClass = matcher.group(5);
        }
        Pattern minPattern = Pattern.compile(".*(《.*》).*");
        Matcher minMatcher = minPattern.matcher(info);
        while (minMatcher.find()) {
            name = minMatcher.group(1);
        }

        //汇总书本
        if (name != null && id != null) {
            Book book = new Book(id, name, author, publishDate, theme, bookBookClass, detailBookClass);
            book.setCookie(cookie);
            books.add(book);
            if (bookBookClass.isTerminal()) {
                ((TerminalBookClass) bookBookClass).addBook(book);
            } else {
                System.out.println("未获取到分类信息,将不被归档 " + book);
            }
        } else {
            System.out.println("error: " + info);
        }
    }
    return books;
}
 
Example 20
Source File: MTGTop8DeckSniffer.java    From MtgDesktopCompanion with GNU General Public License v3.0 4 votes vote down vote up
@Override
public MagicDeck getDeck(RetrievableDeck info) throws IOException {
	Document root = URLTools.extractHtml(info.getUrl().toString());
	MagicDeck d = new MagicDeck();
	d.setDescription(info.getUrl().toString());
	d.setName(info.getName());

	Elements doc = root.select("table.Stable").get(1).select("td table").select(MTGConstants.HTML_TAG_TD);

	boolean side = false;
	for (Element e : doc.select("td table td")) {

		if (e.hasClass("O13")) {
			if (e.text().equalsIgnoreCase("SIDEBOARD"))
				side = true;
		} else {

			int qte = Integer.parseInt(e.text().substring(0, e.text().indexOf(' ')));
			String name = e.select("span.L14").text();
			if (!name.equals("")) 
			{
				try {
					
				MagicCard mc = MTGControler.getInstance().getEnabled(MTGCardsProvider.class).searchCardByName( name, null, true).get(0);
				if (!side)
					d.getMain().put(mc, qte);
				else
					d.getSideBoard().put(mc, qte);
				
				notify(mc);

				}
				catch(IndexOutOfBoundsException err)
				{
					logger.error("Error getting " + name,err);
				}
			}
		}

	}

	return d;
}