Java Code Examples for org.jsoup.select.Elements#attr()

The following examples show how to use org.jsoup.select.Elements#attr() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: WhenAttributesAreUsedInAsciidoctor.java    From asciidoctorj with Apache License 2.0 6 votes vote down vote up
@Test
public void styles_dir_is_set_css_routes_should_use_it() throws IOException {

    Attributes attributes = attributes().stylesDir("./styles")
            .linkCss(true).styleSheetName("mycustom.css").get();
    Options options = options().inPlace(false).safe(SafeMode.UNSAFE)
            .toDir(testFolder.getRoot())
            .mkDirs(true)
            .attributes(attributes).get();

    asciidoctor.convertFile(classpath.getResource("rendersample.asciidoc"), options);

    Document doc = Jsoup.parse(new File(testFolder.getRoot(),
            "rendersample.html"), "UTF-8");
    Elements link = doc.select("link[href]");
    String attr = link.attr("href");
    assertThat(attr, is("./styles/mycustom.css"));

}
 
Example 2
Source File: WhenAttributesAreUsedInAsciidoctor.java    From asciidoctorj with Apache License 2.0 6 votes vote down vote up
@Test
public void unsetting_styleSheetName_should_leave_document_without_style()
        throws IOException {

    Attributes attributes = attributes().unsetStyleSheet().get();
    Options options = options().inPlace(false).safe(SafeMode.UNSAFE)
            .toDir(testFolder.getRoot()).attributes(attributes).get();

    asciidoctor.convertFile(classpath.getResource("rendersample.asciidoc"), options);

    Document doc = Jsoup.parse(new File(testFolder.getRoot(),
            "rendersample.html"), "UTF-8");
    Elements link = doc.select("link[href]");
    String attr = link.attr("href");
    assertThat(attr, is(""));

}
 
Example 3
Source File: WhenAttributesAreUsedInAsciidoctor.java    From asciidoctorj with Apache License 2.0 6 votes vote down vote up
@Test
public void string_content_with_icons_enabled_and_iconsdir_set_should_be_rendered_with_iconsdir()
        throws IOException, SAXException, ParserConfigurationException {

    InputStream content = new FileInputStream(
            classpath.getResource("documentwithnote.asciidoc"));

    Map<String, Object> attributes = attributes()
            .icons(Attributes.IMAGE_ICONS).iconsDir("icons").asMap();
    Map<String, Object> options = options().attributes(attributes).asMap();

    String renderContent = asciidoctor.convert(toString(content), options);

    Document doc = Jsoup.parse(renderContent, "UTF-8");
    Elements image = doc.select("img[src]");
    String srcValue = image.attr("src");
    assertThat(srcValue, is("icons/note.png"));

}
 
Example 4
Source File: WhenAttributesAreUsedInAsciidoctor.java    From asciidoctorj with Apache License 2.0 6 votes vote down vote up
@Test
public void table_of_content_2_should_be_placeable() throws IOException {

    Attributes attributes = attributes().tableOfContents2(Placement.RIGHT).get();
    Options options = options().inPlace(false)
            .toFile(new File(testFolder.getRoot(), "toc2sample.html"))
            .safe(SafeMode.UNSAFE).attributes(attributes).get();

    asciidoctor.convertFile(classpath.getResource("toc2sample.asciidoc"), options);

    File renderedFile = new File(testFolder.getRoot(), "toc2sample.html");
    Document doc = Jsoup.parse(renderedFile, "UTF-8");
    Elements body = doc.select("body");
    String classAttribute = body.attr("class");
    String[] classAttributes = classAttribute.split(" ");
    assertThat(classAttributes, hasItemInArray("toc2"));
    assertThat(classAttributes, hasItemInArray("toc-right"));

    renderedFile.delete();
}
 
Example 5
Source File: WhenAttributesAreUsedInAsciidoctor.java    From asciidoctorj with Apache License 2.0 6 votes vote down vote up
@Test
public void table_of_content_should_be_placeable() throws IOException {

    Attributes attributes = attributes().tableOfContents(Placement.RIGHT).get();
    Options options = options().inPlace(false)
            .toFile(new File(testFolder.getRoot(), "toc2sample.html"))
            .safe(SafeMode.UNSAFE).attributes(attributes).get();

    asciidoctor.convertFile(classpath.getResource("toc2sample.asciidoc"), options);

    File renderedFile = new File(testFolder.getRoot(), "toc2sample.html");
    Document doc = Jsoup.parse(renderedFile, "UTF-8");
    Elements body = doc.select("body");
    String classAttribute = body.attr("class");
    String[] classAttributes = classAttribute.split(" ");
    assertThat(classAttributes, hasItemInArray("toc2"));
    assertThat(classAttributes, hasItemInArray("toc-right"));

    renderedFile.delete();
}
 
Example 6
Source File: WhenAsciidoctorIsCalledUsingCli.java    From asciidoctorj with Apache License 2.0 6 votes vote down vote up
@Test
public void composed_attributes_should_be_built_as_attributes_map() throws IOException {
	
       File inputFile = classpath.getResource("rendersample.asciidoc");
       String inputPath = inputFile.getPath().substring(pwd.length() + 1);
	new AsciidoctorInvoker().invoke("-a", "stylesheet=mystyles.css", "-a", "linkcss", inputPath);
	File expectedFile = new File(inputPath.replaceFirst("\\.asciidoc$", ".html"));
	
	Document doc = Jsoup.parse(expectedFile, "UTF-8");
	Elements link = doc.select("link[href]");
	String attr = link.attr("href");
	assertThat(attr, is("./mystyles.css"));
	
	expectedFile.delete();
	
}
 
Example 7
Source File: MovieServiceImpl.java    From albert with MIT License 5 votes vote down vote up
@Override
	public List<Movie> getPage(PageInfo page) {
		Document doc=null;
		try {
			doc = getConnect(getPageUrl(page.getCurPageNo())).get();
		} catch (IOException e) {
			throw new RuntimeException(e);
		}
		Elements els = doc.select("#post_container > li");
		if(els != null){
			List<Movie> Movies = new ArrayList<>(); 
			for(int i = 0;i<els.size();i++){
				Element li = els.get(i);
				Elements as = li.select(".thumbnail a");
				for(Element a:as){
					String href = a.attr("href");
					Elements img = a.select(" > img");
					String imgUrl = img.attr("src");
					String title = a.attr("title");
					if(!checkHave(title)){
						Movie vo = getRecord(href);
						vo.setName(title);
						vo.setImg(imgUrl);
						movieMapper.addMovie(vo);
						logger.info("插入:"+vo.getName());
						Movie vi = movieMapper.getMovieDetailbyName(title);
						vi.setReviewNum(0);
						vi.setAddTime(new Date());
						vi.setUpdateTime(new Date());
						super.sendMessage(vi, Constants.Cache.Type.save);
						Movies.add(vo);
					}
				}
//				if(i==2)break;
			}
			return Movies;
		}
		return null;
	}
 
Example 8
Source File: hindu.java    From Gazetti_Newspaper_Reader with MIT License 5 votes vote down vote up
private String getImageURL(Element bodyElement) {
    Elements mainImageElement = bodyElement.select(ConfigService.getInstance().getTheHinduImageFirst());
    Elements carouselElements = bodyElement.select(ConfigService.getInstance().getTheHinduImageSecond());

    if (mainImageElement.size() != 0) {
        mImageURL = mainImageElement.first().attr("src");
    } else if (carouselElements.size() != 0) {
        Elements carouselImage = carouselElements.select("div#pic").first().select("img");
        mImageURL = carouselImage.attr("src");
    }

    return mImageURL;

}
 
Example 9
Source File: WhenAsciidoctorIsCalledUsingCli.java    From asciidoctorj with Apache License 2.0 5 votes vote down vote up
@Test
  public void should_honor_doctype_defined_in_document_by_default() throws IOException {
      File inputFile = classpath.getResource("sample-book.adoc");
      String inputPath = inputFile.getPath().substring(pwd.length() + 1);

new AsciidoctorInvoker().invoke(inputPath);

File expectedFile = new File(inputPath.replaceFirst("\\.adoc$", ".html"));
assertThat(expectedFile.exists(), is(true));
Document doc = Jsoup.parse(expectedFile, "UTF-8");
Elements body = doc.select("body");
String attr = body.attr("class");
assertThat(attr, is("book"));
expectedFile.delete();
  }
 
Example 10
Source File: Class.java    From nju-lib-downloader with GNU General Public License v3.0 5 votes vote down vote up
public static int getBookSizeFromHtml(String html){
    Document doc= Jsoup.parse(html);
    Elements sizeNode=doc.select("input[id=totalSize]");
    if(sizeNode!=null&&sizeNode.size()>0){
        String sizeString=sizeNode.attr("value");
        if(sizeString!=null){
            int sizeInt= Integer.parseInt(sizeString);
            return sizeInt;
        }
    }
    return -1;
}
 
Example 11
Source File: Downloader.java    From nju-lib-downloader with GNU General Public License v3.0 5 votes vote down vote up
/**
 * 初始化书的下载元信息。包括书本id,书的名称等。
 *
 * @return
 */
public boolean initBookPara() {
    String result = null;
    IOException exception = null;
    for (int i = 0; i < retryTime; i++) {
        try {
            result = viewBookPageWeb(1);
            break;
        } catch (IOException e) {
            exception = e;
        }
    }
    if (exception == null) {
        int index = result.indexOf("共有");
        if (index != -1) {
            index += 3;
            int start = index - 1;
            while (result.charAt(index) > 47 && result.charAt(index) < 58) {
                index++;
            }
            int end = index;
            total = Integer.parseInt(result.substring(start, end));
        }

        Document doc = Jsoup.parse(result);
        Elements idIntNode = doc.select("[id=bookId]");
        String idInt = idIntNode.attr("value");
        book.setIdInt(idInt);
        Elements nameNode = doc.select("[href=/v3/book/detail/" + book.getId() + "]");
        if (nameNode != null && nameNode.size() > 0)
            book.setName(nameNode.get(0).text());
        else
            return false;
        setDirectory(book.getId());
        return true;
    } else {
        return false;
    }
}
 
Example 12
Source File: StickyXXXRipper.java    From ripme with MIT License 5 votes vote down vote up
@Override
public void rip() throws IOException {
    LOGGER.info("Retrieving " + this.url);
    Document doc = Http.url(url).get();
    Elements videos = doc.select(".wp-video > video > source");
    if (videos.isEmpty()) {
        throw new IOException("Could not find Embed code at " + url);
    }
    String vidUrl = videos.attr("src");
    addURLToDownload(new URL(vidUrl), HOST + "_" + getGID(this.url));
    waitForThreads();
}
 
Example 13
Source File: Book.java    From nju-lib-downloader with GNU General Public License v3.0 4 votes vote down vote up
public static List<Book> getBookFromHTML(String html) {
        Document doc = Jsoup.parse(html);
        Elements infoNode = doc.select("div[class=boxListLi5]");
        List<Book> books = new ArrayList<>(30);
        if (infoNode != null) {
            for (int i = 0; i < infoNode.size(); i++) {
                String id = null, name = null, author = null, publishDate = null, press = null, introduction = null, coverUrl = null;
                Elements idNameNode = infoNode.get(i).select("a[href][title]");
                if (idNameNode != null && idNameNode.size() > 0) {
                    Elements coverImageNode = infoNode.get(i).select("img[src]");
                    if (coverImageNode != null && coverImageNode.size() > 0) {
                        coverUrl = coverImageNode.attr("src");
                    }
                    name = idNameNode.get(0).attr("title");
                    id = idNameNode.get(0).attr("href");
                    int id_index = id.indexOf("/book/detail/");
                    if (id_index != -1) {
                        id = id.substring(id_index + "/book/detail/".length(), id.length());
                    }
                    Elements pressNode = infoNode.get(i).select("span");
                    if (pressNode != null && pressNode.size() > 0) {
                        String pressInfo = pressNode.get(0).text();
                        if (pressInfo != null) {
                            String[] pressInfoArray = pressInfo.split("/");
                            if (pressInfoArray != null && pressInfoArray.length == 3) {
                                author = pressInfoArray[0].trim();
                                press = pressInfoArray[1].trim();
                                publishDate = pressInfoArray[2].trim();
                            }
                        }
                    }
                    Elements introNode = infoNode.get(i).select("p");
                    if (introNode != null && introNode.size() > 0) {
                        introduction = introNode.text();
                    }
                }
                if (id != null) {
                    Book book = new Book(id, name, press, author, publishDate, introduction, coverUrl);
                    books.add(book);
//                    System.out.println(book);
                }
            }
        }
        return books;
    }
 
Example 14
Source File: MyJsoupUtils.java    From spring-boot with Apache License 2.0 4 votes vote down vote up
/**
 * 获取 href值
 *
 * @param element
 * @return
 */
public static String getHrefValue(Element element) {

    //<a href="home/store/catalogue_tc/catalogue_tc_browse.htm?commid=625645">WMO</a>
    // String relHref = link.attr("href"); // == "/"

    Elements elements = element.select("a[href]");

    if (elements.size() == 0)
        return "";

    if (elements.size() > 1)
        throw new IllegalArgumentException("多个 a 标签");

    return elements.attr("abs:href"); // "http://jsoup.org/"
}
 
Example 15
Source File: AsyncVersionCheck.java    From android with Apache License 2.0 4 votes vote down vote up
@Override
protected String doInBackground(String... params) {
    try {
        Connection.Response response = Jsoup.connect("http://m.apps.opera.com/en_us/tv_portal_stream_tv_and_movies.html").userAgent("Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30").execute();
        int code = response.statusCode();
        if (code == 200) {

            document = Jsoup.connect("http://m.android-4-0-3-plus.apps.opera.com/en_us/tv_portal_stream_tv_and_movies.html").userAgent("Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30").get();
           Document deviceredirect = Jsoup.connect(document.baseUri()).userAgent("Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30").get();
           Elements element = deviceredirect.select(".pd-block").get(1).select("p");

          if (element.toString().contains("Version number:")){
            int Index = element.toString().indexOf("number:");
            int Indexend = element.toString().indexOf("]");
              mOperaVersion = element.toString().substring(Index + 8, Indexend).toString();
        }
        } else {
            return null;
        }

        //Todo: Billy, why are we stripping this crap? Why can't we check our versionCode against the versioncode stored on our server?
        //Todo: I'm asking because sometimes your 'stripped' strings are returning null, and this seems way more complicated than it needs to be.

        //just remove everything thats not a number
        mVersion_striped = mVersion.replaceAll("[^0-9]", "");
        mOperaVersion_striped = mOperaVersion.replaceAll("[^0-9]", "");

        //check if string is null before transitioning to int
        if(mOperaVersion_striped  != null || mOperaVersion_striped != null){
            slideme = Integer.parseInt(mOperaVersion_striped);
            tvp = Integer.parseInt(mVersion_striped);}



        //Todo: This code is really broken
        //Cleaned up a lot more efficient switched to opera
        if (tvp != 0 || slideme != 0){
        if(slideme > tvp) {  //  Update TV Portal from Slideme

           //Get apk url
            Document doc4 = Jsoup.connect(document.baseUri())
                    .userAgent("Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30")
                    .get();

            Elements dlbutton = doc4.select(".download_buttons a");
            String hreff = dlbutton.attr("href");


            //Download apk to sd card
            URL url = new URL(hreff);
            HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection();
            httpURLConnection.connect();

            String PATH = Environment.getExternalStorageDirectory() + "/download/";
            File file = new File(PATH);
            file.mkdirs();
            mOutputFile = new File(file, "Tv-Portal.v"+mOperaVersion+".apk");
            if (mOutputFile.exists()) {
              mOutputFile.delete();
            }

            FileOutputStream fos = new FileOutputStream(mOutputFile);

            InputStream is = httpURLConnection.getInputStream();

            byte[] buffer = new byte[1024];
            int length;
            while ((length = is.read(buffer)) != -1) {
                fos.write(buffer, 0, length);
            }
            fos.close();
            is.close();
        }
        }
    } catch (IOException ignored) {
    }
    return null;
}
 
Example 16
Source File: WhenAttributesAreUsedInAsciidoctor.java    From asciidoctorj with Apache License 2.0 4 votes vote down vote up
@Test
public void linkattrs_should_make_asciidoctor_render_link_macro_attributes() {

    Attributes attributes = attributes().linkAttrs(true).get();
    Options options = options().attributes(attributes).get();

    String content = asciidoctor.convert(
            "http://google.com[Google, window=\"_blank\"]", options);

    Document doc = Jsoup.parse(content);
    Elements image = doc.select("a[target]");

    String targetValue = image.attr("target");
    assertThat(targetValue, is("_blank"));

}
 
Example 17
Source File: GalleryRequest.java    From meizhi with Apache License 2.0 4 votes vote down vote up
@Override
protected Response<List<Album>> parseNetworkResponse(NetworkResponse response) {
    try {
        List<Album> albums = new ArrayList<>();

        Document document = Jsoup.parse(new String(response.data,
                HttpHeaderParser.parseCharset(response.headers)));

        for (Element box : document.select(".container.main .box")) {
            Elements link = box.select("a[href]");
            if (link.isEmpty()) {
                continue;
            }

            String url = link.attr("href");
            if (!url.startsWith("/m/")) {
                continue;
            }

            Elements img = link.select("img");
            if (img.isEmpty()) {
                continue;
            }

            String cover = img.attr("src");
            if (TextUtils.isEmpty(cover)) {
                continue;
            }

            if (cover.contains("?imageView2")) {
                cover = cover.substring(0, cover.indexOf("?imageView2"));
            }

            Album album = new Album();
            album.id = url.substring(3);
            album.cover = cover;

            albums.add(album);
        }

        return Response.success(albums, HttpHeaderParser.parseCacheHeaders(response));
    } catch (UnsupportedEncodingException e) {
        return Response.error(new ParseError(e));
    }
}
 
Example 18
Source File: WhenAsciidoctorIsCalledUsingCli.java    From asciidoctorj with Apache License 2.0 4 votes vote down vote up
@Test
public void output_file_hyphen_symbol_should_render_output_to_stdout() {
	
	ByteArrayOutputStream output = redirectStdout();
	
	File inputFile = classpath.getResource("rendersample.asciidoc");
	String inputPath = inputFile.getPath().substring(pwd.length() + 1);
	new AsciidoctorInvoker().invoke("-o", "-", inputPath);
	
	Document doc = Jsoup.parse(output.toString(), "UTF-8");

	Elements link = doc.select("div[class]");
	
	String attr = link.attr("class");
	assertThat(attr, is("sect1"));
	
}
 
Example 19
Source File: GetTopicDetailTask.java    From guanggoo-android with Apache License 2.0 4 votes vote down vote up
@Override
public void run() {
    Document doc;

    try {
        doc = get(mUrl);
    } catch (IOException e) {
        e.printStackTrace();
        failedOnUI(e.getMessage());
        return;
    }

    tryFixAuthStatus(doc);

    Elements topicDetailElements = doc.select("div.topic-detail");

    if (topicDetailElements.isEmpty()) {
        failedOnUI("找不到主题详情");
        return;
    }

    Elements elements = topicDetailElements.select("div.ui-header");

    if (elements.isEmpty()) {
        failedOnUI("找不到主题元信息");
        return;
    }

    final TopicDetail topicDetail = new TopicDetail();

    final Topic topic = GetTopicListTask.createTopicFromElement(elements.first());

    topicDetail.setTopic(topic);
    // 解析收藏
    Favorite favorite = new Favorite();
    Elements favouriteElement = doc.select(".J_topicFavorite");
    if(favouriteElement!=null){
        String dataType = favouriteElement.attr("data-type");
        favorite.setFavorite(!Favorite.TYPE_NOT_FAVORITE.equals(dataType));
    }
    topicDetail.setFavorite(favorite);

    elements = topicDetailElements.select("div.ui-content");

    if (elements.isEmpty()) {
        failedOnUI("找不到主题内容");
        return;
    }

    topicDetail.setContent(elements.first().outerHtml());

    Elements commentsElements = doc.select("div.topic-reply");

    Elements commentsHeaderElements = commentsElements.select("div.ui-header");

    if (!commentsHeaderElements.isEmpty()) {
        String commentsCountText = commentsHeaderElements.first().text();
        if (!TextUtils.isEmpty(commentsCountText)) {
            commentsCountText = commentsCountText.replaceAll("[^\\d]", "");
            topicDetail.setCommentsCount(Integer.valueOf(commentsCountText));
        }
    }

    Map<Integer, Comment> comments = GetCommentsTask.getCommentsFromElements(commentsElements);

    topicDetail.setComments(comments);

    checkTelephoneVerified(doc);

    new GetUserProfileTask(topicDetail.getTopic().getMeta().getAuthor().getUrl(), new OnResponseListener<UserProfile>() {
        @Override
        public void onSucceed(UserProfile data) {
            if (data.isFollowed()) {
                topicDetail.getTopic().getMeta().getAuthor().setFollowed(true);
            }
            successOnUI(topicDetail);
        }

        @Override
        public void onFailed(String msg) {
            successOnUI(topicDetail);
        }
    }).run();
}
 
Example 20
Source File: ImgurRipper.java    From ripme with MIT License 4 votes vote down vote up
public String getAlbumTitle(URL url) throws MalformedURLException {
    String gid = getGID(url);
    if (this.albumType == ALBUM_TYPE.ALBUM) {
        try {
            // Attempt to use album title as GID
            if (albumDoc == null) {
                albumDoc = Http.url(url).get();
            }

            Elements elems = null;

            /*
            // TODO: Add config option for including username in album title.
            // It's possible a lot of users would not be interested in that info.
            String user = null;
            elems = albumDoc.select(".post-account");
            if (elems.size() > 0) {
                Element postAccount = elems.get(0);
                if (postAccount != null) {
                    user = postAccount.text();
                }
            }
            */

            String title = null;
            final String defaultTitle1 = "Imgur: The most awesome images on the Internet";
            final String defaultTitle2 = "Imgur: The magic of the Internet";
            LOGGER.info("Trying to get album title");
            elems = albumDoc.select("meta[property=og:title]");
            if (elems != null) {
                title = elems.attr("content");
                LOGGER.debug("Title is " + title);
            }
            // This is here encase the album is unnamed, to prevent
            // Imgur: The most awesome images on the Internet from being added onto the album name
            if (title.contains(defaultTitle1) || title.contains(defaultTitle2)) {
                LOGGER.debug("Album is untitled or imgur is returning the default title");
                // We set the title to "" here because if it's found in the next few attempts it will be changed
                // but if it's nto found there will be no reason to set it later
                title = "";
                LOGGER.debug("Trying to use title tag to get title");
                elems = albumDoc.select("title");
                if (elems != null) {
                    if (elems.text().contains(defaultTitle1) || elems.text().contains(defaultTitle2)) {
                        LOGGER.debug("Was unable to get album title or album was untitled");
                    }
                    else {
                        title = elems.text();
                    }
                }
            }

            String albumTitle = "imgur_";
            /*
            // TODO: Add config option (see above)
            if (user != null) {
                albumTitle += "user_" + user;
            }
            */
            albumTitle += gid;
            if (title != null) {
                albumTitle += "_" + title;
            }

            return albumTitle;
        } catch (IOException e) {
            // Fall back to default album naming convention
        }
    }
    return getHost() + "_" + gid;
}