Java Code Examples for org.jsoup.Jsoup#parse()

The following examples show how to use org.jsoup.Jsoup#parse() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: job   File: Job51ResumeParser.java    License: MIT License 6 votes vote down vote up
protected Document parse2HtmlAsMail(File file) throws Exception {
  InputStream in = new FileInputStream(file);

  Session mailSession = Session.getDefaultInstance(System.getProperties(), null);

  MimeMessage msg = new MimeMessage(mailSession, in);
  
  Multipart part = (Multipart) msg.getContent();
  String html = null;
  for(int i = 0; i < part.getCount(); i++) {
    html = parseHtml(part.getBodyPart(i));
    if(html != null) {
      break;
    }
  }
  in.close();
  return html == null ? null : Jsoup.parse(html);
}
 
Example 2
Source Project: astor   File: ElementTest.java    License: GNU General Public License v2.0 6 votes vote down vote up
@Test
public void testAppendTo() {
	String parentHtml = "<div class='a'></div>";
	String childHtml = "<div class='b'></div><p>Two</p>";

	Document parentDoc = Jsoup.parse(parentHtml);
	Element parent = parentDoc.body();
       Document childDoc = Jsoup.parse(childHtml);

       Element div = childDoc.select("div").first();
       Element p = childDoc.select("p").first();
       Element appendTo1 = div.appendTo(parent);
       assertEquals(div, appendTo1);

       Element appendTo2 = p.appendTo(div);
       assertEquals(p, appendTo2);

       assertEquals("<div class=\"a\"></div>\n<div class=\"b\">\n <p>Two</p>\n</div>", parentDoc.body().html());
       assertEquals("", childDoc.body().html()); // got moved out
}
 
Example 3
Source Project: java-Crawler   File: Demo.java    License: MIT License 6 votes vote down vote up
public static Queue getUrlQueue(String url) throws Exception{
    Queue queue = new Queue() ;
    CloseableHttpClient closeableHttpClient = HttpClients.createDefault();
    HttpGet httpGet = new HttpGet(url) ;
    CloseableHttpResponse closeableHttpResponse = closeableHttpClient.execute(httpGet) ;
    HttpEntity httpEntity = closeableHttpResponse.getEntity() ;
    String index = EntityUtils.toString(httpEntity,"gb2312");

    Document doc = Jsoup.parse(index);
    Elements elements = doc.select("a");
    for(Element element : elements) {
        String aurl = element.attr("href");

        if(aurl.indexOf("webPlay")!=-1){

        }else {
            queue.enQueue("http://www.dy2018.com" + aurl);
        }
    }
    return queue ;
}
 
Example 4
Source Project: astor   File: ElementTest.java    License: GNU General Public License v2.0 6 votes vote down vote up
@Test public void testGetElementsWithClass() {
    Document doc = Jsoup.parse("<div class='mellow yellow'><span class=mellow>Hello <b class='yellow'>Yellow!</b></span><p>Empty</p></div>");

    List<Element> els = doc.getElementsByClass("mellow");
    assertEquals(2, els.size());
    assertEquals("div", els.get(0).tagName());
    assertEquals("span", els.get(1).tagName());

    List<Element> els2 = doc.getElementsByClass("yellow");
    assertEquals(2, els2.size());
    assertEquals("div", els2.get(0).tagName());
    assertEquals("b", els2.get(1).tagName());

    List<Element> none = doc.getElementsByClass("solo");
    assertEquals(0, none.size());
}
 
Example 5
Source Project: PicKing   File: Nanrentu.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public String getDetailNext(String baseUrl, String currentUrl, byte[] result) throws UnsupportedEncodingException {
    Document document = Jsoup.parse(new String(result, "gb2312"));
    Elements elements = document.select("div.pagelist a:contains(下一页)");
    if (elements.size() > 0) {
        String tempUrl = elements.get(0).attr("href");
        if (tempUrl == null || "".equals(tempUrl))
            return "";
        Pattern pattern = Pattern.compile("http.*/");
        Matcher matcher = pattern.matcher(currentUrl);
        if (matcher.find()) {
            return matcher.group() + tempUrl;
        }
    }
    return "";
}
 
Example 6
Source Project: PicKing   File: Yande.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public String getContentNext(String baseUrl, String currentUrl, byte[] result) throws UnsupportedEncodingException {
    Document document = Jsoup.parse(new String(result, "utf-8"));
    Log.e("Yande", "getSinglePicContent: " + new String(result, "utf-8"));
    Elements elements = document.select("div#paginator a.next_page");
    if (elements.size() > 0) {
        Log.e("Yande", "getContentNext: " + baseUrl + elements.get(0).attr("href"));
        return baseUrl + elements.get(0).attr("href");
    }
    return "";
}
 
Example 7
private void initialize() throws Exception {
    responseString = NUHttpClientUtils.getData("http://multiupload.biz/", httpContext);
    
    doc = Jsoup.parse(responseString);
    uploadURL = doc.select("form#F1").attr("action");
    uploadId = doc.select("input[name=upload_id]").val();
    
    //Get all the services
    Elements elements = doc.select(".srvtbl input[checked]");
    ListIterator<Element> listIterator = elements.listIterator();
    while(listIterator.hasNext()){
        //NULogger.getLogger().log(Level.INFO, "Value of name: {0}", listIterator.next().attr("name"));
        services.add(listIterator.next().val());
    }
}
 
Example 8
Source Project: Toutiao   File: PhotoContentPresenter.java    License: Apache License 2.0 5 votes vote down vote up
private Boolean parseHTML(String HTML) {
    boolean flag = false;
    Document doc = Jsoup.parse(HTML);
    // 取得所有的script tag
    Elements scripts = doc.getElementsByTag("script");
    for (Element e : scripts) {
        // 过滤字符串
        String script = e.toString();
        if (script.contains("BASE_DATA.galleryInfo")) {
            // 只取得script的內容
            script = e.childNode(0).toString();

            Matcher matcher = Pattern.compile("(JSON.parse\\(\\\".+\\))").matcher(script);
            while (matcher.find()) {
                int count = matcher.groupCount();
                if (count >= 1) {
                    int start = script.indexOf("(");
                    int end = script.indexOf("),");
                    String json = script.substring(start + 2, end - 1);

                    // 处理特殊符号
                    json = ChineseUtil.UnicodeToChs(json);
                    json = json.replace("\\", "");
                    JsonReader reader = new JsonReader(new StringReader(json));
                    reader.setLenient(true);
                    bean = new Gson().fromJson(reader, PhotoGalleryBean.class);
                    Log.d(TAG, "parseHTML: " + bean.toString());
                    flag = true;
                    break;
                }
            }
        }
    }
    return flag;
}
 
Example 9
Source Project: astor   File: ElementTest.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Test public void testGetParents() {
    Document doc = Jsoup.parse("<div><p>Hello <span>there</span></div>");
    Element span = doc.select("span").first();
    Elements parents = span.parents();

    assertEquals(4, parents.size());
    assertEquals("p", parents.get(0).tagName());
    assertEquals("div", parents.get(1).tagName());
    assertEquals("body", parents.get(2).tagName());
    assertEquals("html", parents.get(3).tagName());
}
 
Example 10
@Test public void handlesJavadocFont() {
    String h = "<TD BGCOLOR=\"#EEEEFF\" CLASS=\"NavBarCell1\">    <A HREF=\"deprecated-list.html\"><FONT CLASS=\"NavBarFont1\"><B>Deprecated</B></FONT></A>&nbsp;</TD>";
    Document doc = Jsoup.parse(h);
    Element a = doc.select("a").first();
    assertEquals("Deprecated", a.text());
    assertEquals("font", a.child(0).tagName());
    assertEquals("b", a.child(0).child(0).tagName());
}
 
Example 11
public static Lyrics fromXml(String xmlString, String... originalMetadata) {
    if (TextUtils.isEmpty(xmlString))
        return new Lyrics(Lyrics.ERROR);
    Document doc = Jsoup.parse(xmlString);
    Element element = doc.getElementsByTag("GetLyricResult").first();
    String id = element.getElementsByTag("TrackId").get(0).text();
    String checksum = element.getElementsByTag("LyricChecksum").get(0).text();
    Lyrics lyrics = new Lyrics(Lyrics.POSITIVE_RESULT);
    lyrics.setArtist(element.getElementsByTag("LyricArtist").get(0).text());
    lyrics.setTitle(element.getElementsByTag("LyricSong").get(0).text());
    lyrics.setURL("http://api.chartlyrics.com/apiv1.asmx/GetLyric?lyricId=" + id + "&lyricCheckSum=" + checksum);
    boolean hasOriginalMetadata = originalMetadata != null && originalMetadata.length > 0;
    String originalArtist = hasOriginalMetadata ? originalMetadata[0] : null;
    String originalTitle = hasOriginalMetadata ? originalMetadata[1] : null;

    if (TextUtils.isEmpty(lyrics.getArtist()))
        lyrics.setArtist(originalArtist);
    else
        lyrics.setOriginalArtist(originalArtist);
    if (TextUtils.isEmpty(lyrics.getTitle()))
        lyrics.setTitle(originalTitle);
    else
        lyrics.setOriginalTitle(originalTitle);
    lyrics.setText(element.getElementsByTag("Lyric").get(0).html());
    lyrics.setSource(domain);
    return lyrics;
}
 
Example 12
Source Project: astor   File: NodeTest.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Test public void unwrap() {
    Document doc = Jsoup.parse("<div>One <span>Two <b>Three</b></span> Four</div>");
    Element span = doc.select("span").first();
    Node twoText = span.childNode(0);
    Node node = span.unwrap();

    assertEquals("<div>One Two <b>Three</b> Four</div>", TextUtil.stripNewlines(doc.body().html()));
    assertTrue(node instanceof TextNode);
    assertEquals("Two ", ((TextNode) node).text());
    assertEquals(node, twoText);
    assertEquals(node.parent(), doc.select("div").first());
}
 
Example 13
CorrectAnnotationUsage() {
    super((clazz, tag, service) -> new TemplateData("",
            Jsoup.parse("<dom-module id='polymer'></dom-module>")));
}
 
Example 14
@Test public void handlesNestedImplicitTable() {
    Document doc = Jsoup.parse("<table><td>1</td></tr> <td>2</td></tr> <td> <table><td>3</td> <td>4</td></table> <tr><td>5</table>");
    assertEquals("<table><tbody><tr><td>1</td></tr> <tr><td>2</td></tr> <tr><td> <table><tbody><tr><td>3</td> <td>4</td></tr></tbody></table> </td></tr><tr><td>5</td></tr></tbody></table>", TextUtil.stripNewlines(doc.body().html()));
}
 
Example 15
@Override
public void login() {
    loginsuccessful = false;
    try {
        initialize();

        NULogger.getLogger().info("Trying to log in to UpLea.com");
        httpPost = new NUHttpPost("http://api.uplea.com/api/get-my-api-key");

        List<NameValuePair> formparams = new ArrayList<NameValuePair>();
        formparams.add(new BasicNameValuePair("username", getUsername()));
        formparams.add(new BasicNameValuePair("password", getPassword()));
        
        UrlEncodedFormEntity entity = new UrlEncodedFormEntity(formparams, "UTF-8");
        httpPost.setEntity(entity);
        httpResponse = httpclient.execute(httpPost, httpContext);
        NULogger.getLogger().info(httpResponse.getStatusLine().toString());
        responseString = EntityUtils.toString(httpResponse.getEntity());
        // {"error":[],"result":{"api_key":"alphanumericstring"},"status":true}
        api_key = StringUtils.stringBetweenTwoStrings(responseString, "\"api_key\":\"", "\"");

        if (responseString.contains("true") && !api_key.isEmpty()) {
            EntityUtils.consume(httpResponse.getEntity());
            loginsuccessful = true;
            username = getUsername();
            password = getPassword();
            NULogger.getLogger().info("UpLea.com login successful!");

        } else {
            //Get error message
            responseString = EntityUtils.toString(httpResponse.getEntity());
            Document doc = Jsoup.parse(responseString);
            String error = doc.select(".err").first().text();
            
            if("Incorrect Login or Password".equals(error)){
                throw new NUInvalidLoginException(getUsername(), HOSTNAME);
            }

            //Generic exception
            throw new Exception("Login error: " + error);
        }
    } catch(NUException ex){
        resetLogin();
        ex.printError();
        accountUIShow().setVisible(true);
    } catch (Exception e) {
        resetLogin();
        NULogger.getLogger().log(Level.SEVERE, "{0}: {1}", new Object[]{getClass().getName(), e});
        showWarningMessage( Translation.T().loginerror(), HOSTNAME);
        accountUIShow().setVisible(true);
    }
}
 
Example 16
@Override
public void run() {
    try {
        if (sendSpaceAccount.loginsuccessful) {
            userType = "reg";
            httpContext = sendSpaceAccount.getHttpContext();
            maxFileSizeLimit = 314572800; // 300 MB
        } else {
            userType = "anon";
            cookieStore = new BasicCookieStore();
            httpContext.setAttribute(ClientContext.COOKIE_STORE, cookieStore);
            maxFileSizeLimit = 314572800; // 300 MB
        }

        if (file.length() > maxFileSizeLimit) {
            throw new NUMaxFileSizeException(maxFileSizeLimit, file.getName(), host);
        }
        uploadInitialising();
        initialize();
        
        // https://fs08u.sendspace.com/upload?SPEED_LIMIT=0&MAX_FILE_SIZE=314572800&UPLOAD_IDENTIFIER=x.x.x.x.0&DESTINATION_DIR=xx
        // fs08u.sendspace.com/upload?SPEED_LIMIT=0&MAX_FILE_SIZE=314572800&UPLOAD_IDENTIFIER=x.x.x.x.0&DESTINATION_DIR=xx
        hostName = StringUtils.stringStartingFromString(uploadURL, "https://");
        // fs08u.sendspace.com
        hostName = StringUtils.stringUntilString(hostName, "sendspace.com") + "sendspace.com";
        
        // https://fs08u.sendspace.com/upload?SPEED_LIMIT=0&MAX_FILE_SIZE=314572800&UPLOAD_IDENTIFIER=910609187.1440099567.3BB289C9.22.0&DESTINATION_DIR=22
        httpPost = new NUHttpPost(uploadURL);
        httpPost.setHeader("Host", hostName);
        httpPost.setHeader("Referer", "https://www.sendspace.com/");
        
        MultipartEntity mpEntity = new MultipartEntity(HttpMultipartMode.BROWSER_COMPATIBLE);
        mpEntity.addPart("PROGRESS_URL", new StringBody(progressURL));
        mpEntity.addPart("js_enabled", new StringBody("1"));
        mpEntity.addPart("signature", new StringBody(signature));
        mpEntity.addPart("upload_files", new StringBody(""));
        if (userType.equals("reg")) {
            mpEntity.addPart("userid", new StringBody(userID));
            mpEntity.addPart("folder_id", new StringBody("0"));
        }
        mpEntity.addPart("terms", new StringBody("1"));
        mpEntity.addPart("file[]", new StringBody(""));
        mpEntity.addPart("description[]", new StringBody(""));
        mpEntity.addPart("upload_file[]", createMonitoredFileBody());
        httpPost.setEntity(mpEntity);
        
        NULogger.getLogger().log(Level.INFO, "executing request {0}", httpPost.getRequestLine());
        NULogger.getLogger().info("Now uploading your file into SendSpace.com");
        uploading();
        httpResponse = httpclient.execute(httpPost, httpContext);
        responseString = EntityUtils.toString(httpResponse.getEntity());
        
        doc = Jsoup.parse(responseString);
        
        //Read the links
        gettingLink();
        downloadlink = doc.select("div[class=file_description]").select("a").first().attr("href");
        deletelink = doc.select("a[class=link]").attr("href");

        NULogger.getLogger().log(Level.INFO, "Delete link : {0}", deletelink);
        NULogger.getLogger().log(Level.INFO, "Download link : {0}", downloadlink);
        downURL = downloadlink;
        delURL = deletelink;

        uploadFinished();
    } catch(NUException ex){
        ex.printError();
        uploadInvalid();
    } catch (Exception e) {
        Logger.getLogger(getClass().getName()).log(Level.SEVERE, null, e);
        uploadFailed();
    }
}
 
Example 17
public static String getContentByHtml(String html, String url) throws Exception {
    Document doc = Jsoup.parse(html, url);
    return getContentElementByDoc(doc).text();
}
 
Example 18
@Override
public void login() {
    loginsuccessful = false;
    try {
        initialize();

        NULogger.getLogger().info("Trying to log in to TheVideo.me");
        httpPost = new NUHttpPost("http://www.thevideo.me/");

        List<NameValuePair> formparams = new ArrayList<NameValuePair>();
        formparams.add(new BasicNameValuePair("op", "login"));
        formparams.add(new BasicNameValuePair("login", getUsername()));
        formparams.add(new BasicNameValuePair("password", getPassword()));
        
        UrlEncodedFormEntity entity = new UrlEncodedFormEntity(formparams, "UTF-8");
        httpPost.setEntity(entity);
        httpResponse = httpclient.execute(httpPost, httpContext);
        NULogger.getLogger().info(httpResponse.getStatusLine().toString());

        if (!CookieUtils.getCookieValue(httpContext, "xfsts").isEmpty() && !CookieUtils.getCookieValue(httpContext, "login").isEmpty()) {
            EntityUtils.consume(httpResponse.getEntity());
            loginsuccessful = true;
            username = getUsername();
            password = getPassword();
            hostsAccountUI().hostUI(HOSTNAME).setEnabled(true);
            NULogger.getLogger().info("TheVideo.me login successful!");

        } else {
            //Get error message
            responseString = EntityUtils.toString(httpResponse.getEntity());
            //FileUtils.saveInFile("TheVideoAccount.html", responseString);
            Document doc = Jsoup.parse(responseString);
            String error = doc.select(".err").first().text();
            
            if("Incorrect Login or Password".equals(error)){
                throw new NUInvalidLoginException(getUsername(), HOSTNAME);
            }

            //Generic exception
            throw new Exception("Login error: " + error);
        }
    } catch(NUException ex){
        resetLogin();
        ex.printError();
        accountUIShow().setVisible(true);
    } catch (Exception e) {
        resetLogin();
        NULogger.getLogger().log(Level.SEVERE, "{0}: {1}", new Object[]{getClass().getName(), e});
        showWarningMessage( Translation.T().loginerror(), HOSTNAME);
        accountUIShow().setVisible(true);
    }

}
 
Example 19
Source Project: coolreader   File: RelinkImagesTask.java    License: MIT License 4 votes vote down vote up
private void processImageInContents() {
	// get all contents
	ArrayList<PageModel> pages = NovelsDao.getInstance().getAllContentPageModel();
	updated = 0;
	int count = 1;
	for (PageModel page : pages) {
		String message = LNReaderApplication.getInstance().getApplicationContext().getResources().getString(R.string.relink_task_progress, page.getPage(), count, pages.size());
		publishProgress(new CallbackEventData(message));

		try {
			// get the contents
			NovelContentModel content = NovelsDao.getInstance().getNovelContent(new BookModel(), false, callback);

			if (content != null) {

				// replace the rootpath based on /project/
				// for now just replace the thumbs
				// file:///mnt/sdcard/test/project/images/thumb/c/c7/Accel_World_v01_262.jpg/84px-Accel_World_v01_262.jpg
				// file:///sdcard-ext/.bakareaderex/project/images/thumb/c/c7/Accel_World_v01_262.jpg/84px-Accel_World_v01_262.jpg

				Document doc = Jsoup.parse(content.getContent());
				Elements imageElements = doc.select("img");
				for (Element image : imageElements) {
					String imgUrl = image.attr("src");
					if (imgUrl.startsWith("file:///") && imgUrl.contains("/project/images/thumb/")) {
						String mntImgUrl = imgUrl.replace("file:///", "");
						Log.d(TAG, "Found image : " + imgUrl);

						if (!new File(mntImgUrl).exists()) {
							Log.d(TAG, "Old image doesn't exists/moved: " + mntImgUrl);
							String newUrl = imgUrl.replaceAll("file:///[\\w/\\./!$%^&*()_+|~\\={}\\[\\]:\";'<>?,-]+/project/images/thumb/", "file:///" + rootPath + "/project/images/thumb/");
							String mntNewUrl = newUrl.replace("file:///", "");
							Log.d(TAG, "Trying to replace with " + mntNewUrl);

							if (new File(mntNewUrl).exists()) {
								Log.d(TAG, "Replace image: " + imgUrl + " ==> " + newUrl);
								image.attr("src", newUrl);
								++updated;
							}
						}
					}
				}
				content.setContent(doc.html());
				NovelsDao.getInstance().updateNovelContent(content);

			}
		} catch (Exception e) {
			message = LNReaderApplication.getInstance().getApplicationContext().getResources().getString(R.string.relink_task_error, page.getPage());
			Log.e(TAG, message, e);
			publishProgress(new CallbackEventData(message));
		}
		++count;
	}
}
 
Example 20
@Test public void handlesControlCodeInAttributeName() {
    Document doc = Jsoup.parse("<p><a \06=foo>One</a><a/\06=bar><a foo\06=bar>Two</a></p>");
    assertEquals("<p><a>One</a><a></a><a foo=\"bar\">Two</a></p>", doc.body().html());
}