Java Code Examples for org.jsoup.nodes.Document#text()
The following examples show how to use
org.jsoup.nodes.Document#text() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HtmlHelper.java From FairEmail with GNU General Public License v3.0 | 6 votes |
private static String _getText(Document d, boolean full) { truncate(d, !full); for (Element bq : d.select("blockquote")) { bq.prependChild(new TextNode("[")); bq.appendChild(new TextNode("]")); } String text = d.text(); if (full) return text; String preview = text.substring(0, Math.min(text.length(), PREVIEW_SIZE)); if (preview.length() < text.length()) preview += "…"; return preview; }
Example 2
Source File: JerryExtractor.java From web-data-extractor with Apache License 2.0 | 6 votes |
private String parse(String str) { Document document = Jsoup.parse(str, "", Parser.xmlParser()); String result = ""; switch (outType) { case TYPE_TEXT: result = document.text(); break; case TYPE_HTML: result = document.html(); break; default: result = document.text(); break; } return result; }
Example 3
Source File: duckdns.java From DuckDNSClient with GNU General Public License v2.0 | 6 votes |
private static String getip() { Document doc = null; String url = "http://checkip.amazonaws.com"; String ip = ""; try { doc = Jsoup.connect(url).header("Cache-Control", "no-cache").ignoreHttpErrors(true).ignoreContentType(true).timeout(10 * 1000).get(); ip = doc.text(); } catch (IOException e) { e.printStackTrace(); } if (doc.text().length() < 7) { // can't get ip address, let DuckDNS to resolve it ip = ""; } return ip; }
Example 4
Source File: PromPHPCrawlerServiceImpl.java From ShadowSocks-Share with Apache License 2.0 | 5 votes |
@Override protected Set<ShadowSocksDetailsEntity> parse(Document document) { // SSR 订阅地址内容 String base64ssrLinks = document.text(); String ssrLinks = StringUtils.toEncodedString(Base64.decodeBase64(base64ssrLinks), StandardCharsets.UTF_8); String[] ssrLinkList = ssrLinks.split("\n"); // log.debug("---------------->{}={}", ssrLinkList.length + "", ssrLinkList); Set<ShadowSocksDetailsEntity> set = Collections.synchronizedSet(new HashSet<>(ssrLinkList.length)); Arrays.asList(ssrLinkList).parallelStream().forEach((str) -> { try { if (StringUtils.isNotBlank(str)) { ShadowSocksDetailsEntity ss = parseLink(str.trim()); ss.setValid(false); ss.setValidTime(new Date()); ss.setTitle("免费账号 | 云端框架"); ss.setRemarks("https://cloudfra.com/"); ss.setGroup("ShadowSocks-Share"); // 测试网络 if (isReachable(ss)) ss.setValid(true); // 无论是否可用都入库 set.add(ss); log.debug("*************** 第 {} 条 ***************{}{}", set.size(), System.lineSeparator(), ss); // log.debug("{}", ss.getLink()); } } catch (Exception e) { log.error(e.getMessage(), e); } }); return set; }
Example 5
Source File: CharRepresentation.java From semanticvectors with BSD 3-Clause "New" or "Revised" License | 5 votes |
private void addHtmlFileToCharRep(File inputFile) throws IOException { Document doc = Jsoup.parse(inputFile, ENCODING); String innerText = doc.text(); java.util.StringTokenizer tokenizer = new java.util.StringTokenizer(innerText, " "); while (tokenizer.hasMoreTokens()) { String token = tokenizer.nextToken(); addStringToCharRep(token); } }
Example 6
Source File: RandomGoogleTrendExtractor.java From swcv with MIT License | 5 votes |
private static String[] downloadTrends() throws Exception { String query = constructQuery(); InputStream is = new URL(query).openStream(); try { StringWriter writer = new StringWriter(); IOUtils.copy(is, writer); JSONObject json = new JSONObject(writer.toString()); json = json.getJSONObject("responseData"); json = json.getJSONObject("feed"); JSONArray arr = json.getJSONArray("entries"); List<String> res = new ArrayList(); for (int i = 0; i < arr.length(); i++) { JSONObject tmp = arr.getJSONObject(i); String en = tmp.getString("content"); Document doc = Jsoup.parse(en); for (Element t : doc.select("a")) t.append("."); en = doc.text(); res.addAll(Arrays.asList(en.split("\\."))); } return getNonEmptyTrends(res); } finally { is.close(); } }
Example 7
Source File: duckdns.java From DuckDNSClient with GNU General Public License v2.0 | 5 votes |
private static String updateDuckDNS(String domain, String token, String ipaddress) { String url = "http://www.duckdns.org/update?domains=" + domain + "&token=" + token + "&ip=" + ipaddress; Document doc = null; String ua = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.120 Safari/535.2"; try { // doc = Jsoup.connect(url).ignoreHttpErrors(true).timeout(10 * 1000).get(); doc = Jsoup.connect(url).userAgent(ua).ignoreHttpErrors(true).ignoreContentType(true).timeout(10 * 1000).get(); } catch (IOException e) { e.printStackTrace(); } return doc.text(); }
Example 8
Source File: MainFragment.java From android-app with The Unlicense | 5 votes |
@Override protected Void doInBackground(Void... params) { try { Document doc1 = Jsoup.connect("https://evilinsult.com/generate_insult.php?lang="+language).get(); title = doc1.text(); } catch (Exception ex) { ex.printStackTrace(); } return null; }
Example 9
Source File: MainFragment.java From android-app with The Unlicense | 5 votes |
@Override protected Void doInBackground(Void... params) { try { Document doc1 = Jsoup.connect("https://evilinsult.com/generate_insult.php?lang="+language).get(); title = doc1.text(); } catch (Exception ex) { ex.printStackTrace(); } return null; }
Example 10
Source File: MainActivity.java From android-app with The Unlicense | 5 votes |
@Override protected Void doInBackground(Void... params) { try { Document doc1 = Jsoup.connect("https://evilinsult.com/generate_insult.php?lang=en").get(); title = doc1.text(); } catch (Exception ex) { ex.printStackTrace(); title= ex.getMessage(); } return null; }
Example 11
Source File: MainActivity.java From android-app with The Unlicense | 5 votes |
@Override protected Void doInBackground(Void... params) { try { Document doc1 = Jsoup.connect("http://evilinsult.com/generate_insult.php?lang=en").get(); title = doc1.text(); } catch (Exception ex) { ex.printStackTrace(); title= ex.getMessage(); } return null; }
Example 12
Source File: TextPageProcessor.java From ScriptSpider with Apache License 2.0 | 5 votes |
/** * 解析页面 * process函数需要完成的有: * 1.解析有用的信息,丢进去Page的List items中。之后save会进行存储! * * @param page page */ public void process(Page page) { Document doc = page.getDocument(); String title = doc.title(); String text = doc.text(); Map<String, String> items = new HashMap<String, String>(); items.put("title", title); items.put("text", text); items.put("url", page.getUrlSeed().getUrl()); page.setItems(items); }
Example 13
Source File: PlumbleService.java From Plumble with GNU General Public License v3.0 | 4 votes |
@Override public void onMessageLogged(IMessage message) { // Split on / strip all HTML tags. Document parsedMessage = Jsoup.parseBodyFragment(message.getMessage()); String strippedMessage = parsedMessage.text(); String ttsMessage; if(mShortTtsMessagesEnabled) { for (Element anchor : parsedMessage.getElementsByTag("A")) { // Get just the domain portion of links String href = anchor.attr("href"); // Only shorten anchors without custom text if (href != null && href.equals(anchor.text())) { String urlHostname = HtmlUtils.getHostnameFromLink(href); if (urlHostname != null) { anchor.text(getString(R.string.chat_message_tts_short_link, urlHostname)); } } } ttsMessage = parsedMessage.text(); } else { ttsMessage = strippedMessage; } String formattedTtsMessage = getString(R.string.notification_message, message.getActorName(), ttsMessage); // Read if TTS is enabled, the message is less than threshold, is a text message, and not deafened if(mSettings.isTextToSpeechEnabled() && mTTS != null && formattedTtsMessage.length() <= TTS_THRESHOLD && getSessionUser() != null && !getSessionUser().isSelfDeafened()) { mTTS.speak(formattedTtsMessage, TextToSpeech.QUEUE_ADD, null); } // TODO: create a customizable notification sieve if (mSettings.isChatNotifyEnabled()) { mMessageNotification.show(message); } mMessageLog.add(new IChatMessage.TextMessage(message)); }
Example 14
Source File: CheckUtils.java From Natty with GNU General Public License v3.0 | 4 votes |
public static String stripTags(String html) { Document doc = Jsoup.parse(html); return doc.text(); }
Example 15
Source File: ActivityMgmtService.java From symphonyx with Apache License 2.0 | 4 votes |
/** * Collects 1A0001. * * @param userId the specified user id * @return result */ public synchronized JSONObject collect1A0001(final String userId) { final JSONObject ret = Results.falseResult(); if (!activityQueryService.is1A0001Today(userId)) { ret.put(Keys.MSG, langPropsService.get("activityNotParticipatedLabel")); return ret; } if (activityQueryService.isCollected1A0001Today(userId)) { ret.put(Keys.MSG, langPropsService.get("activityParticipatedLabel")); return ret; } final List<JSONObject> records = pointtransferQueryService.getLatestPointtransfers(userId, Pointtransfer.TRANSFER_TYPE_C_ACTIVITY_1A0001, 1); final JSONObject pointtransfer = records.get(0); final String data = pointtransfer.optString(Pointtransfer.DATA_ID); final String smallOrLarge = data.split("-")[1]; final int sum = pointtransfer.optInt(Pointtransfer.SUM); String smallOrLargeResult = null; try { final Document doc = Jsoup.parse(new URL("http://stockpage.10jqka.com.cn/1A0001/quote/header/"), 5000); final JSONObject result = new JSONObject(doc.text()); final String price = result.optJSONObject("data").optJSONObject("1A0001").optString("10"); if (!price.contains(".")) { smallOrLargeResult = "0"; } else { int endInt = 0; if (price.split("\\.")[1].length() > 1) { final String end = price.substring(price.length() - 1); endInt = Integer.valueOf(end); } if (0 <= endInt && endInt <= 4) { smallOrLargeResult = "0"; } else if (5 <= endInt && endInt <= 9) { smallOrLargeResult = "1"; } else { LOGGER.error("Activity 1A0001 collect result [" + endInt + "]"); } } } catch (final Exception e) { LOGGER.log(Level.ERROR, "Collect 1A0001 failed", e); ret.put(Keys.MSG, langPropsService.get("activity1A0001CollectFailLabel")); return ret; } if (Strings.isEmptyOrNull(smallOrLarge)) { ret.put(Keys.MSG, langPropsService.get("activity1A0001CollectFailLabel")); return ret; } ret.put(Keys.STATUS_CODE, true); if (StringUtils.equals(smallOrLarge, smallOrLargeResult)) { final int amount = sum * 2; final boolean succ = null != pointtransferMgmtService.transfer(Pointtransfer.ID_C_SYS, userId, Pointtransfer.TRANSFER_TYPE_C_ACTIVITY_1A0001_COLLECT, amount, DateFormatUtils.format(new Date(), "yyyyMMdd") + "-" + smallOrLargeResult); if (succ) { String msg = langPropsService.get("activity1A0001CollectSucc1Label"); msg = msg.replace("{point}", String.valueOf(amount)); ret.put(Keys.MSG, msg); } else { ret.put(Keys.MSG, langPropsService.get("activity1A0001CollectFailLabel")); } } else { ret.put(Keys.MSG, langPropsService.get("activity1A0001CollectSucc0Label")); } return ret; }
Example 16
Source File: Jsoup403ForbiddenExample.java From crawler-jsoup-maven with Apache License 2.0 | 4 votes |
public static void main(String[] args) { try{ // connect to the website '1 Connection connection = Jsoup.connect("http://www.bluetata.com"); // get the HTML document '2 Document doc = connection.get(); // parse text from HTML '3 String strHTML = doc.text(); // out put dom '4 System.out.println(strHTML); }catch(IOException ioex){ ioex.printStackTrace(); } }
Example 17
Source File: HtmlDocumentInterpreter.java From camunda-bpm-swagger with Apache License 2.0 | 4 votes |
String getText(final HtmlBlock node) { final Document document = Jsoup.parseBodyFragment(node.getChars().toString()); return document.text(); }
Example 18
Source File: RSSScraper.java From Babler with Apache License 2.0 | 4 votes |
public AbstractMap.SimpleEntry<Integer, Integer> fetchAndSave() throws Exception { URL url = new URL(this.url); SyndFeedInput input = new SyndFeedInput(); SyndFeed feed = input.build(new XmlReader(url)); int items = feed.getEntries().size(); if(items > 0){ log.info("Attempting to parse rss feed: "+ this.url ); log.info("This Feed has "+items +" items"); } List <SyndEntry> entries = feed.getEntries(); for (SyndEntry item : entries){ log.info("Title: " + item.getTitle()); log.info("Link: " + item.getLink()); SyndContentImpl contentHolder = (SyndContentImpl) item.getContents().get(0); String content = contentHolder.getValue(); //content might contain html data, let's clean it up Document doc = Jsoup.parse(content); content = doc.text(); try { Result result = ld.detectLanguage(content, language); if (result.languageCode.equals(language) && result.isReliable) { FileSaver file = new FileSaver(content, this.language, "bs", item.getLink(), item.getUri(), String.valueOf(content.hashCode())); String fileName = file.getFileName(); BlogPost post = new BlogPost(content,this.language,null,"bs",item.getLink(),item.getUri(),fileName); if(DAO.saveEntry(post)) { file.save(this.logDb); numOfFiles++; wrongCount = 0; } } else{ log.info("Item " + item.getTitle() + "is in a diff languageCode, skipping this post "+ result.languageCode); wrongCount ++; if(wrongCount > 3){ log.info("Already found 3 posts in the wrong languageCode, skipping this blog"); } break; } } catch(Exception e){ log.error(e); break; } } return new AbstractMap.SimpleEntry<>(numOfFiles,wrongCount); }
Example 19
Source File: HTMLSanitizer.java From document-management-software with GNU Lesser General Public License v3.0 | 4 votes |
public static String stripIframes(String htmlContent) { Document doc = Jsoup.parse(htmlContent, "UTF-8"); doc.select("iframe").remove(); return doc.text(); }
Example 20
Source File: CharRepresentation.java From semanticvectors with BSD 3-Clause "New" or "Revised" License | 4 votes |
private void addHtmlFileToTermRep(File inputFile) throws IOException { Document doc = Jsoup.parse(inputFile, ENCODING); String innerText = doc.text(); addStringToTermRep(innerText); }