Java Code Examples for org.jsoup.nodes.Document#text()

The following examples show how to use org.jsoup.nodes.Document#text() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
private static String _getText(Document d, boolean full) {
    truncate(d, !full);

    for (Element bq : d.select("blockquote")) {
        bq.prependChild(new TextNode("["));
        bq.appendChild(new TextNode("]"));
    }

    String text = d.text();
    if (full)
        return text;

    String preview = text.substring(0, Math.min(text.length(), PREVIEW_SIZE));
    if (preview.length() < text.length())
        preview += "…";

    return preview;
}
 
Example 2
private static String getip() {
Document doc = null;
String url = "http://checkip.amazonaws.com";
String ip = "";

try {
doc = Jsoup.connect(url).header("Cache-Control", "no-cache").ignoreHttpErrors(true).ignoreContentType(true).timeout(10 * 1000).get();
ip = doc.text();
} catch (IOException e) {
e.printStackTrace();
}
if (doc.text().length() < 7) {
// can't get ip address, let DuckDNS to resolve it
ip = "";
}
return ip;
}
 
Example 3
private String parse(String str) {
    Document document = Jsoup.parse(str, "", Parser.xmlParser());
    String result = "";
    switch (outType) {
        case TYPE_TEXT:
            result = document.text();
            break;
        case TYPE_HTML:
            result = document.html();
            break;
        default:
            result = document.text();
            break;
    }
    return result;
}
 
Example 4
@Override
protected Set<ShadowSocksDetailsEntity> parse(Document document) {

	// SSR 订阅地址内容
	String base64ssrLinks = document.text();
	String ssrLinks = StringUtils.toEncodedString(Base64.decodeBase64(base64ssrLinks), StandardCharsets.UTF_8);
	String[] ssrLinkList = ssrLinks.split("\n");

	// log.debug("---------------->{}={}", ssrLinkList.length + "", ssrLinkList);
	Set<ShadowSocksDetailsEntity> set = Collections.synchronizedSet(new HashSet<>(ssrLinkList.length));

	Arrays.asList(ssrLinkList).parallelStream().forEach((str) -> {
		try {
			if (StringUtils.isNotBlank(str)) {
				ShadowSocksDetailsEntity ss = parseLink(str.trim());
				ss.setValid(false);
				ss.setValidTime(new Date());
				ss.setTitle("免费账号 | 云端框架");
				ss.setRemarks("https://cloudfra.com/");
				ss.setGroup("ShadowSocks-Share");

				// 测试网络
				if (isReachable(ss))
					ss.setValid(true);

				// 无论是否可用都入库
				set.add(ss);

				log.debug("*************** 第 {} 条 ***************{}{}", set.size(), System.lineSeparator(), ss);
				// log.debug("{}", ss.getLink());
			}
		} catch (Exception e) {
			log.error(e.getMessage(), e);
		}
	});

	return set;
}
 
Example 5
Source Project: ScriptSpider   File: TextPageProcessor.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * 解析页面
 * process函数需要完成的有:
 * 1.解析有用的信息,丢进去Page的List items中。之后save会进行存储!
 *
 * @param page page
 */
public void process(Page page) {
    Document doc = page.getDocument();

    String title = doc.title();
    String text = doc.text();
    Map<String, String> items = new HashMap<String, String>();
    items.put("title", title);
    items.put("text", text);
    items.put("url", page.getUrlSeed().getUrl());

    page.setItems(items);
}
 
Example 6
Source Project: android-app   File: MainActivity.java    License: The Unlicense 5 votes vote down vote up
@Override
protected Void doInBackground(Void... params) {
    try {
        Document doc1 = Jsoup.connect("http://evilinsult.com/generate_insult.php?lang=en").get();
        title = doc1.text();
    } catch (Exception ex) {
        ex.printStackTrace();
        title= ex.getMessage();

    }
    return null;
}
 
Example 7
Source Project: android-app   File: MainActivity.java    License: The Unlicense 5 votes vote down vote up
@Override
protected Void doInBackground(Void... params) {
    try {
        Document doc1 = Jsoup.connect("https://evilinsult.com/generate_insult.php?lang=en").get();
        title = doc1.text();
    } catch (Exception ex) {
        ex.printStackTrace();
        title= ex.getMessage();

    }
    return null;
}
 
Example 8
Source Project: android-app   File: MainFragment.java    License: The Unlicense 5 votes vote down vote up
@Override
protected Void doInBackground(Void... params) {
    try {
        Document doc1 = Jsoup.connect("https://evilinsult.com/generate_insult.php?lang="+language).get();
        title = doc1.text();

    } catch (Exception ex) {
        ex.printStackTrace();



    }
    return null;
}
 
Example 9
Source Project: android-app   File: MainFragment.java    License: The Unlicense 5 votes vote down vote up
@Override
protected Void doInBackground(Void... params) {
    try {
        Document doc1 = Jsoup.connect("https://evilinsult.com/generate_insult.php?lang="+language).get();
        title = doc1.text();

    } catch (Exception ex) {
        ex.printStackTrace();



    }
    return null;
}
 
Example 10
private static String updateDuckDNS(String domain, String token, String ipaddress) {
String url = "http://www.duckdns.org/update?domains=" + domain + "&token=" + token + "&ip=" + ipaddress;
Document doc = null;
String ua = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.120 Safari/535.2";
try {
//	doc = Jsoup.connect(url).ignoreHttpErrors(true).timeout(10 * 1000).get();
doc = Jsoup.connect(url).userAgent(ua).ignoreHttpErrors(true).ignoreContentType(true).timeout(10 * 1000).get();

} catch (IOException e) {
e.printStackTrace();
}

return doc.text();
}
 
Example 11
Source Project: swcv   File: RandomGoogleTrendExtractor.java    License: MIT License 5 votes vote down vote up
private static String[] downloadTrends() throws Exception
{
    String query = constructQuery();
    InputStream is = new URL(query).openStream();
    try
    {
        StringWriter writer = new StringWriter();
        IOUtils.copy(is, writer);
        JSONObject json = new JSONObject(writer.toString());
        json = json.getJSONObject("responseData");
        json = json.getJSONObject("feed");

        JSONArray arr = json.getJSONArray("entries");
        List<String> res = new ArrayList();
        for (int i = 0; i < arr.length(); i++)
        {
            JSONObject tmp = arr.getJSONObject(i);
            String en = tmp.getString("content");

            Document doc = Jsoup.parse(en);
            for (Element t : doc.select("a"))
                t.append(".");
            
            en = doc.text();
            res.addAll(Arrays.asList(en.split("\\.")));
        }

        return getNonEmptyTrends(res);
    }
    finally
    {
        is.close();
    }
}
 
Example 12
private void addHtmlFileToCharRep(File inputFile) throws IOException {
  Document doc = Jsoup.parse(inputFile, ENCODING);
  String innerText = doc.text();
  java.util.StringTokenizer tokenizer = new java.util.StringTokenizer(innerText, " ");
  while (tokenizer.hasMoreTokens()) {
    String token = tokenizer.nextToken();
    addStringToCharRep(token);
  }
}
 
Example 13
public static String stripIframes(String htmlContent) {
	Document doc = Jsoup.parse(htmlContent, "UTF-8");
	doc.select("iframe").remove();
	return doc.text();
}
 
Example 14
Source Project: Babler   File: RSSScraper.java    License: Apache License 2.0 4 votes vote down vote up
public AbstractMap.SimpleEntry<Integer, Integer> fetchAndSave() throws Exception {

        URL url = new URL(this.url);

        SyndFeedInput input = new SyndFeedInput();
        SyndFeed feed = input.build(new XmlReader(url));


        int items = feed.getEntries().size();

        if(items > 0){
            log.info("Attempting to parse rss feed: "+ this.url );
            log.info("This Feed has "+items +" items");
        }

        List <SyndEntry> entries = feed.getEntries();

        for (SyndEntry item : entries){
            log.info("Title: " + item.getTitle());
            log.info("Link: " + item.getLink());
            SyndContentImpl contentHolder = (SyndContentImpl) item.getContents().get(0);
            String content = contentHolder.getValue();

            //content might contain html data, let's clean it up
            Document doc = Jsoup.parse(content);
            content = doc.text();
            try {
                    Result result = ld.detectLanguage(content, language);
                    if (result.languageCode.equals(language) && result.isReliable) {

                        FileSaver file = new FileSaver(content, this.language, "bs", item.getLink(), item.getUri(), String.valueOf(content.hashCode()));
                        String fileName = file.getFileName();
                        BlogPost post = new BlogPost(content,this.language,null,"bs",item.getLink(),item.getUri(),fileName);
                        if(DAO.saveEntry(post)) {
                            file.save(this.logDb);
                            numOfFiles++;
                            wrongCount = 0;
                        }

                    }

                    else{
                        log.info("Item " + item.getTitle() + "is in a diff languageCode, skipping this post  "+ result.languageCode);
                        wrongCount ++;
                        if(wrongCount > 3){
                            log.info("Already found 3 posts in the wrong languageCode, skipping this blog");
                        }
                        break;
                    }

            }
            catch(Exception e){
                log.error(e);
                break;
            }


        }
        return new AbstractMap.SimpleEntry<>(numOfFiles,wrongCount);
    }
 
Example 15
String getText(final HtmlBlock node) {
  final Document document = Jsoup.parseBodyFragment(node.getChars().toString());
  return document.text();
}
 
Example 16
public static void main(String[] args) {
    
    try{
        
        // connect to the website         '1
        Connection connection = Jsoup.connect("http://www.bluetata.com");
        
        // get the HTML document          '2
        Document doc = connection.get();
        
        // parse text from HTML           '3
        String strHTML = doc.text();
        
        // out put dom                    '4
        System.out.println(strHTML);
        
    }catch(IOException ioex){
        ioex.printStackTrace();
    }
 
}
 
Example 17
@Override
public void onMessageLogged(IMessage message) {
    // Split on / strip all HTML tags.
    Document parsedMessage = Jsoup.parseBodyFragment(message.getMessage());
    String strippedMessage = parsedMessage.text();

    String ttsMessage;
    if(mShortTtsMessagesEnabled) {
        for (Element anchor : parsedMessage.getElementsByTag("A")) {
            // Get just the domain portion of links
            String href = anchor.attr("href");
            // Only shorten anchors without custom text
            if (href != null && href.equals(anchor.text())) {
                String urlHostname = HtmlUtils.getHostnameFromLink(href);
                if (urlHostname != null) {
                    anchor.text(getString(R.string.chat_message_tts_short_link, urlHostname));
                }
            }
        }
        ttsMessage = parsedMessage.text();
    } else {
        ttsMessage = strippedMessage;
    }

    String formattedTtsMessage = getString(R.string.notification_message,
            message.getActorName(), ttsMessage);

    // Read if TTS is enabled, the message is less than threshold, is a text message, and not deafened
    if(mSettings.isTextToSpeechEnabled() &&
            mTTS != null &&
            formattedTtsMessage.length() <= TTS_THRESHOLD &&
            getSessionUser() != null &&
            !getSessionUser().isSelfDeafened()) {
        mTTS.speak(formattedTtsMessage, TextToSpeech.QUEUE_ADD, null);
    }

    // TODO: create a customizable notification sieve
    if (mSettings.isChatNotifyEnabled()) {
        mMessageNotification.show(message);
    }

    mMessageLog.add(new IChatMessage.TextMessage(message));
}
 
Example 18
Source Project: Natty   File: CheckUtils.java    License: GNU General Public License v3.0 4 votes vote down vote up
public static String stripTags(String html) {
    Document doc = Jsoup.parse(html);
    return doc.text();
}
 
Example 19
Source Project: symphonyx   File: ActivityMgmtService.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Collects 1A0001.
 *
 * @param userId the specified user id
 * @return result
 */
public synchronized JSONObject collect1A0001(final String userId) {
    final JSONObject ret = Results.falseResult();

    if (!activityQueryService.is1A0001Today(userId)) {
        ret.put(Keys.MSG, langPropsService.get("activityNotParticipatedLabel"));

        return ret;
    }

    if (activityQueryService.isCollected1A0001Today(userId)) {
        ret.put(Keys.MSG, langPropsService.get("activityParticipatedLabel"));

        return ret;
    }

    final List<JSONObject> records = pointtransferQueryService.getLatestPointtransfers(userId,
            Pointtransfer.TRANSFER_TYPE_C_ACTIVITY_1A0001, 1);
    final JSONObject pointtransfer = records.get(0);
    final String data = pointtransfer.optString(Pointtransfer.DATA_ID);
    final String smallOrLarge = data.split("-")[1];
    final int sum = pointtransfer.optInt(Pointtransfer.SUM);

    String smallOrLargeResult = null;
    try {
        final Document doc = Jsoup.parse(new URL("http://stockpage.10jqka.com.cn/1A0001/quote/header/"), 5000);
        final JSONObject result = new JSONObject(doc.text());
        final String price = result.optJSONObject("data").optJSONObject("1A0001").optString("10");

        if (!price.contains(".")) {
            smallOrLargeResult = "0";
        } else {
            int endInt = 0;
            if (price.split("\\.")[1].length() > 1) {
                final String end = price.substring(price.length() - 1);
                endInt = Integer.valueOf(end);
            }

            if (0 <= endInt && endInt <= 4) {
                smallOrLargeResult = "0";
            } else if (5 <= endInt && endInt <= 9) {
                smallOrLargeResult = "1";
            } else {
                LOGGER.error("Activity 1A0001 collect result [" + endInt + "]");
            }
        }
    } catch (final Exception e) {
        LOGGER.log(Level.ERROR, "Collect 1A0001 failed", e);

        ret.put(Keys.MSG, langPropsService.get("activity1A0001CollectFailLabel"));

        return ret;
    }

    if (Strings.isEmptyOrNull(smallOrLarge)) {
        ret.put(Keys.MSG, langPropsService.get("activity1A0001CollectFailLabel"));

        return ret;
    }

    ret.put(Keys.STATUS_CODE, true);
    if (StringUtils.equals(smallOrLarge, smallOrLargeResult)) {
        final int amount = sum * 2;

        final boolean succ = null != pointtransferMgmtService.transfer(Pointtransfer.ID_C_SYS, userId,
                Pointtransfer.TRANSFER_TYPE_C_ACTIVITY_1A0001_COLLECT, amount,
                DateFormatUtils.format(new Date(), "yyyyMMdd") + "-" + smallOrLargeResult);

        if (succ) {
            String msg = langPropsService.get("activity1A0001CollectSucc1Label");
            msg = msg.replace("{point}", String.valueOf(amount));

            ret.put(Keys.MSG, msg);
        } else {
            ret.put(Keys.MSG, langPropsService.get("activity1A0001CollectFailLabel"));
        }
    } else {
        ret.put(Keys.MSG, langPropsService.get("activity1A0001CollectSucc0Label"));
    }

    return ret;
}
 
Example 20
private void addHtmlFileToTermRep(File inputFile) throws IOException {
  Document doc = Jsoup.parse(inputFile, ENCODING);
  String innerText = doc.text();
  addStringToTermRep(innerText);
}