Java Code Examples for org.jsoup.nodes.Element#data()

The following examples show how to use org.jsoup.nodes.Element#data() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GetHTMLElement.java    From localization_nifi with Apache License 2.0 6 votes vote down vote up
/**
 * Extracts the HTML value based on the configuration values.
 *
 * @return value from the parsed HTML element
 */
private String extractElementValue(String prependValue, final String outputType, String appendValue, final Element ele, final String attrKey) {
    if (StringUtils.isEmpty(prependValue)) {
        prependValue = "";
    }
    if (StringUtils.isEmpty(appendValue)) {
        appendValue = "";
    }

    switch (outputType) {
        case ELEMENT_HTML:
            return prependValue + ele.html() + appendValue;
        case ELEMENT_TEXT:
            return prependValue + ele.text() + appendValue;
        case ELEMENT_DATA:
            return prependValue + ele.data() + appendValue;
        case ELEMENT_ATTRIBUTE:
            return prependValue + ele.attr(attrKey) + appendValue;
        default:
            return prependValue + ele.html() + appendValue;
    }
}
 
Example 2
Source File: BatoRipper.java    From ripme with MIT License 6 votes vote down vote up
@Override
public List<String> getURLsFromPage(Document doc) {
    List<String> result = new ArrayList<>();
    for (Element script : doc.select("script")) {
        if (script.data().contains("var images = ")) {
            String s = script.data();
            s = s.replaceAll("var seriesId = \\d+;", "");
            s = s.replaceAll("var chapterId = \\d+;", "");
            s = s.replaceAll("var pages = \\d+;", "");
            s = s.replaceAll("var page = \\d+;", "");
            s = s.replaceAll("var prevCha = null;", "");
            s = s.replaceAll("var nextCha = \\.*;", "");
            String json = s.replaceAll("var images = ", "").replaceAll(";", "");
            JSONObject images = new JSONObject(json);
            for (int i = 1; i < images.length() +1; i++) {
                result.add(images.getString(Integer.toString(i)));
            }

        }
    }
    return result;
}
 
Example 3
Source File: CSSJsoupPhlocContentAdapterImpl.java    From Asqatasun with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * Retrieve css content and adapt it for each locale resource 
 */
private void adaptLocaleCSS() {
    Set<Long> relatedCssIdSet = new HashSet<>();

    for (Element el : localeCssElements) {
        Resource cssResource;
        String rawCss = el.data();
        if (!StringUtils.isBlank(rawCss)) {
            cssResource = new CSSResourceImpl(
                    rawCss, 
                    0, 
                    new LocalRsrc());
            StylesheetContent cssContent =
                    getStylesheetFromLocaleResource(cssResource.getResource());
            adaptContent(
                    cssContent, 
                    cssResource, 
                    getCurrentResourcePath(el.baseUri()), 
                    getListOfMediaFromAttributeValue(el));
            relatedCssIdSet.add(getContentDataService().saveOrUpdate(cssContent).getId());
        }
    }
    getContentDataService().saveContentRelationShip(getSSP(), relatedCssIdSet);
}
 
Example 4
Source File: GetHTMLElement.java    From nifi with Apache License 2.0 6 votes vote down vote up
/**
 * Extracts the HTML value based on the configuration values.
 *
 * @return value from the parsed HTML element
 */
private String extractElementValue(String prependValue, final String outputType, String appendValue, final Element ele, final String attrKey) {
    if (StringUtils.isEmpty(prependValue)) {
        prependValue = "";
    }
    if (StringUtils.isEmpty(appendValue)) {
        appendValue = "";
    }

    switch (outputType) {
        case ELEMENT_HTML:
            return prependValue + ele.html() + appendValue;
        case ELEMENT_TEXT:
            return prependValue + ele.text() + appendValue;
        case ELEMENT_DATA:
            return prependValue + ele.data() + appendValue;
        case ELEMENT_ATTRIBUTE:
            return prependValue + ele.attr(attrKey) + appendValue;
        default:
            return prependValue + ele.html() + appendValue;
    }
}
 
Example 5
Source File: HtmlDecoder.java    From metafacture-core with Apache License 2.0 6 votes vote down vote up
private void process(Element parent, StreamReceiver receiver) {
    for (Element element : parent.children()) {
        receiver.startEntity(element.nodeName());
        Attributes attributes = element.attributes();
        for (Attribute attribute : attributes) {
            receiver.literal(attribute.getKey(), attribute.getValue());
        }
        if (element.children().isEmpty()) {
            String text = element.text().trim();
            String value = text.isEmpty() ? element.data() : text;
            if (!value.isEmpty()) {
                receiver.literal("value", value);
            }
        }
        process(element, receiver);
        receiver.endEntity();
    }
}
 
Example 6
Source File: ParseKeDouWo.java    From v9porn with MIT License 5 votes vote down vote up
public static KeDouRelated parseVideoDetail(String html) {
        KeDouRelated keDouRelated = new KeDouRelated();
        if (html.contains("您已超过观看限制")) {
            Logger.d("已经超出观看上限了");
            //设置标志位
            keDouRelated.setOutOfWatch(true);
        }
        Document document = Jsoup.parse(html);
        Element first = document.select("div.player-holder").first();
        String data = first.data();

//        final String reg = "(video_url+):\\s?(.+)(.mp4/)";
        final String reg = "(video_url+):\\s?(.+)(\\?br=\\d+)";
        Pattern p = Pattern.compile(reg);
        Matcher m = p.matcher(data);
        if (m.find()) {
            String group = m.group();
//            String videoUrl = group.substring(group.indexOf("'")+1,group.lastIndexOf("/"));
            String videoUrl = group.substring(group.indexOf("'")+1);
            Logger.d("videoUrl: "+videoUrl);
            keDouRelated.setVideoUrl(videoUrl);
        }

        Elements relatedList = document.select("#list_videos_related_videos_items").first().select(".item");
        List<KeDouModel> keDouModels = parseList(relatedList);
        keDouRelated.setRelatedList(keDouModels);

        return keDouRelated;
    }
 
Example 7
Source File: PhotobucketRipper.java    From ripme with MIT License 5 votes vote down vote up
private JSONObject getCollectionData(Document page){
    // Retrieve JSON from a script tag in the returned document
    for (Element script : page.select("script[type=text/javascript]")) {
        String data = script.data();
        // Ensure this chunk of javascript contains the album info
        if (data.contains("libraryAlbumsPageCollectionData")) {
            Matcher m = collDataPattern.matcher(data);
            if (m.matches()) {
                // Grab the JSON
                return new JSONObject(m.group(1));
            }
        }
    }
    return null;
}
 
Example 8
Source File: InstagramRipper.java    From ripme with MIT License 5 votes vote down vote up
private JSONObject getJsonObjectFromDoc(Document document) {
    for (Element script : document.select("script[type=text/javascript]")) {
        String scriptText = script.data();
        if (scriptText.startsWith("window._sharedData") || scriptText.startsWith("window.__additionalDataLoaded")) {
            String jsonText = scriptText.replaceAll("[^{]*([{].*})[^}]*", "$1");
            if (jsonText.contains("graphql") || jsonText.contains("StoriesPage")) {
                return new JSONObject(jsonText);
            }
        }
    }
    return null;
}
 
Example 9
Source File: ThechiveRipper.java    From ripme with MIT License 4 votes vote down vote up
private List<String> getUrlsFromThechive(Document doc) {
    /*
     * The image urls are stored in a <script> tag of the document. This script
     * contains a single array var by name CHIVE_GALLERY_ITEMS.
     * 
     * We grab all the <img> tags from the particular script, combine them in a
     * string, parse it, and grab all the img/gif urls.
     * 
     */
    List<String> result = new ArrayList<>();
    Elements scripts = doc.getElementsByTag("script");

    for (Element script : scripts) {
        String data = script.data();

        if (!data.contains("CHIVE_GALLERY_ITEMS")) {
            continue;
        }

        /*
         * We add all the <img/> tags in a single StringBuilder and parse as HTML for
         * easy sorting of img/ gifs.
         */
        StringBuilder allImgTags = new StringBuilder();
        Matcher matcher = imagePattern.matcher(data);
        while (matcher.find()) {
            // Unescape '\' from the img tags, which also unescape's img url as well.
            allImgTags.append(matcher.group(0).replaceAll("\\\\", ""));
        }

        // Now we parse and sort links.
        Document imgDoc = Jsoup.parse(allImgTags.toString());
        Elements imgs = imgDoc.getElementsByTag("img");
        for (Element img : imgs) {
            if (img.hasAttr("data-gifsrc")) {
                // For gifs.
                result.add(img.attr("data-gifsrc"));
            } else {
                // For jpeg images.
                result.add(img.attr("src"));
            }
        }
    }

    // strip all GET parameters from the links( such as quality, width, height as to
    // get the original image.).
    result.replaceAll(s -> s.substring(0, s.indexOf("?")));

    return result;
}
 
Example 10
Source File: DigiflipScraper.java    From Project-Tauro with GNU General Public License v3.0 4 votes vote down vote up
private boolean logOldDigiflip(HtmlPage mainPage) throws IOException, InterruptedException {

    String macAddr = null;

    Document doc = Jsoup.connect(hostUrl + "/status.htm").userAgent(USER_AGENT).header("Authorization", "Basic " + base64Login).timeout(60000).get();

    Elements data = doc.select("script");

    for (Element datum : data) {
      if (datum.data().contains("bssid_drv[0] ='")) {
        String theScript = datum.data();
        int i = theScript.indexOf("bssid_drv[0] ='");
        i += "bssid_drv[0] ='".length(); // To bypass the quote
        char c = theScript.charAt(i);
        i++;
        macAddr = "";
        while (true) {
          macAddr += c;
          c = theScript.charAt(i);
          i++;
          if (c == '\'') {
            break;
          }
        }
        break;
      }
    }

    doc = Jsoup.parse(mainPage.asXml());

    Elements elements = doc.select("html");

    System.out.println(mainPage.asXml());

    Thread.sleep(5000);

    if (macAddr == null) {
      Logger.error(host + ": Not an old digiflip.", true);
      return false;
    }

    macAddr = macAddr.replace("-", ":");
    if (!macAddr.contains(":")) {
      String tmpMac = macAddr;
      macAddr = "";
      for (int i = 0; i < macAddr.length(); i++) {
        if ((i > 0) && (i % 2 == 0)) {
          macAddr += ":";
        }
        macAddr += tmpMac.charAt(i);
      }
    }

    Logger.info(host + ": Found MAC: " + macAddr);

    Map<String, String> wifiData = new HashMap();

    wifiData.put("BSSID", macAddr.toLowerCase());
    wifiData.put("SSID", elements.get(0).select("input#ssid[name=ssid]").val());
    wifiData.put("key", elements.get(0).select("input#pskValue[name=pskValue]").val());
    wifiData.put("AuthType", "WPA/WPA2 PSK");
    wifiData.put("Encryption", "TKIP/AES");

    if ((wifiData.get("SSID") == null) && (wifiData.get("key") == null)) {
      Logger.debug(host + ": Not an old Digiflip or not WPA. FIX THIS!");
      return false;
    }

    Logger.info(host + ": Found SSID: " + wifiData.get("SSID"));
    Logger.info(host + ": Found key: " + wifiData.get("key"));
    Logger.info(host + ": Found AuthType: " + wifiData.get("AuthType"));
    Logger.info(host + ": Found Encryption: " + wifiData.get("Encryption"));

    updaterDao.saveStation(wifiData, host);

    return true;
  }