Java Code Examples for org.jsoup.nodes.Document#selectFirst()

The following examples show how to use org.jsoup.nodes.Document#selectFirst() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: schedge   File: GetRatings.java    License: MIT License 6 votes vote down vote up
private static Float parseRating(String rawData) {
  rawData = rawData.trim();
  if (rawData == null || rawData.equals("")) {
    logger.warn("Got bad data: empty string");
    return null;
  }
  Document doc = Jsoup.parse(rawData);
  Element body = doc.selectFirst("div#root");
  if (body == null)
    return null;
  Element ratingBody =
      body.selectFirst("div.TeacherInfo__StyledTeacher-ti1fio-1.fIlNyU");
  Element ratingInnerBody = ratingBody.selectFirst("div").selectFirst(
      "div.RatingValue__AvgRating-qw8sqy-1.gIgExh");
  String ratingValue =
      ratingInnerBody
          .selectFirst("div.RatingValue__Numerator-qw8sqy-2.gxuTRq")
          .html()
          .trim();
  try {
    return Float.parseFloat(ratingValue);
  } catch (NumberFormatException exception) {
    logger.warn("The instructor exist but having N/A rating");
    return null;
  }
}
 
Example 2
Source Project: HttpProxy   File: IPUtils.java    License: GNU General Public License v3.0 6 votes vote down vote up
public static String getMyIp() {
    try {
        String html = HttpUtils.getResponseContent(MY_IP_API);

        Document doc = Jsoup.parse(html);
        Element element = doc.selectFirst("div.tableNormal");

        Element ele = element.selectFirst("table").select("td").get(1);

        String ip = element.selectFirst("a").text();

        // System.out.println(ip);
        return ip;
    } catch (Exception e) {
        e.printStackTrace();
    }
    return null;
}
 
Example 3
static ApkVersion parseFromCoolApk(String html) {
    Document document = Jsoup.parse(html);
    String versionName = "-1";
    String versionInfo = null;
    if (document != null) {
        Element element = document.selectFirst("title");
        if (element != null) {
            String text = element.text();
            Pattern p = Pattern.compile("\\d(\\.\\d)+");
            Matcher m = p.matcher(text);
            if (m.find()) {
                versionName = m.group();
            }
        }

        Element rootInfoEle = document.selectFirst(".apk_left_title:contains(新版特性)");
        if (rootInfoEle != null) {
            Element infoEle = rootInfoEle.selectFirst(".apk_left_title_info");
            if (infoEle != null) {
                versionInfo = HtmlCompat.fromHtml(infoEle.toString(), HtmlCompat.FROM_HTML_MODE_COMPACT)
                        .toString().trim();
            }
        }
    }
    return new ApkVersion(versionName, versionInfo);
}
 
Example 4
Source Project: V2EX   File: HtmlAmpUtil.java    License: GNU General Public License v3.0 6 votes vote down vote up
public static Topic getTopicWithReply(String ampHtml){

        Topic topic = new Topic();
        Document document = Jsoup.parse(ampHtml);
        Element topicNode = document.selectFirst(".topic_node");

        Member member = new Member(document.selectFirst(".topic_author").text(), document.selectFirst(".topic_author_avatar").attr("src"));

        topic.setTitle(document.selectFirst(".topic_title").text());
        topic.setMember(member);
        topic.setContent(document.selectFirst(".topic_content").html());
        topic.setClicks(HtmlUtil.matcherGroup1Int(Pattern.compile("(\\d+)"), document.selectFirst(".topic_hits").text()));
        topic.setAgo(document.selectFirst(".topic_created").text());
        topic.setNode(new Node(HtmlUtil.matcherGroup1(Pattern.compile("/go/(\\w+)"), topicNode.html()),topicNode.selectFirst("a").text()));
        topic.setClicks(HtmlUtil.matcherGroup1Int(Pattern.compile("(\\d+)"), document.selectFirst(".topic_stats").text()));

        return topic;
    }
 
Example 5
@Test
public void shouldBuildStatusReportHtmlWithAgentStatusReportLink() throws IOException, TemplateException {
  KubernetesPod pod = mock(KubernetesPod.class);
  when(pod.getJobIdentifier()).thenReturn(new JobIdentifier(3243546575676657L));
  when(pod.getCreationTimestamp()).thenReturn(new Date());

  KubernetesNode node = mock(KubernetesNode.class);
  when(node.getPods()).thenReturn(singletonList(pod));

  KubernetesCluster cluster = mock(KubernetesCluster.class);
  when(cluster.getNodes()).thenReturn(singletonList(node));
  when(cluster.getPluginId()).thenReturn("cd.go.contrib.elastic.agent.kubernetes");
  PluginStatusReportViewBuilder builder = PluginStatusReportViewBuilder.instance();

  String build = builder.build(builder.getTemplate("status-report.template.ftlh"), cluster);

  Document document = Jsoup.parse(build);

  Element link = document.selectFirst("tbody tr td a");
  System.out.println(link);

  assertThat(link.attr("href"), is("/go/admin/status_reports/cd.go.contrib.elastic.agent.kubernetes/agent/?job_id=3243546575676657"));
}
 
Example 6
Source Project: kyoko   File: NicoAudioSourceManager.java    License: MIT License 6 votes vote down vote up
private AudioTrack extractTrackFromHtml(String videoId, Document document) {
    Element element = document.selectFirst("#js-initial-watch-data");
    if (element != null) {
        String data = element.attributes().get("data-api-data");
        if (data == null) {
            return null;
        }

        JsonObject object = new JsonObject(data);
        JsonObject video = object.getJsonObject("video");

        String uploader = object.getJsonObject("owner").getString("nickname");
        String title = video.getString("title");
        long duration = video.getLong("duration") * 1000;

        return new NicoAudioTrack(new AudioTrackInfo(title, uploader, duration, videoId, false, getWatchUrl(videoId)), this);
    }
    return null;
}
 
Example 7
Source Project: schedge   File: GetRatings.java    License: MIT License 5 votes vote down vote up
private static String parseLink(String rawData) {
  logger.debug("parsing raw RMP data to link...");
  rawData = rawData.trim();
  if (rawData == null || rawData.equals("")) {
    logger.warn("Got bad data: empty string");
    return null;
  }

  Document doc = Jsoup.parse(rawData);
  Element body = doc.selectFirst("body.search_results");
  Element container = body.selectFirst("div#container");
  Element innerBody = container.selectFirst("div#body");
  Element mainContent = innerBody.selectFirst("div#mainContent");
  Element resBox = mainContent.selectFirst("div#searchResultsBox");
  Element listings = resBox.selectFirst("div.listings-wrap");

  if (listings == null) {
    return null;
  }

  Element innerListings = listings.selectFirst("ul.listings");
  Elements professors = innerListings.select("li.listing.PROFESSOR");
  for (Element element : professors) {
    String school =
        element.selectFirst("span.sub").toString(); //<- Bugs at this line
    if (school.contains("New York University") || school.contains("NYU")) {
      return element.selectFirst("a").attr("href").split("=")[1];
    }
  }

  return null;
}
 
Example 8
Source Project: schedge   File: ParseSection.java    License: MIT License 5 votes vote down vote up
public static SectionAttribute parse(@NotNull String rawData) {
  logger.debug("parsing raw catalog section data into SectionAttribute...");

  rawData = rawData.trim();

  if (rawData.equals("")) {
    logger.warn("Got bad data: empty string");
    return null; // the course doesn't exist
  }

  Document doc = Jsoup.parse(rawData);
  Element failed = doc.selectFirst("div.alert.alert-info");
  if (failed != null) {
    logger.warn("Got bad data: " + failed.text());
    return null; // the course doesn't exist
  }

  Elements elements = doc.select("a");
  String link = null;
  for (Element element : elements) {
    String el = element.attr("href");
    if (el.contains("mapBuilding")) {
      link = el;
    }
  }

  doc.select("a").unwrap();
  doc.select("i").unwrap();
  doc.select("b").unwrap();
  Element outerDataSection = doc.selectFirst("body > section.main");
  Element innerDataSection = outerDataSection.selectFirst("> section");
  Element courseNameDiv = innerDataSection.selectFirst("> div.primary-head");
  String courseName = courseNameDiv.text();
  Elements dataDivs =
      innerDataSection.select("> div.section-content.clearfix");
  Map<String, String> secData = parseSectionAttributes(dataDivs);

  return parsingElements(secData, courseName, link);
}
 
Example 9
Source Project: schedge   File: ParseEnroll.java    License: MIT License 5 votes vote down vote up
public static void parseRegistrationNumber(String data) {
  Document secData = Jsoup.parse(data);
  Element body = secData.selectFirst("body");
  Element section = body.selectFirst("section.main > section");
  Elements sections = section.select("div");
  for (Element element : sections) {
    if (element.text().equals("Results") || element.text().equals("Okay")) {
      continue;
    }
    System.out.println(element.text());
  }
}
 
Example 10
private String retrieveCsrfToken(String controllerUrl) throws IOException {
    Document doc = Jsoup.connect(controllerUrl).get();
    Element element = doc.selectFirst("input[name='csrfToken']");

    if (element != null) {
        return element.attributes().get("value");
    }

    return null;
}
 
Example 11
Source Project: torrssen2   File: RssMakeService.java    License: MIT License 5 votes vote down vote up
private String getMagnetString1(String urlString) throws Exception {
    Document doc = getDoc(urlString);

    Element el = doc.selectFirst(".btn.btn-success.btn-xs");

    Pattern pattern = Pattern.compile("magnet_link\\(\\'(.{1,})\\'\\);", Pattern.CASE_INSENSITIVE);
    Matcher matcher = pattern.matcher(el.attr("onclick"));

    if (matcher.matches()) {
        return "magnet:?xt=urn:btih:" + matcher.group(1);
    } else {
        return null;
    }
}
 
Example 12
Source Project: runscore   File: Abcyzf.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public String startPay(String orderNo, Double amount, String channelCode) {
	String pid = ConfigHolder.getConfigValue("abcyzf.pid");
	String notifyUrl = ConfigHolder.getConfigValue("abcyzf.notifyUrl");
	String returnUrl = ConfigHolder.getConfigValue("abcyzf.returnUrl");
	String name = ConfigHolder.getConfigValue("abcyzf.name");
	String signType = "MD5";
	Map<String, Object> params = new HashMap<>();
	params.put("pid", pid);
	params.put("type", channelCode);
	params.put("out_trade_no", orderNo);
	params.put("notify_url", notifyUrl);
	params.put("return_url", returnUrl);
	params.put("name", name);
	params.put("money", String.valueOf(amount));
	params.put("sign_type", signType);
	params.put("sign", generateRequestSign(orderNo, String.valueOf(amount), channelCode));

	String result = HttpUtil.get(ConfigHolder.getConfigValue("abcyzf.payUrl"), params);
	System.err.println(result);
	if (StrUtil.isBlank(result)) {
		throw new BizException(BizError.发起支付异常);
	}

	String payUrl = result;
	try {
		Document document = Jsoup.parse(result);
		Element element = document.selectFirst("script");
		if (element != null) {
			payUrl = element.data().replace("window.location.href='", "");
			payUrl = payUrl.substring(0, payUrl.length() - 2);
		}
	} catch (Exception e) {
		throw new BizException(BizError.发起支付异常);
	}
	return payUrl;
}
 
Example 13
@Test
public void basicXSLTTestIsProcessedCorrectly() throws Exception {

	// Obtaining response and basic tests
	MvcResult response = this.mvc
			//
			.perform(get("/test"))
			//
			//.andDo(print())
			//
			.andExpect(status().isOk())
			.andExpect(content().contentTypeCompatibleWith(MediaType.TEXT_HTML))
			.andExpect(content().string(containsString("Test label")))
			//
			.andReturn()
			//
			;
	// Check the model
	final Object model = response.getModelAndView().getModel().get(XsltConfiguration.XML_SOURCE_TAG);
	assertNotNull("Model object returned is not null", model);
	assertThat("Model object is of the appropriate class", model, instanceOf(App.class));
	// App app = (App) model;
	// Further App checking...

	// Check the response
	Document html = Jsoup.parse(response.getResponse().getContentAsString());

	Element headerElement = html.selectFirst("h1");
	assertNotNull("We have a title", headerElement);
	assertThat("We have a title", "TEST", equalTo(headerElement.text()));
}
 
Example 14
Source Project: V2EX   File: HtmlUtil.java    License: GNU General Public License v3.0 5 votes vote down vote up
public static Topic getTopicAndReplies(String html){

        Topic topic = new Topic();
        Document document = Jsoup.parse(html);
        Element header = document.selectFirst("#Main > .box");
        String headerHtml = header.toString();
        Element middleEle = document.selectFirst("#Main > .box > .cell > span");
        Element contentEle = header.selectFirst(".topic_content");
        Element subtleEle = header.selectFirst(".subtle");
        String publishedTime = document.selectFirst("meta[property=article:published_time]")
                .attr("content")
                .replaceAll("[TZ]", " ");

        topic.setCreated(TimeUtil.strToTimestamp(publishedTime,null));
        topic.setId(matcherGroup1Int(Pattern.compile("(\\d{2,})"),
                document.selectFirst("meta[property=og:url]").attr("content")));
        topic.setTitle(header.selectFirst(".header > h1").text());
        topic.setClicks(matcherGroup1Int(PATTERN_TOPIC_CLICK, headerHtml));
        topic.setAgo(matcherGroup1(Pattern.compile("· ([^·]+) ·"),
                header.selectFirst(".header > small").toString()));
        topic.setFavors(matcherGroup1Int(PATTERN_TOPIC_FAVORS, headerHtml));
        topic.setContent_rendered("\n"
                + (contentEle == null ? "<br>" : contentEle.toString())
                + (subtleEle == null ? " " : subtleEle.toString())
                + "\n\t---");
        topic.setMember(new Member(
                matcherGroup1(PATTERN_TOPIC_USERNAME, headerHtml),
                matcherGroup1(PATTERN_TOPIC_USER_AVATAR, headerHtml)));
        topic.setNode(new Node(
                document.selectFirst("meta[property=article:tag]").attr("content"),
                document.selectFirst("meta[property=article:section]").attr("content")));

        if (middleEle != null){
            String lastTouched = matcherGroup1(Pattern.compile("直到 ([^+]+)"), middleEle.toString());
            topic.setLast_touched(lastTouched.isEmpty() ? 0 : TimeUtil.strToTimestamp(lastTouched,null));
            topic.setReplies(matcherGroup1Int(PATTERN_TOPIC_REPLY_COUNT, middleEle.toString()));
        }
        topic.setReplyList(getReplies(document, topic.getMember().getUsername()));
        return topic;
    }
 
Example 15
private void validateActivationLink(String subjectMail, String firstCssQuery, Message message) throws MessagingException, IOException, TechnicalException, FailureException {
    final Document doc = Jsoup.parse(getTextFromMessage(message));
    final Element link = doc.selectFirst(firstCssQuery);
    try {
        final String response = httpService.get(link.attr("href"));
        log.debug("response is {}.", response);
    } catch (final HttpServiceException e) {
        log.error(Messages.format(Messages.getMessage(Messages.FAIL_MESSAGE_MAIL_ACTIVATION), subjectMail), e);
        new Result.Failure<>("", Messages.format(Messages.getMessage(Messages.FAIL_MESSAGE_MAIL_ACTIVATION), subjectMail), false, Context.getCallBack(Callbacks.RESTART_WEB_DRIVER));
    }
}
 
Example 16
Source Project: Hentoid   File: PorncomixParser.java    License: Apache License 2.0 5 votes vote down vote up
@Override
protected List<String> parseImages(@NonNull Content content) throws Exception {
    // Fetch the book gallery page
    Document doc = getOnlineDocument(content.getGalleryUrl());
    if (null == doc)
        throw new ParseException("Document unreachable : " + content.getGalleryUrl());

    Element mangaPagesContainer = doc.selectFirst(".reading-content script");
    List<Element> galleryPages = doc.select("#dgwt-jg-2 a"); // same for zone
    List<Element> galleryPages2 = doc.select(".unite-gallery img"); // same for zone
    List<Element> bestPages = doc.select("#gallery-2 a");

    return parseImages(mangaPagesContainer, galleryPages, galleryPages2, bestPages);
}
 
Example 17
@Test
public void should_create_simple_slides() throws IOException {
    String filename = "sample";
    File inputFile = new File("build/resources/test/" + filename + ".adoc");
    File outputFile1 = new File(inputFile.getParentFile(), filename + ".html");
    removeFileIfItExists(outputFile1);

    AsciidoctorInvoker.main(new String[]{
        "-b", "revealjs",
        "-r", "asciidoctor-diagram",
        "-a", "revealjsdir=https://cdn.jsdelivr.net/npm/[email protected]",
        inputFile.getAbsolutePath()
    });

    Document doc = Jsoup.parse(outputFile1, "UTF-8");

    assertThat(outputFile1.exists(), is(true));

    List<String> stylesheets = doc.head().getElementsByTag("link").stream()
        .filter(element -> "stylesheet".equals(element.attr("rel")))
        .map(element -> element.attr("href"))
        .collect(toList());
    assertThat(stylesheets,
        hasItems(
            "https://cdn.jsdelivr.net/npm/[email protected]/css/reveal.css",
            "https://cdn.jsdelivr.net/npm/[email protected]/css/theme/black.css"));

    Element diagramSlide = doc.selectFirst("#diagram");
    assertThat(diagramSlide, notNullValue());

    Element diagram = diagramSlide.selectFirst("div.imageblock img");
    assertThat(diagram, notNullValue());

    assertThat(diagram.attr("src"), startsWith("data:image/svg+xml;base64,"));
}