org.jsoup.select.Elements Java Examples

The following examples show how to use org.jsoup.select.Elements. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JsoupHTMLLinkStructureExtractor.java    From wandora with GNU General Public License v3.0 6 votes vote down vote up
@Override
public boolean extractTopicsFrom(Document d, String u, TopicMap t) throws Exception {
    
    this.tm = t;
    this.wandoraClass = getWandoraClassTopic(tm);
    
    Topic documentType = getOrCreateTopic(tm, DOC_TYPE, "Document");
    makeSubclassOf(tm, documentType, wandoraClass);
    
    Topic docTopic = getOrCreateTopic(tm, u);
    docTopic.addType(documentType);
    
    Elements links = d.select("a");
    
    for(Element link: links){
        try {
            parseLink(link,docTopic);
        } 
        catch (TopicMapException tme) {
            log(tme.getMessage()); 
        }
    }
    
    return true;

}
 
Example #2
Source File: SourcePrinterTest.java    From warnings-ng-plugin with MIT License 6 votes vote down vote up
@Test
@org.jvnet.hudson.test.Issue("JENKINS-55679")
void shouldRenderXmlFiles() {
    SourcePrinter printer = new SourcePrinter();

    IssueBuilder builder = new IssueBuilder();
    Issue issue = builder.build();

    Document document = Jsoup.parse(printer.render(asStream("format.xml"), issue,
            NO_DESCRIPTION, ICON_URL));
    String expectedFile = toString("format.xml");

    assertThat(document.text()).isEqualToIgnoringWhitespace(expectedFile);

    Elements pre = document.getElementsByTag("pre");
    assertThat(pre.text()).isEqualToIgnoringWhitespace(expectedFile);
}
 
Example #3
Source File: Rgaa30Rule050801.java    From Asqatasun with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * 
 * @param sspHandler
 * @param elementHandler 
 * @param elementHandlerWithoutDataTableMarkup
 */
private void extractTableWithDataTableMarkup(
            ElementHandler<Element> elementHandler, 
            ElementHandler<Element> elementHandlerWithoutDataTableMarkup) {
    
    Elements elementsWithMarkup = new Elements();
    
    for (Element el : elementHandler.get()) {
        if (el.select(DATA_TABLE_MARKUP_CSS_LIKE_QUERY).size() > 0) {
            elementsWithMarkup.add(el);
        } else if (elementHandlerWithoutDataTableMarkup != null) {
            elementHandlerWithoutDataTableMarkup.add(el);
        }
    }
    elementHandler.clean().addAll(elementsWithMarkup);
}
 
Example #4
Source File: LeaveOneOutCV.java    From NLIWOD with GNU Affero General Public License v3.0 6 votes vote down vote up
public static ArrayList<String> loadSystemR(String system){
	Path datapath = Paths.get("./src/main/resources/QALD6MultilingualLogs/multilingual_" + system + ".html");
	ArrayList<String> result = Lists.newArrayList();

	try{
		String loadedData = Files.lines(datapath).collect(Collectors.joining()); 
		Document doc = Jsoup.parse(loadedData);
		Element table = doc.select("table").get(5);
		Elements tableRows = table.select("tr");
		for(Element row: tableRows){
			Elements tableEntry = row.select("td");
			result.add(tableEntry.get(1).ownText());
		}
		result.remove(0); //remove the head of the table
		return result;
	}catch(IOException e){
		e.printStackTrace();
		log.debug("loading failed.");
		return result;
	}
}
 
Example #5
Source File: WhenJavaExtensionIsRegistered.java    From asciidoctorj with Apache License 2.0 6 votes vote down vote up
@Test
public void a_block_processor_instance_should_be_executed_twice() {

    JavaExtensionRegistry javaExtensionRegistry = this.asciidoctor.javaExtensionRegistry();

    Map<String, Object> config = new HashMap<>();
    config.put(Contexts.KEY, Arrays.asList(Contexts.PARAGRAPH));
    config.put(ContentModel.KEY, ContentModel.SIMPLE);
    YellBlock yellBlock = new YellBlock("yell", config);
    javaExtensionRegistry.block(yellBlock);

    for (int i = 0; i < 2; i++) {
        String content = asciidoctor.convertFile(
                classpath.getResource("sample-with-yell-block.ad"),
                options().toFile(false).get());
        org.jsoup.nodes.Document doc = Jsoup.parse(content, "UTF-8");
        Elements elements = doc.getElementsByClass("paragraph");
        assertThat(elements.size(), is(1));
        assertThat(elements.get(0).text(), is("THE TIME IS NOW. GET A MOVE ON."));
    }
}
 
Example #6
Source File: Nanrentu.java    From PicKing with Apache License 2.0 6 votes vote down vote up
@Override
public String getDetailNext(String baseUrl, String currentUrl, byte[] result) throws UnsupportedEncodingException {
    Document document = Jsoup.parse(new String(result, "gb2312"));
    Elements elements = document.select("div.pagelist a:contains(下一页)");
    if (elements.size() > 0) {
        String tempUrl = elements.get(0).attr("href");
        if (tempUrl == null || "".equals(tempUrl))
            return "";
        Pattern pattern = Pattern.compile("http.*/");
        Matcher matcher = pattern.matcher(currentUrl);
        if (matcher.find()) {
            return matcher.group() + tempUrl;
        }
    }
    return "";
}
 
Example #7
Source File: MypCardPricer.java    From MtgDesktopCompanion with GNU General Public License v3.0 6 votes vote down vote up
private void parsingOffers(String urlC, List<MagicPrice> list) throws IOException {
	Elements trs = URLTools.extractHtml(urlC).select("table.table tr[data-key]");
	for(Element tr : trs)
	{
		Elements tds = tr.select("td");
		if(tds.isEmpty())
		{
			logger.debug(getName() + " found no offer");
			return;
		}
		
		MagicPrice mp = new MagicPrice();
			mp.setCountry("Brazil");
			mp.setCurrency(Currency.getInstance("BRL"));
			mp.setSite(getName());
			mp.setSeller(tds.get(1).text());
			mp.setFoil(tds.get(2).html().equalsIgnoreCase("foil"));
			mp.setQuality(tds.get(3).html());
			mp.setValue(Double.parseDouble(tds.get(5).text().replaceAll("R\\$ ", "").replace(",", ".")));
			mp.setUrl(urlC);
			list.add(mp);
	}
	logger.debug(getName() + " found " + list.size() + " offers");
}
 
Example #8
Source File: FreemarkerClientPartialsBasicPropertyTest.java    From angularjs-addon with Eclipse Public License 1.0 6 votes vote down vote up
@Test
public void testGenerateBasicNumberPropertyWithMinConstraint() throws Exception
{
   Map<String, Object> root = createInspectionResultWrapper(ENTITY_NAME, NUMBER_PROP_WITH_MIN_VAL);

   Resource<URL> templateResource = resourceFactory.create(getClass().getResource(
            Deployments.BASE_PACKAGE_PATH + Deployments.BASIC_PROPERTY_DETAIL_INCLUDE));
   Template processor = processorFactory.create(templateResource, FreemarkerTemplate.class);
   String output = processor.process(root);
   Document html = Jsoup.parseBodyFragment(output);
   assertThat(output.trim(), not(equalTo("")));

   Elements container = html.select("div.form-group");
   assertThat(container, notNullValue());
   assertThat(container.attr("ng-class"), not(equalTo("")));

   Elements formInputElement = html.select("div.form-group input");
   assertThat(formInputElement.attr("id"), equalTo("score"));
   assertThat(formInputElement.attr("type"), equalTo("number"));
   assertThat(formInputElement.attr("min"), equalTo("0"));
   assertThat(formInputElement.attr("ng-model"), equalTo(StringUtils.camelCase(ENTITY_NAME) + "." + "score"));
}
 
Example #9
Source File: DataUtil.java    From Focus with GNU General Public License v3.0 6 votes vote down vote up
public static String getCollectionItemImageUrl(Collection item){
    String content;
    if (!Strings.isNullOrEmpty(item.getContent())) {
        content =  item.getContent();
    }else {

        content =  item.getSummary();
    }
    if (content!=null && !content.equals("")){
        Document doc = Jsoup.parse(content);
        if (doc != null) {
            Elements images = doc.select("img");
            if (images.size() > 0) {
                return images.get(0).attr("src");
            }
        }
    }
    return null;
}
 
Example #10
Source File: WhenJavaExtensionIsRegistered.java    From asciidoctorj with Apache License 2.0 6 votes vote down vote up
@Test
public void a_extension_registered_as_class_can_get_its_asciidoctor_instance() {

    JavaExtensionRegistry javaExtensionRegistry = this.asciidoctor.javaExtensionRegistry();
    TestBlock.asciidoctor = asciidoctor;
    Map<String, Object> config = new HashMap<>();
    config.put(Contexts.KEY, Arrays.asList(Contexts.LISTING));
    config.put(ContentModel.KEY, ContentModel.SIMPLE);

    javaExtensionRegistry.block("quiet", TestBlock.class);
    String content = asciidoctor.convert(
        "[quiet]\nHello World",
        options().toFile(false).get());
    org.jsoup.nodes.Document doc = Jsoup.parse(content, "UTF-8");
    Elements elements = doc.getElementsByClass("paragraph");
    assertThat(elements.size(), is(1));
    assertThat(elements.get(0).text(), is("hello world"));

    TestBlock.asciidoctor = null;
}
 
Example #11
Source File: FeilongipProxyListPageParser.java    From ProxyPool with Apache License 2.0 6 votes vote down vote up
@Override
public List<Proxy> parse(String html) {
    Document document = Jsoup.parse(html);
    Elements elements = document.select("div[id=j-tab-newprd] table tbody tr");
    List<Proxy> proxyList = new ArrayList<>();
    for (Element element : elements){
        String ip_port = element.select("td:eq(1)").first().text();
        String ip = ip_port.split(":")[0];
        String port  = ip_port.split(":")[1];

        String isAnonymous = element.select("td:eq(3)").first().text();
        String type = element.select("td:eq(4)").first().text();
        if(!anonymousFlag || isAnonymous.contains("匿") || isAnonymous.contains("anonymous")){
            proxyList.add(new Proxy(ip, Integer.valueOf(port), type, Constant.TIME_INTERVAL));
        }
    }
    return proxyList;
}
 
Example #12
Source File: DemoAnnotatedBingCrawler.java    From WebCollector with GNU General Public License v3.0 6 votes vote down vote up
@MatchType(types = "searchEngine")
public void visitSearchEngine(Page page, CrawlDatums next) {
    String keyword = page.meta("keyword");
    int pageIndex = page.metaAsInt("pageIndex");
    System.out.println("成功抓取关键词" + keyword + "的第" + pageIndex + "页搜索结果");
    Elements results = page.select("li.b_algo>h2>a");

    for (int rank = 0; rank < results.size(); rank++) {
        Element result = results.get(rank);
        /*
        我们希望继续爬取每条搜索结果指向的网页,这里统称为外链。
        我们希望在访问外链时仍然能够知道外链处于搜索引擎的第几页、第几条,
        所以将页号和排序信息放入后续的CrawlDatum中,为了能够区分外链和
        搜索引擎结果页面,type设置为outlink,这里的值完全由
        用户定义,可以设置一个任意的值
        */
        String href = result.attr("abs:href");
        next.addAndReturn(href)
                .type("outlink")
                .meta("keyword", keyword)
                .meta("pageIndex", pageIndex)
                .meta("rank", rank);
    }
}
 
Example #13
Source File: WebDavFile.java    From a with GNU General Public License v3.0 6 votes vote down vote up
private List<WebDavFile> parseDir(String s) {
    List<WebDavFile> list = new ArrayList<>();
    Document document = Jsoup.parse(s);
    Elements elements = document.getElementsByTag("d:response");
    String baseUrl = getUrl().endsWith("/") ? getUrl() : getUrl() + "/";
    for (Element element : elements) {
        String href = element.getElementsByTag("d:href").get(0).text();
        if (!href.endsWith("/")) {
            String fileName = href.substring(href.lastIndexOf("/") + 1);
            WebDavFile webDavFile;
            try {
                webDavFile = new WebDavFile(baseUrl + fileName);
                webDavFile.setDisplayName(fileName);
                webDavFile.setUrlName(href);
                list.add(webDavFile);
            } catch (MalformedURLException e) {
                e.printStackTrace();
            }
        }
    }
    return list;
}
 
Example #14
Source File: Text.java    From JsoupXpath with Apache License 2.0 6 votes vote down vote up
/**
 * 函数具体逻辑
 *
 * @param scope 上下文
 * @return 计算好的节点
 */
@Override
public XValue call(Scope scope) {
    Elements context = scope.context();
    List<String> res = new LinkedList<>();
    if (context!=null&&context.size()>0){
        if (scope.isRecursion()){
            NodeTest allTextFun = Scanner.findNodeTestByName("allText");
            return allTextFun.call(scope);
        }else {
            for (Element e:context){
                if ("script".equals(e.nodeName())){
                    res.add(e.data());
                }else {
                    res.add(e.ownText());
                }
            }
        }
    }
    return XValue.create(res);
}
 
Example #15
Source File: CDTClassifierMultilable.java    From NLIWOD with GNU Affero General Public License v3.0 6 votes vote down vote up
public static ArrayList<String> loadSystemP(String system){

		Path datapath = Paths.get("./src/main/resources/QALD6MultilingualLogs/multilingual_" + system + ".html");
		ArrayList<String> result = Lists.newArrayList();

		try{
			String loadedData = Files.lines(datapath).collect(Collectors.joining()); 
			Document doc = Jsoup.parse(loadedData);
			Element table = doc.select("table").get(5);
			Elements tableRows = table.select("tr");
			for(Element row: tableRows){
				Elements tableEntry = row.select("td");
				result.add(tableEntry.get(2).ownText());
			}
			result.remove(0); //remove the head of the table
			return result;
		}catch(IOException e){
			e.printStackTrace();
			log.debug("loading failed.");
			return result;
		}
	}
 
Example #16
Source File: RssLoader.java    From android-opensource-library-56 with Apache License 2.0 6 votes vote down vote up
private void parseCssSelector(Document document) {
    Elements elements = document.select("item");
    for (Element element : elements) {
        Item item = new Item();
        Elements title = element.select("title");
        Elements link = element.select("link");
        if (!title.isEmpty()) {
            item.title = title.get(0).text();
        }
        if (!link.isEmpty()) {
            item.url = link.get(0).text();
        }
        if (mList == null) {
            mList = new RssList();
        }
        mList.addItem(item);
    }
}
 
Example #17
Source File: CoderBusyProxyListPageParser.java    From ProxyPool with Apache License 2.0 6 votes vote down vote up
@Override
public List<Proxy> parse(String content) {
    Document document = Jsoup.parse(content);
    Elements elements = document.select("div[class='table-responsive'] table[class='table'] tbody tr");
    List<Proxy> proxyList = new ArrayList<>(elements.size());
    for (Element element : elements){
        String ip = element.select("td:eq(0)").first().text();
        String port  = element.select("td:eq(2)").first().text();
        String type = element.select("td:eq(5)").first().text();
        String isAnonymous = element.select("td:eq(7)").first().text();
        System.out.println("ip:"+ip);
        log.debug("parse result = "+type+"://"+ip+":"+port+"  "+isAnonymous);
        if(!anonymousFlag || isAnonymous.contains("匿")){
            proxyList.add(new Proxy(ip, Integer.valueOf(port), type, Constant.TIME_INTERVAL));
        }
    }
    return proxyList;
}
 
Example #18
Source File: BatchFilterTest.java    From jinjava with Apache License 2.0 6 votes vote down vote up
@Test
public void batchFilterNoBackfill() {
  Map<String, Object> context = ImmutableMap.of(
    "items",
    (Object) Lists.newArrayList("1", "2", "3", "4", "5", "6")
  );

  Document dom = Jsoup.parseBodyFragment(render("batch-filter", context));
  assertThat(dom.select("tr")).hasSize(2);

  Elements trs = dom.select("tr");
  assertThat(trs.get(0).select("td")).hasSize(3);
  assertThat(trs.get(0).select("td").get(0).text()).isEqualTo("1");
  assertThat(trs.get(0).select("td").get(1).text()).isEqualTo("2");
  assertThat(trs.get(0).select("td").get(2).text()).isEqualTo("3");
  assertThat(trs.get(1).select("td")).hasSize(3);
  assertThat(trs.get(1).select("td").get(0).text()).isEqualTo("4");
  assertThat(trs.get(1).select("td").get(1).text()).isEqualTo("5");
  assertThat(trs.get(1).select("td").get(2).text()).isEqualTo("6");
}
 
Example #19
Source File: HtmlUtils.java    From TrackRay with GNU General Public License v3.0 6 votes vote down vote up
/**
 * 提取网页中的表单数据
 * @param source 网页源代码
 * @return 表单数据
 */
public static Map<String, String> extractForm(String source) {
    Map<String, String> map = new HashMap<>();

    if (StringUtils.isNotBlank(source)) {
        Document doc = Jsoup.parse(source);
        Elements elements = doc.select("input");
        for (Element element : elements) {
            String key;
            if (org.apache.commons.lang3.StringUtils.isNotBlank(element.attr("name"))) {
                key = element.attr("name");
            } else if (org.apache.commons.lang3.StringUtils.isNotBlank(element.id())) {
                key = element.id();
            } else {
                continue;
            }
            map.put(key, element.val());
        }
    }

    return map;
}
 
Example #20
Source File: CDTClassifierEvaluation.java    From NLIWOD with GNU Affero General Public License v3.0 6 votes vote down vote up
public static ArrayList<String> loadSystemR(String system){
	Path datapath = Paths.get("./src/main/resources/QALD6MultilingualLogs/multilingual_" + system + ".html");
	ArrayList<String> result = Lists.newArrayList();

	try{
		String loadedData = Files.lines(datapath).collect(Collectors.joining()); 
		Document doc = Jsoup.parse(loadedData);
		Element table = doc.select("table").get(5);
		Elements tableRows = table.select("tr");
		for(Element row: tableRows){
			Elements tableEntry = row.select("td");
			result.add(tableEntry.get(1).ownText());
		}
		result.remove(0); //remove the head of the table
		return result;
	}catch(IOException e){
		e.printStackTrace();
		log.debug("loading failed.");
		return result;
	}
}
 
Example #21
Source File: WebPage.java    From zap-extensions with Apache License 2.0 6 votes vote down vote up
/**
 * Extracts script nodes from web page HTML document
 *
 * @param url
 * @throws IOException
 */
private void getScriptNodes(URL url) throws IOException {

    // Document doc = getHTML(url);// this required another connexion

    Elements scripts = HTMLDoc.select("script");

    for (int i = 0; i < scripts.size(); i++) {

        Element script = scripts.get(i);

        if (script.hasAttr("src")) {
            // System.out.println("script = "+scripts.get(i)+"");
            this.scripts.add(script);
        }
        // System.out.println("-----------------------");
    }
}
 
Example #22
Source File: WhenJavaExtensionIsRegistered.java    From asciidoctorj with Apache License 2.0 6 votes vote down vote up
@Test
public void a_block_processor_instance_should_be_executed_when_registered_block_is_found_in_document() {

    JavaExtensionRegistry javaExtensionRegistry = this.asciidoctor.javaExtensionRegistry();

    Map<String, Object> config = new HashMap<>();
    config.put(Contexts.KEY, Arrays.asList(Contexts.PARAGRAPH));
    config.put(ContentModel.KEY, ContentModel.SIMPLE);
    YellBlock yellBlock = new YellBlock("yell", config);
    javaExtensionRegistry.block(yellBlock);
    String content = asciidoctor.convertFile(
            classpath.getResource("sample-with-yell-block.ad"),
            options().toFile(false).get());
    org.jsoup.nodes.Document doc = Jsoup.parse(content, "UTF-8");
    Elements elements = doc.getElementsByClass("paragraph");
    assertThat(elements.size(), is(1));
    assertThat(elements.get(0).text(), is("THE TIME IS NOW. GET A MOVE ON."));

}
 
Example #23
Source File: UtilsStaticAnalyzer.java    From apogen with Apache License 2.0 6 votes vote down vote up
/**
 * Statically analyze the DOM of the State s to create comprehensive Form
 * objects
 * 
 * @param s
 * @return
 */
public static Set<Form> createFormObjects(State s) {

	Set<Form> fl = new HashSet<Form>();
	String dom = s.getDom();

	Document doc = Jsoup.parse(dom, "UTF-8");
	Elements forms = doc.getElementsByTag("form");
	System.out.println("[LOG]\t" + forms.size() + " form(s) found in " + s.getName());

	for (org.jsoup.nodes.Element currentForm : forms) {
		Form formObject = new Form();

		formObject.setAttributes(currentForm.attributes().asList());
		formObject.setFormName(getFormName(formObject, currentForm));
		formObject.setReturnValue("void");
		formObject.setFormFieldList(analyzeFormBody(s, currentForm));

		fl.add(formObject);
	}

	return fl;
}
 
Example #24
Source File: JsoupCssInliner.java    From ogham with Apache License 2.0 5 votes vote down vote up
/**
 * Replace link tags with style tags in order to keep the same inclusion
 * order
 *
 * @param doc
 *            the html document
 * @param cssContents
 *            the list of external css files with their content
 */
private static void internStyles(Document doc, List<ExternalCss> cssContents) {
	Elements els = doc.select(CSS_LINKS_SELECTOR);
	for (Element e : els) {
		if (isInlineModeAllowed(e, InlineModes.STYLE_ATTR)) {
			String path = e.attr(HREF_ATTR);
			ExternalCss css = getCss(cssContents, path);
			if (css != null) {
				Element style = new Element(Tag.valueOf(STYLE_TAG), "");
				style.appendChild(new DataNode(getCssContent(css)));
				e.replaceWith(style);
			}
		}
	}
}
 
Example #25
Source File: MoverParser.java    From Mover with Apache License 2.0 5 votes vote down vote up
public int getLastNavigationPage(Document document){
    Elements elements = document.select("div.pagination .digits .ut a");
    if(elements.size() > 0){
        return internalGetIntegers(elements.last().text());
    }
    return -1;
}
 
Example #26
Source File: LoginHelper.java    From hipda with GNU General Public License v2.0 5 votes vote down vote up
private String getFormhash() {
    String rstStr = null;
    try {
        rstStr = OkHttpHelper.getInstance().get(HiUtils.LoginGetFormHash);

        if (!TextUtils.isEmpty(rstStr)) {
            Document doc = Jsoup.parse(rstStr);

            Elements elements = doc.select("input[name=formhash]");
            Element element = elements.first();

            if (element == null) {
                Elements alartES = doc.select("div.alert_info");
                if (alartES.size() > 0) {
                    mErrorMsg = alartES.first().text();
                } else {
                    mErrorMsg = "Can NOT get formhash";
                }
                return "";
            }
            return element.attr("value");
        }
    } catch (Exception e) {
        mErrorMsg = OkHttpHelper.getErrorMessage(e).getMessage();
    }
    return rstStr;
}
 
Example #27
Source File: ImagebamRipper.java    From ripme with MIT License 5 votes vote down vote up
/**
 * Rips useful image from "image page"
 */
private void fetchImage() {
    try {
        Document doc = Http.url(url).get();
        // Find image
        Elements metaTags = doc.getElementsByTag("meta");
        
        String imgsrc = "";//initialize, so no NullPointerExceptions should ever happen.
        
        for (Element metaTag: metaTags) {
            //the direct link to the image seems to always be linked in the <meta> part of the html.
            if (metaTag.attr("property").equals("og:image")) {
                imgsrc = metaTag.attr("content");
                LOGGER.info("Found URL " + imgsrc);
                break;//only one (useful) image possible for an "image page".
            }
        }
       
        //for debug, or something goes wrong.
        if (imgsrc.isEmpty()) {
            LOGGER.warn("Image not found at " + this.url);
            return;
        }
       
        // Provide prefix and let the AbstractRipper "guess" the filename
        String prefix = "";
        if (Utils.getConfigBoolean("download.save_order", true)) {
            prefix = String.format("%03d_", index);
        }
        
        addURLToDownload(new URL(imgsrc), prefix);
    } catch (IOException e) {
        LOGGER.error("[!] Exception while loading/parsing " + this.url, e);
    }
}
 
Example #28
Source File: PCAGrader.java    From MtgDesktopCompanion with GNU General Public License v3.0 5 votes vote down vote up
@Override
public Grading loadGrading(String identifier) throws IOException {
	
	String url=getWebSite()+"/resumeBdd/"+identifier+"/1";
	Document d = RequestBuilder.build().method(METHOD.GET)
			   .setClient(URLTools.newClient())
			   .url(url)
			   .toHtml();
	
	
	Elements els = d.select("li.mb-1");
	
	if(els.isEmpty())
	{
		logger.debug(identifier +" is not found for " + getName());
		return null;
	}
	
	els.get(3).select("strong").remove();
	els.get(5).select("strong").remove();

	Grading g = new Grading();
	g.setGraderName(getName());
	g.setNumberID(identifier);
	g.setGradeNote(Double.parseDouble(els.get(3).text()));
	try {
		g.setGradeDate(new SimpleDateFormat("yyyy").parse(els.get(5).text()));
	} catch (ParseException e) {
		logger.error(e);
	}
	g.setUrlInfo(url);
	return g;
}
 
Example #29
Source File: FUN_CSSPath.java    From sparql-generate with Apache License 2.0 5 votes vote down vote up
private NodeValue selectAttribute(Element element, String selectPath, String attributeName) {
    Elements elements = element.select(selectPath);
    Element e = elements.first();
    if (e == null) {
        throw new ExprEvalException("No evaluation of " + element + ", " + selectPath);
    }
    if (!e.hasAttr(attributeName)) {
        throw new ExprEvalException("The evaluation of " + element + ", " + selectPath + " is an element that does not have attribute " + attributeName);
    }
    return new NodeValueString(e.attr(attributeName));
}
 
Example #30
Source File: HtmlParserTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test public void handlesUnknownTags() {
    String h = "<div><foo title=bar>Hello<foo title=qux>there</foo></div>";
    Document doc = Jsoup.parse(h);
    Elements foos = doc.select("foo");
    assertEquals(2, foos.size());
    assertEquals("bar", foos.first().attr("title"));
    assertEquals("qux", foos.last().attr("title"));
    assertEquals("there", foos.last().text());
}