Java Code Examples for org.jsoup.nodes.Element#tagName()

The following examples show how to use org.jsoup.nodes.Element#tagName() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HtmlHelper.java    From FairEmail with GNU General Public License v3.0 6 votes vote down vote up
static String toHtml(Spanned spanned, Context context) {
    HtmlEx converter = new HtmlEx(context);
    String html = converter.toHtml(spanned, TO_HTML_PARAGRAPH_LINES_CONSECUTIVE);

    // @Google: why convert size to and from in a different way?
    Document doc = JsoupEx.parse(html);
    for (Element element : doc.select("span")) {
        String style = element.attr("style");
        if (style.startsWith("font-size:")) {
            int colon = style.indexOf(':');
            int semi = style.indexOf("em;", colon);
            if (semi > colon)
                try {
                    String hsize = style.substring(colon + 1, semi).replace(',', '.');
                    float size = Float.parseFloat(hsize);
                    element.tagName(size < 1.0f ? "small" : "big");
                    element.attributes().remove("style");
                } catch (NumberFormatException ex) {
                    Log.e(ex);
                }
        }
    }

    return doc.html();
}
 
Example 2
Source File: LoginWebView.java    From NClientV2 with Apache License 2.0 6 votes vote down vote up
@Override
public void fetchUrl(String url, String html) {
    Document jsoup=Jsoup.parse(html);
    Element body=jsoup.body();
    Element form=body.getElementsByTag("form").first();
    body.getElementsByClass("lead").first().text("Tested");
    form.tagName("div");
    form.before("<script>\n" +
            "document.getElementsByClassName('lead')[0].innerHTML='test';\n"+
            "alert('test');\n"+
            "function intercept(){\n" +
            "    password=document.getElementById('id_password').value;\n" +
            "    email=document.getElementById('id_username_or_email').value;\n" +
            "    token=document.getElementsByName('csrfmiddlewaretoken')[0].value;\n" +
            "    captcha=document.getElementById('g-recaptcha-response').value;\n" +
            "     Interceptor.intercept(email,password,token,captcha);\n" +
            "}\n" +
            "</script>");
    form.getElementsByAttributeValue("type","submit").first().attr("onclick","intercept()");
    removeFetcher(fetcher);
    String encodedHtml = Base64.encodeToString(jsoup.outerHtml().getBytes(), Base64.NO_PADDING);
    loadDataWithBaseURL(Utility.getBaseUrl(), encodedHtml,"text/html","base64",null);
}
 
Example 3
Source File: StringUtil.java    From templatespider with Apache License 2.0 6 votes vote down vote up
/**
	 * 文章相似度对比。越相似,越接近1
	 * @param str1
	 * @param str2
	 * @return 0~1
	 */
	public static double similarity(Element ele1, Element ele2){
		//首先进行tag的对比。当前标签tag一致,才有可能整个代码块相同
		String tag1 = ele1.tagName();
		String tag2 = ele2.tagName();
		if(tag1 != null && tag2 != null && tag1.equals(tag2)){
			//tag相同,进而对比其class、id
			
			//对比id
			if(ele1.attr("id").equals(ele2.attr("id"))){
				
				//对比class
				if(ele1.attr("class").equals(ele2.attr("class"))){
					return similarity_(ele1.toString(), ele2.toString());
				}
			}
		}
		return 0.1;
		
//		return similarity_(ele1.toString(), ele2.toString());
	}
 
Example 4
Source File: PathElementBuilder.java    From Asqatasun with GNU Affero General Public License v3.0 6 votes vote down vote up
private String getAbsolutePathFromElement(Element element) {
    if (element.tagName().equalsIgnoreCase(HtmlElementStore.HTML_ELEMENT)) {
        return "";
    }
    if (!element.tagName().equalsIgnoreCase(HtmlElementStore.BODY_ELEMENT)) {
        int i = 0;
        Element currentElement = element;
        while (currentElement.previousElementSibling() != null) {
            currentElement = currentElement.previousElementSibling();
            i++;
        }
        return getAbsolutePathFromElement(element.parent()) + " > " + element.tagName() + ":eq(" + i + ")";
    } else {
        return HtmlElementStore.BODY_ELEMENT; 
    }
}
 
Example 5
Source File: Cleaner.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
private ElementMeta createSafeElement(Element sourceEl) {
    String sourceTag = sourceEl.tagName();
    Attributes destAttrs = new Attributes();
    Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs);
    int numDiscarded = 0;

    Attributes sourceAttrs = sourceEl.attributes();
    for (Attribute sourceAttr : sourceAttrs) {
        if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr))
            destAttrs.put(sourceAttr);
        else
            numDiscarded++;
    }
    Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag);
    destAttrs.addAll(enforcedAttrs);

    return new ElementMeta(dest, numDiscarded);
}
 
Example 6
Source File: Cleaner.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
private ElementMeta createSafeElement(Element sourceEl) {
    String sourceTag = sourceEl.tagName();
    Attributes destAttrs = new Attributes();
    Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs);
    int numDiscarded = 0;

    Attributes sourceAttrs = sourceEl.attributes();
    for (Attribute sourceAttr : sourceAttrs) {
        if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr))
            destAttrs.put(sourceAttr);
        else
            numDiscarded++;
    }
    Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag);
    destAttrs.addAll(enforcedAttrs);

    return new ElementMeta(dest, numDiscarded);
}
 
Example 7
Source File: BakaTsukiParserAlternative.java    From coolreader with MIT License 6 votes vote down vote up
/***
 * Look for <h3>after
 * <h2>containing the volume list. Treat each li in dl/ul/div as the chapters.
 * 
 * @param novel
 * @param h2
 * @return
 */
private static ArrayList<BookModel> parseBooksMethod1(NovelCollectionModel novel, Element h2, String language) {
	// Log.d(TAG, "method 1");
	ArrayList<BookModel> books = new ArrayList<BookModel>();
	Element bookElement = h2;
	boolean walkBook = true;
	int bookOrder = 0;
	do {
		bookElement = bookElement.nextElementSibling();
		if (bookElement == null || bookElement.tagName() == "h2")
			walkBook = false;
		else if (bookElement.tagName() != "h3") {
			Elements h3s = bookElement.select("h3");
			if (h3s != null && h3s.size() > 0) {
				for (Element h3 : h3s) {
					bookOrder = processH3(novel, books, h3, bookOrder, language);
				}
			}
		} else if (bookElement.tagName() == "h3") {
			bookOrder = processH3(novel, books, bookElement, bookOrder, language);
		}
	} while (walkBook);
	return books;
}
 
Example 8
Source File: SelectorFetcher.java    From stevia with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private static String reversePath(Element e) {
	String loc = e.tagName();
	int div = 0;
	while (e.parent()!=null) {
		String tag = e.parent().tagName().toLowerCase();
		if (tag.compareTo("body")==0) {
			break;
		} else {
			loc = tag + "/" + loc;
		}
		
		if (tag.contentEquals("div")) {
			div++;
			if (div == 2) {
				break;
			}
		} else if (tag.contentEquals("form")) {
			break;
		}
		
		e = e.parent();
	}
	return loc;
}
 
Example 9
Source File: TextFilterManage.java    From bbs with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * 处理视频播放器标签
 * @param html 富文本内容
 * @param tagId 话题标签  -1表示管理后台打开链接,不校验权限
 * @param secret 密钥
 * @return
 */
public String processVideoPlayer(String html,Long tagId,String secret){
	
	if(!StringUtils.isBlank(html)){
		Document doc = Jsoup.parseBodyFragment(html);
		Elements elements = doc.select("video");  
		for (Element element : elements) {
			//标签src属性
			String src = element.attr("src"); 

			element.removeAttr("src"); 
			//替换当前标签为<player>标签
			element.tagName("player");
			
			
			String url = "";
			if(secret != null && !"".equals(secret.trim())){
				url = SecureLink.createVideoRedirectLink(src,tagId,secret);
			}else{
				url = src;
			}
			element.attr("url",url); 
		
			
		}
		//prettyPrint(是否重新格式化)、outline(是否强制所有标签换行)、indentAmount(缩进长度)    doc.outputSettings().indentAmount(0).prettyPrint(false);
		doc.outputSettings().prettyPrint(false);
		html = doc.body().html();
	}
	return html;
}
 
Example 10
Source File: CommentParser.java    From Ouroboros with GNU General Public License v3.0 5 votes vote down vote up
private CharSequence parseFormatting(Element bodyLine, String currentBoard, String resto, FragmentManager fragmentManager, InfiniteDbHelper infiniteDbHelper){
    CharSequence parsedText = "";
    for (Node childNode : bodyLine.childNodes()){
        if (childNode instanceof TextNode){
            parsedText = TextUtils.concat(parsedText, parseNormalText(new SpannableString(((TextNode) childNode).text())));
        } else if (childNode instanceof Element){
            Element childElement = (Element) childNode;
            switch(childElement.tagName()){
                default:
                    parsedText = TextUtils.concat(parsedText, parseNormalText(new SpannableString(childElement.text())));
                    break;
                case "span":
                    CharSequence spanText = parseSpanText(childElement);
                    parsedText = TextUtils.concat(parsedText, spanText);
                    break;
                case "em":
                    parsedText = TextUtils.concat(parsedText, parseItalicText(new SpannableString(childElement.text())));
                    break;
                case "strong":
                    parsedText = TextUtils.concat(parsedText, parseBoldText(new SpannableString(childElement.text())));
                    break;
                case "u":
                    parsedText = TextUtils.concat(parsedText, parseUnderlineText(new SpannableString(childElement.text())));
                    break;
                case "s":
                    parsedText = TextUtils.concat(parsedText, parseStrikethroughText(new SpannableString(childElement.text())));
                    break;
                case "a":
                    parsedText = TextUtils.concat(parsedText, parseAnchorText(childElement, currentBoard, resto, fragmentManager, infiniteDbHelper));
            }
        }
    }
    return parsedText;
}
 
Example 11
Source File: Rgaa3Extractor.java    From Asqatasun with GNU Affero General Public License v3.0 5 votes vote down vote up
private static void createTestcaseFiles() throws IOException {
    File srcDir = new File(RGAA3_TESTCASE_PATH);
    for (File file : srcDir.listFiles()) {
        String fileName = file.getName().replace("Rgaa30Rule", "").replace(".java", "");
        String theme = fileName.substring(0, 2);
        String crit = fileName.substring(2, 4);
        String test = fileName.substring(4, 6);
        String testKey = Integer.valueOf(theme).toString()+"-"+Integer.valueOf(crit).toString()+"-"+Integer.valueOf(test).toString();
        String wrongKey = theme+"."+crit+"."+test;
        for (File testcase : file.listFiles()) {
            if (testcase.isFile() && testcase.getName().contains("html")) {
                Document doc = Jsoup.parse(FileUtils.readFileToString(testcase));
                Element detail = doc.select(".test-detail").first();
                if (detail == null) {
                    System.out.println(doc.outerHtml());
                } else {
                    detail.tagName("div");
                    detail.text("");
                    for (Element el : detail.children()) {
                        el.remove();
                    }
                    if (!detail.hasAttr("lang")) {
                        detail.attr("lang", "fr");
                    }
                    detail.append("\n"+RGAA3.get(testKey).ruleRawHtml+"\n");
                    doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
                    doc.outputSettings().outline(false);
                    doc.outputSettings().indentAmount(4);
                    String outputHtml = doc.outerHtml();
                    if (outputHtml.contains(wrongKey)) {
                        outputHtml = outputHtml.replaceAll(wrongKey, RGAA3.get(testKey).getRuleDot());
                    }
                    FileUtils.writeStringToFile(testcase, outputHtml);
                }
            }
        }
    }
}
 
Example 12
Source File: HeadingsHierarchyChecker.java    From Asqatasun with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 *
 * @param previousIndex
 * @param element
 * @return the evidence element string.
 */
private String getEvidenceElementMsg(int previousIndex, Element element) {
    Pattern pattern = Pattern.compile("(?i)h[1-6]");
    Matcher matcher = pattern.matcher(element.tagName());
    if (matcher.matches()) {
        return "h" + String.valueOf(previousIndex);
    } else {
        return element.tagName() + " role=\"heading\" aria-level=\""
                + String.valueOf(previousIndex) + "\"";
    }
}
 
Example 13
Source File: TagServlet.java    From firing-range with Apache License 2.0 5 votes vote down vote up
/**
 * Handles the request filtering out unallowed tags. Note that an empty allowedTag we allow
 * all tags.
 */
private void handleRequest(
    Elements elements, HttpServletResponse response, String allowedTag, String allowedAttr)
        throws IOException {
  if (allowedTag.equalsIgnoreCase("script")) {
    elements.empty();
  }

  StringBuilder res = new StringBuilder();
  for (Element element : elements) {
    String tag = element.tagName();

    if (!allowedTag.isEmpty() && !allowedTag.equalsIgnoreCase(tag)) {
      continue;
    }

    if (!allowedAttr.isEmpty()) {
      Attributes attributes = element.attributes();
      for (Attribute attribute : attributes) {
        if (!attribute.getKey().equalsIgnoreCase(allowedAttr)) {
          Responses.sendError(response, "Invalid input attribute", 400);
          return;
        }
      }
    }
    res.append(element.toString());
  }
  Responses.sendXssed(response, res.toString());
}
 
Example 14
Source File: BakaTsukiParserAlternative.java    From coolreader with MIT License 5 votes vote down vote up
public static int processH3(NovelCollectionModel novel, ArrayList<BookModel> books, Element bookElement, int bookOrder, String language) {
	// Log.d(TAG, "Found: " +bookElement.text());
	BookModel book = new BookModel();
	book.setTitle(CommonParser.sanitize(bookElement.text(), true));
	book.setOrder(bookOrder);
	ArrayList<PageModel> chapterCollection = new ArrayList<PageModel>();
	String parent = novel.getPage() + Constants.NOVEL_BOOK_DIVIDER + book.getTitle();

	// parse the chapters.
	boolean walkChapter = true;
	int chapterOrder = 0;
	Element chapterElement = bookElement;
	do {
		chapterElement = chapterElement.nextElementSibling();
		if (chapterElement == null || chapterElement.tagName() == "h2" || chapterElement.tagName() == "h3") {
			walkChapter = false;
		} else {
			Elements chapters = chapterElement.select("li");
			for (Element chapter : chapters) {
				PageModel p = processLI(chapter, parent, chapterOrder, language);
				if (p != null) {
					chapterCollection.add(p);
					++chapterOrder;
				}
			}
		}
		book.setChapterCollection(chapterCollection);
	} while (walkChapter);
	books.add(book);
	++bookOrder;
	return bookOrder;
}
 
Example 15
Source File: BakaTsukiParserAlternative.java    From coolreader with MIT License 5 votes vote down vote up
/***
 * Only have 1 book, chapter list is nested in ul/dl, e.g:Fate/Apocrypha, Gekkou
 * Parse the li as the chapters.
 * 
 * @param novel
 * @param h2
 * @return
 */
private static ArrayList<BookModel> parseBooksMethod3(NovelCollectionModel novel, Element h2, String language) {
	ArrayList<BookModel> books = new ArrayList<BookModel>();
	Element bookElement = h2;
	boolean walkBook = true;
	int bookOrder = 0;
	do {
		bookElement = bookElement.nextElementSibling();
		if (bookElement == null || bookElement.tagName() == "h2")
			walkBook = false;
		else if (bookElement.tagName() == "ul" || bookElement.tagName() == "dl") {
			// Log.d(TAG, "Found: " +bookElement.text());
			BookModel book = new BookModel();
			book.setTitle(CommonParser.sanitize(h2.text(), true));
			book.setOrder(bookOrder);
			ArrayList<PageModel> chapterCollection = new ArrayList<PageModel>();
			String parent = novel.getPage() + Constants.NOVEL_BOOK_DIVIDER + book.getTitle();

			// parse the chapters.
			int chapterOrder = 0;
			Elements chapters = bookElement.select("li");
			for (Element chapter : chapters) {
				PageModel p = processLI(chapter, parent, chapterOrder, language);
				if (p != null) {
					chapterCollection.add(p);
					++chapterOrder;
				}
			}
			book.setChapterCollection(chapterCollection);
			books.add(book);
			++bookOrder;
		}
	} while (walkBook);
	return books;
}