Java Code Examples for org.jsoup.nodes.Element#equals()

The following examples show how to use org.jsoup.nodes.Element#equals() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CommonUtil.java    From CrawlerForReader with Apache License 2.0 6 votes vote down vote up
/**
 * 获取同名元素在同胞中的index
 *
 * @param e
 * @return
 */
public static int getElIndexInSameTags(Element e) {
    Elements chs = e.parent().children();
    int index = 1;
    for (int i = 0; i < chs.size(); i++) {
        Element cur = chs.get(i);
        if (e.tagName().equals(cur.tagName())) {
            if (e.equals(cur)) {
                break;
            } else {
                index += 1;
            }
        }
    }
    return index;
}
 
Example 2
Source File: OutputFormatter.java    From Xndroid with GNU General Public License v3.0 5 votes vote down vote up
private int append(Element node, StringBuilder sb, String tagName) {
    int countOfP = 0; // Number of P elements in the article
    int paragraphWithTextIndex = 0;
    // is select more costly then getElementsByTag?
    MAIN:
    for (Element e : node.select(tagName)) {
        Element tmpEl = e;
        // check all elements until 'node'
        while (tmpEl != null && !tmpEl.equals(node)) {
            if (unlikely(tmpEl))
                continue MAIN;
            tmpEl = tmpEl.parent();
        }

        String text = node2Text(e);
        if (text.isEmpty() || text.length() < getMinParagraph(paragraphWithTextIndex)
                || text.length() > SHelper.countLetters(text) * 2) {
            continue;
        }

        if (e.tagName().equals("p")) {
            countOfP++;
        }

        sb.append(text);
        sb.append("\n\n");
        paragraphWithTextIndex += 1;
    }

    return countOfP;
}
 
Example 3
Source File: ProcessRemarkServiceImpl.java    From Asqatasun with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * This methods search the line where the current node is present in
 * the source code
 * @param node
 * @return
 */
private int getElementIndex(Element element) {
    Elements elements = jsoupDocument.getElementsByTag(element.tagName());
    for (int i = 0; i < elements.size(); i++) {
        Element current = elements.get(i);
        if (current.equals(element)) {
            return i;
        }
    }
    return -1;
}
 
Example 4
Source File: ImageElementSelector.java    From Asqatasun with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * 
 * @param imageParent
 * @param image
 * @return whether the current image is an image link
 */
private boolean isImageLink(Element imageParent, Element image) {
    if (imageParent == null || !StringUtils.equals(imageParent.text(), image.text())) {
        return false;
    }
    if (imageParent.children().size() == 1) {
        return isImageLink(imageParent.child(0), image);
    } else if (imageParent.children().isEmpty() && imageParent.equals(image)) {
        return true;
    }
    return false;
}
 
Example 5
Source File: OutputFormatter.java    From JumpGo with Mozilla Public License 2.0 5 votes vote down vote up
private int append(Element node, StringBuilder sb, String tagName) {
    int countOfP = 0; // Number of P elements in the article
    int paragraphWithTextIndex = 0;
    // is select more costly then getElementsByTag?
    MAIN:
    for (Element e : node.select(tagName)) {
        Element tmpEl = e;
        // check all elements until 'node'
        while (tmpEl != null && !tmpEl.equals(node)) {
            if (unlikely(tmpEl))
                continue MAIN;
            tmpEl = tmpEl.parent();
        }

        String text = node2Text(e);
        if (text.isEmpty() || text.length() < getMinParagraph(paragraphWithTextIndex)
                || text.length() > SHelper.countLetters(text) * 2) {
            continue;
        }

        if (e.tagName().equals("p")) {
            countOfP++;
        }

        sb.append(text);
        sb.append("\n\n");
        paragraphWithTextIndex += 1;
    }

    return countOfP;
}
 
Example 6
Source File: Selector.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
static Elements filterOut(Collection<Element> elements, Collection<Element> outs) {
    Elements output = new Elements();
    for (Element el : elements) {
        boolean found = false;
        for (Element out : outs) {
            if (el.equals(out)) {
                found = true;
                break;
            }
        }
        if (!found)
            output.add(el);
    }
    return output;
}
 
Example 7
Source File: Selector.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
static Elements filterOut(Collection<Element> elements, Collection<Element> outs) {
    Elements output = new Elements();
    for (Element el : elements) {
        boolean found = false;
        for (Element out : outs) {
            if (el.equals(out)) {
                found = true;
                break;
            }
        }
        if (!found)
            output.add(el);
    }
    return output;
}
 
Example 8
Source File: Selector.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
static Elements filterOut(Collection<Element> elements, Collection<Element> outs) {
    Elements output = new Elements();
    for (Element el : elements) {
        boolean found = false;
        for (Element out : outs) {
            if (el.equals(out)) {
                found = true;
                break;
            }
        }
        if (!found)
            output.add(el);
    }
    return output;
}
 
Example 9
Source File: CommonUtil.java    From JsoupXpath with Apache License 2.0 5 votes vote down vote up
/**
 * 获取同名元素在同胞中的index
 * @param e
 * @return
 */
public static int getElIndexInSameTags(Element e,Scope scope){
    Elements chs = e.parent().children();
    int index = 1;
    for (Element cur : chs) {
        if (e.tagName().equals(cur.tagName()) && scope.context().contains(cur)) {
            if (e.equals(cur)) {
                break;
            } else {
                index += 1;
            }
        }
    }
    return index;
}
 
Example 10
Source File: HTMLEasyPDFConverterReader.java    From TableDisentangler with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Read table caption.
 *
 * @param tablexmlNode the tablexml node
 * @return the string
 */
public String readTableCaption(Element tablexmlNode)
{
	//TODO: Obtain label as the first p tag before table tag
	String caption = "";
	boolean captionFound = false;
	List<Element>nl = getChildrenByTagName(tablexmlNode,"caption");
	if(nl.size()>0){
		caption = Utilities.getString(nl.get(0));
	}
	Elements inDivChild = tablexmlNode.parent().getAllElements().first().children();
	for(int i =0;i<inDivChild.size();i++){
		Element el = inDivChild.get(i);
		if(i+1<inDivChild.size()){
			Element tableel = inDivChild.get(i+1);
			if(tableel.tagName().equals("table") && captionFound==false && tableel.equals(tablexmlNode)){
				caption = el.text();
				captionFound = true;
			}
		}
	
	}
	nl = getChildrenByTagName(tablexmlNode,"title");
	if(nl.size()>0){
		caption = Utilities.getString(nl.get(0));
	}
	return caption;
}
 
Example 11
Source File: Selector.java    From jsoup-learning with MIT License 5 votes vote down vote up
static Elements filterOut(Collection<Element> elements, Collection<Element> outs) {
    Elements output = new Elements();
    for (Element el : elements) {
        boolean found = false;
        for (Element out : outs) {
            if (el.equals(out)) {
                found = true;
                break;
            }
        }
        if (!found)
            output.add(el);
    }
    return output;
}
 
Example 12
Source File: HTMLEasyPDFConverterReader.java    From TableDisentangler with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Read table footer.
 *
 * @param tablesxmlNode the tablesxml node
 * @return the string
 */
public String ReadTableFooter(Element tablesxmlNode)
{
	//TODO: Obtain label as the first p tag before table tag
			String caption = "";
			
			List<Element>nl = getChildrenByTagName(tablesxmlNode,"caption");
			if(nl.size()>0){
				caption = Utilities.getString(nl.get(0));
			}
			nl = getChildrenByTagName(tablesxmlNode,"p");
			if(nl.size()>0){
				for(int i=0;i<nl.size();i++){
				caption += Utilities.getString(nl.get(i))+'\n';
				}
			}
			
			Elements inDivChild = tablesxmlNode.parent().getAllElements().first().children();
			boolean isafterTable = false;
			for(int i =0;i<inDivChild.size();i++){
				Element el = inDivChild.get(i);
				if(el.tagName().equals("table")&& el.equals(tablesxmlNode))
				{
					//caption = "";
					isafterTable = true;
					continue;
				}
				if(isafterTable && !el.tagName().equals("table")){

						caption += el.text()+'\n';
					
				}
				if(el.tagName().equals("table"))
				{
					isafterTable= false;
				}
			
			}
			
			nl = getChildrenByTagName(tablesxmlNode,"title");
			if(nl.size()>0){
				caption = Utilities.getString(nl.get(0));
			}
			return caption;
}