org.sweble.wikitext.engine.PageTitle Java Examples

The following examples show how to use org.sweble.wikitext.engine.PageTitle. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TextConverter.java    From JuniperBot with GNU General Public License v3.0 6 votes vote down vote up
public void visit(WtInternalLink link) {
    try {
        if (link.getTarget().isResolved()) {
            PageTitle page = PageTitle.make(config, link.getTarget().getAsString());
            if (page.getNamespace().equals(config.getNamespace("Category")))
                return;
        }
    } catch (LinkTargetException e) {
        // fall down
    }

    /*if (!link.hasTitle()) {
        iterate(link.getTarget());
    } else {
        String title = getContent(link.getTitle());
        String target = UriUtils.encode(getContent(link.getTarget()), "UTF-8");
        String url = config.getWikiUrl() + "/wiki/" + target;
        write(CommonUtils.mdLink(title, url));
    }*/

    iterate(link.hasTitle() ? link.getTitle() : link.getTarget());
}
 
Example #2
Source File: MediaWikiParser.java    From entity-fishing with Apache License 2.0 6 votes vote down vote up
/**
 * @return the content of the wiki text fragment with all markup removed
 */
public String toTextOnly(String wikitext, String lang) {
    String result = "";

    // get a compiler for wiki pages
    //WtEngineImpl engine = new WtEngineImpl(config);        
    WtEngineImpl engine = engines.get(lang);

    try {
        // Retrieve a page 
        // PL: no clue what is this page title thing ?? not even documented
        PageTitle pageTitle = PageTitle.make(configs.get(lang), "crap");
        PageId pageId = new PageId(pageTitle, -1);

        // Compile the retrieved page
        EngProcessedPage cp = engine.postprocess(pageId, wikitext, null);
        WikiTextConverter converter = new WikiTextConverter(configs.get(lang));
        result = (String)converter.go(cp.getPage());
    } catch(Exception e) {
        LOGGER.warn("Fail to parse MediaWiki text, lang is " + lang, e);
    }

    return trim(result);
}
 
Example #3
Source File: MediaWikiParser.java    From entity-fishing with Apache License 2.0 6 votes vote down vote up
/**
 * @return the content of the wiki text fragment with all markup removed except links 
 * to internal wikipedia pages: external links to the internet are removed
 */
public String toTextWithInternalLinksOnly(String wikitext, String lang) {
    String result = "";

    // Instantiate a compiler for wiki pages
    //WtEngineImpl engine = new WtEngineImpl(config);        
    WtEngineImpl engine = engines.get(lang);

    try {
        // Retrieve a page 
        // PL: no clue what is this??
        PageTitle pageTitle = PageTitle.make(configs.get(lang), "crap");
        PageId pageId = new PageId(pageTitle, -1);

        // Compile the retrieved page
        EngProcessedPage cp = engine.postprocess(pageId, wikitext, null);
        WikiTextConverter converter = new WikiTextConverter(configs.get(lang));
        converter.addToKeep(WikiTextConverter.INTERNAL_LINKS);
        result = (String)converter.go(cp.getPage());
    } catch(Exception e) {
        LOGGER.warn("Fail to parse MediaWiki text, lang is " + lang, e);
    }

    return trim(result);
}
 
Example #4
Source File: MediaWikiParser.java    From entity-fishing with Apache License 2.0 6 votes vote down vote up
/**
 * @return the content of the wiki text fragment with all markup removed except links 
 * to internal wikipedia pages: external links to the internet are removed
 */ 
public String toTextWithInternalLinksAndCategoriesOnly(String wikitext, String lang) {
    String result = "";

    // Instantiate a compiler for wiki pages
    //WtEngineImpl engine = new WtEngineImpl(config);        
    WtEngineImpl engine = engines.get(lang);

    try {
        // Retrieve a page 
        // PL: no clue what is this??
        PageTitle pageTitle = PageTitle.make(configs.get(lang), "crap");
        PageId pageId = new PageId(pageTitle, -1);

        // Compile the retrieved page
        EngProcessedPage cp = engine.postprocess(pageId, wikitext, null);
        WikiTextConverter converter = new WikiTextConverter(configs.get(lang));
        converter.addToKeep(WikiTextConverter.INTERNAL_LINKS);
        converter.addToKeep(WikiTextConverter.CATEGORY_LINKS);
        result = (String)converter.go(cp.getPage());
    } catch(Exception e) {
        LOGGER.warn("Fail to parse MediaWiki text, lang is " + lang, e);
    }

    return trim(result);
}
 
Example #5
Source File: MediaWikiParser.java    From entity-fishing with Apache License 2.0 6 votes vote down vote up
/**
 * @return the content of the wiki text fragment with all markup removed except links 
 * to internal wikipedia articles : external links to the internet are removed, as well as
 * internal link not to an article (e.g. redirection, disambiguation page, category, ...)
 */
public String toTextWithInternalLinksArticlesOnly(String wikitext, String lang) {
    String result = "";

    // Instantiate a compiler for wiki pages
    //WtEngineImpl engine = new WtEngineImpl(config);        
    WtEngineImpl engine = engines.get(lang);

    try {
        // Retrieve a page 
        // PL: no clue what is this??
        PageTitle pageTitle = PageTitle.make(configs.get(lang), "crap");
        PageId pageId = new PageId(pageTitle, -1);

        // Compile the retrieved page
        EngProcessedPage cp = engine.postprocess(pageId, wikitext, null);
        WikiTextConverter converter = new WikiTextConverter(configs.get(lang));
        converter.addToKeep(WikiTextConverter.INTERNAL_LINKS_ARTICLES);
        result = (String)converter.go(cp.getPage());
    } catch(Exception e) {
        LOGGER.warn("Fail to parse MediaWiki text, lang is " + lang, e);
    }

    return trim(result);
}
 
Example #6
Source File: MediaWikiParser.java    From entity-fishing with Apache License 2.0 6 votes vote down vote up
/**
 * @return the content of the wiki text fragment with all markup removed except links 
 * to internal wikipedia (external links to the internet are removed) and except emphasis 
 * (bold and italics)
 */
public String toTextWithInternalLinksEmphasisOnly(String wikitext, String lang) {
    String result = "";
    // Instantiate a compiler for wiki pages
    //WtEngineImpl engine = new WtEngineImpl(config);
    WtEngineImpl engine = engines.get(lang);    

    try {
        // Retrieve a page 
        // PL: no clue what is this??
        PageTitle pageTitle = PageTitle.make(configs.get(lang), "crap");
        PageId pageId = new PageId(pageTitle, -1);

        // Compile the retrieved page
        EngProcessedPage cp = engine.postprocess(pageId, wikitext, null);
        WikiTextConverter converter = new WikiTextConverter(configs.get(lang));
        converter.addToKeep(WikiTextConverter.INTERNAL_LINKS);
        converter.addToKeep(WikiTextConverter.BOLD);
        converter.addToKeep(WikiTextConverter.ITALICS);
        result = (String)converter.go(cp.getPage());
    } catch(Exception e) {
        LOGGER.warn("Fail to parse MediaWiki text, lang is " + lang, e);
    }

    return trim(result);
}
 
Example #7
Source File: Page.java    From dkpro-jwpl with Apache License 2.0 6 votes vote down vote up
/**
 * Returns CompiledPage produced by the SWEBLE parser using the SimpleWikiConfiguration.
 *
 * @return the parsed page
 * @throws WikiApiException Thrown if errors occurred.
 */
private EngProcessedPage getCompiledPage() throws WikiApiException
{
	EngProcessedPage cp;
	try{
		WtEngineImpl engine = new WtEngineImpl(this.wiki.getWikConfig());

		PageTitle pageTitle = PageTitle.make(this.wiki.getWikConfig(), this.getTitle().toString());
		PageId pageId = new PageId(pageTitle, -1);

		// Compile the retrieved page
		cp = engine.postprocess(pageId, this.getText(), null);
	} catch(Exception e){
		throw new WikiApiException(e);
	}
	return cp;
}
 
Example #8
Source File: WikiFurService.java    From JuniperBot with GNU General Public License v3.0 5 votes vote down vote up
private EngProcessedPage processedPage(Article article) {
    try {
        PageTitle pageTitle = PageTitle.make(config, article.getTitle());
        PageId pageId = new PageId(pageTitle, Integer.parseInt(article.getRevisionId()));
        return engine.postprocess(pageId, article.getText(), null);
    } catch (LinkTargetException | EngineException e) {
        throw new RuntimeException(e);
    }
}
 
Example #9
Source File: PlainTextConverter.java    From dkpro-jwpl with Apache License 2.0 5 votes vote down vote up
public void visit(WtInternalLink link)
{
	currentLinkTitleInCell = null;
	try
	{
		PageTitle page = PageTitle.make(config, link.getTarget().getAsString());
		if (page.getNamespace().equals(config.getNamespace("Category"))) {
			return;
		}
	}
	catch (LinkTargetException e)
	{
		logger.warn(e.getLocalizedMessage());
	}

	write(link.getPrefix());
	WtLinkTitle pageTitle = link.getTitle();

	if (pageTitle == null || pageTitle.isEmpty())
	{
		// remember this as it could be needed to process table rows correctly
		currentLinkTitleInCell =  link.getTarget().getAsString();
		if(currentLinkTitleInCell.contains("#")) {
			// only take the first part of the string, no anchors on pages (divided by '#' symbols)
			currentLinkTitleInCell = currentLinkTitleInCell.split(Pattern.quote("#"), 2)[0];
		}
		// for regular cases: just write the original value here
		if(currentCell==null) {
			write(link.getTarget().getAsString());
		}
	}
	else
	{
		iterate(link.getTitle());
	}
	write(link.getPostfix());
}
 
Example #10
Source File: ParseUtils.java    From dkpro-jwpl with Apache License 2.0 5 votes vote down vote up
/**
 * Returns CompiledPage produced by the SWEBLE parser using the
 * SimpleWikiConfiguration.
 *
 * @return the parsed page
 * @throws LinkTargetException
 * @throws EngineException if the wiki page could not be compiled by the parser
 * @throws JAXBException
 * @throws FileNotFoundException
 */
private static EngProcessedPage getCompiledPage(String text, String title, long revision) throws LinkTargetException, EngineException, FileNotFoundException, JAXBException
{
	WikiConfig config = DefaultConfigEnWp.generate();

	PageTitle pageTitle = PageTitle.make(config, title);
	PageId pageId = new PageId(pageTitle, revision);
	// Compile the retrieved page
	WtEngineImpl engine = new WtEngineImpl(config);
	// Compile the retrieved page
	return engine.postprocess(pageId, text, null);
}
 
Example #11
Source File: SectionExtractor.java    From dkpro-jwpl with Apache License 2.0 5 votes vote down vote up
public void visit(WtInternalLink link)
{
	try
	{
		PageTitle page = PageTitle.make(config, link.getTarget().getAsString());
		if (page.getNamespace().equals(config.getNamespace("Category"))) {
			return;
		}else{
			String curLinkTitle="";
			for(AstNode n:link.getTitle()){
				if(n instanceof AstText){
					curLinkTitle = ((AstText)n).getContent().trim();
				}
			}
			if(curLinkTitle.isEmpty()){
				bodyBuilder.append(link.getTarget());
			}else{
				bodyBuilder.append(curLinkTitle);
			}

		}
	}
	catch (LinkTargetException e)
	{
	}

}
 
Example #12
Source File: SwebleParserUtil.java    From wikiforia with GNU General Public License v2.0 5 votes vote down vote up
public static EngProcessedPage parsePage(WikiConfig config, String title, long revision, String markup) throws EngineException, LinkTargetException {
    WtEngineImpl engine = new WtEngineImpl(config);

    PageTitle pageTitle = PageTitle.make(config, title);
    PageId pageId = new PageId(pageTitle, revision);

    return parseWikipage(engine, pageId, markup);
}
 
Example #13
Source File: SwebleTextAstWalker.java    From wikiforia with GNU General Public License v2.0 5 votes vote down vote up
public void visit(WtInternalLink link)
{
    try
    {
        if (link.getTarget().isResolved())
        {
            PageTitle page = PageTitle.make(config, link.getTarget().getAsString());
            if (page.getNamespace().equals(config.getNamespace("Category"))) {
                return;
            }
            else if(page.getNamespace().isFileNs() || page.getNamespace().isMediaNs()) {
                return;
            }
        }
    }
    catch (LinkTargetException e)
    {
    }

    //int start = sb.length();

    if(!isInsideFilteredSection()) {
        sb.append(link.getPrefix());
    }

    if (!link.hasTitle())
    {
        iterate(link.getTarget());
    }
    else
    {
        iterate(link.getTitle());
    }

    if(!isInsideFilteredSection()) {
        sb.append(link.getPostfix());
    }

    //int end = sb.length();
}
 
Example #14
Source File: TextParser.java    From wikiforia with GNU General Public License v2.0 4 votes vote down vote up
public void visit(WtInternalLink link)
{
    try
    {
        if (link.getTarget().isResolved())
        {
            PageTitle page = PageTitle.make(config, link.getTarget().getAsString());
            if (page.getNamespace().equals(config.getNamespace("Category"))) {
                sb.flush();
                parser.category(context, link.getTarget().getAsString(), page.getTitle().replace('_', ' '), sb.length());
                return;
            }
            else if(page.getNamespace().isFileNs() || page.getNamespace().isMediaNs()) {
                return;
            }
        }
    }
    catch (LinkTargetException e)
    {
    }

    sb.flush();
    int start = sb.length();

    if(!isInsideFilteredSection()) {
        sb.append(link.getPrefix());
    }

    if (!link.hasTitle())
        iterate(link.getTarget());
    else
        iterate(link.getTitle());

    if(!isInsideFilteredSection()) {
        sb.append(link.getPostfix());
    }

    sb.flush();
    int end = sb.length();

    start = trimStart(start);
    end = trimEnd(end);

    if(start < end) {
        String target = link.getTarget().getAsString();
        if(target.startsWith("#")) {
            parser.anchor(context, page.getTitle(), target.substring(1), true, start, end);
        } else {
            int hashIndex = target.lastIndexOf('#');
            if(hashIndex == -1) {
                parser.anchor(context, link.getTarget().getAsString(), null, true, start, end);
            } else {
                parser.anchor(context, target.substring(0,hashIndex), target.substring(hashIndex+1), true, start, end);
            }
        }
    }
}