org.sweble.wikitext.engine.PageTitle Java Examples
The following examples show how to use
org.sweble.wikitext.engine.PageTitle.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TextConverter.java From JuniperBot with GNU General Public License v3.0 | 6 votes |
public void visit(WtInternalLink link) { try { if (link.getTarget().isResolved()) { PageTitle page = PageTitle.make(config, link.getTarget().getAsString()); if (page.getNamespace().equals(config.getNamespace("Category"))) return; } } catch (LinkTargetException e) { // fall down } /*if (!link.hasTitle()) { iterate(link.getTarget()); } else { String title = getContent(link.getTitle()); String target = UriUtils.encode(getContent(link.getTarget()), "UTF-8"); String url = config.getWikiUrl() + "/wiki/" + target; write(CommonUtils.mdLink(title, url)); }*/ iterate(link.hasTitle() ? link.getTitle() : link.getTarget()); }
Example #2
Source File: MediaWikiParser.java From entity-fishing with Apache License 2.0 | 6 votes |
/** * @return the content of the wiki text fragment with all markup removed */ public String toTextOnly(String wikitext, String lang) { String result = ""; // get a compiler for wiki pages //WtEngineImpl engine = new WtEngineImpl(config); WtEngineImpl engine = engines.get(lang); try { // Retrieve a page // PL: no clue what is this page title thing ?? not even documented PageTitle pageTitle = PageTitle.make(configs.get(lang), "crap"); PageId pageId = new PageId(pageTitle, -1); // Compile the retrieved page EngProcessedPage cp = engine.postprocess(pageId, wikitext, null); WikiTextConverter converter = new WikiTextConverter(configs.get(lang)); result = (String)converter.go(cp.getPage()); } catch(Exception e) { LOGGER.warn("Fail to parse MediaWiki text, lang is " + lang, e); } return trim(result); }
Example #3
Source File: MediaWikiParser.java From entity-fishing with Apache License 2.0 | 6 votes |
/** * @return the content of the wiki text fragment with all markup removed except links * to internal wikipedia pages: external links to the internet are removed */ public String toTextWithInternalLinksOnly(String wikitext, String lang) { String result = ""; // Instantiate a compiler for wiki pages //WtEngineImpl engine = new WtEngineImpl(config); WtEngineImpl engine = engines.get(lang); try { // Retrieve a page // PL: no clue what is this?? PageTitle pageTitle = PageTitle.make(configs.get(lang), "crap"); PageId pageId = new PageId(pageTitle, -1); // Compile the retrieved page EngProcessedPage cp = engine.postprocess(pageId, wikitext, null); WikiTextConverter converter = new WikiTextConverter(configs.get(lang)); converter.addToKeep(WikiTextConverter.INTERNAL_LINKS); result = (String)converter.go(cp.getPage()); } catch(Exception e) { LOGGER.warn("Fail to parse MediaWiki text, lang is " + lang, e); } return trim(result); }
Example #4
Source File: MediaWikiParser.java From entity-fishing with Apache License 2.0 | 6 votes |
/** * @return the content of the wiki text fragment with all markup removed except links * to internal wikipedia pages: external links to the internet are removed */ public String toTextWithInternalLinksAndCategoriesOnly(String wikitext, String lang) { String result = ""; // Instantiate a compiler for wiki pages //WtEngineImpl engine = new WtEngineImpl(config); WtEngineImpl engine = engines.get(lang); try { // Retrieve a page // PL: no clue what is this?? PageTitle pageTitle = PageTitle.make(configs.get(lang), "crap"); PageId pageId = new PageId(pageTitle, -1); // Compile the retrieved page EngProcessedPage cp = engine.postprocess(pageId, wikitext, null); WikiTextConverter converter = new WikiTextConverter(configs.get(lang)); converter.addToKeep(WikiTextConverter.INTERNAL_LINKS); converter.addToKeep(WikiTextConverter.CATEGORY_LINKS); result = (String)converter.go(cp.getPage()); } catch(Exception e) { LOGGER.warn("Fail to parse MediaWiki text, lang is " + lang, e); } return trim(result); }
Example #5
Source File: MediaWikiParser.java From entity-fishing with Apache License 2.0 | 6 votes |
/** * @return the content of the wiki text fragment with all markup removed except links * to internal wikipedia articles : external links to the internet are removed, as well as * internal link not to an article (e.g. redirection, disambiguation page, category, ...) */ public String toTextWithInternalLinksArticlesOnly(String wikitext, String lang) { String result = ""; // Instantiate a compiler for wiki pages //WtEngineImpl engine = new WtEngineImpl(config); WtEngineImpl engine = engines.get(lang); try { // Retrieve a page // PL: no clue what is this?? PageTitle pageTitle = PageTitle.make(configs.get(lang), "crap"); PageId pageId = new PageId(pageTitle, -1); // Compile the retrieved page EngProcessedPage cp = engine.postprocess(pageId, wikitext, null); WikiTextConverter converter = new WikiTextConverter(configs.get(lang)); converter.addToKeep(WikiTextConverter.INTERNAL_LINKS_ARTICLES); result = (String)converter.go(cp.getPage()); } catch(Exception e) { LOGGER.warn("Fail to parse MediaWiki text, lang is " + lang, e); } return trim(result); }
Example #6
Source File: MediaWikiParser.java From entity-fishing with Apache License 2.0 | 6 votes |
/** * @return the content of the wiki text fragment with all markup removed except links * to internal wikipedia (external links to the internet are removed) and except emphasis * (bold and italics) */ public String toTextWithInternalLinksEmphasisOnly(String wikitext, String lang) { String result = ""; // Instantiate a compiler for wiki pages //WtEngineImpl engine = new WtEngineImpl(config); WtEngineImpl engine = engines.get(lang); try { // Retrieve a page // PL: no clue what is this?? PageTitle pageTitle = PageTitle.make(configs.get(lang), "crap"); PageId pageId = new PageId(pageTitle, -1); // Compile the retrieved page EngProcessedPage cp = engine.postprocess(pageId, wikitext, null); WikiTextConverter converter = new WikiTextConverter(configs.get(lang)); converter.addToKeep(WikiTextConverter.INTERNAL_LINKS); converter.addToKeep(WikiTextConverter.BOLD); converter.addToKeep(WikiTextConverter.ITALICS); result = (String)converter.go(cp.getPage()); } catch(Exception e) { LOGGER.warn("Fail to parse MediaWiki text, lang is " + lang, e); } return trim(result); }
Example #7
Source File: Page.java From dkpro-jwpl with Apache License 2.0 | 6 votes |
/** * Returns CompiledPage produced by the SWEBLE parser using the SimpleWikiConfiguration. * * @return the parsed page * @throws WikiApiException Thrown if errors occurred. */ private EngProcessedPage getCompiledPage() throws WikiApiException { EngProcessedPage cp; try{ WtEngineImpl engine = new WtEngineImpl(this.wiki.getWikConfig()); PageTitle pageTitle = PageTitle.make(this.wiki.getWikConfig(), this.getTitle().toString()); PageId pageId = new PageId(pageTitle, -1); // Compile the retrieved page cp = engine.postprocess(pageId, this.getText(), null); } catch(Exception e){ throw new WikiApiException(e); } return cp; }
Example #8
Source File: WikiFurService.java From JuniperBot with GNU General Public License v3.0 | 5 votes |
private EngProcessedPage processedPage(Article article) { try { PageTitle pageTitle = PageTitle.make(config, article.getTitle()); PageId pageId = new PageId(pageTitle, Integer.parseInt(article.getRevisionId())); return engine.postprocess(pageId, article.getText(), null); } catch (LinkTargetException | EngineException e) { throw new RuntimeException(e); } }
Example #9
Source File: PlainTextConverter.java From dkpro-jwpl with Apache License 2.0 | 5 votes |
public void visit(WtInternalLink link) { currentLinkTitleInCell = null; try { PageTitle page = PageTitle.make(config, link.getTarget().getAsString()); if (page.getNamespace().equals(config.getNamespace("Category"))) { return; } } catch (LinkTargetException e) { logger.warn(e.getLocalizedMessage()); } write(link.getPrefix()); WtLinkTitle pageTitle = link.getTitle(); if (pageTitle == null || pageTitle.isEmpty()) { // remember this as it could be needed to process table rows correctly currentLinkTitleInCell = link.getTarget().getAsString(); if(currentLinkTitleInCell.contains("#")) { // only take the first part of the string, no anchors on pages (divided by '#' symbols) currentLinkTitleInCell = currentLinkTitleInCell.split(Pattern.quote("#"), 2)[0]; } // for regular cases: just write the original value here if(currentCell==null) { write(link.getTarget().getAsString()); } } else { iterate(link.getTitle()); } write(link.getPostfix()); }
Example #10
Source File: ParseUtils.java From dkpro-jwpl with Apache License 2.0 | 5 votes |
/** * Returns CompiledPage produced by the SWEBLE parser using the * SimpleWikiConfiguration. * * @return the parsed page * @throws LinkTargetException * @throws EngineException if the wiki page could not be compiled by the parser * @throws JAXBException * @throws FileNotFoundException */ private static EngProcessedPage getCompiledPage(String text, String title, long revision) throws LinkTargetException, EngineException, FileNotFoundException, JAXBException { WikiConfig config = DefaultConfigEnWp.generate(); PageTitle pageTitle = PageTitle.make(config, title); PageId pageId = new PageId(pageTitle, revision); // Compile the retrieved page WtEngineImpl engine = new WtEngineImpl(config); // Compile the retrieved page return engine.postprocess(pageId, text, null); }
Example #11
Source File: SectionExtractor.java From dkpro-jwpl with Apache License 2.0 | 5 votes |
public void visit(WtInternalLink link) { try { PageTitle page = PageTitle.make(config, link.getTarget().getAsString()); if (page.getNamespace().equals(config.getNamespace("Category"))) { return; }else{ String curLinkTitle=""; for(AstNode n:link.getTitle()){ if(n instanceof AstText){ curLinkTitle = ((AstText)n).getContent().trim(); } } if(curLinkTitle.isEmpty()){ bodyBuilder.append(link.getTarget()); }else{ bodyBuilder.append(curLinkTitle); } } } catch (LinkTargetException e) { } }
Example #12
Source File: SwebleParserUtil.java From wikiforia with GNU General Public License v2.0 | 5 votes |
public static EngProcessedPage parsePage(WikiConfig config, String title, long revision, String markup) throws EngineException, LinkTargetException { WtEngineImpl engine = new WtEngineImpl(config); PageTitle pageTitle = PageTitle.make(config, title); PageId pageId = new PageId(pageTitle, revision); return parseWikipage(engine, pageId, markup); }
Example #13
Source File: SwebleTextAstWalker.java From wikiforia with GNU General Public License v2.0 | 5 votes |
public void visit(WtInternalLink link) { try { if (link.getTarget().isResolved()) { PageTitle page = PageTitle.make(config, link.getTarget().getAsString()); if (page.getNamespace().equals(config.getNamespace("Category"))) { return; } else if(page.getNamespace().isFileNs() || page.getNamespace().isMediaNs()) { return; } } } catch (LinkTargetException e) { } //int start = sb.length(); if(!isInsideFilteredSection()) { sb.append(link.getPrefix()); } if (!link.hasTitle()) { iterate(link.getTarget()); } else { iterate(link.getTitle()); } if(!isInsideFilteredSection()) { sb.append(link.getPostfix()); } //int end = sb.length(); }
Example #14
Source File: TextParser.java From wikiforia with GNU General Public License v2.0 | 4 votes |
public void visit(WtInternalLink link) { try { if (link.getTarget().isResolved()) { PageTitle page = PageTitle.make(config, link.getTarget().getAsString()); if (page.getNamespace().equals(config.getNamespace("Category"))) { sb.flush(); parser.category(context, link.getTarget().getAsString(), page.getTitle().replace('_', ' '), sb.length()); return; } else if(page.getNamespace().isFileNs() || page.getNamespace().isMediaNs()) { return; } } } catch (LinkTargetException e) { } sb.flush(); int start = sb.length(); if(!isInsideFilteredSection()) { sb.append(link.getPrefix()); } if (!link.hasTitle()) iterate(link.getTarget()); else iterate(link.getTitle()); if(!isInsideFilteredSection()) { sb.append(link.getPostfix()); } sb.flush(); int end = sb.length(); start = trimStart(start); end = trimEnd(end); if(start < end) { String target = link.getTarget().getAsString(); if(target.startsWith("#")) { parser.anchor(context, page.getTitle(), target.substring(1), true, start, end); } else { int hashIndex = target.lastIndexOf('#'); if(hashIndex == -1) { parser.anchor(context, link.getTarget().getAsString(), null, true, start, end); } else { parser.anchor(context, target.substring(0,hashIndex), target.substring(hashIndex+1), true, start, end); } } } }