Python wikipedia.page() Examples

The following are 29 code examples of wikipedia.page(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module wikipedia , or try the search function .
Example #1
Source File: fetch_wiki.py    From adam_qas with GNU General Public License v3.0 8 votes vote down vote up
def search_wiki(keywords, number_of_search, wiki_pages):
    suggestion = False

    for word in range(0, len(keywords) - 1):
        # print(keywords[word], ">>")
        result_set = wikipedia.search(keywords[word], number_of_search, suggestion)
        for term in result_set:

            try:
                page = wikipedia.page(term, preload=False)
                page_title = page.title
                # page_summary = page.summary
                page_content = page.content
                wiki_pages[page_title] = page_content

            except wikipedia.exceptions.DisambiguationError as error:
                pass
            except wikipedia.exceptions.PageError as error:
                pass
                # print(error.options)

            # print(page_title, len(page_content), type(page_content))

    return wiki_pages 
Example #2
Source File: wikipedia.py    From Mash-Cogs with GNU General Public License v3.0 6 votes vote down vote up
def wikipedia(self, ctx, *text):
        """Wikipedia search."""     

        if text == ():
            await send_cmd_help(ctx)
            return
        else:            
            s = "_";
            search = ""
            search = s.join(text)
            user = ctx.message.author
            wikiLang = 'en'# Define the Wikipedia language / Most of these are supported » https://nl.wikipedia.org/wiki/ISO_3166-1
            ws = None
            wikipedia.set_lang(wikiLang)# Set the Wikipedia language.
            try:
                ws = wikipedia.page(search)
                wikiUrl = (ws.url.encode('ascii', 'xmlcharrefreplace'))
                await self.bot.say(wikiUrl.decode("utf8"))
            except:
                await self.bot.say( 'Sorry {}, no wiki hit, try to rephrase'.format(user)) 
Example #3
Source File: gen_corpus.py    From Living-Audio-Dataset with Apache License 2.0 6 votes vote down vote up
def get_articles(language, no_words, max_no_articles, search, **kwargs):
    """ Retrieve articles from Wikipedia """
    wikipedia.set_rate_limiting(True) # be polite
    wikipedia.set_lang(language)

    if search is not None:
        titles = wikipedia.search(search, results = max_no_articles)
    else:
        titles = wikipedia.random(pages = max_no_articles)

    articles = []
    current_no_words = 0
    for title in titles:
        print("INFO: loading {}".format(title))
        page = wikipedia.page(title=title)
        content = page.content
        article_no_words = len(content.split())
        current_no_words += article_no_words
        print("INFO: article contains {} words".format(article_no_words))
        articles.append((title, content))
        if current_no_words >= no_words:
            break

    return articles 
Example #4
Source File: simplewikipedia.py    From hangoutsbot with GNU Affero General Public License v3.0 6 votes vote down vote up
def wiki(bot, event, *args):
    """lookup a term on Wikipedia"""

    term = " ".join(args)
    if not term:
        return

    try:
        page = wikipedia.page(term, auto_suggest=False)

        summary = page.summary.strip()
        summary = summary.replace('\r\n', '\n').replace('\r', '\n')
        summary = re.sub('\n+', "\n", summary).replace('\n', '<br /><br />')
        source = _('<i>source: <a href="{}">{}</a></i>').format(page.url, page.url)

        html_text = '<b>"{}"</b><br /><br />{}<br /><br />{}'.format(term, summary, source)
    except wikipedia.exceptions.PageError:
        html_text = _("<i>no entry found for {}</i>").format(term)
    except wikipedia.exceptions.DisambiguationError as e:
        exception_text = str(e).strip().replace("\n", "<br />")
        html_text = "<i>{}</i>".format(exception_text)

    yield from bot.coro_send_message(event.conv, html_text) 
Example #5
Source File: searching.py    From Utlyz-CLI with Apache License 2.0 6 votes vote down vote up
def cli(google,wiki):
	browser = mechanize.Browser()
	browser.set_handle_robots(False)	#Allows everything to be written
	cookies = mechanize.CookieJar()
	browser.set_cookiejar(cookies)
	browser.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Chrome/7.0.517.41 Safari/534.7')]
	browser.set_handle_refresh(False)	#Sometimes hangs without this
	if(google):
		query = raw_input("Enter the topic you want to search about: ")
		for link in search(query, tld="co.in", num=10, stop=1, pause=2):
			print link
	if(wiki):
		wiki_topic = raw_input('Enter the topic you want to read about: ')
		result = wikipedia.page(title=wiki_topic,auto_suggest=True,redirect=True, preload=False)
		wiki_content = result.content
		print wiki_content 
Example #6
Source File: app.py    From Utlyz-CLI with Apache License 2.0 6 votes vote down vote up
def wiki(bot, update, args):
	try:
		topic = ""
		for arg in args:
			topic += arg + " "
		summary = wikipedia.summary(topic, sentences = 30)
		page = wikipedia.page(topic)
		extra = "\nFor more details visit " + page.url
		summary += extra
		bot.sendChatAction(chat_id = update.message.chat_id, action = ChatAction.TYPING)
		bot.sendMessage(chat_id = update.message.chat_id, parse_mode=ParseMode.HTML, text = summary)

	except wikipedia.exceptions.DisambiguationError as e:
		error = "Please be more specific with your search query as there are a couple of other options meaning the same."
		for options in e.options:
			error += options.decode("utf-8","ignore")+'\n'
		bot.sendChatAction(chat_id = update.message.chat_id, action = ChatAction.TYPING)
		bot.sendMessage(chat_id = update.message.chat_id, text = error)

	except wikipedia.exceptions.PageError:
		error = "No messages could be found with the topic you entered!"
		bot.sendChatAction(chat_id = update.message.chat_id, action = ChatAction.TYPING)
		bot.sendMessage(chat_id = update.message.chat_id, text = error) 
Example #7
Source File: wordcount.py    From ray with Apache License 2.0 5 votes vote down vote up
def get_next(self):
        if self.done:
            return None  # Source exhausted
        while True:
            if self.article_done:
                try:  # Try next title
                    next_title = next(self.title_reader)
                except StopIteration:
                    self.done = True  # Source exhausted
                    return None
                # Get next article
                logger.debug("Next article: {}".format(next_title))
                article = wikipedia.page(next_title).content
                # Split article in sentences
                self.sentences = iter(article.split("."))
                self.article_done = False
            try:  # Try next sentence
                sentence = next(self.sentences)
                logger.debug("Next sentence: {}".format(sentence))
                return sentence
            except StopIteration:
                self.article_done = True


# Splits input line into words and
# outputs records of the form (word,1) 
Example #8
Source File: fetch_tax_info.py    From idseq-dag with MIT License 5 votes vote down vote up
def fetch_ncbi_wiki_map(num_threads, batch_size, taxid_list, taxid2wikidict):
        ''' Use Entrez API to fetch taxonid -> wikipedia page mapping '''
        threads = []
        semaphore = threading.Semaphore(num_threads)
        mutex = TraceLock("fetch_ncbi_wiki_map", threading.RLock())
        batch = []
        with open(taxid_list, 'r') as taxf:
            for line in taxf:
                taxid = line.rstrip()
                if taxid == 'taxid':
                    continue  # header
                batch.append(taxid)
                if len(batch) >= batch_size:
                    semaphore.acquire()
                    t = threading.Thread(
                        target=PipelineStepFetchTaxInfo.
                        get_taxid_mapping_for_batch,
                        args=[batch, taxid2wikidict, mutex, semaphore]
                    )
                    t.start()
                    threads.append(t)
                    batch = []
        if len(batch) > 0:
            semaphore.acquire()
            t = threading.Thread(
                target=PipelineStepFetchTaxInfo.
                get_taxid_mapping_for_batch,
                args=[batch, taxid2wikidict, mutex, semaphore]
            )
            t.start()
            threads.append(t)
        for t in threads:
            t.join() 
Example #9
Source File: fetch_tax_info.py    From idseq-dag with MIT License 5 votes vote down vote up
def get_wiki_content_for_page(taxid, pageid, taxname, taxid2wikicontent, mutex, semaphore, max_attempt=3):
        ''' Fetch wiki content for pageid '''
        for attempt in range(max_attempt):
            try:
                page = None
                if pageid:
                    log.write(f"fetching wiki {pageid} for {taxid}")
                    page = wikipedia.page(pageid=pageid)
                elif taxname:
                    search_results = wikipedia.search(taxname)
                    if len(search_results) > 0:
                        wikiname = str(search_results[0])
                        if taxname.lower() == wikiname.lower():
                            page = wikipedia.page(wikiname)
                    if not page:
                        # query the page directly
                        try:
                            page = wikipedia.page(taxname.replace(" ", "_"))
                        except:
                            page = None

                if page:
                    output = {
                        "pageid": page.pageid,
                        "description": page.content[:1000],
                        "title": page.title,
                        "summary": page.summary
                    }
                    with mutex:
                        taxid2wikicontent[taxid] = output
                break
            except:
                log.write(f"having trouble fetching {taxid} wiki {pageid} attempt {attempt}")
        semaphore.release() 
Example #10
Source File: rhodiola.py    From rhodiola with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_cities(proper_noun):
    page = wikipedia.page(proper_noun)
    geo = GeoText(page.summary)
    return list(set(geo.cities)) 
Example #11
Source File: rhodiola.py    From rhodiola with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_year(proper_noun):
    try:
        page = wikipedia.page(proper_noun)
        year = re.findall('[1-3][0-9]{3}',page.summary)
        return year[0]
    except:
        return None 
Example #12
Source File: wiki_search.py    From NeuralTripleTranslation with Apache License 2.0 5 votes vote down vote up
def get(query):
    return find_candidates(find_json(query))

# with open("testoutput.txt", "w") as text_file:

# for i in get('video-assisted thoracoscopic'):
#     try:
#             # print get('video-assisted thoracoscopic')
#             # print wikipedia.page(i)
#             # allwiki.append(wikipedia.page(i).content)
#         text_file.write('%s\n\n' % wikipedia.page(i).content.encode('utf8'))
#         # print type(wikipedia.page(i).content.encode('utf8'))
#         # print wikipedia.page(i).content
#     except:
#         print "Unexpected error:", sys.exc_info()[0]


# print get('video-assisted thoracoscopic')

# for keys in world_dict.keys():
#     for term in world_dict.get(keys, keys):
#         if wikipedia.search(term):
#             term = wikipedia.search(term)[0]
#             wikipage = wikipedia.page(term)
#             content = wikipage.content
#             allwiki.append(content)
#
# print (wikipedia.page("Georgia_(country)").content)
#
# print wikipedia.search('fraction of inspired o2')[0]


# text_file.close() 
Example #13
Source File: article.py    From wikipedia-question-generator with MIT License 5 votes vote down vote up
def __init__(self, title):
        self.page = wikipedia.page(title)
        self.summary = TextBlob(self.page.summary) 
Example #14
Source File: classifier.py    From quantulum with MIT License 5 votes vote down vote up
def download_wiki():
    """Download WikiPedia pages of ambiguous units."""
    ambiguous = [i for i in l.UNITS.items() if len(i[1]) > 1]
    ambiguous += [i for i in l.DERIVED_ENT.items() if len(i[1]) > 1]
    pages = set([(j.name, j.uri) for i in ambiguous for j in i[1]])

    print
    objs = []
    for num, page in enumerate(pages):

        obj = {'url': page[1]}
        obj['_id'] = obj['url'].replace('https://en.wikipedia.org/wiki/', '')
        obj['clean'] = obj['_id'].replace('_', ' ')

        print '---> Downloading %s (%d of %d)' % \
              (obj['clean'], num + 1, len(pages))

        obj['text'] = wikipedia.page(obj['clean']).content
        obj['unit'] = page[0]
        objs.append(obj)

    path = os.path.join(l.TOPDIR, 'wiki.json')
    os.remove(path)
    json.dump(objs, open(path, 'w'), indent=4, sort_keys=True)

    print '\n---> All done.\n'


############################################################################### 
Example #15
Source File: tests.py    From quantulum with MIT License 5 votes vote down vote up
def wiki_test(page='CERN'):
    """Download a wikipedia page and test the parser on its content.

    Pages full of units:
        CERN
        Hubble_Space_Telescope,
        Herschel_Space_Observatory
    """
    content = wikipedia.page(page).content
    parsed = p.parse(content)
    parts = int(round(len(content) * 1.0 / 1000))

    print
    end_char = 0
    for num, chunk in enumerate(range(parts)):
        _ = os.system('clear')
        print
        quants = [j for j in parsed if chunk * 1000 < j.span[0] < (chunk + 1) *
                  1000]
        beg_char = max(chunk * 1000, end_char)
        text, end_char = embed_text(quants, beg_char, chunk, content)
        print COLOR2 % text
        print
        try:
            _ = raw_input('--------- End part %d of %d\n' % (num + 1, parts))
        except (KeyboardInterrupt, EOFError):
            return


############################################################################### 
Example #16
Source File: streaming.py    From ray with Apache License 2.0 5 votes vote down vote up
def get_new_article(self):
        # Get the next wikipedia article.
        article = wikipedia.page(self.title_stream.next()).content
        # Count the words and store the result.
        self.word_counts.append(Counter(article.split(" ")))
        self.num_articles_processed += 1 
Example #17
Source File: wordcount.py    From ray with Apache License 2.0 5 votes vote down vote up
def get_next(self):
        if self.done:
            return None  # Source exhausted
        while True:
            if self.article_done:
                try:  # Try next title
                    next_title = next(self.title_reader)
                except StopIteration:
                    self.done = True  # Source exhausted
                    return None
                # Get next article
                logger.debug("Next article: {}".format(next_title))
                article = wikipedia.page(next_title).content
                # Split article in sentences
                self.sentences = iter(article.split("."))
                self.article_done = False
            try:  # Try next sentence
                sentence = next(self.sentences)
                logger.debug("Next sentence: {}".format(sentence))
                return sentence
            except StopIteration:
                self.article_done = True


# Splits input line into words and
# outputs records of the form (word,1) 
Example #18
Source File: Self.py    From CyberTK-Self with GNU General Public License v2.0 5 votes vote down vote up
def translate(to_translate, to_language="auto", language="auto"):
    bahasa_awal = "auto"
    bahasa_tujuan = to_language
    kata = to_translate
    url = 'https://translate.google.com/m?sl=%s&tl=%s&ie=UTF-8&prev=_m&q=%s' % (bahasa_awal, bahasa_tujuan, kata.replace(" ", "+"))
    agent = {'User-Agent':'Mozilla/5.0'}
    cari_hasil = 'class="t0">'
    request = urllib2.Request(url, headers=agent)
    page = urllib2.urlopen(request).read()
    result = page[page.find(cari_hasil)+len(cari_hasil):]
    result = result.split("<")[0]
    return result 
Example #19
Source File: BuscadorPersonas.py    From osint-suite-tools with GNU General Public License v3.0 5 votes vote down vote up
def searchWikipedia(target):

    try:
        wikipedia.set_lang("es")
        d0 = wikipedia.search(target)

        if d0:
            print()
            print("|----[INFO][WIKIPEDIA][>] ")
            print("     |----[INFO][SEARCH][>] ")
            print("     - Resultados encontrados: ")
            for r in d0:
                print("     - " + r)
        else:
            print("|----[INFO][WIKIPEDIA][>] No aparecen resultados en WIKIPEDIA.")

    except:
        print("[!][WARNING][WIKIPEDIA][>] Error en la API...")

    try:
        d1 = wikipedia.page(target)

        linksWIKI = d1.links
        urlWIKI = d1.url

        if d1:
            print("     |----[INFO][TAGS][>] ")
            for l in linksWIKI:
                print("     - " + l)
            print("|----[FUENTES][WIKIPEDIA][>] ")
            print("     - " + urlWIKI)
            config.wikipediaData_list.append(urlWIKI)
        else:
            print("|----[INFO][WIKIPEDIA][>] No aparecen resultados en WIKIPEDIA.")
    
    except:
        print("[!][WARNING][WIKIPEDIA][>] Error en la API o no aparecen resultados...")

#Funciones para buscar en Youtube 
Example #20
Source File: wiki.py    From Jarvis with MIT License 5 votes vote down vote up
def content(
            self,
            title=None,
            pageid=None,
            auto_suggest=True,
            redirect=True,
            preload=False):
        """Returns plain text content of query's page, excluding images, tables and other data."""
        try:
            page = wikipedia.page(title)
            return page.content
        except wikipedia.exceptions.PageError:
            return "No page matches, try another item."
        except wikipedia.exceptions.DisambiguationError as error:
            return error.options[:5] 
Example #21
Source File: wiki.py    From Jarvis with MIT License 5 votes vote down vote up
def summary(self, query, sentences=0, chars=0):
        """Returns a plain text summary from the query's page."""
        try:
            return wikipedia.summary(query, sentences, chars)
        except wikipedia.exceptions.PageError:
            return "No page matches, try another item."
        except wikipedia.exceptions.DisambiguationError as error:
            return error.options[:5] 
Example #22
Source File: wikipedia.py    From pyconjpbot with MIT License 5 votes vote down vote up
def wikipedia_page(message, option, query):
    """
    Wikipediaで検索した結果を返す
    """
    if query == 'help':
        return

    # set language
    lang = 'ja'
    if option:
        _, lang = option.split('-')
    wikipedia.set_lang(lang)

    try:
        # search with query
        results = wikipedia.search(query)
    except:
        botsend(message, '指定された言語 `{}` は存在しません'.format(lang))
        return

    # get first result
    if results:
        page = wikipedia.page(results[0])

        attachments = [{
            'fallback': 'Wikipedia: {}'.format(page.title),
            'pretext': 'Wikipedia: <{}|{}>'.format(page.url, page.title),
            'text': page.summary,
        }]
        botwebapi(message, attachments)
    else:
        botsend(message, '`{}` に該当するページはありません'.format(query)) 
Example #23
Source File: util.py    From qb with MIT License 5 votes vote down vote up
def download_pages():
    bonus_questions = BonusQuestionDatabase().all_questions()
    train_answers = set()
    for q in bonus_questions.values():
        train_answers.update(q.pages)
    
    if os.path.isfile(BONUS_ANSWER_PAGES):
        with open(BONUS_ANSWER_PAGES, 'rb') as f:
            try:
                pages = pickle.load(f)
                print('loaded {} pages'.format(len(pages)))
            except EOFError:
                pages = dict()
    else:
        pages = dict()

    train_answers = train_answers - set(pages.keys())

    for answer in tqdm(train_answers):
        if answer in pages:
            continue
        try:
            page = wikipedia.page(answer)
        except (DisambiguationError, PageError, ConnectionError) as e:
            if isinstance(e, DisambiguationError):
                pages[answer] = None
                continue
            if isinstance(e, PageError):
                pages[answer] = None
                continue
            if isinstance(e, ConnectionError):
                break
        try:
            pages[answer] = [page.title, page.content, page.links,
                    page.summary, page.categories, page.url, page.pageid]
        except ConnectionError:
            break

    with open(BONUS_ANSWER_PAGES, 'wb') as f:
        pickle.dump(pages, f) 
Example #24
Source File: Self.py    From CyberTK-Self with GNU General Public License v2.0 5 votes vote down vote up
def _images_get_all_items(page):
    items = []
    while True:
        item, end_content = _images_get_next_item(page)
        if item == "no_links":
            break
        else:
            items.append(item)      #Append all the links in the list named 'Links'
            time.sleep(0.1)        #Timer could be used to slow down the request for image downloads
            page = page[end_content:]
    return items 
Example #25
Source File: Self.py    From CyberTK-Self with GNU General Public License v2.0 5 votes vote down vote up
def download_page(url):
    version = (3,0)
    cur_version = sys.version_info
    if cur_version >= version:     #If the Current Version of Python is 3.0 or above
        import urllib,request    #urllib library for Extracting web pages
        try:
            headers = {}
            headers['User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
            req = urllib,request.Request(url, headers = headers)
            resp = urllib,request.urlopen(req)
            respData = str(resp.read())
            return respData
        except Exception as e:
            print(str(e))
    else:                        #If the Current Version of Python is 2.x
        import urllib2
        try:
            headers = {}
            headers['User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"
            req = urllib2.Request(url, headers = headers)
            response = urllib2.urlopen(req)
            page = response.read()
            return page
        except:
            return"Page Not found"

#Finding 'Next Image' from the given raw page 
Example #26
Source File: search.py    From W.I.L.L with MIT License 4 votes vote down vote up
def search_google(query):
    '''Search google and determine if wikipedia is in it'''
    search_object = google.search(query)
    #Determine if a wikipedia url is in the first 5 searches
    urls = []
    for i in range(0, 4):
        url = search_object.__next__()
        urls.append(url)
        if "wikipedia.org/wiki" in url:
            wikipedia_search = wikipedia.search(query)[0]
            url = wikipedia.page(wikipedia_search).url
            response = wikipedia.summary(wikipedia_search) + " ({0})".format(url)
            return response
    #If there were no wikipedia pages
    first_url = urls[0]
    try:
        article = Article(first_url)
        article.download()
        article.parse()
        article.nlp()
        article_summary = article.summary
        article_title = article.title
        return "{0}\n{1} - ({2})".format(
            article_summary, article_title, first_url
        )

    except Exception as article_exception:
        try:
            log.debug("Got error {0}, {1} while using newspaper, switching to bs4".format(
            article_exception.message,article_exception.args
            ))
            html = requests.get(first_url).text
            #Parse the html using bs4
            soup = BeautifulSoup(html, "html.parser")
            [s.extract() for s in soup(['style', 'script', '[document]', 'head', 'title'])]
            text = soup.getText()
         # break into lines and remove leading and trailing space on each
            lines = (line.strip() for line in text.splitlines())
            # break multi-headlines into a line each
            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
            # drop blank lines
            soup_text = '\n'.join(chunk for chunk in chunks if " " in chunk)
            response = format(soup_text) + " ({0})".format(first_url)
            return response
        except Exception as search_exception:
            log.info("Error {0},{1} occurred while searching query {2}".format(
                search_exception.message, search_exception.args, query
            ))
            return "Error encountered on query {0}".format(query) 
Example #27
Source File: wiki.py    From JARVIS-on-Messenger with MIT License 4 votes vote down vote up
def process(input, entities):
    output = {}
    try:
        query = entities['wiki'][0]['value']
        data = wikipedia.page(query)
        output['input'] = input
        template = TextTemplate('Wikipedia summary of ' + data.title + ':\n' + data.summary)
        text = template.get_text()
        template = ButtonTemplate(text)
        template.add_web_url('Wikipedia Link', data.url)
        output['output'] = template.get_message()
        output['success'] = True
    except wikipedia.exceptions.DisambiguationError as e:
        template = GenericTemplate()
        template.set_image_aspect_ratio_to_square()
        image_url = 'https://en.wikipedia.org/static/images/project-logos/enwiki-2x.png'
        pageids = set()
        for option in e.options:
            try:
                data = wikipedia.page(option)
                if data.pageid in pageids:
                    continue
                pageids.add(data.pageid)
                buttons = ButtonTemplate()
                buttons.add_web_url('Wikipedia Link', data.url)
                payload = {
                    'intent': 'wiki',
                    'entities': {
                        'wiki': [
                            {
                                'value': option
                            }
                        ]
                    }
                }
                buttons.add_postback('Wikipedia Summary', payload)
                template.add_element(title=data.title, item_url=data.url, image_url=image_url,
                                     buttons=buttons.get_buttons())
            except (wikipedia.exceptions.PageError, wikipedia.exceptions.DisambiguationError):
                pass  # Some suggestions don't map to a page; skipping them..
        output['input'] = input
        output['output'] = template.get_message()
        output['success'] = True
    except:
        error_message = 'I couldn\'t find any wikipedia results matching your query.'
        error_message += '\nPlease ask me something else, like:'
        error_message += '\n  - wikipedia barack'
        error_message += '\n  - html wiki'
        error_message += '\n  - who is sachin tendulkar'
        output['error_msg'] = TextTemplate(error_message).get_message()
        output['success'] = False
    return output 
Example #28
Source File: special.py    From EmiliaHikari with GNU General Public License v3.0 4 votes vote down vote up
def wiki(update, context):
	msg = update.effective_message
	chat_id = update.effective_chat.id
	args = update.effective_message.text.split(None, 1)
	teks = args[1]
	message = update.effective_message
	getlang = langsql.get_lang(chat_id)
	if str(getlang) == "id":
		wikipedia.set_lang("id")
	else:
		wikipedia.set_lang("en")
	try:
		pagewiki = wikipedia.page(teks)
	except wikipedia.exceptions.PageError:
		send_message(update.effective_message, tl(update.effective_message, "Hasil tidak ditemukan"))
		return
	except wikipedia.exceptions.DisambiguationError as refer:
		rujuk = str(refer).split("\n")
		if len(rujuk) >= 6:
			batas = 6
		else:
			batas = len(rujuk)
		teks = ""
		for x in range(batas):
			if x == 0:
				if getlang == "id":
					teks += rujuk[x].replace('may refer to', 'dapat merujuk ke')+"\n"
				else:
					teks += rujuk[x]+"\n"
			else:
				teks += "- `"+rujuk[x]+"`\n"
		send_message(update.effective_message, teks, parse_mode="markdown")
		return
	except IndexError:
		send_message(update.effective_message, tl(update.effective_message, "Tulis pesan untuk mencari dari sumber wikipedia"))
		return
	judul = pagewiki.title
	summary = pagewiki.summary
	if update.effective_message.chat.type == "private":
		send_message(update.effective_message, tl(update.effective_message, "Hasil dari {} adalah:\n\n<b>{}</b>\n{}").format(teks, judul, summary), parse_mode=ParseMode.HTML)
	else:
		if len(summary) >= 200:
			judul = pagewiki.title
			summary = summary[:200]+"..."
			button = InlineKeyboardMarkup([[InlineKeyboardButton(text=tl(update.effective_message, "Baca Lebih Lengkap"), url="t.me/{}?start=wiki-{}".format(context.bot.username, teks.replace(' ', '_')))]])
		else:
			button = None
		send_message(update.effective_message, tl(update.effective_message, "Hasil dari {} adalah:\n\n<b>{}</b>\n{}").format(teks, judul, summary), parse_mode=ParseMode.HTML, reply_markup=button) 
Example #29
Source File: article.py    From wikipedia-question-generator with MIT License 4 votes vote down vote up
def evaluate_sentence(self, sentence):
        if sentence.tags[0][1] == 'RB' or len(sentence.words) < 6:
            # This sentence starts with an adverb or is less than five words long
            # and probably won't be a good fit
            return None

        tag_map = {word.lower(): tag for word, tag in sentence.tags}

        replace_nouns = []
        for word, tag in sentence.tags:
            # For now, only blank out non-proper nouns that don't appear in the article title
            if tag == 'NN' and word not in self.page.title:
                # Is it in a noun phrase? If so, blank out the last two words in that phrase
                for phrase in sentence.noun_phrases:
                    if phrase[0] == '\'':
                        # If it starts with an apostrophe, ignore it
                        # (this is a weird error that should probably
                        # be handled elsewhere)
                        break

                    if word in phrase:
                        # Blank out the last two words in this phrase
                        [replace_nouns.append(phrase_word) for phrase_word in phrase.split()[-2:]]
                        break

                # If we couldn't find the word in any phrases,
                # replace it on its own
                if len(replace_nouns) == 0:
                    replace_nouns.append(word)
                break
        
        if len(replace_nouns) == 0:
            # Return none if we found no words to replace
            return None

        trivia = {
            'title': self.page.title,
            'url': self.page.url,
            'answer': ' '.join(replace_nouns)
        }

        if len(replace_nouns) == 1:
            # If we're only replacing one word, use WordNet to find similar words
            trivia['similar_words'] = self.get_similar_words(replace_nouns[0])
        else:
            # If we're replacing a phrase, don't bother - it's too unlikely to make sense
            trivia['similar_words'] = []

        # Blank out our replace words (only the first occurrence of the word in the sentence)
        replace_phrase = ' '.join(replace_nouns)
        blanks_phrase = ('__________ ' * len(replace_nouns)).strip()

        expression = re.compile(re.escape(replace_phrase), re.IGNORECASE)
        sentence = expression.sub(blanks_phrase, str(sentence), count=1)

        trivia['question'] = sentence
        return trivia