Python wikipedia.page() Examples
The following are 29
code examples of wikipedia.page().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
wikipedia
, or try the search function
.
Example #1
Source File: fetch_wiki.py From adam_qas with GNU General Public License v3.0 | 8 votes |
def search_wiki(keywords, number_of_search, wiki_pages): suggestion = False for word in range(0, len(keywords) - 1): # print(keywords[word], ">>") result_set = wikipedia.search(keywords[word], number_of_search, suggestion) for term in result_set: try: page = wikipedia.page(term, preload=False) page_title = page.title # page_summary = page.summary page_content = page.content wiki_pages[page_title] = page_content except wikipedia.exceptions.DisambiguationError as error: pass except wikipedia.exceptions.PageError as error: pass # print(error.options) # print(page_title, len(page_content), type(page_content)) return wiki_pages
Example #2
Source File: wikipedia.py From Mash-Cogs with GNU General Public License v3.0 | 6 votes |
def wikipedia(self, ctx, *text): """Wikipedia search.""" if text == (): await send_cmd_help(ctx) return else: s = "_"; search = "" search = s.join(text) user = ctx.message.author wikiLang = 'en'# Define the Wikipedia language / Most of these are supported » https://nl.wikipedia.org/wiki/ISO_3166-1 ws = None wikipedia.set_lang(wikiLang)# Set the Wikipedia language. try: ws = wikipedia.page(search) wikiUrl = (ws.url.encode('ascii', 'xmlcharrefreplace')) await self.bot.say(wikiUrl.decode("utf8")) except: await self.bot.say( 'Sorry {}, no wiki hit, try to rephrase'.format(user))
Example #3
Source File: gen_corpus.py From Living-Audio-Dataset with Apache License 2.0 | 6 votes |
def get_articles(language, no_words, max_no_articles, search, **kwargs): """ Retrieve articles from Wikipedia """ wikipedia.set_rate_limiting(True) # be polite wikipedia.set_lang(language) if search is not None: titles = wikipedia.search(search, results = max_no_articles) else: titles = wikipedia.random(pages = max_no_articles) articles = [] current_no_words = 0 for title in titles: print("INFO: loading {}".format(title)) page = wikipedia.page(title=title) content = page.content article_no_words = len(content.split()) current_no_words += article_no_words print("INFO: article contains {} words".format(article_no_words)) articles.append((title, content)) if current_no_words >= no_words: break return articles
Example #4
Source File: simplewikipedia.py From hangoutsbot with GNU Affero General Public License v3.0 | 6 votes |
def wiki(bot, event, *args): """lookup a term on Wikipedia""" term = " ".join(args) if not term: return try: page = wikipedia.page(term, auto_suggest=False) summary = page.summary.strip() summary = summary.replace('\r\n', '\n').replace('\r', '\n') summary = re.sub('\n+', "\n", summary).replace('\n', '<br /><br />') source = _('<i>source: <a href="{}">{}</a></i>').format(page.url, page.url) html_text = '<b>"{}"</b><br /><br />{}<br /><br />{}'.format(term, summary, source) except wikipedia.exceptions.PageError: html_text = _("<i>no entry found for {}</i>").format(term) except wikipedia.exceptions.DisambiguationError as e: exception_text = str(e).strip().replace("\n", "<br />") html_text = "<i>{}</i>".format(exception_text) yield from bot.coro_send_message(event.conv, html_text)
Example #5
Source File: searching.py From Utlyz-CLI with Apache License 2.0 | 6 votes |
def cli(google,wiki): browser = mechanize.Browser() browser.set_handle_robots(False) #Allows everything to be written cookies = mechanize.CookieJar() browser.set_cookiejar(cookies) browser.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Chrome/7.0.517.41 Safari/534.7')] browser.set_handle_refresh(False) #Sometimes hangs without this if(google): query = raw_input("Enter the topic you want to search about: ") for link in search(query, tld="co.in", num=10, stop=1, pause=2): print link if(wiki): wiki_topic = raw_input('Enter the topic you want to read about: ') result = wikipedia.page(title=wiki_topic,auto_suggest=True,redirect=True, preload=False) wiki_content = result.content print wiki_content
Example #6
Source File: app.py From Utlyz-CLI with Apache License 2.0 | 6 votes |
def wiki(bot, update, args): try: topic = "" for arg in args: topic += arg + " " summary = wikipedia.summary(topic, sentences = 30) page = wikipedia.page(topic) extra = "\nFor more details visit " + page.url summary += extra bot.sendChatAction(chat_id = update.message.chat_id, action = ChatAction.TYPING) bot.sendMessage(chat_id = update.message.chat_id, parse_mode=ParseMode.HTML, text = summary) except wikipedia.exceptions.DisambiguationError as e: error = "Please be more specific with your search query as there are a couple of other options meaning the same." for options in e.options: error += options.decode("utf-8","ignore")+'\n' bot.sendChatAction(chat_id = update.message.chat_id, action = ChatAction.TYPING) bot.sendMessage(chat_id = update.message.chat_id, text = error) except wikipedia.exceptions.PageError: error = "No messages could be found with the topic you entered!" bot.sendChatAction(chat_id = update.message.chat_id, action = ChatAction.TYPING) bot.sendMessage(chat_id = update.message.chat_id, text = error)
Example #7
Source File: wordcount.py From ray with Apache License 2.0 | 5 votes |
def get_next(self): if self.done: return None # Source exhausted while True: if self.article_done: try: # Try next title next_title = next(self.title_reader) except StopIteration: self.done = True # Source exhausted return None # Get next article logger.debug("Next article: {}".format(next_title)) article = wikipedia.page(next_title).content # Split article in sentences self.sentences = iter(article.split(".")) self.article_done = False try: # Try next sentence sentence = next(self.sentences) logger.debug("Next sentence: {}".format(sentence)) return sentence except StopIteration: self.article_done = True # Splits input line into words and # outputs records of the form (word,1)
Example #8
Source File: fetch_tax_info.py From idseq-dag with MIT License | 5 votes |
def fetch_ncbi_wiki_map(num_threads, batch_size, taxid_list, taxid2wikidict): ''' Use Entrez API to fetch taxonid -> wikipedia page mapping ''' threads = [] semaphore = threading.Semaphore(num_threads) mutex = TraceLock("fetch_ncbi_wiki_map", threading.RLock()) batch = [] with open(taxid_list, 'r') as taxf: for line in taxf: taxid = line.rstrip() if taxid == 'taxid': continue # header batch.append(taxid) if len(batch) >= batch_size: semaphore.acquire() t = threading.Thread( target=PipelineStepFetchTaxInfo. get_taxid_mapping_for_batch, args=[batch, taxid2wikidict, mutex, semaphore] ) t.start() threads.append(t) batch = [] if len(batch) > 0: semaphore.acquire() t = threading.Thread( target=PipelineStepFetchTaxInfo. get_taxid_mapping_for_batch, args=[batch, taxid2wikidict, mutex, semaphore] ) t.start() threads.append(t) for t in threads: t.join()
Example #9
Source File: fetch_tax_info.py From idseq-dag with MIT License | 5 votes |
def get_wiki_content_for_page(taxid, pageid, taxname, taxid2wikicontent, mutex, semaphore, max_attempt=3): ''' Fetch wiki content for pageid ''' for attempt in range(max_attempt): try: page = None if pageid: log.write(f"fetching wiki {pageid} for {taxid}") page = wikipedia.page(pageid=pageid) elif taxname: search_results = wikipedia.search(taxname) if len(search_results) > 0: wikiname = str(search_results[0]) if taxname.lower() == wikiname.lower(): page = wikipedia.page(wikiname) if not page: # query the page directly try: page = wikipedia.page(taxname.replace(" ", "_")) except: page = None if page: output = { "pageid": page.pageid, "description": page.content[:1000], "title": page.title, "summary": page.summary } with mutex: taxid2wikicontent[taxid] = output break except: log.write(f"having trouble fetching {taxid} wiki {pageid} attempt {attempt}") semaphore.release()
Example #10
Source File: rhodiola.py From rhodiola with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_cities(proper_noun): page = wikipedia.page(proper_noun) geo = GeoText(page.summary) return list(set(geo.cities))
Example #11
Source File: rhodiola.py From rhodiola with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_year(proper_noun): try: page = wikipedia.page(proper_noun) year = re.findall('[1-3][0-9]{3}',page.summary) return year[0] except: return None
Example #12
Source File: wiki_search.py From NeuralTripleTranslation with Apache License 2.0 | 5 votes |
def get(query): return find_candidates(find_json(query)) # with open("testoutput.txt", "w") as text_file: # for i in get('video-assisted thoracoscopic'): # try: # # print get('video-assisted thoracoscopic') # # print wikipedia.page(i) # # allwiki.append(wikipedia.page(i).content) # text_file.write('%s\n\n' % wikipedia.page(i).content.encode('utf8')) # # print type(wikipedia.page(i).content.encode('utf8')) # # print wikipedia.page(i).content # except: # print "Unexpected error:", sys.exc_info()[0] # print get('video-assisted thoracoscopic') # for keys in world_dict.keys(): # for term in world_dict.get(keys, keys): # if wikipedia.search(term): # term = wikipedia.search(term)[0] # wikipage = wikipedia.page(term) # content = wikipage.content # allwiki.append(content) # # print (wikipedia.page("Georgia_(country)").content) # # print wikipedia.search('fraction of inspired o2')[0] # text_file.close()
Example #13
Source File: article.py From wikipedia-question-generator with MIT License | 5 votes |
def __init__(self, title): self.page = wikipedia.page(title) self.summary = TextBlob(self.page.summary)
Example #14
Source File: classifier.py From quantulum with MIT License | 5 votes |
def download_wiki(): """Download WikiPedia pages of ambiguous units.""" ambiguous = [i for i in l.UNITS.items() if len(i[1]) > 1] ambiguous += [i for i in l.DERIVED_ENT.items() if len(i[1]) > 1] pages = set([(j.name, j.uri) for i in ambiguous for j in i[1]]) print objs = [] for num, page in enumerate(pages): obj = {'url': page[1]} obj['_id'] = obj['url'].replace('https://en.wikipedia.org/wiki/', '') obj['clean'] = obj['_id'].replace('_', ' ') print '---> Downloading %s (%d of %d)' % \ (obj['clean'], num + 1, len(pages)) obj['text'] = wikipedia.page(obj['clean']).content obj['unit'] = page[0] objs.append(obj) path = os.path.join(l.TOPDIR, 'wiki.json') os.remove(path) json.dump(objs, open(path, 'w'), indent=4, sort_keys=True) print '\n---> All done.\n' ###############################################################################
Example #15
Source File: tests.py From quantulum with MIT License | 5 votes |
def wiki_test(page='CERN'): """Download a wikipedia page and test the parser on its content. Pages full of units: CERN Hubble_Space_Telescope, Herschel_Space_Observatory """ content = wikipedia.page(page).content parsed = p.parse(content) parts = int(round(len(content) * 1.0 / 1000)) print end_char = 0 for num, chunk in enumerate(range(parts)): _ = os.system('clear') print quants = [j for j in parsed if chunk * 1000 < j.span[0] < (chunk + 1) * 1000] beg_char = max(chunk * 1000, end_char) text, end_char = embed_text(quants, beg_char, chunk, content) print COLOR2 % text print try: _ = raw_input('--------- End part %d of %d\n' % (num + 1, parts)) except (KeyboardInterrupt, EOFError): return ###############################################################################
Example #16
Source File: streaming.py From ray with Apache License 2.0 | 5 votes |
def get_new_article(self): # Get the next wikipedia article. article = wikipedia.page(self.title_stream.next()).content # Count the words and store the result. self.word_counts.append(Counter(article.split(" "))) self.num_articles_processed += 1
Example #17
Source File: wordcount.py From ray with Apache License 2.0 | 5 votes |
def get_next(self): if self.done: return None # Source exhausted while True: if self.article_done: try: # Try next title next_title = next(self.title_reader) except StopIteration: self.done = True # Source exhausted return None # Get next article logger.debug("Next article: {}".format(next_title)) article = wikipedia.page(next_title).content # Split article in sentences self.sentences = iter(article.split(".")) self.article_done = False try: # Try next sentence sentence = next(self.sentences) logger.debug("Next sentence: {}".format(sentence)) return sentence except StopIteration: self.article_done = True # Splits input line into words and # outputs records of the form (word,1)
Example #18
Source File: Self.py From CyberTK-Self with GNU General Public License v2.0 | 5 votes |
def translate(to_translate, to_language="auto", language="auto"): bahasa_awal = "auto" bahasa_tujuan = to_language kata = to_translate url = 'https://translate.google.com/m?sl=%s&tl=%s&ie=UTF-8&prev=_m&q=%s' % (bahasa_awal, bahasa_tujuan, kata.replace(" ", "+")) agent = {'User-Agent':'Mozilla/5.0'} cari_hasil = 'class="t0">' request = urllib2.Request(url, headers=agent) page = urllib2.urlopen(request).read() result = page[page.find(cari_hasil)+len(cari_hasil):] result = result.split("<")[0] return result
Example #19
Source File: BuscadorPersonas.py From osint-suite-tools with GNU General Public License v3.0 | 5 votes |
def searchWikipedia(target): try: wikipedia.set_lang("es") d0 = wikipedia.search(target) if d0: print() print("|----[INFO][WIKIPEDIA][>] ") print(" |----[INFO][SEARCH][>] ") print(" - Resultados encontrados: ") for r in d0: print(" - " + r) else: print("|----[INFO][WIKIPEDIA][>] No aparecen resultados en WIKIPEDIA.") except: print("[!][WARNING][WIKIPEDIA][>] Error en la API...") try: d1 = wikipedia.page(target) linksWIKI = d1.links urlWIKI = d1.url if d1: print(" |----[INFO][TAGS][>] ") for l in linksWIKI: print(" - " + l) print("|----[FUENTES][WIKIPEDIA][>] ") print(" - " + urlWIKI) config.wikipediaData_list.append(urlWIKI) else: print("|----[INFO][WIKIPEDIA][>] No aparecen resultados en WIKIPEDIA.") except: print("[!][WARNING][WIKIPEDIA][>] Error en la API o no aparecen resultados...") #Funciones para buscar en Youtube
Example #20
Source File: wiki.py From Jarvis with MIT License | 5 votes |
def content( self, title=None, pageid=None, auto_suggest=True, redirect=True, preload=False): """Returns plain text content of query's page, excluding images, tables and other data.""" try: page = wikipedia.page(title) return page.content except wikipedia.exceptions.PageError: return "No page matches, try another item." except wikipedia.exceptions.DisambiguationError as error: return error.options[:5]
Example #21
Source File: wiki.py From Jarvis with MIT License | 5 votes |
def summary(self, query, sentences=0, chars=0): """Returns a plain text summary from the query's page.""" try: return wikipedia.summary(query, sentences, chars) except wikipedia.exceptions.PageError: return "No page matches, try another item." except wikipedia.exceptions.DisambiguationError as error: return error.options[:5]
Example #22
Source File: wikipedia.py From pyconjpbot with MIT License | 5 votes |
def wikipedia_page(message, option, query): """ Wikipediaで検索した結果を返す """ if query == 'help': return # set language lang = 'ja' if option: _, lang = option.split('-') wikipedia.set_lang(lang) try: # search with query results = wikipedia.search(query) except: botsend(message, '指定された言語 `{}` は存在しません'.format(lang)) return # get first result if results: page = wikipedia.page(results[0]) attachments = [{ 'fallback': 'Wikipedia: {}'.format(page.title), 'pretext': 'Wikipedia: <{}|{}>'.format(page.url, page.title), 'text': page.summary, }] botwebapi(message, attachments) else: botsend(message, '`{}` に該当するページはありません'.format(query))
Example #23
Source File: util.py From qb with MIT License | 5 votes |
def download_pages(): bonus_questions = BonusQuestionDatabase().all_questions() train_answers = set() for q in bonus_questions.values(): train_answers.update(q.pages) if os.path.isfile(BONUS_ANSWER_PAGES): with open(BONUS_ANSWER_PAGES, 'rb') as f: try: pages = pickle.load(f) print('loaded {} pages'.format(len(pages))) except EOFError: pages = dict() else: pages = dict() train_answers = train_answers - set(pages.keys()) for answer in tqdm(train_answers): if answer in pages: continue try: page = wikipedia.page(answer) except (DisambiguationError, PageError, ConnectionError) as e: if isinstance(e, DisambiguationError): pages[answer] = None continue if isinstance(e, PageError): pages[answer] = None continue if isinstance(e, ConnectionError): break try: pages[answer] = [page.title, page.content, page.links, page.summary, page.categories, page.url, page.pageid] except ConnectionError: break with open(BONUS_ANSWER_PAGES, 'wb') as f: pickle.dump(pages, f)
Example #24
Source File: Self.py From CyberTK-Self with GNU General Public License v2.0 | 5 votes |
def _images_get_all_items(page): items = [] while True: item, end_content = _images_get_next_item(page) if item == "no_links": break else: items.append(item) #Append all the links in the list named 'Links' time.sleep(0.1) #Timer could be used to slow down the request for image downloads page = page[end_content:] return items
Example #25
Source File: Self.py From CyberTK-Self with GNU General Public License v2.0 | 5 votes |
def download_page(url): version = (3,0) cur_version = sys.version_info if cur_version >= version: #If the Current Version of Python is 3.0 or above import urllib,request #urllib library for Extracting web pages try: headers = {} headers['User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36" req = urllib,request.Request(url, headers = headers) resp = urllib,request.urlopen(req) respData = str(resp.read()) return respData except Exception as e: print(str(e)) else: #If the Current Version of Python is 2.x import urllib2 try: headers = {} headers['User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17" req = urllib2.Request(url, headers = headers) response = urllib2.urlopen(req) page = response.read() return page except: return"Page Not found" #Finding 'Next Image' from the given raw page
Example #26
Source File: search.py From W.I.L.L with MIT License | 4 votes |
def search_google(query): '''Search google and determine if wikipedia is in it''' search_object = google.search(query) #Determine if a wikipedia url is in the first 5 searches urls = [] for i in range(0, 4): url = search_object.__next__() urls.append(url) if "wikipedia.org/wiki" in url: wikipedia_search = wikipedia.search(query)[0] url = wikipedia.page(wikipedia_search).url response = wikipedia.summary(wikipedia_search) + " ({0})".format(url) return response #If there were no wikipedia pages first_url = urls[0] try: article = Article(first_url) article.download() article.parse() article.nlp() article_summary = article.summary article_title = article.title return "{0}\n{1} - ({2})".format( article_summary, article_title, first_url ) except Exception as article_exception: try: log.debug("Got error {0}, {1} while using newspaper, switching to bs4".format( article_exception.message,article_exception.args )) html = requests.get(first_url).text #Parse the html using bs4 soup = BeautifulSoup(html, "html.parser") [s.extract() for s in soup(['style', 'script', '[document]', 'head', 'title'])] text = soup.getText() # break into lines and remove leading and trailing space on each lines = (line.strip() for line in text.splitlines()) # break multi-headlines into a line each chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) # drop blank lines soup_text = '\n'.join(chunk for chunk in chunks if " " in chunk) response = format(soup_text) + " ({0})".format(first_url) return response except Exception as search_exception: log.info("Error {0},{1} occurred while searching query {2}".format( search_exception.message, search_exception.args, query )) return "Error encountered on query {0}".format(query)
Example #27
Source File: wiki.py From JARVIS-on-Messenger with MIT License | 4 votes |
def process(input, entities): output = {} try: query = entities['wiki'][0]['value'] data = wikipedia.page(query) output['input'] = input template = TextTemplate('Wikipedia summary of ' + data.title + ':\n' + data.summary) text = template.get_text() template = ButtonTemplate(text) template.add_web_url('Wikipedia Link', data.url) output['output'] = template.get_message() output['success'] = True except wikipedia.exceptions.DisambiguationError as e: template = GenericTemplate() template.set_image_aspect_ratio_to_square() image_url = 'https://en.wikipedia.org/static/images/project-logos/enwiki-2x.png' pageids = set() for option in e.options: try: data = wikipedia.page(option) if data.pageid in pageids: continue pageids.add(data.pageid) buttons = ButtonTemplate() buttons.add_web_url('Wikipedia Link', data.url) payload = { 'intent': 'wiki', 'entities': { 'wiki': [ { 'value': option } ] } } buttons.add_postback('Wikipedia Summary', payload) template.add_element(title=data.title, item_url=data.url, image_url=image_url, buttons=buttons.get_buttons()) except (wikipedia.exceptions.PageError, wikipedia.exceptions.DisambiguationError): pass # Some suggestions don't map to a page; skipping them.. output['input'] = input output['output'] = template.get_message() output['success'] = True except: error_message = 'I couldn\'t find any wikipedia results matching your query.' error_message += '\nPlease ask me something else, like:' error_message += '\n - wikipedia barack' error_message += '\n - html wiki' error_message += '\n - who is sachin tendulkar' output['error_msg'] = TextTemplate(error_message).get_message() output['success'] = False return output
Example #28
Source File: special.py From EmiliaHikari with GNU General Public License v3.0 | 4 votes |
def wiki(update, context): msg = update.effective_message chat_id = update.effective_chat.id args = update.effective_message.text.split(None, 1) teks = args[1] message = update.effective_message getlang = langsql.get_lang(chat_id) if str(getlang) == "id": wikipedia.set_lang("id") else: wikipedia.set_lang("en") try: pagewiki = wikipedia.page(teks) except wikipedia.exceptions.PageError: send_message(update.effective_message, tl(update.effective_message, "Hasil tidak ditemukan")) return except wikipedia.exceptions.DisambiguationError as refer: rujuk = str(refer).split("\n") if len(rujuk) >= 6: batas = 6 else: batas = len(rujuk) teks = "" for x in range(batas): if x == 0: if getlang == "id": teks += rujuk[x].replace('may refer to', 'dapat merujuk ke')+"\n" else: teks += rujuk[x]+"\n" else: teks += "- `"+rujuk[x]+"`\n" send_message(update.effective_message, teks, parse_mode="markdown") return except IndexError: send_message(update.effective_message, tl(update.effective_message, "Tulis pesan untuk mencari dari sumber wikipedia")) return judul = pagewiki.title summary = pagewiki.summary if update.effective_message.chat.type == "private": send_message(update.effective_message, tl(update.effective_message, "Hasil dari {} adalah:\n\n<b>{}</b>\n{}").format(teks, judul, summary), parse_mode=ParseMode.HTML) else: if len(summary) >= 200: judul = pagewiki.title summary = summary[:200]+"..." button = InlineKeyboardMarkup([[InlineKeyboardButton(text=tl(update.effective_message, "Baca Lebih Lengkap"), url="t.me/{}?start=wiki-{}".format(context.bot.username, teks.replace(' ', '_')))]]) else: button = None send_message(update.effective_message, tl(update.effective_message, "Hasil dari {} adalah:\n\n<b>{}</b>\n{}").format(teks, judul, summary), parse_mode=ParseMode.HTML, reply_markup=button)
Example #29
Source File: article.py From wikipedia-question-generator with MIT License | 4 votes |
def evaluate_sentence(self, sentence): if sentence.tags[0][1] == 'RB' or len(sentence.words) < 6: # This sentence starts with an adverb or is less than five words long # and probably won't be a good fit return None tag_map = {word.lower(): tag for word, tag in sentence.tags} replace_nouns = [] for word, tag in sentence.tags: # For now, only blank out non-proper nouns that don't appear in the article title if tag == 'NN' and word not in self.page.title: # Is it in a noun phrase? If so, blank out the last two words in that phrase for phrase in sentence.noun_phrases: if phrase[0] == '\'': # If it starts with an apostrophe, ignore it # (this is a weird error that should probably # be handled elsewhere) break if word in phrase: # Blank out the last two words in this phrase [replace_nouns.append(phrase_word) for phrase_word in phrase.split()[-2:]] break # If we couldn't find the word in any phrases, # replace it on its own if len(replace_nouns) == 0: replace_nouns.append(word) break if len(replace_nouns) == 0: # Return none if we found no words to replace return None trivia = { 'title': self.page.title, 'url': self.page.url, 'answer': ' '.join(replace_nouns) } if len(replace_nouns) == 1: # If we're only replacing one word, use WordNet to find similar words trivia['similar_words'] = self.get_similar_words(replace_nouns[0]) else: # If we're replacing a phrase, don't bother - it's too unlikely to make sense trivia['similar_words'] = [] # Blank out our replace words (only the first occurrence of the word in the sentence) replace_phrase = ' '.join(replace_nouns) blanks_phrase = ('__________ ' * len(replace_nouns)).strip() expression = re.compile(re.escape(replace_phrase), re.IGNORECASE) sentence = expression.sub(blanks_phrase, str(sentence), count=1) trivia['question'] = sentence return trivia