Python whoosh.index.open_dir() Examples

The following are 12 code examples of whoosh.index.open_dir(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module whoosh.index , or try the search function .
Example #1
Source File: engine.py    From txtorg with MIT License 6 votes vote down vote up
def _init_index(self):

        if not os.path.exists(self.corpus.path):
            os.mkdir(self.corpus.path)

        analyzer = self.corpus.analyzer
        self.analyzer = self.corpus.analyzer
        
        if exists_in(self.corpus.path):
            ix = open_dir(self.corpus.path)
        else:
            # may need to remove this?  how can we have a schema if we don't know the...uh...schema?
            schema = Schema(title=TEXT(stored=True,analyzer=analyzer), content=TEXT(analyzer=analyzer),
                            path=ID(stored=True))
            ix = create_in(self.corpus.path,schema)
            writer = ix.writer()            
            writer.commit()

        self.index = ix
        self.searcher = ix.searcher();
        #self.reader = IndexReader.open(self.lucene_index, True)
        self.reader = ix.reader();
        #self.analyzer = self.corpus.analyzer 
Example #2
Source File: whoosh_write.py    From Penny-Dreadful-Tools with GNU General Public License v3.0 5 votes vote down vote up
def update_card(self, card: Card) -> None:
        ix = open_dir(WhooshConstants.index_dir)
        update_index(ix, [card]) 
Example #3
Source File: whoosh_search.py    From Penny-Dreadful-Tools with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self) -> None:
        self.ix = open_dir(WhooshConstants.index_dir)
        self.initialize_trie() 
Example #4
Source File: search.py    From markdown-search with GNU General Public License v2.0 5 votes vote down vote up
def open_index(self, index_folder, create_new=False):
        self.index_folder = index_folder
        if create_new:
            if os.path.exists(index_folder):
                shutil.rmtree(index_folder)
                print "deleted index folder: " + index_folder

        if not os.path.exists(index_folder):
            os.mkdir(index_folder)

        exists = index.exists_in(index_folder)
        stemming_analyzer = StemmingAnalyzer()

        schema = Schema(
            path=ID(stored=True, unique=True)
            , filename=TEXT(stored=True, field_boost=100.0)
            , tags=KEYWORD(stored=True, scorable=True, field_boost=80.0)
            , headlines=KEYWORD(stored=True, scorable=True, field_boost=60.0)
            , doubleemphasiswords=KEYWORD(stored=True, scorable=True, field_boost=40.0)
            , emphasiswords=KEYWORD(stored=True, scorable=True, field_boost=20.0)
            , content=TEXT(stored=True, analyzer=stemming_analyzer)
            , time=STORED
        )
        if not exists:
            self.ix = index.create_in(index_folder, schema)
        else:
            self.ix = index.open_dir(index_folder) 
Example #5
Source File: whoosh_backend.py    From flask-msearch with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def init(self):
        ix_path = os.path.join(self.path, self.name)
        if whoosh_index.exists_in(ix_path):
            return whoosh_index.open_dir(ix_path)
        if not os.path.exists(ix_path):
            os.makedirs(ix_path)
        return whoosh_index.create_in(ix_path, self.schema) 
Example #6
Source File: search.py    From databrewer with MIT License 5 votes vote down vote up
def __init__(self, index_dir, schema=DEFAULT_SCHEMA, force_create=False):
        self.schema = schema
        if exists_in(index_dir) and not force_create:
            self.index = open_dir(index_dir, schema=schema)
        else:
            self.index = create_in(index_dir, schema=schema) 
Example #7
Source File: whoosh_tool.py    From TorCMS with MIT License 5 votes vote down vote up
def __init__(self):
        self.whbase = open_dir("database/whoosh")
        self.parser = QueryParser("content", schema=self.whbase.schema) 
Example #8
Source File: whooshsearch.py    From pySINDy with MIT License 5 votes vote down vote up
def __init__(self, db_path):
        ensuredir(db_path)
        if index.exists_in(db_path):
            self.index = index.open_dir(db_path)
        else:
            self.index = index.create_in(db_path, schema=self.schema)
        self.qparser = QueryParser('text', self.schema) 
Example #9
Source File: Flask_search.py    From AIL-framework with GNU Affero General Public License v3.0 5 votes vote down vote up
def get_item_count(dirs):
    ix = index.open_dir(os.path.join(baseindexpath, dirs))
    return ix.doc_count_all() 
Example #10
Source File: models.py    From realms-wiki with GNU General Public License v2.0 5 votes vote down vote up
def __init__(self, index_path, language):
        from whoosh import index as whoosh_index
        from whoosh.fields import Schema, TEXT, ID
        from whoosh import qparser
        from whoosh.highlight import UppercaseFormatter
        from whoosh.analysis import SimpleAnalyzer, LanguageAnalyzer
        from whoosh.lang import has_stemmer, has_stopwords
        import os

        if not has_stemmer(language) or not has_stopwords(language):
            # TODO Display a warning?
            analyzer = SimpleAnalyzer()
        else:
            analyzer = LanguageAnalyzer(language)

        self.schema = Schema(path=ID(unique=True, stored=True), body=TEXT(analyzer=analyzer))
        self.formatter = UppercaseFormatter()

        self.index_path = index_path

        if not os.path.exists(index_path):
            try:
                os.mkdir(index_path)
            except OSError as e:
                sys.exit("Error creating Whoosh index: %s" % e)

        if whoosh_index.exists_in(index_path):
            try:
                self.search_index = whoosh_index.open_dir(index_path)
            except whoosh_index.IndexError as e:
                sys.exit("Error opening whoosh index: {0}".format(e))
        else:
            self.search_index = whoosh_index.create_in(index_path, self.schema)

        self.query_parser = qparser.MultifieldParser(["body", "path"], schema=self.schema)
        self.query_parser.add_plugin(qparser.FuzzyTermPlugin()) 
Example #11
Source File: index_whoosh.py    From BREDS with GNU Lesser General Public License v3.0 5 votes vote down vote up
def create_index():
    regex_tokenize = re.compile('\w+(?:-\w+)+|<[A-Z]+>[^<]+</[A-Z]+>|\w+', re.U)
    tokenizer = RegexTokenizer(regex_tokenize)
    schema = Schema(sentence=TEXT(stored=True, analyzer=tokenizer))
    if not os.path.exists("index_full"):
        os.mkdir("index_full")
        idx = create_in("index_full", schema)
    else:
        idx = open_dir("index_full")
    return idx 
Example #12
Source File: boolean.py    From cltk with MIT License 4 votes vote down vote up
def corpus_query(self, query, save_file=None, window_size=300, surround_size=50):
        """Send query to a corpus's index. `save_file` is a filename.
        :type save_file: str

        >>> # cltk_index = CLTKIndex('latin', 'latin_text_latin_library')
        >>> # results = cltk_index.corpus_query('amicitia')

        """
        _index = open_dir(self.index_path)

        output_str = ''

        with _index.searcher() as searcher:
            _query = QueryParser("content", _index.schema).parse(query)
            results = searcher.search(_query, limit=None)
            results.fragmenter.charlimit = None

            # Allow larger fragments
            results.fragmenter.maxchars = window_size
            # Show more context before and after
            results.fragmenter.surround = surround_size

            docs_number = searcher.doc_count_all()

            output_str += 'Docs containing hits: {}.'.format(docs_number) + '</br></br>'

            for hit in results:
                author = hit['author']
                filepath = hit['path']
                output_str += author + '</br>'
                output_str += filepath + '</br>'

                with open(filepath) as file_open:
                    file_contents = file_open.read()

                highlights = hit.highlights("content", text=file_contents, top=10000000)
                lines = highlights.split('\n')
                #lines_numbers = [l for l in lines]
                lines_br = '</br>'.join(lines)
                lines_number_approx = len(lines)
                output_str += 'Approximate hits: {}.'.format(lines_number_approx) + '</br>'

                output_str += lines_br + '</br></br>'

        if save_file:
            user_dir = os.path.normpath(get_cltk_data_dir() + '/user_data/search')
            output_path = os.path.join(user_dir, save_file + '.html')

            try:
                with open(output_path, 'w') as file_open:
                    file_open.write(output_str)
            except FileNotFoundError:
                os.mkdir(user_dir)
                with open(output_path, 'w') as file_open:
                    file_open.write(output_str)
        else:
            return output_str