Java Code Examples for org.apache.lucene.index.IndexWriterConfig#setOpenMode()

The following examples show how to use org.apache.lucene.index.IndexWriterConfig#setOpenMode() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Txt2PubmedIdIndexer.java    From bluima with Apache License 2.0 6 votes vote down vote up
@Override
public void initialize(UimaContext context)
        throws ResourceInitializationException {
    super.initialize(context);
    try {
        // create writer
        Directory dir;
        dir = FSDirectory.open(new File(INDEX_PATH));
        Analyzer analyzer = getAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(
                Version.LUCENE_41, analyzer);
        iwc.setOpenMode(OpenMode.CREATE);
        indexWriter = new IndexWriter(dir, iwc);
    } catch (IOException e) {
        e.printStackTrace();
    }
}
 
Example 2
Source File: LucenePerUserWaveViewHandlerImpl.java    From incubator-retired-wave with Apache License 2.0 6 votes vote down vote up
@Inject
public LucenePerUserWaveViewHandlerImpl(IndexDirectory directory,
                                        ReadableWaveletDataProvider waveletProvider,
                                        @Named(CoreSettingsNames.WAVE_SERVER_DOMAIN) String domain,
                                        @IndexExecutor Executor executor) {
  this.waveletProvider = waveletProvider;
  this.executor = executor;
  analyzer = new StandardAnalyzer(LUCENE_VERSION);
  try {
    IndexWriterConfig indexConfig = new IndexWriterConfig(LUCENE_VERSION, analyzer);
    indexConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
    indexWriter = new IndexWriter(directory.getDirectory(), indexConfig);
    nrtManager = new NRTManager(indexWriter, new WaveSearchWarmer(domain));
  } catch (IOException ex) {
    throw new IndexException(ex);
  }

  nrtManagerReopenThread = new NRTManagerReopenThread(nrtManager, MAX_STALE_SEC, MIN_STALE_SEC);
  nrtManagerReopenThread.start();
}
 
Example 3
Source File: LucenePerUserWaveViewHandlerImpl.java    From swellrt with Apache License 2.0 6 votes vote down vote up
@Inject
public LucenePerUserWaveViewHandlerImpl(IndexDirectory directory,
                                        ReadableWaveletDataProvider waveletProvider,
                                        @Named(CoreSettingsNames.WAVE_SERVER_DOMAIN) String domain,
                                        @IndexExecutor Executor executor) {
  this.waveletProvider = waveletProvider;
  this.executor = executor;
  analyzer = new StandardAnalyzer(LUCENE_VERSION);
  try {
    IndexWriterConfig indexConfig = new IndexWriterConfig(LUCENE_VERSION, analyzer);
    indexConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
    indexWriter = new IndexWriter(directory.getDirectory(), indexConfig);
    nrtManager = new NRTManager(indexWriter, new WaveSearchWarmer(domain));
  } catch (IOException ex) {
    throw new IndexException(ex);
  }

  nrtManagerReopenThread = new NRTManagerReopenThread(nrtManager, MAX_STALE_SEC, MIN_STALE_SEC);
  nrtManagerReopenThread.start();
}
 
Example 4
Source File: Index.java    From dacapobench with Apache License 2.0 6 votes vote down vote up
/**
 * Index all text files under a directory.
 */
public void main(final File INDEX_DIR, final String[] args) throws IOException {
  IndexWriterConfig IWConfig = new IndexWriterConfig();
  IWConfig.setOpenMode (IndexWriterConfig.OpenMode.CREATE);
  IWConfig.setMergePolicy (new LogByteSizeMergePolicy());
  IndexWriter writer = new IndexWriter(FSDirectory.open(Paths.get(INDEX_DIR.getCanonicalPath())), IWConfig);
  for (int arg = 0; arg < args.length; arg++) {
    final File docDir = new File(args[arg]);
    if (!docDir.exists() || !docDir.canRead()) {
      System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path");
      throw new IOException("Cannot read from document directory");
    }

    indexDocs(writer, docDir);
    System.out.println("Optimizing...");
    writer.forceMerge(1);
  }
  writer.close();
}
 
Example 5
Source File: LuceneVsLuceneTest.java    From orientdb-lucene with Apache License 2.0 6 votes vote down vote up
@BeforeClass
public void init() {
  initDB();
  OSchema schema = databaseDocumentTx.getMetadata().getSchema();
  OClass v = schema.getClass("V");
  OClass song = schema.createClass("Song");
  song.setSuperClass(v);
  song.createProperty("title", OType.STRING);
  song.createProperty("author", OType.STRING);

  try {
    Directory dir = getDirectory();
    Analyzer analyzer = new StandardAnalyzer(OLuceneIndexManagerAbstract.LUCENE_VERSION);
    IndexWriterConfig iwc = new IndexWriterConfig(OLuceneIndexManagerAbstract.LUCENE_VERSION, analyzer);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    indexWriter = new IndexWriter(dir, iwc);

  } catch (IOException e) {
    e.printStackTrace();
  }
  databaseDocumentTx.command(new OCommandSQL("create index Song.title on Song (title) FULLTEXT ENGINE LUCENE")).execute();

}
 
Example 6
Source File: InMemoryIndex.java    From SnowGraph with Apache License 2.0 6 votes vote down vote up
public InMemoryIndex(Map<String,String> id2Text){
    Analyzer analyzer = new EnglishAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    try {
        IndexWriter writer = new IndexWriter(directory, iwc);
        for (String id:id2Text.keySet()) {
            Document doc=new Document();
            doc.add(new StringField("id", id, Field.Store.YES));
            doc.add(new TextField("content", id2Text.get(id), Field.Store.YES));
            writer.addDocument(doc);
        }
        writer.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
 
Example 7
Source File: Indexer.java    From gerbil with GNU Affero General Public License v3.0 5 votes vote down vote up
public Indexer(String path)
		throws GerbilException {
	try {
		dir = FSDirectory.open(new File(path).toPath());
		Analyzer analyzer = new StandardAnalyzer();
		IndexWriterConfig config = new IndexWriterConfig(analyzer);
		config.setOpenMode(OpenMode.CREATE);
		writer = new IndexWriter(dir, config);
	} catch (IOException e) {
		LOGGER.error("Error occured during accesing file " + path, e);
		throw new GerbilException(ErrorTypes.UNEXPECTED_EXCEPTION);
	}
}
 
Example 8
Source File: Collection.java    From openbd-core with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Creates an empty collection to get it up and running
 */
public synchronized void create( boolean _errorOnExists ) throws IOException {
	setDirectory();
	
	if ( directory.listAll().length > 2 ) {
		if ( _errorOnExists ) {
			throw new IOException( "directory not empty; possible collection already present" );
		}else {
			if ( DirectoryReader.indexExists( directory ) ) {
				return;
			}// otherwise an index doesn't exist so allow the creation code to execute
		}
	}

	IndexWriterConfig iwc = new IndexWriterConfig( AnalyzerFactory.get(language) );
	iwc.setOpenMode( OpenMode.CREATE );
	
	indexwriter = new IndexWriter(directory, iwc);
	indexwriter.commit();
	indexwriter.close();
	indexwriter = null;
	
	// throw an openbd.create file in there so we know when it was created
	created	= System.currentTimeMillis();
	File touchFile	= new File( collectionpath, "openbd.created" );
	Writer	fw	= new FileWriter( touchFile );
	fw.close();
}
 
Example 9
Source File: SearchEngineConfiguration.java    From gravitee-management-rest-api with Apache License 2.0 5 votes vote down vote up
@Bean
public IndexWriter indexWriter(Directory directory, Analyzer analyzer) throws IOException {
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

    return new IndexWriter(directory, iwc);
}
 
Example 10
Source File: AnalyzingInfixSuggester.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Override this to customize index settings, e.g. which
 *  codec to use. */
protected IndexWriterConfig getIndexWriterConfig(Analyzer indexAnalyzer, IndexWriterConfig.OpenMode openMode) {
  IndexWriterConfig iwc = new IndexWriterConfig(indexAnalyzer);
  iwc.setOpenMode(openMode);

  // This way all merged segments will be sorted at
  // merge time, allow for per-segment early termination
  // when those segments are searched:
  iwc.setIndexSort(SORT);

  return iwc;
}
 
Example 11
Source File: Indexer.java    From gerbil with GNU Affero General Public License v3.0 5 votes vote down vote up
public static Indexer create(String indexDirPath) {
    Directory indexDirectory = null;
    try {
        indexDirectory = FSDirectory.open(new File(indexDirPath).toPath());
        IndexWriterConfig config = new IndexWriterConfig();
        config.setOpenMode(OpenMode.CREATE);
        IndexWriter indexWriter = new IndexWriter(indexDirectory, config);
        return new Indexer(indexDirectory, indexWriter);
    } catch (IOException e) {
        LOGGER.error("Exception while trying to create index writer for entity checking. Returning null.", e);
        IOUtils.closeQuietly(indexDirectory);
        return null;
    }
}
 
Example 12
Source File: LuceneIndex.java    From rdf4j with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private void postInit() throws IOException {
	this.queryAnalyzer = new StandardAnalyzer();

	// do some initialization for new indices
	if (!DirectoryReader.indexExists(directory)) {
		logger.debug("creating new Lucene index in directory {}", directory);
		IndexWriterConfig indexWriterConfig = getIndexWriterConfig();
		indexWriterConfig.setOpenMode(OpenMode.CREATE);
		IndexWriter writer = new IndexWriter(directory, indexWriterConfig);
		writer.close();
	}
}
 
Example 13
Source File: IndexFiles.java    From elasticsearch-full with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {

        String indexPath = "index";
        String docsPath = "/Users/admin/github/elasticsearch-full/JAVA-QUERY-API/";
        boolean create = true;

        final Path docDir = Paths.get(docsPath);
        if (!Files.isReadable(docDir)) {
            System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path");
            System.exit(1);
        }

        Date start = new Date();
        try {
            System.out.println("Indexing to directory '" + indexPath + "'...");
            Directory dir = FSDirectory.open(Paths.get(indexPath));
            Analyzer analyzer = new StandardAnalyzer();
            IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
            if (create) {
                iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
            } else {
                iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
            }

            IndexWriter writer = new IndexWriter(dir, iwc);
            indexDocs(writer, docDir);
            writer.close();
            Date end = new Date();
            System.out.println(end.getTime() - start.getTime() + " total milliseconds");
        } catch (Exception e) {
            System.out.println(" caught a " + e.getClass() +
                    "\n with message: " + e.getMessage());
        }

    }
 
Example 14
Source File: NLPIRTokenizerTest.java    From nlpir-analysis-cn-ictclas with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	// NLPIR
	NLPIRTokenizerAnalyzer nta = new NLPIRTokenizerAnalyzer("", 1, "", "", false);
	// Index
	IndexWriterConfig inconf = new IndexWriterConfig(nta);
	inconf.setOpenMode(OpenMode.CREATE_OR_APPEND);
	IndexWriter index = new IndexWriter(FSDirectory.open(Paths.get("index/")), inconf);
	Document doc = new Document();
	doc.add(new TextField("contents",
			"特朗普表示,很高兴汉堡会晤后再次同习近平主席通话。我同习主席就重大问题保持沟通和协调、两国加强各层级和各领域交往十分重要。当前,美中关系发展态势良好,我相信可以发展得更好。我期待着对中国进行国事访问。",
			Field.Store.YES));
	index.addDocument(doc);
	index.flush();
	index.close();
	// Search
	String field = "contents";
	IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get("index/")));
	IndexSearcher searcher = new IndexSearcher(reader);
	QueryParser parser = new QueryParser(field, nta);
	Query query = parser.parse("特朗普习近平");
	TopDocs top = searcher.search(query, 100);
	System.out.println("总条数:" + top.totalHits);
	ScoreDoc[] hits = top.scoreDocs;
	for (int i = 0; i < hits.length; i++) {
		System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
		Document d = searcher.doc(hits[i].doc);
		System.out.println(d.get("contents"));
	}

}
 
Example 15
Source File: IndexUtil.java    From everywhere with Apache License 2.0 5 votes vote down vote up
public static IndexWriter getIndexWriter(String indexPath, boolean create) throws IOException {
    Directory dir = FSDirectory.open(Paths.get(indexPath));
    Analyzer analyzer = new SmartChineseAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    LogMergePolicy mergePolicy = new LogByteSizeMergePolicy();
    mergePolicy.setMergeFactor(50);
    mergePolicy.setMaxMergeDocs(5000);
    if (create){
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    } else {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    }
    return new IndexWriter(dir, iwc);
}
 
Example 16
Source File: LuceneMessageSearchIndex.java    From james-project with Apache License 2.0 5 votes vote down vote up
protected IndexWriterConfig createConfig(Analyzer analyzer, boolean dropIndexOnStart) {
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_31, analyzer);
    if (dropIndexOnStart) {
        config.setOpenMode(OpenMode.CREATE);
    } else {
        config.setOpenMode(OpenMode.CREATE_OR_APPEND);
    }
    return config;
}
 
Example 17
Source File: DocumentIndexer.java    From act with GNU General Public License v3.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
  System.out.println("Starting up...");
  System.out.flush();
  Options opts = new Options();
  opts.addOption(Option.builder("i").
      longOpt("input").hasArg().required().desc("Input file or directory to index").build());
  opts.addOption(Option.builder("x").
      longOpt("index").hasArg().required().desc("Path to index file to generate").build());
  opts.addOption(Option.builder("h").longOpt("help").desc("Print this help message and exit").build());
  opts.addOption(Option.builder("v").longOpt("verbose").desc("Print verbose log output").build());

  HelpFormatter helpFormatter = new HelpFormatter();
  CommandLineParser cmdLineParser = new DefaultParser();
  CommandLine cmdLine = null;
  try {
    cmdLine = cmdLineParser.parse(opts, args);
  } catch (ParseException e) {
    System.out.println("Caught exception when parsing command line: " + e.getMessage());
    helpFormatter.printHelp("DocumentIndexer", opts);
    System.exit(1);
  }

  if (cmdLine.hasOption("help")) {
    helpFormatter.printHelp("DocumentIndexer", opts);
    System.exit(0);
  }

  if (cmdLine.hasOption("verbose")) {
    // With help from http://stackoverflow.com/questions/23434252/programmatically-change-log-level-in-log4j2
    LoggerContext ctx = (LoggerContext) LogManager.getContext(false);
    Configuration ctxConfig = ctx.getConfiguration();
    LoggerConfig logConfig = ctxConfig.getLoggerConfig(LogManager.ROOT_LOGGER_NAME);
    logConfig.setLevel(Level.DEBUG);

    ctx.updateLoggers();
    LOGGER.debug("Verbose logging enabled");
  }

  LOGGER.info("Opening index at " + cmdLine.getOptionValue("index"));
  Directory indexDir = FSDirectory.open(new File(cmdLine.getOptionValue("index")).toPath());

  /* The standard analyzer is too aggressive with chemical entities (it strips structural annotations, for one
   * thing), and the whitespace analyzer doesn't do any case normalization or stop word elimination.  This custom
   * analyzer appears to treat chemical entities better than the standard analyzer without admitting too much
   * cruft to the index. */
  Analyzer analyzer = CustomAnalyzer.builder().
      withTokenizer("whitespace").
      addTokenFilter("lowercase").
      addTokenFilter("stop").
      build();

  IndexWriterConfig writerConfig = new IndexWriterConfig(analyzer);
  writerConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
  writerConfig.setRAMBufferSizeMB(1 << 10);
  IndexWriter indexWriter = new IndexWriter(indexDir, writerConfig);

  String inputFileOrDir = cmdLine.getOptionValue("input");
  File splitFileOrDir = new File(inputFileOrDir);
  if (!(splitFileOrDir.exists())) {
    LOGGER.error("Unable to find directory at " + inputFileOrDir);
    System.exit(1);
  }

  DocumentIndexer indexer = new DocumentIndexer(indexWriter);
  PatentCorpusReader corpusReader = new PatentCorpusReader(indexer, splitFileOrDir);
  corpusReader.readPatentCorpus();
  indexer.commitAndClose();
}
 
Example 18
Source File: IndexFiles.java    From Java-Data-Science-Cookbook with MIT License 4 votes vote down vote up
public static void main(String[] args) {
	String indexPath = "index";
	String docsPath = null;
	boolean create = true;
	for(int i=0;i<args.length;i++) {
		if ("-index".equals(args[i])) {
			indexPath = args[i+1];
			i++;
		} else if ("-docs".equals(args[i])) {
			docsPath = args[i+1];
			i++;
		} else if ("-update".equals(args[i])) {
			create = false;
		}
	}

	final Path docDir = Paths.get(docsPath);

	Date start = new Date();
	try {
		System.out.println("Indexing to directory '" + indexPath + "'...");

		Directory dir = FSDirectory.open(Paths.get(indexPath));
		Analyzer analyzer = new StandardAnalyzer();
		IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

		if (create) {
			iwc.setOpenMode(OpenMode.CREATE);
		} else {
			iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
		}
		IndexWriter writer = new IndexWriter(dir, iwc);
		indexDocs(writer, docDir);

		writer.close();

		Date end = new Date();
		System.out.println(end.getTime() - start.getTime() + " total milliseconds");

	} catch (IOException e) {
	}
}
 
Example 19
Source File: IndexFiles.java    From word2vec-query-expansion with Apache License 2.0 4 votes vote down vote up
/** Index all text files under a directory. */
  @SuppressWarnings("deprecation")
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
                 + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
                 + "This indexes the documents in DOCS_PATH, creating a Lucene index"
                 + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = null;
    boolean create = true;
    for(int i=0;i<args.length;i++) {
      if ("-index".equals(args[i])) {
        indexPath = args[i+1];
        i++;
      } else if ("-docs".equals(args[i])) {
        docsPath = args[i+1];
        i++;
      } else if ("-update".equals(args[i])) {
        create = false;
      }
    }

    if (docsPath == null) {
      System.err.println("Usage: " + usage);
      System.exit(1);
    }

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
      System.out.println("Document directory '" +docDir.getAbsolutePath()+ "' does not exist or is not readable, please check the path");
      System.exit(1);
    }
    
    Date start = new Date();
    try {
      System.out.println("Indexing to directory '" + indexPath + "'...");

      Directory dir = FSDirectory.open(new File(indexPath));
      Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
      IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer);

      if (create) {
        // Create a new index in the directory, removing any
        // previously indexed documents:
        iwc.setOpenMode(OpenMode.CREATE);
      } else {
        // Add new documents to an existing index:
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
      }

      // Optional: for better indexing performance, if you
      // are indexing many documents, increase the RAM
      // buffer.  But if you do this, increase the max heap
      // size to the JVM (eg add -Xmx512m or -Xmx1g):
      //
      // iwc.setRAMBufferSizeMB(256.0);

      IndexWriter writer = new IndexWriter(dir, iwc);
      indexDocs(writer, docDir);

      // NOTE: if you want to maximize search performance,
      // you can optionally call forceMerge here.  This can be
      // a terribly costly operation, so generally it's only
      // worth it when your index is relatively static (ie
      // you're done adding documents to it):
      //
      // writer.forceMerge(1);

      writer.close();

      Date end = new Date();
      System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
      System.out.println(" caught a " + e.getClass() +
       "\n with message: " + e.getMessage());
    }
  }
 
Example 20
Source File: BuildEnsembleSearchIndex.java    From marathonv5 with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception{
    File samplesFilesDir = new File("build/classes/ensemble/");
    File indexDir = new File("build/classes/ensemble/search/index");
    File docDir = new File("../../../artifacts/sdk/docs/api");
    File samplesDir = new File("src/ensemble/samples");
    // create index
    ///System.out.println("Indexing to directory '" + indexDir + "'...");
    long start = System.currentTimeMillis();
    Directory dir = FSDirectory.open(indexDir);
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer);
    iwc.setOpenMode(OpenMode.CREATE);
    // generate and write index of all java doc and samples
    IndexWriter writer = new IndexWriter(dir, iwc);

    List<String> samplesFileList = new ArrayList<String>();

    indexSamples(writer, samplesDir, samplesFileList);
    try {
        indexJavaDocAllClasses(writer, docDir);
    } catch (Exception e) {
        System.out.println("\nWarning: We were not able to locate the JavaFX API documentation for your build environment.\n"
                + "Ensemble search will not include the API documentation.\n"); 
    }
    writer.close();
    // create a listAll.txt file that is used
    FileWriter listAllOut = new FileWriter(new File(indexDir,"listAll.txt"));
    for (String fileName: dir.listAll()) {
        if (!"listAll.txt".equals(fileName)) { // don't include the "listAll.txt" file
            Long length = dir.fileLength(fileName);
            listAllOut.write(fileName);
            listAllOut.write(':');
            listAllOut.write(length.toString());
            listAllOut.write('\n');
        }
    }
    listAllOut.flush();
    listAllOut.close();

    FileWriter sampleFilesCache = new FileWriter(new File(samplesFilesDir,"samplesAll.txt"));
    for (String oneSample: samplesFileList) {
            sampleFilesCache.write(oneSample);
            sampleFilesCache.write('\n');
    }
    sampleFilesCache.flush();
    sampleFilesCache.close();

    // print time taken
    ///System.out.println(System.currentTimeMillis() - start + " total milliseconds");
}