Java Code Examples for org.apache.lucene.index.IndexWriterConfig#setCodec()

The following examples show how to use org.apache.lucene.index.IndexWriterConfig#setCodec() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestPerFieldPostingsFormat2.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void doTestMixedPostings(Codec codec) throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
  iwc.setCodec(codec);
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
  Document doc = new Document();
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  // turn on vectors for the checkindex cross-check
  ft.setStoreTermVectors(true);
  ft.setStoreTermVectorOffsets(true);
  ft.setStoreTermVectorPositions(true);
  Field idField = new Field("id", "", ft);
  Field dateField = new Field("date", "", ft);
  doc.add(idField);
  doc.add(dateField);
  for (int i = 0; i < 100; i++) {
    idField.setStringValue(Integer.toString(random().nextInt(50)));
    dateField.setStringValue(Integer.toString(random().nextInt(100)));
    iw.addDocument(doc);
  }
  iw.close();
  dir.close(); // checkindex
}
 
Example 2
Source File: TestPointQueries.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testWrongNumBytes() throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig iwc = newIndexWriterConfig();
  iwc.setCodec(getCodec());
  RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
  Document doc = new Document();
  doc.add(new LongPoint("value", Long.MIN_VALUE));
  w.addDocument(doc);

  IndexReader r = w.getReader();

  // no wrapping, else the exc might happen in executor thread:
  IndexSearcher s = new IndexSearcher(r);
  byte[][] point = new byte[1][];
  point[0] = new byte[10];
  IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
    s.count(BinaryPoint.newRangeQuery("value", point, point));
  });
  assertEquals("field=\"value\" was indexed with bytesPerDim=8 but this query has bytesPerDim=10", expected.getMessage());

  IOUtils.close(r, w, dir);
}
 
Example 3
Source File: TestPointQueries.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testEmptyPointInSetQuery() throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig iwc = newIndexWriterConfig();
  iwc.setCodec(getCodec());
  IndexWriter w = new IndexWriter(dir, iwc);

  Document doc = new Document();
  doc.add(new IntPoint("int", 17));
  doc.add(new LongPoint("long", 17L));
  doc.add(new FloatPoint("float", 17.0f));
  doc.add(new DoublePoint("double", 17.0));
  doc.add(new BinaryPoint("bytes", new byte[] {0, 17}));
  w.addDocument(doc);

  IndexReader r = DirectoryReader.open(w);
  IndexSearcher s = newSearcher(r, false);
  assertEquals(0, s.count(IntPoint.newSetQuery("int")));
  assertEquals(0, s.count(LongPoint.newSetQuery("long")));
  assertEquals(0, s.count(FloatPoint.newSetQuery("float")));
  assertEquals(0, s.count(DoublePoint.newSetQuery("double")));
  assertEquals(0, s.count(BinaryPoint.newSetQuery("bytes")));

  w.close();
  r.close();
  dir.close();
}
 
Example 4
Source File: TestSuggestField.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
static IndexWriterConfig iwcWithSuggestField(Analyzer analyzer, final Set<String> suggestFields) {
  IndexWriterConfig iwc = newIndexWriterConfig(random(), analyzer);
  iwc.setMergePolicy(newLogMergePolicy());
  Codec filterCodec = new Lucene86Codec() {
    CompletionPostingsFormat.FSTLoadMode fstLoadMode =
        RandomPicks.randomFrom(random(), CompletionPostingsFormat.FSTLoadMode.values());
    PostingsFormat postingsFormat = new Completion84PostingsFormat(fstLoadMode);

    @Override
    public PostingsFormat getPostingsFormatForField(String field) {
      if (suggestFields.contains(field)) {
        return postingsFormat;
      }
      return super.getPostingsFormatForField(field);
    }
  };
  iwc.setCodec(filterCodec);
  return iwc;
}
 
Example 5
Source File: Blur024CodecTest.java    From incubator-retired-blur with Apache License 2.0 6 votes vote down vote up
@Test
public void testDocValuesFormat() throws IOException {
  RAMDirectory directory = new RAMDirectory();
  IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43));
  conf.setCodec(new Blur024Codec());
  IndexWriter writer = new IndexWriter(directory, conf);

  Document doc = new Document();
  doc.add(new StringField("f", "v", Store.YES));
  doc.add(new SortedDocValuesField("f", new BytesRef("v")));
  writer.addDocument(doc);

  writer.close();

  DirectoryReader reader = DirectoryReader.open(directory);
  AtomicReaderContext context = reader.leaves().get(0);
  AtomicReader atomicReader = context.reader();
  SortedDocValues sortedDocValues = atomicReader.getSortedDocValues("f");
  assertTrue(sortedDocValues.getClass().getName().startsWith(DiskDocValuesProducer.class.getName()));

  reader.close();
}
 
Example 6
Source File: TestPointQueries.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testBasicMultiValueMultiDimPointInSetQuery() throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig iwc = newIndexWriterConfig();
  iwc.setCodec(getCodec());
  IndexWriter w = new IndexWriter(dir, iwc);

  Document doc = new Document();
  doc.add(new IntPoint("int", 17, 42));
  doc.add(new IntPoint("int", 34, 79));
  w.addDocument(doc);
  IndexReader r = DirectoryReader.open(w);
  IndexSearcher s = newSearcher(r, false);

  assertEquals(0, s.count(newMultiDimIntSetQuery("int", 2, 17, 41)));
  assertEquals(1, s.count(newMultiDimIntSetQuery("int", 2, 17, 42)));
  assertEquals(1, s.count(newMultiDimIntSetQuery("int", 2, 17, 42, 34, 79)));
  assertEquals(1, s.count(newMultiDimIntSetQuery("int", 2, -7, -7, 17, 42)));
  assertEquals(1, s.count(newMultiDimIntSetQuery("int", 2, -7, -7, 34, 79)));
  assertEquals(1, s.count(newMultiDimIntSetQuery("int", 2, 17, 42, -14, -14)));

  assertEquals("int:{-14,-14 17,42}", newMultiDimIntSetQuery("int", 2, 17, 42, -14, -14).toString());

  w.close();
  r.close();
  dir.close();
}
 
Example 7
Source File: TestIDVersionPostingsFormat.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testInvalidVersions2() throws IOException {
  Directory dir = newDirectory();
  IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
  iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
  RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc, false);
  Document doc = new Document();
  // Long.MAX_VALUE:
  doc.add(new StringAndPayloadField("id", "id", new BytesRef(new byte[] {(byte)0x7f, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff})));
  expectThrows(IllegalArgumentException.class, () -> {
    w.addDocument(doc);
    w.commit(false);
  });
  expectThrows(AlreadyClosedException.class, () -> {
    w.addDocument(doc);
  });

  dir.close();
}
 
Example 8
Source File: TestIDVersionPostingsFormat.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testMoreThanOnceInSingleDoc() throws IOException {
  Directory dir = newDirectory();
  IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
  iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
  RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc, false);
  Document doc = new Document();
  doc.add(makeIDField("id", 17));
  doc.add(makeIDField("id", 17));
  expectThrows(IllegalArgumentException.class, () -> {
    w.addDocument(doc);
    w.commit(false);
  });

  w.close();
  dir.close();
}
 
Example 9
Source File: TestBlockPostingsFormat2.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void setUp() throws Exception {
  super.setUp();
  dir = newFSDirectory(createTempDir("testDFBlockSize"));
  IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
  iwc.setCodec(TestUtil.alwaysPostingsFormat(new Lucene50RWPostingsFormat()));
  iw = new RandomIndexWriter(random(), dir, iwc);
  iw.setDoRandomForceMerge(false); // we will ourselves
}
 
Example 10
Source File: FilterCacheTest.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private void writeDocs(FilterCache filterCache, RAMDirectory directory) throws IOException {
  IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
  conf.setCodec(new Blur024Codec());
  IndexWriter indexWriter = new IndexWriter(directory, conf);
  int count = 10000;
  addDocs(indexWriter, count);
  indexWriter.close();
}
 
Example 11
Source File: TestDocTermOrds.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testRandom() throws Exception {
  Directory dir = newDirectory();

  final int NUM_TERMS = atLeast(20);
  final Set<BytesRef> terms = new HashSet<>();
  while(terms.size() < NUM_TERMS) {
    final String s = TestUtil.randomRealisticUnicodeString(random());
    //final String s = _TestUtil.randomSimpleString(random);
    if (s.length() > 0) {
      terms.add(new BytesRef(s));
    }
  }
  final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]);
  Arrays.sort(termsArray);
  
  final int NUM_DOCS = atLeast(100);

  IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));

  // Sometimes swap in codec that impls ord():
  if (random().nextInt(10) == 7) {
    // Make sure terms index has ords:
    Codec codec = TestUtil.alwaysPostingsFormat(TestUtil.getPostingsFormatWithOrds(random()));
    conf.setCodec(codec);
  }
  
  final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf);

  final int[][] idToOrds = new int[NUM_DOCS][];
  final Set<Integer> ordsForDocSet = new HashSet<>();

  for(int id=0;id<NUM_DOCS;id++) {
    Document doc = new Document();

    doc.add(new LegacyIntField("id", id, Field.Store.YES));
    
    final int termCount = TestUtil.nextInt(random(), 0, 20 * RANDOM_MULTIPLIER);
    while(ordsForDocSet.size() < termCount) {
      ordsForDocSet.add(random().nextInt(termsArray.length));
    }
    final int[] ordsForDoc = new int[termCount];
    int upto = 0;
    if (VERBOSE) {
      System.out.println("TEST: doc id=" + id);
    }
    for(int ord : ordsForDocSet) {
      ordsForDoc[upto++] = ord;
      Field field = newStringField("field", termsArray[ord].utf8ToString(), Field.Store.NO);
      if (VERBOSE) {
        System.out.println("  f=" + termsArray[ord].utf8ToString());
      }
      doc.add(field);
    }
    ordsForDocSet.clear();
    Arrays.sort(ordsForDoc);
    idToOrds[id] = ordsForDoc;
    w.addDocument(doc);
  }
  
  final DirectoryReader r = w.getReader();
  w.close();

  if (VERBOSE) {
    System.out.println("TEST: reader=" + r);
  }

  for(LeafReaderContext ctx : r.leaves()) {
    if (VERBOSE) {
      System.out.println("\nTEST: sub=" + ctx.reader());
    }
    verify(ctx.reader(), idToOrds, termsArray, null);
  }

  // Also test top-level reader: its enum does not support
  // ord, so this forces the OrdWrapper to run:
  if (VERBOSE) {
    System.out.println("TEST: top reader");
  }
  LeafReader slowR = SlowCompositeReaderWrapper.wrap(r);
  TestUtil.checkReader(slowR);
  verify(slowR, idToOrds, termsArray, null);

  FieldCache.DEFAULT.purgeByCacheKey(slowR.getCoreCacheHelper().getKey());

  r.close();
  dir.close();
}
 
Example 12
Source File: MtasSearchTestConsistency.java    From mtas with Apache License 2.0 4 votes vote down vote up
/**
 * Creates the index.
 *
 * @param configFile the config file
 * @param files the files
 * @throws IOException Signals that an I/O exception has occurred.
 */
private static void createIndex(String configFile,
    HashMap<String, String> files) throws IOException {
  // analyzer
  Map<String, String> paramsCharFilterMtas = new HashMap<>();
  paramsCharFilterMtas.put("type", "file");
  Map<String, String> paramsTokenizer = new HashMap<>();
  paramsTokenizer.put("configFile", configFile);
  Analyzer mtasAnalyzer = CustomAnalyzer
      .builder(Paths.get("docker").toAbsolutePath())
      .addCharFilter("mtas", paramsCharFilterMtas)
      .withTokenizer("mtas", paramsTokenizer).build();
  Map<String, Analyzer> analyzerPerField = new HashMap<>();
  analyzerPerField.put(FIELD_CONTENT, mtasAnalyzer);
  PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(
      new StandardAnalyzer(), analyzerPerField);
  // indexwriter
  IndexWriterConfig config = new IndexWriterConfig(analyzer);
  config.setUseCompoundFile(false);
  config.setCodec(Codec.forName("MtasCodec"));
  IndexWriter w = new IndexWriter(directory, config);
  // delete
  w.deleteAll();
  // add
  int counter = 0;
  for (Entry<String, String> entry : files.entrySet()) {
    addDoc(w, counter, entry.getKey(), entry.getValue());
    if (counter == 0) {
      w.commit();
    } else {
      addDoc(w, counter, entry.getKey(), entry.getValue());
      addDoc(w, counter, "deletable", entry.getValue());
      w.commit();
      w.deleteDocuments(new Term(FIELD_ID, Integer.toString(counter)));
      w.deleteDocuments(new Term(FIELD_TITLE, "deletable"));
      addDoc(w, counter, entry.getKey(), entry.getValue());
    }
    counter++;
  }
  w.commit();
  // finish
  w.close();
}
 
Example 13
Source File: Blur022CodecTest.java    From incubator-retired-blur with Apache License 2.0 4 votes vote down vote up
@Test
public void testLargeDocs() throws IOException {
  Random random = new Random();
  Iterable<? extends IndexableField> doc = getLargeDoc(random);
  RAMDirectory directory = new RAMDirectory();
  IndexWriterConfig conf1 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43));
  conf1.setCodec(new Blur022Codec());
  IndexWriter writer1 = new IndexWriter(directory, conf1);
  writer1.addDocument(doc);
  writer1.close();

  DirectoryReader reader1 = DirectoryReader.open(directory);
  int numDocs1 = reader1.numDocs();
  assertEquals(1, numDocs1);

  // for (int i = 0; i < numDocs1; i++) {
  // System.out.println(reader1.document(i));
  // }

  IndexWriterConfig conf2 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43));
  conf2.setCodec(new Blur022Codec(1 << 16, CompressionMode.HIGH_COMPRESSION));
  IndexWriter writer2 = new IndexWriter(directory, conf2);
  writer2.addDocument(doc);
  writer2.close();

  DirectoryReader reader2 = DirectoryReader.open(directory);
  int numDocs2 = reader2.numDocs();
  assertEquals(2, numDocs2);

  for (int i = 0; i < 2; i++) {

    long t1 = System.nanoTime();
    Document document1 = reader1.document(0);
    long t2 = System.nanoTime();
    Document document2 = reader2.document(1);
    long t3 = System.nanoTime();

    System.out.println((t3 - t2) / 1000000.0);
    System.out.println((t2 - t1) / 1000000.0);

    System.out.println("doc1 " + document1.hashCode());
    System.out.println("doc2 " + document2.hashCode());
  }

  // for (int i = 0; i < numDocs2; i++) {
  // System.out.println(reader2.document(i));
  // }

  // long fileLength = directory.fileLength("_0.fdt");

  for (String name : directory.listAll()) {
    if (name.endsWith(".fdt")) {
      System.out.println(name);
      System.out.println(directory.fileLength(name));
    }
  }

}
 
Example 14
Source File: TestNearest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private IndexWriterConfig getIndexWriterConfig() {
  IndexWriterConfig iwc = newIndexWriterConfig();
  iwc.setCodec(Codec.forName("Lucene86"));
  return iwc;
}
 
Example 15
Source File: Blur024CodecTest.java    From incubator-retired-blur with Apache License 2.0 4 votes vote down vote up
@Test
public void testLargeDocs() throws IOException {
  Random random = new Random();
  Iterable<? extends IndexableField> doc = getLargeDoc(random);
  RAMDirectory directory = new RAMDirectory();
  IndexWriterConfig conf1 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43));
  conf1.setCodec(new Blur024Codec());
  IndexWriter writer1 = new IndexWriter(directory, conf1);
  writer1.addDocument(doc);
  writer1.close();

  DirectoryReader reader1 = DirectoryReader.open(directory);
  int numDocs1 = reader1.numDocs();
  assertEquals(1, numDocs1);

  // for (int i = 0; i < numDocs1; i++) {
  // System.out.println(reader1.document(i));
  // }

  IndexWriterConfig conf2 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43));
  conf2.setCodec(new Blur024Codec(1 << 16, CompressionMode.HIGH_COMPRESSION));
  IndexWriter writer2 = new IndexWriter(directory, conf2);
  writer2.addDocument(doc);
  writer2.close();

  DirectoryReader reader2 = DirectoryReader.open(directory);
  int numDocs2 = reader2.numDocs();
  assertEquals(2, numDocs2);

  for (int i = 0; i < 2; i++) {

    long t1 = System.nanoTime();
    Document document1 = reader1.document(0);
    long t2 = System.nanoTime();
    Document document2 = reader2.document(1);
    long t3 = System.nanoTime();

    System.out.println((t3 - t2) / 1000000.0);
    System.out.println((t2 - t1) / 1000000.0);

    System.out.println("doc1 " + document1.hashCode());
    System.out.println("doc2 " + document2.hashCode());
  }

  // for (int i = 0; i < numDocs2; i++) {
  // System.out.println(reader2.document(i));
  // }

  // long fileLength = directory.fileLength("_0.fdt");

  for (String name : directory.listAll()) {
    if (name.endsWith(".fdt")) {
      System.out.println(name);
      System.out.println(directory.fileLength(name));
    }
  }

}
 
Example 16
Source File: TestCompressingStoredFieldsFormat.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testDeletePartiallyWrittenFilesIfAbort() throws IOException {
  Directory dir = newDirectory();
  IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
  iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30));
  iwConf.setCodec(getCodec());
  // disable CFS because this test checks file names
  iwConf.setMergePolicy(newLogMergePolicy(false));
  iwConf.setUseCompoundFile(false);

  // Cannot use RIW because this test wants CFS to stay off:
  IndexWriter iw = new IndexWriter(dir, iwConf);

  final Document validDoc = new Document();
  validDoc.add(new IntPoint("id", 0));
  validDoc.add(new StoredField("id", 0));
  iw.addDocument(validDoc);
  iw.commit();
  
  // make sure that #writeField will fail to trigger an abort
  final Document invalidDoc = new Document();
  FieldType fieldType = new FieldType();
  fieldType.setStored(true);
  invalidDoc.add(new Field("invalid", fieldType) {
    
    @Override
    public String stringValue() {
      // TODO: really bad & scary that this causes IW to
      // abort the segment!!  We should fix this.
      return null;
    }
    
  });
  
  try {
    iw.addDocument(invalidDoc);
    iw.commit();
  } catch(IllegalArgumentException iae) {
    // expected
    assertEquals(iae, iw.getTragicException());
  }
  // Writer should be closed by tragedy
  assertFalse(iw.isOpen());
  dir.close();
}
 
Example 17
Source File: TestPerFieldPostingsFormat2.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Test
public void testChangeCodecAndMerge() throws IOException {
  Directory dir = newDirectory();
  if (VERBOSE) {
    System.out.println("TEST: make new index");
  }
  IndexWriterConfig iwconf = newIndexWriterConfig(new MockAnalyzer(random()))
                               .setOpenMode(OpenMode.CREATE).setCodec(new MockCodec());
  iwconf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
  //((LogMergePolicy) iwconf.getMergePolicy()).setMergeFactor(10);
  IndexWriter writer = newWriter(dir, iwconf);

  addDocs(writer, 10);
  writer.commit();
  assertQuery(new Term("content", "aaa"), dir, 10);
  if (VERBOSE) {
    System.out.println("TEST: addDocs3");
  }
  addDocs3(writer, 10);
  writer.commit();
  writer.close();

  assertQuery(new Term("content", "ccc"), dir, 10);
  assertQuery(new Term("content", "aaa"), dir, 10);
  Codec codec = iwconf.getCodec();

  iwconf = newIndexWriterConfig(new MockAnalyzer(random()))
      .setOpenMode(OpenMode.APPEND).setCodec(codec);
  //((LogMergePolicy) iwconf.getMergePolicy()).setNoCFSRatio(0.0);
  //((LogMergePolicy) iwconf.getMergePolicy()).setMergeFactor(10);
  iwconf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);

  iwconf.setCodec(new MockCodec()); // uses standard for field content
  writer = newWriter(dir, iwconf);
  // swap in new codec for currently written segments
  if (VERBOSE) {
    System.out.println("TEST: add docs w/ Standard codec for content field");
  }
  addDocs2(writer, 10);
  writer.commit();
  codec = iwconf.getCodec();
  assertEquals(30, writer.getDocStats().maxDoc);
  assertQuery(new Term("content", "bbb"), dir, 10);
  assertQuery(new Term("content", "ccc"), dir, 10);   ////
  assertQuery(new Term("content", "aaa"), dir, 10);

  if (VERBOSE) {
    System.out.println("TEST: add more docs w/ new codec");
  }
  addDocs2(writer, 10);
  writer.commit();
  assertQuery(new Term("content", "ccc"), dir, 10);
  assertQuery(new Term("content", "bbb"), dir, 20);
  assertQuery(new Term("content", "aaa"), dir, 10);
  assertEquals(40, writer.getDocStats().maxDoc);

  if (VERBOSE) {
    System.out.println("TEST: now optimize");
  }
  writer.forceMerge(1);
  assertEquals(40, writer.getDocStats().maxDoc);
  writer.close();
  assertQuery(new Term("content", "ccc"), dir, 10);
  assertQuery(new Term("content", "bbb"), dir, 20);
  assertQuery(new Term("content", "aaa"), dir, 10);

  dir.close();
}
 
Example 18
Source File: MtasDocumentIndex.java    From inception with Apache License 2.0 4 votes vote down vote up
private synchronized IndexWriter getIndexWriter() throws IOException
{
    if (_indexWriter == null) {
        log.debug("Opening index for project [{}]({})", project.getName(), project.getId());

        OPEN_INDEXES.put(project.getId(), this);
        
        // Initialize and populate the hash maps for the layers and features
        features = schemaService.listAnnotationFeature(project).stream()
                .filter(feat -> feat.getLayer().isEnabled())
                .filter(feat -> feat.isEnabled())
                .collect(Collectors.toList());
        
        // Add the project id to the configuration
        JSONObject jsonParserConfiguration = new JSONObject();
        jsonParserConfiguration.put(PARAM_PROJECT_ID, project.getId());
        
        // Tokenizer parameters
        Map<String, String> tokenizerArguments = new HashMap<>();
        tokenizerArguments.put(ARGUMENT_PARSER, MtasUimaParser.class.getName());
        tokenizerArguments.put(ARGUMENT_PARSER_ARGS, jsonParserConfiguration.toString());
        
        // Build analyzer
        Analyzer mtasAnalyzer = CustomAnalyzer.builder()
                .withTokenizer(MtasTokenizerFactory.class, tokenizerArguments)
                .build();
        
        Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>();
        analyzerPerField.put(FIELD_CONTENT, mtasAnalyzer);
        
        PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(),
                analyzerPerField);
        
        // Build IndexWriter
        FileUtils.forceMkdir(getIndexDir());
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        config.setCodec(Codec.forName(MTAS_CODEC_NAME));
        IndexWriter indexWriter = new IndexWriter(FSDirectory.open(getIndexDir().toPath()),
                config);
        
        // Initialize the index
        indexWriter.commit();
        
        // After the index has been initialized, assign the _indexWriter - this is also used
        // by isOpen() to check if the index writer is available.
        _indexWriter = indexWriter;
    }
    
    return _indexWriter;
}
 
Example 19
Source File: Blur022CodecTest.java    From incubator-retired-blur with Apache License 2.0 4 votes vote down vote up
@Test
public void testSmallDocs() throws IOException {

  RAMDirectory directory = new RAMDirectory();
  IndexWriterConfig conf1 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43));
  conf1.setCodec(new Blur022Codec());
  Random random1 = new Random(1);
  IndexWriter writer1 = new IndexWriter(directory, conf1);
  for (int i = 0; i < 1000; i++) {
    writer1.addDocument(getSmallDoc(random1));
  }
  writer1.close();

  DirectoryReader reader1 = DirectoryReader.open(directory);
  int numDocs1 = reader1.numDocs();
  assertEquals(1000, numDocs1);

  // for (int i = 0; i < numDocs1; i++) {
  // System.out.println(reader1.document(i));
  // }

  IndexWriterConfig conf2 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43));
  conf2.setCodec(new Blur022Codec(1 << 16, CompressionMode.HIGH_COMPRESSION));
  Random random2 = new Random(1);
  IndexWriter writer2 = new IndexWriter(directory, conf2);
  for (int i = 0; i < 1000; i++) {
    writer2.addDocument(getSmallDoc(random2));
  }
  writer2.close();

  DirectoryReader reader2 = DirectoryReader.open(directory);
  int numDocs2 = reader2.numDocs();
  assertEquals(2000, numDocs2);

  for (int i = 0; i < 2; i++) {

    long t1 = System.nanoTime();
    long hash1 = 0;
    long hash2 = 0;
    for (int d = 0; d < 1000; d++) {
      Document document1 = reader1.document(d);
      hash1 += document1.hashCode();
    }
    long t2 = System.nanoTime();
    for (int d = 0; d < 1000; d++) {
      Document document2 = reader2.document(d + 1000);
      hash2 += document2.hashCode();
    }
    long t3 = System.nanoTime();

    System.out.println((t3 - t2) / 1000000.0);
    System.out.println((t2 - t1) / 1000000.0);

    System.out.println("doc1 " + hash1);
    System.out.println("doc2 " + hash2);
  }

  // for (int i = 0; i < numDocs2; i++) {
  // System.out.println(reader2.document(i));
  // }

  // long fileLength = directory.fileLength("_0.fdt");

  for (String name : directory.listAll()) {
    if (name.endsWith(".fdt")) {
      System.out.println(name);
      System.out.println(directory.fileLength(name));
    }
  }
}
 
Example 20
Source File: Blur024CodecTest.java    From incubator-retired-blur with Apache License 2.0 4 votes vote down vote up
@Test
public void testSmallDocs() throws IOException {

  RAMDirectory directory = new RAMDirectory();
  IndexWriterConfig conf1 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43));
  conf1.setCodec(new Blur024Codec());
  Random random1 = new Random(1);
  IndexWriter writer1 = new IndexWriter(directory, conf1);
  for (int i = 0; i < 1000; i++) {
    writer1.addDocument(getSmallDoc(random1));
  }
  writer1.close();

  DirectoryReader reader1 = DirectoryReader.open(directory);
  int numDocs1 = reader1.numDocs();
  assertEquals(1000, numDocs1);

  // for (int i = 0; i < numDocs1; i++) {
  // System.out.println(reader1.document(i));
  // }

  IndexWriterConfig conf2 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43));
  conf2.setCodec(new Blur024Codec(1 << 16, CompressionMode.HIGH_COMPRESSION));
  Random random2 = new Random(1);
  IndexWriter writer2 = new IndexWriter(directory, conf2);
  for (int i = 0; i < 1000; i++) {
    writer2.addDocument(getSmallDoc(random2));
  }
  writer2.close();

  DirectoryReader reader2 = DirectoryReader.open(directory);
  int numDocs2 = reader2.numDocs();
  assertEquals(2000, numDocs2);

  for (int i = 0; i < 2; i++) {

    long t1 = System.nanoTime();
    long hash1 = 0;
    long hash2 = 0;
    for (int d = 0; d < 1000; d++) {
      Document document1 = reader1.document(d);
      hash1 += document1.hashCode();
    }
    long t2 = System.nanoTime();
    for (int d = 0; d < 1000; d++) {
      Document document2 = reader2.document(d + 1000);
      hash2 += document2.hashCode();
    }
    long t3 = System.nanoTime();

    System.out.println((t3 - t2) / 1000000.0);
    System.out.println((t2 - t1) / 1000000.0);

    System.out.println("doc1 " + hash1);
    System.out.println("doc2 " + hash2);
  }

  // for (int i = 0; i < numDocs2; i++) {
  // System.out.println(reader2.document(i));
  // }

  // long fileLength = directory.fileLength("_0.fdt");

  for (String name : directory.listAll()) {
    if (name.endsWith(".fdt")) {
      System.out.println(name);
      System.out.println(directory.fileLength(name));
    }
  }
}