Java Code Examples for org.apache.lucene.document.FieldType#setOmitNorms()

The following examples show how to use org.apache.lucene.document.FieldType#setOmitNorms() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestSimilarity2.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** make sure all sims work if TF and norms is omitted */
public void testOmitTFAndNorms() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setIndexOptions(IndexOptions.DOCS);
  ft.setOmitNorms(true);
  ft.freeze();
  Field f = newField("foo", "bar", ft);
  doc.add(f);
  iw.addDocument(doc);
  IndexReader ir = iw.getReader();
  iw.close();
  IndexSearcher is = newSearcher(ir);
  
  for (Similarity sim : sims) {
    is.setSimilarity(sim);
    BooleanQuery.Builder query = new BooleanQuery.Builder();
    query.add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD);
    assertEquals(1, is.search(query.build(), 10).totalHits.value);
  }
  ir.close();
  dir.close();
}
 
Example 2
Source File: TestSimilarity2.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** make sure we can retrieve when norms are disabled */
public void testNoNorms() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setOmitNorms(true);
  ft.freeze();
  doc.add(newField("foo", "bar", ft));
  iw.addDocument(doc);
  IndexReader ir = iw.getReader();
  iw.close();
  IndexSearcher is = newSearcher(ir);
  
  for (Similarity sim : sims) {
    is.setSimilarity(sim);
    BooleanQuery.Builder query = new BooleanQuery.Builder();
    query.add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD);
    assertEquals(1, is.search(query.build(), 10).totalHits.value);
  }
  ir.close();
  dir.close();
}
 
Example 3
Source File: CountingTermsTest.java    From lucene-query-example with Apache License 2.0 5 votes vote down vote up
Field newFieldAllOn(String name, String value) {
	FieldType tagsFieldType = new FieldType();
	tagsFieldType.setStored(true);
	IndexOptions opts = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
	tagsFieldType.setIndexOptions(opts);
	tagsFieldType.setOmitNorms(true);
	tagsFieldType.setStoreTermVectors(true);
	tagsFieldType.setStoreTermVectorPositions(true);
	tagsFieldType.setStoreTermVectorPayloads(true);
	return new Field(name, value, tagsFieldType);
}
 
Example 4
Source File: TestOmitNorms.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testNoNrmFile() throws Throwable {
  Directory ram = newDirectory();
  Analyzer analyzer = new MockAnalyzer(random());
  IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(analyzer)
                                              .setMaxBufferedDocs(3)
                                              .setMergePolicy(newLogMergePolicy()));
  LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
  lmp.setMergeFactor(2);
  lmp.setNoCFSRatio(0.0);
  Document d = new Document();

  FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
  customType.setOmitNorms(true);
  Field f1 = newField("f1", "This field has no norms", customType);
  d.add(f1);

  for (int i = 0; i < 30; i++) {
    writer.addDocument(d);
  }

  writer.commit();

  assertNoNrm(ram);
      
  // force merge
  writer.forceMerge(1);
  // flush
  writer.close();

  assertNoNrm(ram);
  ram.close();
}
 
Example 5
Source File: TestDocumentWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Test adding two fields with the same name, one indexed
 * the other stored only. The omitNorms and omitTermFreqAndPositions setting
 * of the stored field should not affect the indexed one (LUCENE-1590)
 */
public void testLUCENE_1590() throws Exception {
  Document doc = new Document();
  // f1 has no norms
  FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
  customType.setOmitNorms(true);
  FieldType customType2 = new FieldType();
  customType2.setStored(true);
  doc.add(newField("f1", "v1", customType));
  doc.add(newField("f1", "v2", customType2));
  // f2 has no TF
  FieldType customType3 = new FieldType(TextField.TYPE_NOT_STORED);
  customType3.setIndexOptions(IndexOptions.DOCS);
  Field f = newField("f2", "v1", customType3);
  doc.add(f);
  doc.add(newField("f2", "v2", customType2));

  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
  writer.addDocument(doc);
  writer.forceMerge(1); // be sure to have a single segment
  writer.close();

  TestUtil.checkIndex(dir);

  LeafReader reader = getOnlyLeafReader(DirectoryReader.open(dir));
  FieldInfos fi = reader.getFieldInfos();
  // f1
  assertFalse("f1 should have no norms", fi.fieldInfo("f1").hasNorms());
  assertEquals("omitTermFreqAndPositions field bit should not be set for f1", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.fieldInfo("f1").getIndexOptions());
  // f2
  assertTrue("f2 should have norms", fi.fieldInfo("f2").hasNorms());
  assertEquals("omitTermFreqAndPositions field bit should be set for f2", IndexOptions.DOCS, fi.fieldInfo("f2").getIndexOptions());
  reader.close();
}
 
Example 6
Source File: Test2BPostings.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Nightly
public void test() throws Exception {
  BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BPostings"));
  if (dir instanceof MockDirectoryWrapper) {
    ((MockDirectoryWrapper)dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
  }

  IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()))
      .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
      .setRAMBufferSizeMB(256.0)
      .setMergeScheduler(new ConcurrentMergeScheduler())
      .setMergePolicy(newLogMergePolicy(false, 10))
      .setOpenMode(IndexWriterConfig.OpenMode.CREATE);
  
  IndexWriter w = new IndexWriter(dir, iwc);

  MergePolicy mp = w.getConfig().getMergePolicy();
  if (mp instanceof LogByteSizeMergePolicy) {
   // 1 petabyte:
   ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024*1024*1024);
  }

  Document doc = new Document();
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setOmitNorms(true);
  ft.setIndexOptions(IndexOptions.DOCS);
  Field field = new Field("field", new MyTokenStream(), ft);
  doc.add(field);
  
  final int numDocs = (Integer.MAX_VALUE / 26) + 1;
  for (int i = 0; i < numDocs; i++) {
    w.addDocument(doc);
    if (VERBOSE && i % 100000 == 0) {
      System.out.println(i + " of " + numDocs + "...");
    }
  }
  w.forceMerge(1);
  w.close();
  dir.close();
}
 
Example 7
Source File: TestDirectoryReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
static Document createDocument(String id) {
  Document doc = new Document();
  FieldType customType = new FieldType(TextField.TYPE_STORED);
  customType.setTokenized(false);
  customType.setOmitNorms(true);
  
  doc.add(newField("id", id, customType));
  return doc;
}
 
Example 8
Source File: TestOrdValues.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private static void addDoc(RandomIndexWriter iw, int i) throws Exception {
  Document d = new Document();
  Field f;
  int scoreAndID = i + 1;

  FieldType customType = new FieldType(TextField.TYPE_STORED);
  customType.setTokenized(false);
  customType.setOmitNorms(true);
  
  f = newField(ID_FIELD, id2String(scoreAndID), customType); // for debug purposes
  d.add(f);
  d.add(new SortedDocValuesField(ID_FIELD, new BytesRef(id2String(scoreAndID))));

  FieldType customType2 = new FieldType(TextField.TYPE_NOT_STORED);
  customType2.setOmitNorms(true);
  f = newField(TEXT_FIELD, "text of doc" + scoreAndID + textLine(i), customType2); // for regular search
  d.add(f);

  f = new LegacyIntField(INT_FIELD, scoreAndID, Store.YES); // for function scoring
  d.add(f);
  d.add(new NumericDocValuesField(INT_FIELD, scoreAndID));

  f = new LegacyFloatField(FLOAT_FIELD, scoreAndID, Store.YES); // for function scoring
  d.add(f);
  d.add(new NumericDocValuesField(FLOAT_FIELD, Float.floatToRawIntBits(scoreAndID)));

  iw.addDocument(d);
  log("added: " + d);
}
 
Example 9
Source File: TestSloppyPhraseQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private static Document makeDocument(String docText) {
  Document doc = new Document();
  FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
  customType.setOmitNorms(true);
  Field f = new Field("f", docText, customType);
  doc.add(f);
  return doc;
}
 
Example 10
Source File: ResetableDocumentStoredFieldVisitor.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
@Override
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
  final FieldType ft = new FieldType(TextField.TYPE_STORED);
  ft.setStoreTermVectors(fieldInfo.hasVectors());
  ft.setIndexed(fieldInfo.isIndexed());
  ft.setOmitNorms(fieldInfo.omitsNorms());
  ft.setIndexOptions(fieldInfo.getIndexOptions());
  doc.add(new Field(fieldInfo.name, value, ft));
  size += _emptyString * 2;
  size += fieldInfo.name.length() * 2;
  size += value.length() * 2;
}
 
Example 11
Source File: BaseSimilarityTestCase.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void beforeClass() throws Exception {
  // with norms
  DIR = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), DIR);
  Document doc = new Document();
  FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
  fieldType.setOmitNorms(true);
  doc.add(newField("field", "value", fieldType));
  writer.addDocument(doc);
  READER = getOnlyLeafReader(writer.getReader());
  writer.close();
}
 
Example 12
Source File: AnalyzingInfixSuggester.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Subclass can override this method to change the field type of the text field
 * e.g. to change the index options
 */
protected FieldType getTextFieldType(){
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setIndexOptions(IndexOptions.DOCS);
  ft.setOmitNorms(true);

  return ft;
}
 
Example 13
Source File: Test2BPositions.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void test() throws Exception {
  BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BPositions"));
  if (dir instanceof MockDirectoryWrapper) {
    ((MockDirectoryWrapper)dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
  }
  
  IndexWriter w = new IndexWriter(dir,
      new IndexWriterConfig(new MockAnalyzer(random()))
      .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
      .setRAMBufferSizeMB(256.0)
      .setMergeScheduler(new ConcurrentMergeScheduler())
      .setMergePolicy(newLogMergePolicy(false, 10))
      .setOpenMode(IndexWriterConfig.OpenMode.CREATE)
      .setCodec(TestUtil.getDefaultCodec()));

  MergePolicy mp = w.getConfig().getMergePolicy();
  if (mp instanceof LogByteSizeMergePolicy) {
    // 1 petabyte:
    ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024*1024*1024);
  }

  Document doc = new Document();
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setOmitNorms(true);
  Field field = new Field("field", new MyTokenStream(), ft);
  doc.add(field);
  
  final int numDocs = (Integer.MAX_VALUE / 26) + 1;
  for (int i = 0; i < numDocs; i++) {
    w.addDocument(doc);
    if (VERBOSE && i % 100000 == 0) {
      System.out.println(i + " of " + numDocs + "...");
    }
  }
  w.forceMerge(1);
  w.close();
  dir.close();
}
 
Example 14
Source File: TestOmitNorms.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Tests various combinations of omitNorms=true/false, the field not existing at all,
 * ensuring that only omitNorms is 'viral'.
 * Internally checks that MultiNorms.norms() is consistent (returns the same bytes)
 * as the fully merged equivalent.
 */
public void testOmitNormsCombos() throws IOException {
  // indexed with norms
  FieldType customType = new FieldType(TextField.TYPE_STORED);
  Field norms = new Field("foo", "a", customType);
  // indexed without norms
  FieldType customType1 = new FieldType(TextField.TYPE_STORED);
  customType1.setOmitNorms(true);
  Field noNorms = new Field("foo", "a", customType1);
  // not indexed, but stored
  FieldType customType2 = new FieldType();
  customType2.setStored(true);
  Field noIndex = new Field("foo", "a", customType2);
  // not indexed but stored, omitNorms is set
  FieldType customType3 = new FieldType();
  customType3.setStored(true);
  customType3.setOmitNorms(true);
  Field noNormsNoIndex = new Field("foo", "a", customType3);
  // not indexed nor stored (doesnt exist at all, we index a different field instead)
  Field emptyNorms = new Field("bar", "a", customType);
  
  assertNotNull(getNorms("foo", norms, norms));
  assertNull(getNorms("foo", norms, noNorms));
  assertNotNull(getNorms("foo", norms, noIndex));
  assertNotNull(getNorms("foo", norms, noNormsNoIndex));
  assertNotNull(getNorms("foo", norms, emptyNorms));
  assertNull(getNorms("foo", noNorms, noNorms));
  assertNull(getNorms("foo", noNorms, noIndex));
  assertNull(getNorms("foo", noNorms, noNormsNoIndex));
  assertNull(getNorms("foo", noNorms, emptyNorms));
  assertNull(getNorms("foo", noIndex, noIndex));
  assertNull(getNorms("foo", noIndex, noNormsNoIndex));
  assertNull(getNorms("foo", noIndex, emptyNorms));
  assertNull(getNorms("foo", noNormsNoIndex, noNormsNoIndex));
  assertNull(getNorms("foo", noNormsNoIndex, emptyNorms));
  assertNull(getNorms("foo", emptyNorms, emptyNorms));
}
 
Example 15
Source File: TestMemoryIndex.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
public void testOmitNorms() throws IOException {
  MemoryIndex mi = new MemoryIndex();
  FieldType ft = new FieldType();
  ft.setTokenized(true);
  ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
  ft.setOmitNorms(true);
  mi.addField(new Field("f1", "some text in here", ft), analyzer);
  mi.freeze();

  LeafReader leader = (LeafReader) mi.createSearcher().getIndexReader();
  NumericDocValues norms = leader.getNormValues("f1");
  assertNull(norms);
}
 
Example 16
Source File: DocMaker.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/** Set the configuration parameters of this doc maker. */
public void setConfig(Config config, ContentSource source) {
  this.config = config;
  this.source = source;

  boolean stored = config.get("doc.stored", false);
  boolean bodyStored = config.get("doc.body.stored", stored);
  boolean tokenized = config.get("doc.tokenized", true);
  boolean bodyTokenized = config.get("doc.body.tokenized", tokenized);
  boolean norms = config.get("doc.tokenized.norms", false);
  boolean bodyNorms = config.get("doc.body.tokenized.norms", true);
  boolean bodyOffsets = config.get("doc.body.offsets", false);
  boolean termVec = config.get("doc.term.vector", false);
  boolean termVecPositions = config.get("doc.term.vector.positions", false);
  boolean termVecOffsets = config.get("doc.term.vector.offsets", false);
  
  valType = new FieldType(TextField.TYPE_NOT_STORED);
  valType.setStored(stored);
  valType.setTokenized(tokenized);
  valType.setOmitNorms(!norms);
  valType.setStoreTermVectors(termVec);
  valType.setStoreTermVectorPositions(termVecPositions);
  valType.setStoreTermVectorOffsets(termVecOffsets);
  valType.freeze();

  bodyValType = new FieldType(TextField.TYPE_NOT_STORED);
  bodyValType.setStored(bodyStored);
  bodyValType.setTokenized(bodyTokenized);
  bodyValType.setOmitNorms(!bodyNorms);
  if (bodyTokenized && bodyOffsets) {
    bodyValType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
  }
  bodyValType.setStoreTermVectors(termVec);
  bodyValType.setStoreTermVectorPositions(termVecPositions);
  bodyValType.setStoreTermVectorOffsets(termVecOffsets);
  bodyValType.freeze();

  storeBytes = config.get("doc.store.body.bytes", false);
  
  reuseFields = config.get("doc.reuse.fields", true);

  // In a multi-rounds run, it is important to reset DocState since settings
  // of fields may change between rounds, and this is the only way to reset
  // the cache of all threads.
  docState = new ThreadLocal<>();
  
  indexProperties = config.get("doc.index.props", false);

  updateDocIDLimit = config.get("doc.random.id.limit", -1);
  if (updateDocIDLimit != -1) {
    r = new Random(179);
  }
}
 
Example 17
Source File: TestDocsAndPositions.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/**
 * Simple testcase for {@link PostingsEnum}
 */
public void testPositionsSimple() throws IOException {
  Directory directory = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
      newIndexWriterConfig(new MockAnalyzer(random())));
  for (int i = 0; i < 39; i++) {
    Document doc = new Document();
    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setOmitNorms(true);
    doc.add(newField(fieldName, "1 2 3 4 5 6 7 8 9 10 "
        + "1 2 3 4 5 6 7 8 9 10 " + "1 2 3 4 5 6 7 8 9 10 "
        + "1 2 3 4 5 6 7 8 9 10", customType));
    writer.addDocument(doc);
  }
  IndexReader reader = writer.getReader();
  writer.close();

  int num = atLeast(13);
  for (int i = 0; i < num; i++) {
    BytesRef bytes = new BytesRef("1");
    IndexReaderContext topReaderContext = reader.getContext();
    for (LeafReaderContext leafReaderContext : topReaderContext.leaves()) {
      PostingsEnum docsAndPosEnum = getDocsAndPositions(
          leafReaderContext.reader(), bytes);
      assertNotNull(docsAndPosEnum);
      if (leafReaderContext.reader().maxDoc() == 0) {
        continue;
      }
      final int advance = docsAndPosEnum.advance(random().nextInt(leafReaderContext.reader().maxDoc()));
      do {
        String msg = "Advanced to: " + advance + " current doc: "
            + docsAndPosEnum.docID(); // TODO: + " usePayloads: " + usePayload;
        assertEquals(msg, 4, docsAndPosEnum.freq());
        assertEquals(msg, 0, docsAndPosEnum.nextPosition());
        assertEquals(msg, 4, docsAndPosEnum.freq());
        assertEquals(msg, 10, docsAndPosEnum.nextPosition());
        assertEquals(msg, 4, docsAndPosEnum.freq());
        assertEquals(msg, 20, docsAndPosEnum.nextPosition());
        assertEquals(msg, 4, docsAndPosEnum.freq());
        assertEquals(msg, 30, docsAndPosEnum.nextPosition());
      } while (docsAndPosEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
    }
  }
  reader.close();
  directory.close();
}
 
Example 18
Source File: TestDirectoryReaderReopen.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void doTestReopenWithCommit (Random random, Directory dir, boolean withReopen) throws IOException {
  IndexWriter iwriter = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random))
                                               .setOpenMode(OpenMode.CREATE)
                                               .setMergeScheduler(new SerialMergeScheduler())
                                               .setMergePolicy(newLogMergePolicy()));
  iwriter.commit();
  DirectoryReader reader = DirectoryReader.open(dir);
  try {
    int M = 3;
    FieldType customType = new FieldType(TextField.TYPE_STORED);
    customType.setTokenized(false);
    FieldType customType2 = new FieldType(TextField.TYPE_STORED);
    customType2.setTokenized(false);
    customType2.setOmitNorms(true);
    FieldType customType3 = new FieldType();
    customType3.setStored(true);
    for (int i=0; i<4; i++) {
      for (int j=0; j<M; j++) {
        Document doc = new Document();
        doc.add(newField("id", i+"_"+j, customType));
        doc.add(newField("id2", i+"_"+j, customType2));
        doc.add(newField("id3", i+"_"+j, customType3));
        iwriter.addDocument(doc);
        if (i>0) {
          int k = i-1;
          int n = j + k*M;
          Document prevItereationDoc = reader.document(n);
          assertNotNull(prevItereationDoc);
          String id = prevItereationDoc.get("id");
          assertEquals(k+"_"+j, id);
        }
      }
      iwriter.commit();
      if (withReopen) {
        // reopen
        DirectoryReader r2 = DirectoryReader.openIfChanged(reader);
        if (r2 != null) {
          reader.close();
          reader = r2;
        }
      } else {
        // recreate
        reader.close();
        reader = DirectoryReader.open(dir);
      }
    }
  } finally {
    iwriter.close();
    reader.close();
  }
}
 
Example 19
Source File: Test2BTerms.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void test2BTerms() throws IOException {

    System.out.println("Starting Test2B");
    final long TERM_COUNT = ((long) Integer.MAX_VALUE) + 100000000;

    final int TERMS_PER_DOC = TestUtil.nextInt(random(), 100000, 1000000);

    List<BytesRef> savedTerms = null;

    BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BTerms"));
    //MockDirectoryWrapper dir = newFSDirectory(new File("/p/lucene/indices/2bindex"));
    if (dir instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper)dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
    }
    dir.setCheckIndexOnClose(false); // don't double-checkindex

    if (true) {

      IndexWriter w = new IndexWriter(dir,
                                      new IndexWriterConfig(new MockAnalyzer(random()))
                                      .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                                      .setRAMBufferSizeMB(256.0)
                                      .setMergeScheduler(new ConcurrentMergeScheduler())
                                      .setMergePolicy(newLogMergePolicy(false, 10))
                                      .setOpenMode(IndexWriterConfig.OpenMode.CREATE)
                                      .setCodec(TestUtil.getDefaultCodec()));

      MergePolicy mp = w.getConfig().getMergePolicy();
      if (mp instanceof LogByteSizeMergePolicy) {
        // 1 petabyte:
        ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024*1024*1024);
      }

      Document doc = new Document();
      final MyTokenStream ts = new MyTokenStream(random(), TERMS_PER_DOC);

      FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
      customType.setIndexOptions(IndexOptions.DOCS);
      customType.setOmitNorms(true);
      Field field = new Field("field", ts, customType);
      doc.add(field);
      //w.setInfoStream(System.out);
      final int numDocs = (int) (TERM_COUNT/TERMS_PER_DOC);

      System.out.println("TERMS_PER_DOC=" + TERMS_PER_DOC);
      System.out.println("numDocs=" + numDocs);

      for(int i=0;i<numDocs;i++) {
        final long t0 = System.currentTimeMillis();
        w.addDocument(doc);
        System.out.println(i + " of " + numDocs + " " + (System.currentTimeMillis()-t0) + " msec");
      }
      savedTerms = ts.savedTerms;

      System.out.println("TEST: full merge");
      w.forceMerge(1);
      System.out.println("TEST: close writer");
      w.close();
    }

    System.out.println("TEST: open reader");
    final IndexReader r = DirectoryReader.open(dir);
    if (savedTerms == null) {
      savedTerms = findTerms(r);
    }
    final int numSavedTerms = savedTerms.size();
    final List<BytesRef> bigOrdTerms = new ArrayList<>(savedTerms.subList(numSavedTerms-10, numSavedTerms));
    System.out.println("TEST: test big ord terms...");
    testSavedTerms(r, bigOrdTerms);
    System.out.println("TEST: test all saved terms...");
    testSavedTerms(r, savedTerms);
    r.close();

    System.out.println("TEST: now CheckIndex...");
    CheckIndex.Status status = TestUtil.checkIndex(dir);
    final long tc = status.segmentInfos.get(0).termIndexStatus.termCount;
    assertTrue("count " + tc + " is not > " + Integer.MAX_VALUE, tc > Integer.MAX_VALUE);

    dir.close();
    System.out.println("TEST: done!");
  }
 
Example 20
Source File: TestIndexWriter.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testNoUnwantedTVFiles() throws Exception {

    Directory dir = newDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
                                                     .setRAMBufferSizeMB(0.01)
                                                     .setMergePolicy(newLogMergePolicy()));
    indexWriter.getConfig().getMergePolicy().setNoCFSRatio(0.0);

    String BIG="alskjhlaksjghlaksjfhalksvjepgjioefgjnsdfjgefgjhelkgjhqewlrkhgwlekgrhwelkgjhwelkgrhwlkejg";
    BIG=BIG+BIG+BIG+BIG;

    FieldType customType = new FieldType(TextField.TYPE_STORED);
    customType.setOmitNorms(true);
    FieldType customType2 = new FieldType(TextField.TYPE_STORED);
    customType2.setTokenized(false);
    FieldType customType3 = new FieldType(TextField.TYPE_STORED);
    customType3.setTokenized(false);
    customType3.setOmitNorms(true);

    for (int i=0; i<2; i++) {
      Document doc = new Document();
      doc.add(new Field("id", Integer.toString(i)+BIG, customType3));
      doc.add(new Field("str", Integer.toString(i)+BIG, customType2));
      doc.add(new Field("str2", Integer.toString(i)+BIG, storedTextType));
      doc.add(new Field("str3", Integer.toString(i)+BIG, customType));
      indexWriter.addDocument(doc);
    }

    indexWriter.close();

    TestUtil.checkIndex(dir);

    assertNoUnreferencedFiles(dir, "no tv files");
    DirectoryReader r0 = DirectoryReader.open(dir);
    for (LeafReaderContext ctx : r0.leaves()) {
      SegmentReader sr = (SegmentReader) ctx.reader();
      assertFalse(sr.getFieldInfos().hasVectors());
    }

    r0.close();
    dir.close();
  }