Java Code Examples for org.apache.lucene.document.FieldType#setStoreTermVectorPayloads()

The following examples show how to use org.apache.lucene.document.FieldType#setStoreTermVectorPayloads() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestTermVectorsReader.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testIllegalPayloadsWithoutPositions() throws Exception {
  Directory dir = newDirectory();
  MockAnalyzer a = new MockAnalyzer(random());
  a.setEnableChecks(false);
  RandomIndexWriter w = new RandomIndexWriter(random(), dir, a);
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setStoreTermVectors(true);
  ft.setStoreTermVectorPayloads(true);
  Document doc = new Document();
  doc.add(new Field("field", "value", ft));

  IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
    w.addDocument(doc);
  });
  assertEquals("cannot index term vector payloads without term vector positions (field=\"field\")", expected.getMessage());
  
  w.close();
  dir.close();
}
 
Example 2
Source File: TestTermVectorsReader.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testIllegalVectorPayloadsWithoutVectors() throws Exception {
  Directory dir = newDirectory();
  MockAnalyzer a = new MockAnalyzer(random());
  a.setEnableChecks(false);
  RandomIndexWriter w = new RandomIndexWriter(random(), dir, a);
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setStoreTermVectors(false);
  ft.setStoreTermVectorPayloads(true);
  Document doc = new Document();
  doc.add(new Field("field", "value", ft));
  
  IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
    w.addDocument(doc);
  });
  assertEquals("cannot index term vector payloads when term vectors are not indexed (field=\"field\")", expected.getMessage());
  
  w.close();
  dir.close();
}
 
Example 3
Source File: TestTermVectorsReader.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testIllegalVectorPayloadsWithoutIndexed() throws Exception {
  Directory dir = newDirectory();
  MockAnalyzer a = new MockAnalyzer(random());
  a.setEnableChecks(false);
  RandomIndexWriter w = new RandomIndexWriter(random(), dir, a);
  FieldType ft = new FieldType(StoredField.TYPE);
  ft.setStoreTermVectorPayloads(true);
  Document doc = new Document();
  doc.add(new Field("field", "value", ft));
  
  IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
    w.addDocument(doc);
  });
  assertEquals("cannot store term vector payloads for a field that is not indexed (field=\"field\")", expected.getMessage());

  w.close();
  dir.close();
}
 
Example 4
Source File: TestPayloadsOnVectors.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testPayloadsWithoutPositions() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
  customType.setStoreTermVectors(true);
  customType.setStoreTermVectorPositions(false);
  customType.setStoreTermVectorPayloads(true);
  customType.setStoreTermVectorOffsets(random().nextBoolean());
  doc.add(new Field("field", "foo", customType));

  expectThrows(IllegalArgumentException.class, () -> {
    writer.addDocument(doc);
  });

  writer.close();
  dir.close();
}
 
Example 5
Source File: TestBlockPostingsFormat2.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private Document newDocument() {
  Document doc = new Document();
  for (IndexOptions option : IndexOptions.values()) {
    if (option == IndexOptions.NONE) {
      continue;
    }
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    // turn on tvs for a cross-check, since we rely upon checkindex in this test (for now)
    ft.setStoreTermVectors(true);
    ft.setStoreTermVectorOffsets(true);
    ft.setStoreTermVectorPositions(true);
    ft.setStoreTermVectorPayloads(true);
    ft.setIndexOptions(option);
    doc.add(new Field(option.toString(), "", ft));
  }
  return doc;
}
 
Example 6
Source File: IndexOptionsDialogFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void saveOptions() {
  nf.setStored(storedCB.isSelected());
  if (nf.getType().equals(Field.class)) {
    FieldType ftype = (FieldType) nf.getFieldType();
    ftype.setStored(storedCB.isSelected());
    ftype.setTokenized(tokenizedCB.isSelected());
    ftype.setOmitNorms(omitNormsCB.isSelected());
    ftype.setIndexOptions(IndexOptions.valueOf((String) idxOptCombo.getSelectedItem()));
    ftype.setStoreTermVectors(storeTVCB.isSelected());
    ftype.setStoreTermVectorPositions(storeTVPosCB.isSelected());
    ftype.setStoreTermVectorOffsets(storeTVOffCB.isSelected());
    ftype.setStoreTermVectorPayloads(storeTVPayCB.isSelected());
  }
  dialog.dispose();
}
 
Example 7
Source File: BaseTermVectorsFormatTestCase.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
protected FieldType fieldType(Options options) {
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setStoreTermVectors(true);
  ft.setStoreTermVectorPositions(options.positions);
  ft.setStoreTermVectorOffsets(options.offsets);
  ft.setStoreTermVectorPayloads(options.payloads);
  ft.freeze();
  return ft;
}
 
Example 8
Source File: CountingTermsTest.java    From lucene-query-example with Apache License 2.0 5 votes vote down vote up
Field newFieldAllOn(String name, String value) {
	FieldType tagsFieldType = new FieldType();
	tagsFieldType.setStored(true);
	IndexOptions opts = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
	tagsFieldType.setIndexOptions(opts);
	tagsFieldType.setOmitNorms(true);
	tagsFieldType.setStoreTermVectors(true);
	tagsFieldType.setStoreTermVectorPositions(true);
	tagsFieldType.setStoreTermVectorPayloads(true);
	return new Field(name, value, tagsFieldType);
}
 
Example 9
Source File: TokenSourcesTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testPayloads() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
  FieldType myFieldType = new FieldType(TextField.TYPE_NOT_STORED);
  myFieldType.setStoreTermVectors(true);
  myFieldType.setStoreTermVectorOffsets(true);
  myFieldType.setStoreTermVectorPositions(true);
  myFieldType.setStoreTermVectorPayloads(true);

  curOffset = 0;

  Token[] tokens = new Token[] {
    getToken("foxes"),
    getToken("can"),
    getToken("jump"),
    getToken("high")
  };

  Document doc = new Document();
  doc.add(new Field("field", new CannedTokenStream(tokens), myFieldType));
  writer.addDocument(doc);

  IndexReader reader = writer.getReader();
  writer.close();
  assertEquals(1, reader.numDocs());

  TokenStream ts = TokenSources.getTermVectorTokenStreamOrNull("field", reader.getTermVectors(0), -1);

  CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
  PositionIncrementAttribute posIncAtt = ts.getAttribute(PositionIncrementAttribute.class);
  OffsetAttribute offsetAtt = ts.getAttribute(OffsetAttribute.class);
  PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);

  ts.reset();
  for(Token token : tokens) {
    assertTrue(ts.incrementToken());
    assertEquals(token.toString(), termAtt.toString());
    assertEquals(token.getPositionIncrement(), posIncAtt.getPositionIncrement());
    assertEquals(token.getPayload(), payloadAtt.getPayload());
    assertEquals(token.startOffset(), offsetAtt.startOffset());
    assertEquals(token.endOffset(), offsetAtt.endOffset());
  }

  assertFalse(ts.incrementToken());

  reader.close();
  dir.close();
}
 
Example 10
Source File: TokenSourcesTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Repeat(iterations = 10)
//@Seed("947083AB20AB2D4F")
public void testRandomizedRoundTrip() throws Exception {
  final int distinct = TestUtil.nextInt(random(), 1, 10);

  String[] terms = new String[distinct];
  BytesRef[] termBytes = new BytesRef[distinct];
  for (int i = 0; i < distinct; ++i) {
    terms[i] = TestUtil.randomRealisticUnicodeString(random());
    termBytes[i] = new BytesRef(terms[i]);
  }

  final BaseTermVectorsFormatTestCase.RandomTokenStream rTokenStream =
      new BaseTermVectorsFormatTestCase.RandomTokenStream(TestUtil.nextInt(random(), 1, 10), terms, termBytes);
  //check to see if the token streams might have non-deterministic testable result
  final boolean storeTermVectorPositions = random().nextBoolean();
  final int[] startOffsets = rTokenStream.getStartOffsets();
  final int[] positionsIncrements = rTokenStream.getPositionsIncrements();
  for (int i = 1; i < positionsIncrements.length; i++) {
    if (storeTermVectorPositions && positionsIncrements[i] != 0) {
      continue;
    }
    //TODO should RandomTokenStream ensure endOffsets for tokens at same position and same startOffset are greater
    // than previous token's endOffset?  That would increase the testable possibilities.
    if (startOffsets[i] == startOffsets[i-1]) {
      if (VERBOSE)
        System.out.println("Skipping test because can't easily validate random token-stream is correct.");
      rTokenStream.close();
      return;
    }
  }

  //sanity check itself
  assertTokenStreamContents(rTokenStream,
      rTokenStream.getTerms(), rTokenStream.getStartOffsets(), rTokenStream.getEndOffsets(),
      rTokenStream.getPositionsIncrements());

  Directory dir = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
  FieldType myFieldType = new FieldType(TextField.TYPE_NOT_STORED);
  myFieldType.setStoreTermVectors(true);
  myFieldType.setStoreTermVectorOffsets(true);
  myFieldType.setStoreTermVectorPositions(storeTermVectorPositions);
  //payloads require positions; it will throw an error otherwise
  myFieldType.setStoreTermVectorPayloads(storeTermVectorPositions && random().nextBoolean());

  Document doc = new Document();
  doc.add(new Field("field", rTokenStream, myFieldType));
  writer.addDocument(doc);

  IndexReader reader = writer.getReader();
  writer.close();
  assertEquals(1, reader.numDocs());

  TokenStream vectorTokenStream =
      TokenSources.getTermVectorTokenStreamOrNull("field", reader.getTermVectors(0), -1);

  //sometimes check payloads
  PayloadAttribute payloadAttribute = null;
  if (myFieldType.storeTermVectorPayloads() && usually()) {
    payloadAttribute = vectorTokenStream.addAttribute(PayloadAttribute.class);
  }
  assertTokenStreamContents(vectorTokenStream,
      rTokenStream.getTerms(), rTokenStream.getStartOffsets(), rTokenStream.getEndOffsets(),
      myFieldType.storeTermVectorPositions() ? rTokenStream.getPositionsIncrements() : null);
  //test payloads
  if (payloadAttribute != null) {
    vectorTokenStream.reset();
    for (int i = 0; vectorTokenStream.incrementToken(); i++) {
      assertEquals(rTokenStream.getPayloads()[i], payloadAttribute.getPayload());
    }
  }

  reader.close();
  dir.close();
  rTokenStream.close();
}
 
Example 11
Source File: TestMemoryIndexAgainstDirectory.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testDuelMemoryIndexCoreDirectoryWithArrayField() throws Exception {

    final String field_name = "text";
    MockAnalyzer mockAnalyzer = new MockAnalyzer(random());
    if (random().nextBoolean()) {
      mockAnalyzer.setOffsetGap(random().nextInt(100));
    }
    //index into a random directory
    FieldType type = new FieldType(TextField.TYPE_STORED);
    type.setStoreTermVectorOffsets(true);
    type.setStoreTermVectorPayloads(false);
    type.setStoreTermVectorPositions(true);
    type.setStoreTermVectors(true);
    type.freeze();

    Document doc = new Document();
    doc.add(new Field(field_name, "la la", type));
    doc.add(new Field(field_name, "foo bar foo bar foo", type));

    Directory dir = newDirectory();
    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), mockAnalyzer));
    writer.updateDocument(new Term("id", "1"), doc);
    writer.commit();
    writer.close();
    DirectoryReader reader = DirectoryReader.open(dir);

    //Index document in Memory index
    MemoryIndex memIndex = new MemoryIndex(true);
    memIndex.addField(field_name, "la la", mockAnalyzer);
    memIndex.addField(field_name, "foo bar foo bar foo", mockAnalyzer);

    //compare term vectors
    Terms ramTv = reader.getTermVector(0, field_name);
    IndexReader memIndexReader = memIndex.createSearcher().getIndexReader();
    TestUtil.checkReader(memIndexReader);
    Terms memTv = memIndexReader.getTermVector(0, field_name);

    compareTermVectors(ramTv, memTv, field_name);
    memIndexReader.close();
    reader.close();
    dir.close();

  }
 
Example 12
Source File: LuceneTestCase.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public synchronized static Field newField(Random random, String name, Object value, FieldType type) {

    // Defeat any consumers that illegally rely on intern'd
    // strings (we removed this from Lucene a while back):
    name = new String(name);

    FieldType prevType = fieldToType.get(name);

    if (usually(random) || type.indexOptions() == IndexOptions.NONE || prevType != null) {
      // most of the time, don't modify the params
      if (prevType == null) {
        fieldToType.put(name, new FieldType(type));
      } else {
        type = mergeTermVectorOptions(type, prevType);
      }

      return createField(name, value, type);
    }

    // TODO: once all core & test codecs can index
    // offsets, sometimes randomly turn on offsets if we are
    // already indexing positions...

    FieldType newType = new FieldType(type);
    if (!newType.stored() && random.nextBoolean()) {
      newType.setStored(true); // randomly store it
    }

    // Randomly turn on term vector options, but always do
    // so consistently for the same field name:
    if (!newType.storeTermVectors() && random.nextBoolean()) {
      newType.setStoreTermVectors(true);
      if (!newType.storeTermVectorPositions()) {
        newType.setStoreTermVectorPositions(random.nextBoolean());
        
        if (newType.storeTermVectorPositions()) {
          if (!newType.storeTermVectorPayloads()) {
            newType.setStoreTermVectorPayloads(random.nextBoolean());
          }
        }
      }
      
      if (!newType.storeTermVectorOffsets()) {
        newType.setStoreTermVectorOffsets(random.nextBoolean());
      }

      if (VERBOSE) {
        System.out.println("NOTE: LuceneTestCase: upgrade name=" + name + " type=" + newType);
      }
    }
    newType.freeze();
    fieldToType.put(name, newType);

    // TODO: we need to do this, but smarter, ie, most of
    // the time we set the same value for a given field but
    // sometimes (rarely) we change it up:
    /*
    if (newType.omitNorms()) {
      newType.setOmitNorms(random.nextBoolean());
    }
    */
    
    return createField(name, value, newType);
  }