Java Code Examples for org.apache.lucene.document.Document#getFields()

The following examples show how to use org.apache.lucene.document.Document#getFields() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestBlockPostingsFormat2.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** tests terms with ttf % blocksize = 0 */
public void testTTFBlockSizeMultiple() throws Exception {
  Document doc = newDocument();
  for (int i = 0; i < Lucene50PostingsFormat.BLOCK_SIZE/2; i++) {
    for (IndexableField f : doc.getFields()) {
      String proto = (f.name() + " " + f.name() + " " + f.name() + " " + f.name() + " " 
                     + f.name() + "_2 " + f.name() + "_2 " + f.name() + "_2 " + f.name() + "_2");
      StringBuilder val = new StringBuilder();
      for (int j = 0; j < 16; j++) {
        val.append(proto);
        val.append(" ");
      }
      ((Field) f).setStringValue(val.toString());
    }
    iw.addDocument(doc);
  }
}
 
Example 2
Source File: TestStressIndexing2.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public static void verifyEquals(Document d1, Document d2) {
List<IndexableField> ff1 = new ArrayList<>(d1.getFields());
List<IndexableField> ff2 = new ArrayList<>(d2.getFields());

Collections.sort(ff1, fieldNameComparator);
Collections.sort(ff2, fieldNameComparator);

assertEquals(ff1 + " : " + ff2, ff1.size(), ff2.size());

for (int i=0; i<ff1.size(); i++) {
  IndexableField f1 = ff1.get(i);
  IndexableField f2 = ff2.get(i);
  if (f1.binaryValue() != null) {
    assert(f2.binaryValue() != null);
  } else {
    String s1 = f1.stringValue();
    String s2 = f2.stringValue();
    assertEquals(ff1 + " : " + ff2, s1,s2);
    }
  }
}
 
Example 3
Source File: TestFieldsReader.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@BeforeClass
public static void beforeClass() throws Exception {
  testDoc = new Document();
  fieldInfos = new FieldInfos.Builder(new FieldInfos.FieldNumbers(null));
  DocHelper.setupDoc(testDoc);
  for (IndexableField field : testDoc.getFields()) {
    FieldInfo fieldInfo = fieldInfos.getOrAdd(field.name());
    IndexableFieldType ift = field.fieldType();
    fieldInfo.setIndexOptions(ift.indexOptions());
    if (ift.omitNorms()) {
      fieldInfo.setOmitsNorms();
    }
    fieldInfo.setDocValuesType(ift.docValuesType());
  }
  dir = newDirectory();
  IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()))
                             .setMergePolicy(newLogMergePolicy());
  conf.getMergePolicy().setNoCFSRatio(0.0);
  IndexWriter writer = new IndexWriter(dir, conf);
  writer.addDocument(testDoc);
  writer.close();
}
 
Example 4
Source File: SimpleNaiveBayesDocumentClassifier.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * This methods performs the analysis for the seed document and extract the boosts if present.
 * This is done only one time for the Seed Document.
 *
 * @param inputDocument         the seed unseen document
 * @param fieldName2tokensArray a map that associated to a field name the list of token arrays for all its values
 * @param fieldName2boost       a map that associates the boost to the field
 * @throws IOException If there is a low-level I/O error
 */
private void analyzeSeedDocument(Document inputDocument, Map<String, List<String[]>> fieldName2tokensArray, Map<String, Float> fieldName2boost) throws IOException {
  for (int i = 0; i < textFieldNames.length; i++) {
    String fieldName = textFieldNames[i];
    float boost = 1;
    List<String[]> tokenizedValues = new LinkedList<>();
    if (fieldName.contains("^")) {
      String[] field2boost = fieldName.split("\\^");
      fieldName = field2boost[0];
      boost = Float.parseFloat(field2boost[1]);
    }
    IndexableField[] fieldValues = inputDocument.getFields(fieldName);
    for (IndexableField fieldValue : fieldValues) {
      TokenStream fieldTokens = fieldValue.tokenStream(field2analyzer.get(fieldName), null);
      String[] fieldTokensArray = getTokenArray(fieldTokens);
      tokenizedValues.add(fieldTokensArray);
    }
    fieldName2tokensArray.put(fieldName, tokenizedValues);
    fieldName2boost.put(fieldName, boost);
    textFieldNames[i] = fieldName;
  }
}
 
Example 5
Source File: TestBlockPostingsFormat2.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** tests terms with df % blocksize = 0 */
public void testDFBlockSizeMultiple() throws Exception {
  Document doc = newDocument();
  for (int i = 0; i < Lucene50PostingsFormat.BLOCK_SIZE * 16; i++) {
    for (IndexableField f : doc.getFields()) {
      ((Field) f).setStringValue(f.name() + " " + f.name() + "_2");
    }
    iw.addDocument(doc);
  }
}
 
Example 6
Source File: AclDiscoverFieldTypeDefinitionTest.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private void test(int expected, boolean rowQuery, Collection<String> discoverAuthorizations) throws IOException,
    ParseException {
  DirectoryReader reader = DirectoryReader.open(_dir);
  SuperParser parser = new SuperParser(Version.LUCENE_43, _fieldManager, rowQuery, null, ScoreType.SUPER, new Term(
      BlurConstants.PRIME_DOC, BlurConstants.PRIME_DOC_VALUE));

  Query query = parser.parse("fam.string:value");

  Collection<String> readAuthorizations = null;
  Set<String> discoverableFields = new HashSet<String>();
  discoverableFields.add("rowid");
  discoverableFields.add("recordid");
  discoverableFields.add("family");
  IndexSearcher searcher = new SecureIndexSearcher(reader, getAccessControlFactory(), readAuthorizations,
      discoverAuthorizations, discoverableFields, null);

  TopDocs topDocs = searcher.search(query, 10);
  assertEquals(expected, topDocs.totalHits);
  for (int i = 0; i < expected; i++) {
    int doc = topDocs.scoreDocs[i].doc;
    Document document = searcher.doc(doc);
    List<IndexableField> fields = document.getFields();
    for (IndexableField field : fields) {
      assertTrue(discoverableFields.contains(field.name()));
    }
  }
  reader.close();
}
 
Example 7
Source File: MutatableActionTest.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
@Test
public void testReplaceRecord() throws IOException {
  RAMDirectory directory = new RAMDirectory();
  DirectoryReader reader = getIndexReader(directory);
  IndexWriter writer = new IndexWriter(directory, _conf.clone());
  assertEquals(0, reader.numDocs());

  Row row = genRow();
  List<Column> cols = new ArrayList<Column>();
  cols.add(new Column("n", "v"));
  row.addToRecords(new Record("1", "fam", cols));

  _action.replaceRow(row);
  _action.performMutate(getSearcher(reader, directory), writer);
  reader = commitAndReopen(reader, writer);
  assertEquals(2, reader.numDocs());

  cols.add(new Column("n2", "v2"));
  Record record = new Record("1", "fam", cols);
  _action.replaceRecord(row.getId(), record);
  _action.performMutate(getSearcher(reader, directory), writer);
  reader = commitAndReopen(reader, writer);
  assertEquals(2, reader.numDocs());

  IndexSearcher searcher = new IndexSearcher(reader);
  TopDocs topDocs = searcher.search(new TermQuery(new Term(BlurConstants.ROW_ID, row.getId())), 10);
  Document doc2 = searcher.doc(topDocs.scoreDocs[1].doc);
  List<IndexableField> fields = doc2.getFields();
  assertEquals(fields.size(), 5);
  String value = doc2.get("fam.n2");
  assertEquals("v2", value);
}
 
Example 8
Source File: MutatableActionTest.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
@Test
public void testReplaceColumns() throws IOException {
  RAMDirectory directory = new RAMDirectory();
  DirectoryReader reader = getIndexReader(directory);
  IndexWriter writer = new IndexWriter(directory, _conf.clone());
  assertEquals(0, reader.numDocs());

  Row row = genRow();
  List<Column> cols = new ArrayList<Column>();
  cols.add(new Column("n", "v"));
  cols.add(new Column("n1", "v1"));
  row.addToRecords(new Record("1", "fam", cols));

  _action.replaceRow(row);
  _action.performMutate(getSearcher(reader, directory), writer);
  reader = commitAndReopen(reader, writer);
  assertEquals(2, reader.numDocs());

  cols.clear();
  cols.add(new Column("n1", "v2"));
  Record record = new Record("1", "fam", cols);
  _action.replaceColumns(row.getId(), record);
  _action.performMutate(getSearcher(reader, directory), writer);
  reader = commitAndReopen(reader, writer);
  assertEquals(2, reader.numDocs());

  IndexSearcher searcher = new IndexSearcher(reader);
  TopDocs topDocs = searcher.search(new TermQuery(new Term(BlurConstants.ROW_ID, row.getId())), 10);
  Document doc2 = searcher.doc(topDocs.scoreDocs[1].doc);
  List<IndexableField> fields = doc2.getFields();
  assertEquals(5, fields.size());
  String value = doc2.get("fam.n1");
  assertEquals("v2", value);
}
 
Example 9
Source File: MoreLikeThis.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Find words for a more-like-this query former.
 *
 * @param docNum the id of the lucene document from which to find terms
 */
private PriorityQueue<ScoreTerm> retrieveTerms(int docNum) throws IOException {
  Map<String, Map<String, Int>> field2termFreqMap = new HashMap<>();
  for (String fieldName : fieldNames) {
    final Fields vectors = ir.getTermVectors(docNum);
    final Terms vector;
    if (vectors != null) {
      vector = vectors.terms(fieldName);
    } else {
      vector = null;
    }

    // field does not store term vector info
    if (vector == null) {
      Document d = ir.document(docNum);
      IndexableField[] fields = d.getFields(fieldName);
      for (IndexableField field : fields) {
        final String stringValue = field.stringValue();
        if (stringValue != null) {
          addTermFrequencies(new StringReader(stringValue), field2termFreqMap, fieldName);
        }
      }
    } else {
      addTermFrequencies(field2termFreqMap, vector, fieldName);
    }
  }

  return createQueue(field2termFreqMap);
}
 
Example 10
Source File: LuceneIndexTest.java    From rdf4j with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * @param statement112
 * @param document
 */
private void assertNoStatement(Statement statement, Document document) {
	IndexableField[] fields = document.getFields(SearchFields.getPropertyField(statement.getPredicate()));
	if (fields == null) {
		return;
	}
	for (IndexableField f : fields) {
		if (((Literal) statement.getObject()).getLabel().equals(f.stringValue())) {
			fail("Statement should not be found in document " + statement);
		}
	}

}
 
Example 11
Source File: MutatableActionTest.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
@Test
public void testAppendColumns() throws IOException {
  RAMDirectory directory = new RAMDirectory();
  DirectoryReader reader = getIndexReader(directory);
  IndexWriter writer = new IndexWriter(directory, _conf.clone());
  assertEquals(0, reader.numDocs());

  Row row = genRow();
  List<Column> cols = new ArrayList<Column>();
  cols.add(new Column("n", "v"));
  row.addToRecords(new Record("1", "fam", cols));

  _action.replaceRow(row);
  _action.performMutate(getSearcher(reader, directory), writer);
  reader = commitAndReopen(reader, writer);
  assertEquals(2, reader.numDocs());

  cols.clear();
  cols.add(new Column("n2", "v2"));
  Record record = new Record("1", "fam", cols);
  _action.appendColumns(row.getId(), record);
  _action.performMutate(getSearcher(reader, directory), writer);
  reader = commitAndReopen(reader, writer);
  assertEquals(2, reader.numDocs());

  IndexSearcher searcher = new IndexSearcher(reader);
  TopDocs topDocs = searcher.search(new TermQuery(new Term(BlurConstants.ROW_ID, row.getId())), 10);
  Document doc2 = searcher.doc(topDocs.scoreDocs[1].doc);
  List<IndexableField> fields = doc2.getFields();
  assertEquals(fields.size(), 5);
  String value = doc2.get("fam.n2");
  assertEquals("v2", value);
}
 
Example 12
Source File: LuceneIndex.java    From rdf4j with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
protected synchronized SearchDocument copyDocument(SearchDocument doc) {
	Document document = ((LuceneDocument) doc).getDocument();
	Document newDocument = new Document();

	// add all existing fields (including id, uri, context, and text)
	for (IndexableField oldField : document.getFields()) {
		newDocument.add(oldField);
	}
	return new LuceneDocument(newDocument, geoStrategyMapper);
}
 
Example 13
Source File: TestBlockPostingsFormat2.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** tests terms with ttf = blocksize */
public void testTTFBlockSize() throws Exception {
  Document doc = newDocument();
  for (int i = 0; i < Lucene50PostingsFormat.BLOCK_SIZE/2; i++) {
    for (IndexableField f : doc.getFields()) {
      ((Field) f).setStringValue(f.name() + " " + f.name() + " " + f.name() + "_2 " + f.name() + "_2");
    }
    iw.addDocument(doc);
  }
}
 
Example 14
Source File: ReferenceCountingReadOnlyIndexReaderFactory.java    From alfresco-repository with GNU Lesser General Public License v3.0 5 votes vote down vote up
public String getPathLinkId(int n) throws IOException
{
    Document document = document(n, new SingleFieldSelector("ID", true));
    Field[] fields = document.getFields("ID");
    Field field = fields[fields.length - 1];
    return (field == null) ? null : field.stringValue();
}
 
Example 15
Source File: ReferenceCountingReadOnlyIndexReaderFactory.java    From alfresco-repository with GNU Lesser General Public License v3.0 5 votes vote down vote up
public List<Field> get(int n, FieldSelector fieldSelector) throws IOException
{
    Document document = ReferenceCountingReadOnlyIndexReader.super.document(n, fieldSelector);
    Field[] fields = document.getFields(fieldName);
    ArrayList<Field> cacheable = new ArrayList<Field>(fields.length);
    for (Field field : fields)
    {
        cacheable.add(field);
    }
    return cacheable;
}
 
Example 16
Source File: ReferenceCountingReadOnlyIndexReaderFactory.java    From alfresco-repository with GNU Lesser General Public License v3.0 5 votes vote down vote up
public List<Field> get(int n, FieldSelector fieldSelector) throws IOException
{
    Document document = ReferenceCountingReadOnlyIndexReader.super.document(n, fieldSelector);
    List<Field> fields = (List<Field>) document.getFields();
    ArrayList<Field> cacheable = new ArrayList<Field>(fields.size());
    cacheable.addAll(fields);
    return cacheable;
}
 
Example 17
Source File: IndexSearcherTest.java    From incubator-retired-blur with Apache License 2.0 4 votes vote down vote up
private void validateDiscoverFields(Document doc, Collection<String> discoverableFields) {
  Set<String> fields = new HashSet<String>(discoverableFields);
  for (IndexableField indexableField : doc.getFields()) {
    assertTrue(fields.contains(indexableField.name()));
  }
}
 
Example 18
Source File: LuceneRecord.java    From HongsCORE with MIT License 4 votes vote down vote up
/**
 * 填充返回数据(将 doc 填充到 map)
 * 可覆盖此方法补充额外数据
 *
 * @param doc
 * @param map
 * @param rep
 */
protected void padDat(Document doc, Map map, Set rep) {
    if (rep != null && rep.isEmpty( )) {
        rep  = null;
    }

    Map<String, Map> fields = getFields();
    for(Map.Entry<String, Map> e : fields.entrySet()) {
        Map    m = e.getValue();
        String k = e.getKey  ();

        if (rep != null
        && !rep.contains(k)) {
            continue;
        }

        if (k == null
        ||  k.equals("@")
        ||  unstated( m )
        ||  unstored( m )) {
            continue;
        }

        IValue  v ;
        String  t = datatype(m);
        boolean r = repeated(m);
        IndexableField[] fs = doc.getFields(k);

        if (t != null) switch (t) {
        case "search":
        case "sorted":
            continue; // 纯功能字段无可见值
        case "date":
            // 时间戳转 Date 对象时需要乘以 1000
            String  y = Synt.declare(m.get("type"), "");
            if (OBJECT_MODE) {
                if ("time".equals(y) || "timestamp".equals(y)) {
                    v = new NumberValue( );
                } else {
                    v = new DatimeValue(m);
                }
            } else {
                if ("time".equals(y) || "timestamp".equals(y)) {
                    v = new NumeraValue( );
                } else {
                    v = new DatextValue(m);
                }
            }
            break;
        case "int":
        case "long":
        case "float":
        case "double":
        case "number":
            if (OBJECT_MODE) {
                v = new NumberValue();
            } else {
                v = new NumeraValue();
            }
            break;
        case "object":
            v = new ObjectValue();
            break;
        default:
            v = new StringValue();
        } else {
            v = new StringValue();
        }

        if (r) {
            if (fs.length > 0) {
                for(IndexableField f : fs ) {
                    Dict.put(map , v.get(f), k, null);
                }
            } else {
                map.put(k , new ArrayList());
            }
        } else {
            if (fs.length > 0) {
                map.put(k , v.get ( fs[0] ));
            } else {
                map.put(k , null);
            }
        }
    }
}
 
Example 19
Source File: TestDirectoryReader.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testBinaryFields() throws IOException {
  Directory dir = newDirectory();
  byte[] bin = new byte[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
    
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
                                              .setMergePolicy(newLogMergePolicy()));
    
  for (int i = 0; i < 10; i++) {
    addDoc(writer, "document number " + (i + 1));
    addDocumentWithFields(writer);
    addDocumentWithDifferentFields(writer);
    addDocumentWithTermVectorFields(writer);
  }
  writer.close();
  writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
                                  .setOpenMode(OpenMode.APPEND)
                                  .setMergePolicy(newLogMergePolicy()));
  Document doc = new Document();
  doc.add(new StoredField("bin1", bin));
  doc.add(new TextField("junk", "junk text", Field.Store.NO));
  writer.addDocument(doc);
  writer.close();
  DirectoryReader reader = DirectoryReader.open(dir);
  Document doc2 = reader.document(reader.maxDoc() - 1);
  IndexableField[] fields = doc2.getFields("bin1");
  assertNotNull(fields);
  assertEquals(1, fields.length);
  IndexableField b1 = fields[0];
  assertTrue(b1.binaryValue() != null);
  BytesRef bytesRef = b1.binaryValue();
  assertEquals(bin.length, bytesRef.length);
  for (int i = 0; i < bin.length; i++) {
    assertEquals(bin[i], bytesRef.bytes[i + bytesRef.offset]);
  }
  reader.close();
  // force merge


  writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
                                  .setOpenMode(OpenMode.APPEND)
                                  .setMergePolicy(newLogMergePolicy()));
  writer.forceMerge(1);
  writer.close();
  reader = DirectoryReader.open(dir);
  doc2 = reader.document(reader.maxDoc() - 1);
  fields = doc2.getFields("bin1");
  assertNotNull(fields);
  assertEquals(1, fields.length);
  b1 = fields[0];
  assertTrue(b1.binaryValue() != null);
  bytesRef = b1.binaryValue();
  assertEquals(bin.length, bytesRef.length);
  for (int i = 0; i < bin.length; i++) {
    assertEquals(bin[i], bytesRef.bytes[i + bytesRef.offset]);
  }
  reader.close();
  dir.close();
}
 
Example 20
Source File: TestDocumentWriter.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testAddDocument() throws Exception {
  Document testDoc = new Document();
  DocHelper.setupDoc(testDoc);
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
  writer.addDocument(testDoc);
  writer.commit();
  SegmentCommitInfo info = writer.newestSegment();
  writer.close();
  //After adding the document, we should be able to read it back in
  SegmentReader reader = new SegmentReader(info, Version.LATEST.major, newIOContext(random()));
  assertTrue(reader != null);
  Document doc = reader.document(0);
  assertTrue(doc != null);

  //System.out.println("Document: " + doc);
  IndexableField[] fields = doc.getFields("textField2");
  assertTrue(fields != null && fields.length == 1);
  assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_2_TEXT));
  assertTrue(fields[0].fieldType().storeTermVectors());

  fields = doc.getFields("textField1");
  assertTrue(fields != null && fields.length == 1);
  assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_1_TEXT));
  assertFalse(fields[0].fieldType().storeTermVectors());

  fields = doc.getFields("keyField");
  assertTrue(fields != null && fields.length == 1);
  assertTrue(fields[0].stringValue().equals(DocHelper.KEYWORD_TEXT));

  fields = doc.getFields(DocHelper.NO_NORMS_KEY);
  assertTrue(fields != null && fields.length == 1);
  assertTrue(fields[0].stringValue().equals(DocHelper.NO_NORMS_TEXT));

  fields = doc.getFields(DocHelper.TEXT_FIELD_3_KEY);
  assertTrue(fields != null && fields.length == 1);
  assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_3_TEXT));

  // test that the norms are not present in the segment if
  // omitNorms is true
  for (FieldInfo fi : reader.getFieldInfos()) {
    if (fi.getIndexOptions() != IndexOptions.NONE) {
      assertTrue(fi.omitsNorms() == (reader.getNormValues(fi.name) == null));
    }
  }
  reader.close();
}