org.apache.lucene.document.DocumentStoredFieldVisitor Java Examples

The following examples show how to use org.apache.lucene.document.DocumentStoredFieldVisitor. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Catalog.java    From cxf with Apache License 2.0 6 votes vote down vote up
@GET
@Produces(MediaType.APPLICATION_JSON)
public JsonArray getBooks() throws IOException {
    final IndexReader reader = DirectoryReader.open(directory);
    final IndexSearcher searcher = new IndexSearcher(reader);
    final JsonArrayBuilder builder = Json.createArrayBuilder();

    try {
        final Query query = new MatchAllDocsQuery();

        for (final ScoreDoc scoreDoc: searcher.search(query, 1000).scoreDocs) {
            final DocumentStoredFieldVisitor fieldVisitor =
                new DocumentStoredFieldVisitor(LuceneDocumentMetadata.SOURCE_FIELD);

            reader.document(scoreDoc.doc, fieldVisitor);
            builder.add(fieldVisitor
                    .getDocument()
                    .getField(LuceneDocumentMetadata.SOURCE_FIELD)
                    .stringValue());
        }

        return builder.build();
    } finally {
        reader.close();
    }
}
 
Example #2
Source File: SearchTravRetLoadFieldSelectorTask.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
protected Document retrieveDoc(IndexReader ir, int id) throws IOException {
  if (fieldsToLoad == null) {
    return ir.document(id);
  } else {
    DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(fieldsToLoad);
    ir.document(id, visitor);
    return visitor.getDocument();
  }
}
 
Example #3
Source File: IndexReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Like {@link #document(int)} but only loads the specified
 * fields.  Note that this is simply sugar for {@link
 * DocumentStoredFieldVisitor#DocumentStoredFieldVisitor(Set)}.
 */
public final Document document(int docID, Set<String> fieldsToLoad)
    throws IOException {
  final DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(
      fieldsToLoad);
  document(docID, visitor);
  return visitor.getDocument();
}
 
Example #4
Source File: GenericRecordReader.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private void fetchBlurRecord() throws IOException {
  DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
  _fieldsReader.visitDocument(_docId, visitor);
  BlurRecord blurRecord = new BlurRecord();
  String rowId = RowDocumentUtil.readRecord(visitor.getDocument(), blurRecord);
  blurRecord.setRowId(rowId);
  _rowId = new Text(rowId);
  _tableBlurRecord = new TableBlurRecord(_table, blurRecord);
}
 
Example #5
Source File: CheckIndex.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/**
 * Test stored fields.
 * @lucene.experimental
 */
public static Status.StoredFieldStatus testStoredFields(CodecReader reader, PrintStream infoStream, boolean failFast) throws IOException {
  long startNS = System.nanoTime();
  final Status.StoredFieldStatus status = new Status.StoredFieldStatus();

  try {
    if (infoStream != null) {
      infoStream.print("    test: stored fields.......");
    }

    // Scan stored fields for all documents
    final Bits liveDocs = reader.getLiveDocs();
    StoredFieldsReader storedFields = reader.getFieldsReader().getMergeInstance();
    for (int j = 0; j < reader.maxDoc(); ++j) {
      // Intentionally pull even deleted documents to
      // make sure they too are not corrupt:
      DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
      storedFields.visitDocument(j, visitor);
      Document doc = visitor.getDocument();
      if (liveDocs == null || liveDocs.get(j)) {
        status.docCount++;
        status.totFields += doc.getFields().size();
      }
    }      

    // Validate docCount
    if (status.docCount != reader.numDocs()) {
      throw new RuntimeException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs");
    }

    msg(infoStream, String.format(Locale.ROOT, "OK [%d total field count; avg %.1f fields per doc] [took %.3f sec]",
                                  status.totFields,
                                  (((float) status.totFields)/status.docCount),
                                  nsToSec(System.nanoTime() - startNS)));
  } catch (Throwable e) {
    if (failFast) {
      throw IOUtils.rethrowAlways(e);
    }
    msg(infoStream, "ERROR [" + String.valueOf(e.getMessage()) + "]");
    status.error = e;
    if (infoStream != null) {
      e.printStackTrace(infoStream);
    }
  }

  return status;
}
 
Example #6
Source File: IndexSizeEstimatorTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Test
public void testEstimator() throws Exception {
  JettySolrRunner jetty = cluster.getRandomJetty(random());
  String randomCoreName = jetty.getCoreContainer().getAllCoreNames().iterator().next();
  SolrCore core = jetty.getCoreContainer().getCore(randomCoreName);
  RefCounted<SolrIndexSearcher> searcherRef = core.getSearcher();
  try {
    SolrIndexSearcher searcher = searcherRef.get();
    // limit the max length
    IndexSizeEstimator estimator = new IndexSizeEstimator(searcher.getRawReader(), 20, 50, true, true);
    IndexSizeEstimator.Estimate estimate = estimator.estimate();
    Map<String, Long> fieldsBySize = estimate.getFieldsBySize();
    assertFalse("empty fieldsBySize", fieldsBySize.isEmpty());
    assertEquals(fieldsBySize.toString(), fields.size(), fieldsBySize.size());
    fieldsBySize.forEach((k, v) -> assertTrue("unexpected size of " + k + ": " + v, v > 0));
    Map<String, Long> typesBySize = estimate.getTypesBySize();
    assertFalse("empty typesBySize", typesBySize.isEmpty());
    assertTrue("expected at least 8 types: " + typesBySize.toString(), typesBySize.size() >= 8);
    typesBySize.forEach((k, v) -> assertTrue("unexpected size of " + k + ": " + v, v > 0));
    Map<String, Object> summary = estimate.getSummary();
    assertNotNull("summary", summary);
    assertFalse("empty summary", summary.isEmpty());
    assertEquals(summary.keySet().toString(), fields.size(), summary.keySet().size());
    Map<String, Object> details = estimate.getDetails();
    assertNotNull("details", details);
    assertFalse("empty details", details.isEmpty());
    // by type
    assertEquals(details.keySet().toString(), 6, details.keySet().size());

    // check sampling
    estimator.setSamplingThreshold(searcher.getRawReader().maxDoc() / 2);
    IndexSizeEstimator.Estimate sampledEstimate = estimator.estimate();
    Map<String, Long> sampledFieldsBySize = sampledEstimate.getFieldsBySize();
    assertFalse("empty fieldsBySize", sampledFieldsBySize.isEmpty());
    // verify that the sampled values are within 50% of the original values
    fieldsBySize.forEach((field, size) -> {
      Long sampledSize = sampledFieldsBySize.get(field);
      assertNotNull("sampled size for " + field + " is missing in " + sampledFieldsBySize, sampledSize);
      double delta = (double) size * 0.5;
      assertEquals("sampled size of " + field + " is wildly off", (double)size, (double)sampledSize, delta);
    });
    // verify the reader is still usable - SOLR-13694
    IndexReader reader = searcher.getRawReader();
    for (LeafReaderContext context : reader.leaves()) {
      LeafReader leafReader = context.reader();
      assertTrue("unexpected LeafReader class: " + leafReader.getClass().getName(), leafReader instanceof CodecReader);
      Bits liveDocs = leafReader.getLiveDocs();
      CodecReader codecReader = (CodecReader) leafReader;
      StoredFieldsReader storedFieldsReader = codecReader.getFieldsReader();
      StoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
      assertNotNull(storedFieldsReader);
      for (int docId = 0; docId < leafReader.maxDoc(); docId++) {
        if (liveDocs != null && !liveDocs.get(docId)) {
          continue;
        }
        storedFieldsReader.visitDocument(docId, visitor);
      }
    }
  } finally {
    searcherRef.decref();
    core.close();
  }
}
 
Example #7
Source File: IndexReader.java    From lucene-solr with Apache License 2.0 3 votes vote down vote up
/**
 * Returns the stored fields of the <code>n</code><sup>th</sup>
 * <code>Document</code> in this index.  This is just
 * sugar for using {@link DocumentStoredFieldVisitor}.
 * <p>
 * <b>NOTE:</b> for performance reasons, this method does not check if the
 * requested document is deleted, and therefore asking for a deleted document
 * may yield unspecified results. Usually this is not required, however you
 * can test if the doc is deleted by checking the {@link
 * Bits} returned from {@link MultiBits#getLiveDocs}.
 *
 * <b>NOTE:</b> only the content of a field is returned,
 * if that field was stored during indexing.  Metadata
 * like boost, omitNorm, IndexOptions, tokenized, etc.,
 * are not preserved.
 * 
 * @throws CorruptIndexException if the index is corrupt
 * @throws IOException if there is a low-level IO error
 */
// TODO: we need a separate StoredField, so that the
// Document returned here contains that class not
// IndexableField
public final Document document(int docID) throws IOException {
  final DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
  document(docID, visitor);
  return visitor.getDocument();
}