Java Code Examples for org.apache.lucene.index.FieldInfos#getIndexedFields()

The following examples show how to use org.apache.lucene.index.FieldInfos#getIndexedFields() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: MoreLikeThis.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Return a query that will return docs like the passed lucene document ID.
 *
 * @param docNum the documentID of the lucene doc to generate the 'More Like This" query for.
 * @return a query that will return docs like the passed lucene document ID.
 */
public Query like(int docNum) throws IOException {
  if (fieldNames == null) {
    // gather list of valid fields from lucene
    Collection<String> fields = FieldInfos.getIndexedFields(ir);
    fieldNames = fields.toArray(new String[fields.size()]);
  }

  return createQuery(retrieveTerms(docNum));
}

Example 2

Source File: MoreLikeThis.java From lucene-solr with Apache License 2.0

5 votes

/**
 * 
 * @param filteredDocument Document with field values extracted for selected fields.
 * @return More Like This query for the passed document.
 */
public Query like(Map<String, Collection<Object>> filteredDocument) throws IOException {
  if (fieldNames == null) {
    // gather list of valid fields from lucene
    Collection<String> fields = FieldInfos.getIndexedFields(ir);
    fieldNames = fields.toArray(new String[fields.size()]);
  }
  return createQuery(retrieveTerms(filteredDocument));
}

Example 3

Source File: LuceneIndexer.java From MtgDesktopCompanion with GNU General Public License v3.0

5 votes

public String[] listFields()
{
	if(dir==null)
		open();
	
	try (IndexReader indexReader = DirectoryReader.open(dir))
	{
		Collection<String> fields = FieldInfos.getIndexedFields(indexReader);
		return fields.toArray(new String[fields.size()]);
	} catch (IOException e) {
		return new String[0];
	}
}

Example 4

Source File: TestPerfTasksLogic.java From lucene-solr with Apache License 2.0

4 votes

/**
 * Test ReadTokensTask
 */
public void testReadTokens() throws Exception {

  // We will call ReadTokens on this many docs
  final int NUM_DOCS = 20;

  // Read tokens from first NUM_DOCS docs from Reuters and
  // then build index from the same docs
  String algLines1[] = {
    "# ----- properties ",
    "analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer",
    "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
    "docs.file=" + getReuters20LinesFile(),
    "# ----- alg ",
    "{ReadTokens}: " + NUM_DOCS,
    "ResetSystemErase",
    "CreateIndex",
    "{AddDoc}: " + NUM_DOCS,
    "CloseIndex",
  };

  // Run algo
  Benchmark benchmark = execBenchmark(algLines1);

  List<TaskStats> stats = benchmark.getRunData().getPoints().taskStats();

  // Count how many tokens all ReadTokens saw
  int totalTokenCount1 = 0;
  for (final TaskStats stat : stats) {
    if (stat.getTask().getName().equals("ReadTokens")) {
      totalTokenCount1 += stat.getCount();
    }
  }

  // Separately count how many tokens are actually in the index:
  IndexReader reader = DirectoryReader.open(benchmark.getRunData().getDirectory());
  assertEquals(NUM_DOCS, reader.numDocs());

  int totalTokenCount2 = 0;

  Collection<String> fields = FieldInfos.getIndexedFields(reader);

  for (String fieldName : fields) {
    if (fieldName.equals(DocMaker.ID_FIELD) || fieldName.equals(DocMaker.DATE_MSEC_FIELD) || fieldName.equals(DocMaker.TIME_SEC_FIELD)) {
      continue;
    }
    Terms terms = MultiTerms.getTerms(reader, fieldName);
    if (terms == null) {
      continue;
    }
    TermsEnum termsEnum = terms.iterator();
    PostingsEnum docs = null;
    while(termsEnum.next() != null) {
      docs = TestUtil.docs(random(), termsEnum, docs, PostingsEnum.FREQS);
      while(docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
        totalTokenCount2 += docs.freq();
      }
    }
  }
  reader.close();

  // Make sure they are the same
  assertEquals(totalTokenCount1, totalTokenCount2);
}

Example 5

Source File: QueryAutoStopWordAnalyzer.java From lucene-solr with Apache License 2.0

3 votes

/**
 * Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for all
 * indexed fields from terms with a document frequency greater than the given
 * maxDocFreq
 *
 * @param delegate Analyzer whose TokenStream will be filtered
 * @param indexReader IndexReader to identify the stopwords from
 * @param maxDocFreq Document frequency terms should be above in order to be stopwords
 * @throws IOException Can be thrown while reading from the IndexReader
 */
public QueryAutoStopWordAnalyzer(
    Analyzer delegate,
    IndexReader indexReader,
    int maxDocFreq) throws IOException {
  this(delegate, indexReader, FieldInfos.getIndexedFields(indexReader), maxDocFreq);
}

Example 6

Source File: QueryAutoStopWordAnalyzer.java From lucene-solr with Apache License 2.0

3 votes

/**
 * Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for all
 * indexed fields from terms with a document frequency percentage greater than
 * the given maxPercentDocs
 *
 * @param delegate Analyzer whose TokenStream will be filtered
 * @param indexReader IndexReader to identify the stopwords from
 * @param maxPercentDocs The maximum percentage (between 0.0 and 1.0) of index documents which
 *                      contain a term, after which the word is considered to be a stop word
 * @throws IOException Can be thrown while reading from the IndexReader
 */
public QueryAutoStopWordAnalyzer(
    Analyzer delegate,
    IndexReader indexReader,
    float maxPercentDocs) throws IOException {
  this(delegate, indexReader, FieldInfos.getIndexedFields(indexReader), maxPercentDocs);
}