Java Code Examples for org.apache.lucene.index.TermsEnum

The following examples show how to use org.apache.lucene.index.TermsEnum. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: liresolr   Source File: HashTermStatistics.java    License: GNU General Public License v2.0 6 votes vote down vote up
public static void addToStatistics(SolrIndexSearcher searcher, String field) throws IOException {
        // check if this field is already in the stats.
//        synchronized (instance) {
            if (termstats.get(field)!=null) return;
//        }
        // else add it to the stats.
        Terms terms = searcher.getSlowAtomicReader().terms(field);
        HashMap<String, Integer> term2docFreq = new HashMap<String, Integer>(1000);
        termstats.put(field, term2docFreq);
        if (terms!=null) {
            TermsEnum termsEnum = terms.iterator();
            BytesRef term;
            while ((term = termsEnum.next()) != null) {
                term2docFreq.put(term.utf8ToString(), termsEnum.docFreq());
            }
        }
    }
 
Example 2
public void testNoOrds() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setStoreTermVectors(true);
  doc.add(new Field("foo", "this is a test", ft));
  iw.addDocument(doc);
  LeafReader ir = getOnlyLeafReader(iw.getReader());
  Terms terms = ir.getTermVector(0, "foo");
  assertNotNull(terms);
  TermsEnum termsEnum = terms.iterator();
  assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("this")));

  expectThrows(UnsupportedOperationException.class, termsEnum::ord);
  expectThrows(UnsupportedOperationException.class, () -> termsEnum.seekExact(0));

  ir.close();
  iw.close();
  dir.close();
}
 
Example 3
Source Project: lucene-solr   Source File: DocToDoubleVectorUtils.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * create a sparse <code>Double</code> vector given doc and field term vectors using local frequency of the terms in the doc
 *
 * @param docTerms   term vectors for a given document
 * @param fieldTerms field term vectors
 * @return a sparse vector of <code>Double</code>s as an array
 * @throws IOException in case accessing the underlying index fails
 */
public static Double[] toSparseLocalFreqDoubleArray(Terms docTerms, Terms fieldTerms) throws IOException {
  TermsEnum fieldTermsEnum = fieldTerms.iterator();
  Double[] freqVector = null;
  if (docTerms != null && fieldTerms.size() > -1) {
    freqVector = new Double[(int) fieldTerms.size()];
    int i = 0;
    TermsEnum docTermsEnum = docTerms.iterator();
    BytesRef term;
    while ((term = fieldTermsEnum.next()) != null) {
      TermsEnum.SeekStatus seekStatus = docTermsEnum.seekCeil(term);
      if (seekStatus.equals(TermsEnum.SeekStatus.END)) {
        docTermsEnum = docTerms.iterator();
      }
      if (seekStatus.equals(TermsEnum.SeekStatus.FOUND)) {
        long termFreqLocal = docTermsEnum.totalTermFreq(); // the total number of occurrences of this term in the given document
        freqVector[i] = Long.valueOf(termFreqLocal).doubleValue();
      } else {
        freqVector[i] = 0d;
      }
      i++;
    }
  }
  return freqVector;
}
 
Example 4
Source Project: lucene-solr   Source File: TestRTGBase.java    License: Apache License 2.0 6 votes vote down vote up
protected int getFirstMatch(IndexReader r, Term t) throws IOException {
  Terms terms = MultiTerms.getTerms(r, t.field());
  if (terms == null) return -1;
  BytesRef termBytes = t.bytes();
  final TermsEnum termsEnum = terms.iterator();
  if (!termsEnum.seekExact(termBytes)) {
    return -1;
  }
  PostingsEnum docs = termsEnum.postings(null, PostingsEnum.NONE);
  docs = BitsFilteredPostingsEnum.wrap(docs, MultiBits.getLiveDocs(r));
  int id = docs.nextDoc();
  if (id != DocIdSetIterator.NO_MORE_DOCS) {
    int next = docs.nextDoc();
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
  }
  return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
 
Example 5
Source Project: Elasticsearch   Source File: DfsOnlyRequest.java    License: Apache License 2.0 6 votes vote down vote up
public DfsOnlyRequest(Fields termVectorsFields, String[] indices, String[] types, Set<String> selectedFields) throws IOException {
    super(indices);

    // build a search request with a query of all the terms
    final BoolQueryBuilder boolBuilder = boolQuery();
    for (String fieldName : termVectorsFields) {
        if ((selectedFields != null) && (!selectedFields.contains(fieldName))) {
            continue;
        }
        Terms terms = termVectorsFields.terms(fieldName);
        TermsEnum iterator = terms.iterator();
        while (iterator.next() != null) {
            String text = iterator.term().utf8ToString();
            boolBuilder.should(QueryBuilders.termQuery(fieldName, text));
        }
    }
    // wrap a search request object
    this.searchRequest = new SearchRequest(indices).types(types).source(new SearchSourceBuilder().query(boolBuilder));
}
 
Example 6
Source Project: crate   Source File: ShardSplittingQuery.java    License: Apache License 2.0 6 votes vote down vote up
private static void findSplitDocs(String idField, Predicate<BytesRef> includeInShard, LeafReader leafReader,
                                  IntConsumer consumer) throws IOException {
    Terms terms = leafReader.terms(idField);
    TermsEnum iterator = terms.iterator();
    BytesRef idTerm;
    PostingsEnum postingsEnum = null;
    while ((idTerm = iterator.next()) != null) {
        if (includeInShard.test(idTerm) == false) {
            postingsEnum = iterator.postings(postingsEnum);
            int doc;
            while ((doc = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                consumer.accept(doc);
            }
        }
    }
}
 
Example 7
Source Project: scava   Source File: ReadmeSimilarityCalculator.java    License: Eclipse Public License 2.0 6 votes vote down vote up
private DocVector[] getDocumentVectors() throws IOException {
	DocVector[] docVector = new DocVector[getTotalDocumentInIndex()];
	for (int docId = 0; docId < getTotalDocumentInIndex(); docId++) {
		Terms vector = getIndexReader().getTermVector(docId, FIELD_CONTENT);
		TermsEnum termsEnum = null;
		termsEnum = vector.iterator();
		BytesRef text = null;
		docVector[docId] = new DocVector(getAllTerms());
		while ((text = termsEnum.next()) != null) {
			String term = text.utf8ToString();
			int freq = (int) termsEnum.totalTermFreq();
			docVector[docId].setEntry(term, freq);
		}
		docVector[docId].normalize();
	}
	getIndexReader().close();
	return docVector;
}
 
Example 8
Source Project: linden   Source File: LindenFieldCacheImpl.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected Accountable createValue(final AtomicReader reader, CacheKey key, boolean setDocsWithField)
    throws IOException {
  final Map<String, Integer> uidMap = new HashMap<>();

  Uninvert u = new Uninvert() {
    private String currentValue;

    @Override
    public void visitTerm(BytesRef term) {
      currentValue = term.utf8ToString();
    }

    @Override
    public void visitDoc(int docID) {
      uidMap.put(currentValue, docID);
    }

    @Override
    protected TermsEnum termsEnum(Terms terms) throws IOException {
      return terms.iterator(null);
    }
  };
  u.uninvert(reader, key.field, setDocsWithField);
  return new PerReaderUIDMaps(reader.getContext().ord, uidMap);
}
 
Example 9
Source Project: incubator-retired-blur   Source File: IndexImporter.java    License: Apache License 2.0 6 votes vote down vote up
private void runOldMergeSortRowIdCheckAndDelete(boolean emitDeletes, IndexReader currentIndexReader,
    BlurPartitioner blurPartitioner, Text key, int numberOfShards, int shardId, Action action,
    AtomicReader atomicReader) throws IOException {
  MergeSortRowIdLookup lookup = new MergeSortRowIdLookup(currentIndexReader);
  Fields fields = atomicReader.fields();
  Terms terms = fields.terms(BlurConstants.ROW_ID);
  if (terms != null) {
    TermsEnum termsEnum = terms.iterator(null);
    BytesRef ref = null;
    while ((ref = termsEnum.next()) != null) {
      key.set(ref.bytes, ref.offset, ref.length);
      int partition = blurPartitioner.getPartition(key, null, numberOfShards);
      if (shardId != partition) {
        throw new IOException("Index is corrupted, RowIds are found in wrong shard, partition [" + partition
            + "] does not shard [" + shardId + "], this can happen when rows are not hashed correctly.");
      }
      if (emitDeletes) {
        lookup.lookup(ref, action);
      }
    }
  }
}
 
Example 10
Source Project: lucene-solr   Source File: TermVectorsAdapter.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Returns the term vectors for the specified field in the specified document.
 * If no term vector is available for the field, empty list is returned.
 *
 * @param docid - document id
 * @param field - field name
 * @return list of term vector elements
 * @throws IOException - if there is a low level IO error.
 */
List<TermVectorEntry> getTermVector(int docid, String field) throws IOException {
  Terms termVector = reader.getTermVector(docid, field);
  if (termVector == null) {
    // no term vector available
    log.warn("No term vector indexed for doc: #{} and field: {}", docid, field);
    return Collections.emptyList();
  }

  List<TermVectorEntry> res = new ArrayList<>();
  TermsEnum te = termVector.iterator();
  while (te.next() != null) {
    res.add(TermVectorEntry.of(te));
  }
  return res;
}
 
Example 11
Source Project: lucene-solr   Source File: SimpleNaiveBayesClassifier.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Calculate probabilities for all classes for a given input text
 * @param inputDocument the input text as a {@code String}
 * @return a {@code List} of {@code ClassificationResult}, one for each existing class
 * @throws IOException if assigning probabilities fails
 */
protected List<ClassificationResult<BytesRef>> assignClassNormalizedList(String inputDocument) throws IOException {
  List<ClassificationResult<BytesRef>> assignedClasses = new ArrayList<>();

  Terms classes = MultiTerms.getTerms(indexReader, classFieldName);
  if (classes != null) {
    TermsEnum classesEnum = classes.iterator();
    BytesRef next;
    String[] tokenizedText = tokenize(inputDocument);
    int docsWithClassSize = countDocsWithClass();
    while ((next = classesEnum.next()) != null) {
      if (next.length > 0) {
        Term term = new Term(this.classFieldName, next);
        double clVal = calculateLogPrior(term, docsWithClassSize) + calculateLogLikelihood(tokenizedText, term, docsWithClassSize);
        assignedClasses.add(new ClassificationResult<>(term.bytes(), clVal));
      }
    }
  }
  // normalization; the values transforms to a 0-1 range
  return normClassificationResults(assignedClasses);
}
 
Example 12
Source Project: lucene-solr   Source File: TestUtil.java    License: Apache License 2.0 6 votes vote down vote up
public static PostingsEnum docs(Random random, TermsEnum termsEnum, PostingsEnum reuse, int flags) throws IOException {
  // TODO: simplify this method? it would be easier to randomly either use the flags passed, or do the random selection,
  // FREQS should be part fo the random selection instead of outside on its own?
  if (random.nextBoolean()) {
    if (random.nextBoolean()) {
      final int posFlags;
      switch (random.nextInt(4)) {
        case 0: posFlags = PostingsEnum.POSITIONS; break;
        case 1: posFlags = PostingsEnum.OFFSETS; break;
        case 2: posFlags = PostingsEnum.PAYLOADS; break;
        default: posFlags = PostingsEnum.ALL; break;
      }
      return termsEnum.postings(null, posFlags);
    }
    flags |= PostingsEnum.FREQS;
  }
  return termsEnum.postings(reuse, flags);
}
 
Example 13
Source Project: lucene-solr   Source File: BlockTermsWriter.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void write(Fields fields, NormsProducer norms) throws IOException {

  for(String field : fields) {

    Terms terms = fields.terms(field);
    if (terms == null) {
      continue;
    }

    TermsEnum termsEnum = terms.iterator();

    TermsWriter termsWriter = addField(fieldInfos.fieldInfo(field));

    while (true) {
      BytesRef term = termsEnum.next();
      if (term == null) {
        break;
      }

      termsWriter.write(term, termsEnum, norms);
    }

    termsWriter.finish();
  }
}
 
Example 14
Source Project: clue   Source File: ReconstructCommand.java    License: Apache License 2.0 6 votes vote down vote up
public String reconstructNoPositions(TermsEnum te, int docid, Bits liveDocs) throws IOException{
  List<String> textList = new ArrayList<String>();
  BytesRef text;
  PostingsEnum postings = null;
  while ((text = te.next()) != null) {
    postings = te.postings(postings, PostingsEnum.FREQS);
    int iterDoc = postings.advance(docid);
    if (iterDoc == docid) {
      textList.add(text.utf8ToString());
    }
  }
  StringBuilder buf = new StringBuilder();
  for (String s : textList) {
    buf.append(s+" ");
  }
  return buf.toString();
}
 
Example 15
Source Project: lucene-solr   Source File: DisjunctionMatchesIterator.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Create a {@link DisjunctionMatchesIterator} over a list of terms extracted from a {@link BytesRefIterator}
 *
 * Only terms that have at least one match in the given document will be included
 */
static MatchesIterator fromTermsEnum(LeafReaderContext context, int doc, Query query, String field, BytesRefIterator terms) throws IOException {
  Objects.requireNonNull(field);
  Terms t = context.reader().terms(field);
  if (t == null)
    return null;
  TermsEnum te = t.iterator();
  PostingsEnum reuse = null;
  for (BytesRef term = terms.next(); term != null; term = terms.next()) {
    if (te.seekExact(term)) {
      PostingsEnum pe = te.postings(reuse, PostingsEnum.OFFSETS);
      if (pe.advance(doc) == doc) {
        return new TermsEnumDisjunctionMatchesIterator(new TermMatchesIterator(query, pe), terms, te, doc, query);
      }
      else {
        reuse = pe;
      }
    }
  }
  return null;
}
 
Example 16
Source Project: lucene-solr   Source File: UniformSplitTermsWriter.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void write(Fields fields, NormsProducer normsProducer) throws IOException {
  BlockWriter blockWriter = new BlockWriter(blockOutput, targetNumBlockLines, deltaNumLines, blockEncoder);
  ByteBuffersDataOutput fieldsOutput = new ByteBuffersDataOutput();
  int fieldsNumber = 0;
  for (String field : fields) {
    Terms terms = fields.terms(field);
    if (terms != null) {
      TermsEnum termsEnum = terms.iterator();
      FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
      fieldsNumber += writeFieldTerms(blockWriter, fieldsOutput, termsEnum, fieldInfo, normsProducer);
    }
  }
  writeFieldsMetadata(fieldsNumber, fieldsOutput);
  CodecUtil.writeFooter(dictionaryOutput);
}
 
Example 17
Source Project: lucene-solr   Source File: SolrRangeQuery.java    License: Apache License 2.0 6 votes vote down vote up
public RangeTermsEnum(Terms terms) throws IOException {
  if (terms == null) {
    positioned = true;
  } else {
    te = terms.iterator();
    if (lower != null) {
      TermsEnum.SeekStatus status = te.seekCeil(lower);
      if (status == TermsEnum.SeekStatus.END) {
        positioned = true;
        curr = null;
      } else if (status == SeekStatus.FOUND) {
        positioned = includeLower();
        curr = te.term();
      } else {
        // lower bound not found, so includeLower is irrelevant
        positioned = true;
        curr = te.term();
      }
    }
  }
}
 
Example 18
Source Project: lucene-solr   Source File: FeatureDoubleValuesSource.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
  Terms terms = ctx.reader().terms(field);
  if (terms == null) {
    return DoubleValues.EMPTY;
  } else {
    TermsEnum termsEnum = terms.iterator();
    if (termsEnum.seekExact(featureName) == false) {
      return DoubleValues.EMPTY;
    } else {
      PostingsEnum currentReaderPostingsValues = termsEnum.postings(null, PostingsEnum.FREQS);
      return new FeatureDoubleValues(currentReaderPostingsValues);
    }
  }
}
 
Example 19
Source Project: lucene-solr   Source File: FuzzyTermsEnum.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * return an automata-based enum for matching up to editDistance from
 * lastTerm, if possible
 */
private TermsEnum getAutomatonEnum(int editDistance, BytesRef lastTerm) throws IOException {
  assert editDistance < automata.length;
  final CompiledAutomaton compiled = automata[editDistance];
  BytesRef initialSeekTerm;
  if (lastTerm == null) {
    // This is the first enum we are pulling:
    initialSeekTerm = null;
  } else {
    // We are pulling this enum (e.g., ed=1) after iterating for a while already (e.g., ed=2):
    initialSeekTerm = compiled.floor(lastTerm, new BytesRefBuilder());
  }
  return terms.intersect(compiled, initialSeekTerm);
}
 
Example 20
Source Project: lucene-solr   Source File: LegacyNumericRangeQuery.java    License: Apache License 2.0 5 votes vote down vote up
@Override @SuppressWarnings("unchecked")
protected TermsEnum getTermsEnum(final Terms terms, AttributeSource atts) throws IOException {
  // very strange: java.lang.Number itself is not Comparable, but all subclasses used here are
  if (min != null && max != null && ((Comparable<T>) min).compareTo(max) > 0) {
    return TermsEnum.EMPTY;
  }
  return new NumericRangeTermsEnum(terms.iterator());
}
 
Example 21
Source Project: Elasticsearch   Source File: IndexNumericFieldData.java    License: Apache License 2.0 5 votes vote down vote up
public final TermsEnum wrapTermsEnum(TermsEnum termsEnum) {
    if (requiredBits() == 1) { // boolean, no prefix-terms
        return termsEnum;
    } else if (requiredBits() > 32) {
        return OrdinalsBuilder.wrapNumeric64Bit(termsEnum);
    } else {
        return OrdinalsBuilder.wrapNumeric32Bit(termsEnum);
    }
}
 
Example 22
Source Project: lucene-solr   Source File: TermGroupFacetCollector.java    License: Apache License 2.0 5 votes vote down vote up
SegmentResult(int[] counts, int total, int missingCountIndex, TermsEnum tenum, int startFacetOrd, int endFacetOrd) throws IOException {
  super(counts, total - counts[missingCountIndex], counts[missingCountIndex],
      endFacetOrd == missingCountIndex + 1 ?  missingCountIndex : endFacetOrd);
  this.tenum = tenum;
  this.mergePos = startFacetOrd;
  if (tenum != null) {
    tenum.seekExact(mergePos);
    mergeTerm = tenum.term();
  }
}
 
Example 23
Source Project: SourcererCC   Source File: TermFreq.java    License: GNU General Public License v3.0 5 votes vote down vote up
private void dummy() throws IOException {
    Fields fields = MultiFields.getFields(this.reader);
    Terms terms = fields.terms("field");
    TermsEnum iterator = terms.iterator(null);
    BytesRef byteRef = null;
    while ((byteRef = iterator.next()) != null) {
        String term = new String(byteRef.bytes, byteRef.offset,
                byteRef.length);
        Term termInstance = new Term("tokens", term);
        long termFreq = this.reader.totalTermFreq(termInstance);
        this.TermFreqMap.put(term, termFreq);
        System.out.println(termFreq);
    }
}
 
Example 24
Source Project: lucene-solr   Source File: TermInSetQuery.java    License: Apache License 2.0 5 votes vote down vote up
TermAndState(String field, TermsEnum termsEnum) throws IOException {
  this.field = field;
  this.termsEnum = termsEnum;
  this.term = BytesRef.deepCopyOf(termsEnum.term());
  this.state = termsEnum.termState();
  this.docFreq = termsEnum.docFreq();
  this.totalTermFreq = termsEnum.totalTermFreq();
}
 
Example 25
Source Project: lucene-solr   Source File: TestTeeSinkTokenFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEndOffsetPositionWithTeeSinkTokenFilter() throws Exception {
  Directory dir = newDirectory();
  Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(analyzer));
  Document doc = new Document();
  TokenStream tokenStream = analyzer.tokenStream("field", "abcd   ");
  TeeSinkTokenFilter tee = new TeeSinkTokenFilter(tokenStream);
  TokenStream sink = tee.newSinkTokenStream();
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setStoreTermVectors(true);
  ft.setStoreTermVectorOffsets(true);
  ft.setStoreTermVectorPositions(true);
  Field f1 = new Field("field", tee, ft);
  Field f2 = new Field("field", sink, ft);
  doc.add(f1);
  doc.add(f2);
  w.addDocument(doc);
  w.close();

  IndexReader r = DirectoryReader.open(dir);
  Terms vector = r.getTermVectors(0).terms("field");
  assertEquals(1, vector.size());
  TermsEnum termsEnum = vector.iterator();
  termsEnum.next();
  assertEquals(2, termsEnum.totalTermFreq());
  PostingsEnum positions = termsEnum.postings(null, PostingsEnum.ALL);
  assertTrue(positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  assertEquals(2, positions.freq());
  positions.nextPosition();
  assertEquals(0, positions.startOffset());
  assertEquals(4, positions.endOffset());
  positions.nextPosition();
  assertEquals(8, positions.startOffset());
  assertEquals(12, positions.endOffset());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, positions.nextDoc());
  r.close();
  dir.close();
  analyzer.close();
}
 
Example 26
Source Project: lucene-solr   Source File: TermFilteredPresearcher.java    License: Apache License 2.0 5 votes vote down vote up
private Query buildFilterClause(LeafReader reader, String field) throws IOException {

    Terms terms = reader.terms(field);
    if (terms == null)
      return null;

    BooleanQuery.Builder bq = new BooleanQuery.Builder();

    int docsInBatch = reader.maxDoc();

    BytesRef term;
    TermsEnum te = terms.iterator();
    while ((term = te.next()) != null) {
      // we need to check that every document in the batch has the same field values, otherwise
      // this filtering will not work
      if (te.docFreq() != docsInBatch)
        throw new IllegalArgumentException("Some documents in this batch do not have a term value of "
            + field + ":" + Term.toString(term));
      bq.add(new TermQuery(new Term(field, BytesRef.deepCopyOf(term))), BooleanClause.Occur.SHOULD);
    }

    BooleanQuery built = bq.build();

    if (built.clauses().size() == 0)
      return null;

    return built;
  }
 
Example 27
Source Project: lucene-solr   Source File: TermIntervalsSource.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public IntervalMatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
  Terms terms = ctx.reader().terms(field);
  if (terms == null)
    return null;
  if (terms.hasPositions() == false) {
    throw new IllegalArgumentException("Cannot create an IntervalIterator over field " + field + " because it has no indexed positions");
  }
  TermsEnum te = terms.iterator();
  if (te.seekExact(term) == false) {
    return null;
  }
  return matches(te, doc, field);
}
 
Example 28
@Override
public Boolean execute(IndexContext context) throws IOException, InterruptedException {
  try {
    IndexReader indexReader = context.getIndexReader();
    while (true) {
      long hash = 0;
      for (AtomicReaderContext atomicReaderContext : indexReader.leaves()) {
        AtomicReader reader = atomicReaderContext.reader();
        for (String field : reader.fields()) {
          Terms terms = reader.terms(field);
          BytesRef bytesRef;
          TermsEnum iterator = terms.iterator(null);
          while ((bytesRef = iterator.next()) != null) {
            hash += bytesRef.hashCode();
          }
        }
      }
      System.out.println("hashcode = " + hash);
    }
  } catch (IOException e) {
    e.printStackTrace();
    throw e;
  } catch (Throwable t) {
    t.printStackTrace();
    if (t instanceof InterruptedException) {
      throw t;
    } else if (t instanceof RuntimeException) {
      throw (RuntimeException) t;
    }
    throw new RuntimeException(t);
  }
}
 
Example 29
Source Project: lucene-solr   Source File: FieldReader.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
  // if (DEBUG) System.out.println("  FieldReader.intersect startTerm=" + BlockTreeTermsWriter.brToString(startTerm));
  //System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
  // TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum?
  // can we optimize knowing that...?
  if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
    throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
  }
  return new IntersectTermsEnum(this, compiled.automaton, compiled.runAutomaton, compiled.commonSuffixRef, startTerm);
}
 
Example 30
Source Project: lucene-solr   Source File: CompressingTermVectorsReader.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public TermsEnum iterator() throws IOException {
  TVTermsEnum termsEnum = new TVTermsEnum();
  termsEnum.reset(numTerms, flags, prefixLengths, suffixLengths, termFreqs, positionIndex, positions, startOffsets, lengths,
      payloadIndex, payloadBytes,
      new ByteArrayDataInput(termBytes.bytes, termBytes.offset, termBytes.length));
  return termsEnum;
}