Java Code Examples for org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

The following examples show how to use org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: lucene-solr   Source File: AbstractTestCase.java    License: Apache License 2.0 6 votes vote down vote up
protected List<BytesRef> analyze(String text, String field, Analyzer analyzer) throws IOException {
  List<BytesRef> bytesRefs = new ArrayList<>();

  try (TokenStream tokenStream = analyzer.tokenStream(field, text)) {
    TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class);
    
    tokenStream.reset();
  
    while (tokenStream.incrementToken()) {
      bytesRefs.add(BytesRef.deepCopyOf(termAttribute.getBytesRef()));
    }

    tokenStream.end();
  }

  return bytesRefs;
}
 
Example 2
Source Project: lucene-solr   Source File: ReadTokensTask.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public int doLogic() throws Exception {
  List<IndexableField> fields = doc.getFields();
  Analyzer analyzer = getRunData().getAnalyzer();
  int tokenCount = 0;
  for(final IndexableField field : fields) {
    if (field.fieldType().indexOptions() == IndexOptions.NONE ||
        field.fieldType().tokenized() == false) {
      continue;
    }
    
    final TokenStream stream = field.tokenStream(analyzer, null);
    // reset the TokenStream to the first token
    stream.reset();

    TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
    while(stream.incrementToken()) {
      termAtt.getBytesRef();
      tokenCount++;
    }
    stream.end();
    stream.close();
  }
  totalTokenCount += tokenCount;
  return tokenCount;
}
 
Example 3
Source Project: lucene-solr   Source File: TestPerfTasksLogic.java    License: Apache License 2.0 6 votes vote down vote up
private void assertEqualCollation(Analyzer a1, Analyzer a2, String text)
    throws Exception {
  TokenStream ts1 = a1.tokenStream("bogus", text);
  TokenStream ts2 = a2.tokenStream("bogus", text);
  ts1.reset();
  ts2.reset();
  TermToBytesRefAttribute termAtt1 = ts1.addAttribute(TermToBytesRefAttribute.class);
  TermToBytesRefAttribute termAtt2 = ts2.addAttribute(TermToBytesRefAttribute.class);
  assertTrue(ts1.incrementToken());
  assertTrue(ts2.incrementToken());
  BytesRef bytes1 = termAtt1.getBytesRef();
  BytesRef bytes2 = termAtt2.getBytesRef();
  assertEquals(bytes1, bytes2);
  assertFalse(ts1.incrementToken());
  assertFalse(ts2.incrementToken());
  ts1.close();
  ts2.close();
}
 
Example 4
Source Project: lucene-solr   Source File: TestEmptyTokenStream.java    License: Apache License 2.0 6 votes vote down vote up
public void testIndexWriter_LUCENE4656() throws IOException {
  Directory directory = newDirectory();
  IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(null));

  TokenStream ts = new EmptyTokenStream();
  assertFalse(ts.hasAttribute(TermToBytesRefAttribute.class));

  Document doc = new Document();
  doc.add(new StringField("id", "0", Field.Store.YES));
  doc.add(new TextField("description", ts));
  
  // this should not fail because we have no TermToBytesRefAttribute
  writer.addDocument(doc);
  
  assertEquals(1, writer.getDocStats().numDocs);

  writer.close();
  directory.close();
}
 
Example 5
Source Project: lucene-solr   Source File: SpanOrTermsBuilder.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public SpanQuery getSpanQuery(Element e) throws ParserException {
  String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
  String value = DOMUtils.getNonBlankTextOrFail(e);

  List<SpanQuery> clausesList = new ArrayList<>();

  try (TokenStream ts = analyzer.tokenStream(fieldName, value)) {
    TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
    ts.reset();
    while (ts.incrementToken()) {
      SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(termAtt.getBytesRef())));
      clausesList.add(stq);
    }
    ts.end();
    SpanOrQuery soq = new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()]));
    float boost = DOMUtils.getAttribute(e, "boost", 1.0f);
    return new SpanBoostQuery(soq, boost);
  }
  catch (IOException ioe) {
    throw new ParserException("IOException parsing value:" + value);
  }
}
 
Example 6
Source Project: lucene-solr   Source File: QueryBuilder.java    License: Apache License 2.0 6 votes vote down vote up
/** 
 * Creates complex boolean query from the cached tokenstream contents 
 */
protected Query analyzeMultiBoolean(String field, TokenStream stream, BooleanClause.Occur operator) throws IOException {
  BooleanQuery.Builder q = newBooleanQuery();
  List<TermAndBoost> currentQuery = new ArrayList<>();
  
  TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
  PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
  BoostAttribute boostAtt = stream.addAttribute(BoostAttribute.class);

  stream.reset();
  while (stream.incrementToken()) {
    if (posIncrAtt.getPositionIncrement() != 0) {
      add(q, currentQuery, operator);
      currentQuery.clear();
    }
    currentQuery.add(new TermAndBoost(new Term(field, termAtt.getBytesRef()), boostAtt.getBoost()));
  }
  add(q, currentQuery, operator);
  
  return q.build();
}
 
Example 7
Source Project: lucene-solr   Source File: QueryBuilder.java    License: Apache License 2.0 6 votes vote down vote up
/** 
 * Creates simple phrase query from the cached tokenstream contents 
 */
protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException {
  PhraseQuery.Builder builder = new PhraseQuery.Builder();
  builder.setSlop(slop);
  
  TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
  BoostAttribute boostAtt = stream.addAttribute(BoostAttribute.class);
  PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
  int position = -1;
  float phraseBoost = DEFAULT_BOOST;
  stream.reset();
  while (stream.incrementToken()) {
    if (enablePositionIncrements) {
      position += posIncrAtt.getPositionIncrement();
    } else {
      position += 1;
    }
    builder.add(new Term(field, termAtt.getBytesRef()), position);
    phraseBoost *= boostAtt.getBoost();
  }
  PhraseQuery query = builder.build();
  if (phraseBoost == DEFAULT_BOOST) {
    return query;
  }
  return new BoostQuery(query, phraseBoost);
}
 
Example 8
Source Project: lucene-solr   Source File: Tagger.java    License: Apache License 2.0 6 votes vote down vote up
public Tagger(Terms terms, Bits liveDocs, TokenStream tokenStream,
              TagClusterReducer tagClusterReducer, boolean skipAltTokens,
              boolean ignoreStopWords) throws IOException {
  this.terms = terms;
  this.liveDocs = liveDocs;
  this.tokenStream = tokenStream;
  this.skipAltTokens = skipAltTokens;
  this.ignoreStopWords = ignoreStopWords;
  byteRefAtt = tokenStream.addAttribute(TermToBytesRefAttribute.class);
  posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
  offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
  taggingAtt = tokenStream.addAttribute(TaggingAttribute.class);
  tokenStream.reset();

  this.tagClusterReducer = tagClusterReducer;
}
 
Example 9
Source Project: lucene-solr   Source File: AnalysisRequestHandlerBase.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Analyzes the given text using the given analyzer and returns the produced tokens.
 *
 * @param query    The query to analyze.
 * @param analyzer The analyzer to use.
 */
protected Set<BytesRef> getQueryTokenSet(String query, Analyzer analyzer) {
  try (TokenStream tokenStream = analyzer.tokenStream("", query)){
    final Set<BytesRef> tokens = new HashSet<>();
    final TermToBytesRefAttribute bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);

    tokenStream.reset();

    while (tokenStream.incrementToken()) {
      tokens.add(BytesRef.deepCopyOf(bytesAtt.getBytesRef()));
    }

    tokenStream.end();
    return tokens;
  } catch (IOException ioe) {
    throw new RuntimeException("Error occurred while iterating over tokenstream", ioe);
  }
}
 
Example 10
Source Project: lucene-solr   Source File: TextField.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Analyzes a text part using the provided {@link Analyzer} for a multi-term query.
 * <p>
 * Expects a single token to be used as multi-term term. This single token might also be filtered out
 * so zero token is supported and null is returned in this case.
 *
 * @return The multi-term term bytes; or null if there is no multi-term terms.
 * @throws SolrException If the {@link Analyzer} tokenizes more than one token;
 * or if an underlying {@link IOException} occurs.
 */
public static BytesRef analyzeMultiTerm(String field, String part, Analyzer analyzerIn) {
  if (part == null || analyzerIn == null) return null;

  try (TokenStream source = analyzerIn.tokenStream(field, part)){
    source.reset();

    TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);

    if (!source.incrementToken()) {
      // Accept no tokens because it may have been filtered out by a StopFilter for example.
      return null;
    }
    BytesRef bytes = BytesRef.deepCopyOf(termAtt.getBytesRef());
    if (source.incrementToken())
      throw  new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned too many terms for multiTerm term: " + part);

    source.end();
    return bytes;
  } catch (IOException e) {
    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"error analyzing range part: " + part, e);
  }
}
 
Example 11
Source Project: lucene-solr   Source File: TestNumericTokenStream.java    License: Apache License 2.0 6 votes vote down vote up
public void testLongStream() throws Exception {
  @SuppressWarnings("resource")
  final LegacyNumericTokenStream stream=new LegacyNumericTokenStream().setLongValue(lvalue);
  final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
  assertNotNull(bytesAtt);
  final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
  assertNotNull(typeAtt);
  final LegacyNumericTokenStream.LegacyNumericTermAttribute numericAtt = stream.getAttribute(LegacyNumericTokenStream.LegacyNumericTermAttribute.class);
  assertNotNull(numericAtt);
  stream.reset();
  assertEquals(64, numericAtt.getValueSize());
  for (int shift=0; shift<64; shift+= LegacyNumericUtils.PRECISION_STEP_DEFAULT) {
    assertTrue("New token is available", stream.incrementToken());
    assertEquals("Shift value wrong", shift, numericAtt.getShift());
    assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), LegacyNumericUtils.prefixCodedToLong(bytesAtt.getBytesRef()));
    assertEquals("Term raw value is incorrectly encoded", lvalue & ~((1L << shift) - 1L), numericAtt.getRawValue());
    assertEquals("Type incorrect", (shift == 0) ? LegacyNumericTokenStream.TOKEN_TYPE_FULL_PREC : LegacyNumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
  }
  assertFalse("More tokens available", stream.incrementToken());
  stream.end();
  stream.close();
}
 
Example 12
Source Project: lucene-solr   Source File: TestNumericTokenStream.java    License: Apache License 2.0 6 votes vote down vote up
public void testIntStream() throws Exception {
  @SuppressWarnings("resource")
  final LegacyNumericTokenStream stream=new LegacyNumericTokenStream().setIntValue(ivalue);
  final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
  assertNotNull(bytesAtt);
  final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
  assertNotNull(typeAtt);
  final LegacyNumericTokenStream.LegacyNumericTermAttribute numericAtt = stream.getAttribute(LegacyNumericTokenStream.LegacyNumericTermAttribute.class);
  assertNotNull(numericAtt);
  stream.reset();
  assertEquals(32, numericAtt.getValueSize());
  for (int shift=0; shift<32; shift+= LegacyNumericUtils.PRECISION_STEP_DEFAULT) {
    assertTrue("New token is available", stream.incrementToken());
    assertEquals("Shift value wrong", shift, numericAtt.getShift());
    assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), LegacyNumericUtils.prefixCodedToInt(bytesAtt.getBytesRef()));
    assertEquals("Term raw value is incorrectly encoded", ((long) ivalue) & ~((1L << shift) - 1L), numericAtt.getRawValue());
    assertEquals("Type incorrect", (shift == 0) ? LegacyNumericTokenStream.TOKEN_TYPE_FULL_PREC : LegacyNumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
  }
  assertFalse("More tokens available", stream.incrementToken());
  stream.end();
  stream.close();
}
 
Example 13
Source Project: yes-cart   Source File: LuceneSearchUtil.java    License: Apache License 2.0 6 votes vote down vote up
List<String> analyse(String search) {

            final TokenStream stream = get().tokenStream("X", search);

            final List<String> result = new ArrayList<>();
            try {
                stream.reset();
                while(stream.incrementToken()) {
                    result.add(stream.getAttribute(TermToBytesRefAttribute.class).getBytesRef().utf8ToString());
                }
                stream.close();
            } catch (IOException e) {
                // OK
            }

            return result;

        }
 
Example 14
Source Project: pyramid   Source File: PhraseCountQueryBuilder.java    License: Apache License 2.0 6 votes vote down vote up
protected Query doToQuery(QueryShardContext context) throws IOException {
//        Analyzer analyzer = context.getMapperService().searchAnalyzer();
        Analyzer analyzer = new WhitespaceAnalyzer();
        try (TokenStream source = analyzer.tokenStream(fieldName, value.toString())) {
            CachingTokenFilter stream = new CachingTokenFilter(new LowerCaseFilter(source));
            TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
            if (termAtt == null) {
                return null;
            }
            List<CustomSpanTermQuery> clauses = new ArrayList<>();
            stream.reset();
            while (stream.incrementToken()) {
                Term term = new Term(fieldName, termAtt.getBytesRef());
                    clauses.add(new CustomSpanTermQuery(term));
            }
            return new PhraseCountQuery(clauses.toArray(new CustomSpanTermQuery[clauses.size()]), slop, inOrder, weightedCount);
        } catch (IOException e) {
            throw new RuntimeException("Error analyzing query text", e);
        }


    }
 
Example 15
Source Project: SolrTextTagger   Source File: Tagger.java    License: Apache License 2.0 6 votes vote down vote up
public Tagger(Terms terms, Bits liveDocs, TokenStream tokenStream,
              TagClusterReducer tagClusterReducer, boolean skipAltTokens,
              boolean ignoreStopWords) throws IOException {
  this.terms = terms;
  this.liveDocs = liveDocs;
  this.tokenStream = tokenStream;
  this.skipAltTokens = skipAltTokens;
  this.ignoreStopWords = ignoreStopWords;
  byteRefAtt = tokenStream.addAttribute(TermToBytesRefAttribute.class);
  posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
  offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
  taggingAtt = tokenStream.addAttribute(TaggingAttribute.class);
  tokenStream.reset();

  this.tagClusterReducer = tagClusterReducer;
}
 
Example 16
Source Project: crate   Source File: StringFieldType.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePosIncrements) throws IOException {

    PhraseQuery.Builder builder = new PhraseQuery.Builder();
    builder.setSlop(slop);

    TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
    PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
    int position = -1;

    stream.reset();
    while (stream.incrementToken()) {
        if (enablePosIncrements) {
            position += posIncrAtt.getPositionIncrement();
        } else {
            position += 1;
        }
        builder.add(new Term(field, termAtt.getBytesRef()), position);
    }

    return builder.build();
}
 
Example 17
public static BytesRef analyzeMultiTerm(String field, String part, Analyzer analyzerIn)
{
    if (part == null || analyzerIn == null)
        return null;

    TokenStream source = null;
    try
    {
        source = analyzerIn.tokenStream(field, part);
        source.reset();

        TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
        BytesRef bytes = termAtt.getBytesRef();

        if (!source.incrementToken())
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "analyzer returned no terms for multiTerm term: " + part);
        if (source.incrementToken())
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "analyzer returned too many terms for multiTerm term: " + part);

        source.end();
        return BytesRef.deepCopyOf(bytes);
    }
    catch (IOException e)
    {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "error analyzing range part: " + part, e);
    }
    finally
    {
        IOUtils.closeWhileHandlingException(source);
    }
}
 
Example 18
Source Project: lucene-solr   Source File: FieldInvertState.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Sets attributeSource to a new instance.
 */
void setAttributeSource(AttributeSource attributeSource) {
  if (this.attributeSource != attributeSource) {
    this.attributeSource = attributeSource;
    termAttribute = attributeSource.getAttribute(TermToBytesRefAttribute.class);
    termFreqAttribute = attributeSource.addAttribute(TermFrequencyAttribute.class);
    posIncrAttribute = attributeSource.addAttribute(PositionIncrementAttribute.class);
    offsetAttribute = attributeSource.addAttribute(OffsetAttribute.class);
    payloadAttribute = attributeSource.getAttribute(PayloadAttribute.class);
  }
}
 
Example 19
Source Project: lucene-solr   Source File: QueryBuilder.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Creates simple term query from the cached tokenstream contents 
 */
protected Query analyzeTerm(String field, TokenStream stream) throws IOException {
  TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
  BoostAttribute boostAtt = stream.addAttribute(BoostAttribute.class);
  
  stream.reset();
  if (!stream.incrementToken()) {
    throw new AssertionError();
  }
  
  return newTermQuery(new Term(field, termAtt.getBytesRef()), boostAtt.getBoost());
}
 
Example 20
Source Project: lucene-solr   Source File: QueryBuilder.java    License: Apache License 2.0 5 votes vote down vote up
/** 
 * Creates simple boolean query from the cached tokenstream contents 
 */
protected Query analyzeBoolean(String field, TokenStream stream) throws IOException {
  TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
  BoostAttribute boostAtt = stream.addAttribute(BoostAttribute.class);
  
  stream.reset();
  List<TermAndBoost> terms = new ArrayList<>();
  while (stream.incrementToken()) {
    terms.add(new TermAndBoost(new Term(field, termAtt.getBytesRef()), boostAtt.getBoost()));
  }
  
  return newSynonymQuery(terms.toArray(new TermAndBoost[0]));
}
 
Example 21
Source Project: lucene-solr   Source File: QueryBuilder.java    License: Apache License 2.0 5 votes vote down vote up
/** 
 * Creates complex phrase query from the cached tokenstream contents 
 */
protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException {
  MultiPhraseQuery.Builder mpqb = newMultiPhraseQueryBuilder();
  mpqb.setSlop(slop);
  
  TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);

  PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
  int position = -1;  
  
  List<Term> multiTerms = new ArrayList<>();
  stream.reset();
  while (stream.incrementToken()) {
    int positionIncrement = posIncrAtt.getPositionIncrement();
    
    if (positionIncrement > 0 && multiTerms.size() > 0) {
      if (enablePositionIncrements) {
        mpqb.add(multiTerms.toArray(new Term[0]), position);
      } else {
        mpqb.add(multiTerms.toArray(new Term[0]));
      }
      multiTerms.clear();
    }
    position += positionIncrement;
    multiTerms.add(new Term(field, termAtt.getBytesRef()));
  }
  
  if (enablePositionIncrements) {
    mpqb.add(multiTerms.toArray(new Term[0]), position);
  } else {
    mpqb.add(multiTerms.toArray(new Term[0]));
  }
  return mpqb.build();
}
 
Example 22
Source Project: lucene-solr   Source File: Test2BTerms.java    License: Apache License 2.0 5 votes vote down vote up
public MyTokenStream(Random random, int tokensPerDoc) {
  super(new MyAttributeFactory(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY));
  this.tokensPerDoc = tokensPerDoc;
  addAttribute(TermToBytesRefAttribute.class);
  bytes.length = TOKEN_LEN;
  this.random = random;
  nextSave = TestUtil.nextInt(random, 500000, 1000000);
}
 
Example 23
Source Project: lucene-solr   Source File: Test2BTerms.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
  if (attClass == TermToBytesRefAttribute.class)
    return new MyTermAttributeImpl();
  if (CharTermAttribute.class.isAssignableFrom(attClass))
    throw new IllegalArgumentException("no");
  return delegate.createAttributeInstance(attClass);
}
 
Example 24
Source Project: lucene-solr   Source File: TestLongPostings.java    License: Apache License 2.0 5 votes vote down vote up
private String getRandomTerm(String other) throws IOException {
  Analyzer a = new MockAnalyzer(random());
  while(true) {
    String s = TestUtil.randomRealisticUnicodeString(random());
    if (other != null && s.equals(other)) {
      continue;
    }
    try (TokenStream ts = a.tokenStream("foo", s)) {
      final TermToBytesRefAttribute termAtt = ts.getAttribute(TermToBytesRefAttribute.class);
      ts.reset();

      int count = 0;
      boolean changed = false;

      while(ts.incrementToken()) {
        final BytesRef termBytes = termAtt.getBytesRef();
        if (count == 0 && !termBytes.utf8ToString().equals(s)) {
          // The value was changed during analysis.  Keep iterating so the
          // tokenStream is exhausted.
          changed = true;
        }
        count++;
      }

      ts.end();
      // Did we iterate just once and the value was unchanged?
      if (!changed && count == 1) {
        return s;
      }
    }
  }
}
 
Example 25
Source Project: lucene-solr   Source File: LegacyNumericTokenStream.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void reflectWith(AttributeReflector reflector) {
  reflector.reflect(TermToBytesRefAttribute.class, "bytes", getBytesRef());
  reflector.reflect(LegacyNumericTermAttribute.class, "shift", shift);
  reflector.reflect(LegacyNumericTermAttribute.class, "rawValue", getRawValue());
  reflector.reflect(LegacyNumericTermAttribute.class, "valueSize", valueSize);
}
 
Example 26
Source Project: lucene-solr   Source File: MinHashQParser.java    License: Apache License 2.0 5 votes vote down vote up
private void getHashesFromTokenStream(String analyserField, ArrayList<BytesRef> hashes) throws Exception {
  TokenStream ts = getReq().getSchema().getIndexAnalyzer().tokenStream(analyserField, qstr);
  TermToBytesRefAttribute termAttribute = ts.getAttribute(TermToBytesRefAttribute.class);
  ts.reset();
  while (ts.incrementToken()) {
    BytesRef term = termAttribute.getBytesRef();
    hashes.add(BytesRef.deepCopyOf(term));
  }
  ts.end();
  ts.close();
}
 
Example 27
Source Project: lucene-solr   Source File: CursorMarkTest.java    License: Apache License 2.0 5 votes vote down vote up
private static Object getRandomCollation(SchemaField sf) throws IOException {
  Object val;
  Analyzer analyzer = sf.getType().getIndexAnalyzer();
  String term = TestUtil.randomRealisticUnicodeString(random());
  try (TokenStream ts = analyzer.tokenStream("fake", term)) {
    TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
    ts.reset();
    assertTrue(ts.incrementToken());
    val = BytesRef.deepCopyOf(termAtt.getBytesRef());
    assertFalse(ts.incrementToken());
    ts.end();
  }
  return val;
}
 
Example 28
private BytesRef bytesFromTokenStream(TokenStream stream) throws Exception {
    TermToBytesRefAttribute termAttr = stream.getAttribute(TermToBytesRefAttribute.class);
    stream.reset();
    BytesRefBuilder bytesRefBuilder = new BytesRefBuilder();
    while (stream.incrementToken()) {
        BytesRef bytesRef = termAttr.getBytesRef();
        bytesRefBuilder.append(bytesRef);
    }
    stream.close();
    return bytesRefBuilder.toBytesRef();
}
 
Example 29
private BytesRef sortKeyFromTokenStream(TokenStream stream) throws Exception {
    TermToBytesRefAttribute termAttr = stream.getAttribute(TermToBytesRefAttribute.class);
    BytesRefBuilder b = new BytesRefBuilder();
    stream.reset();
    while (stream.incrementToken()) {
        b.append(termAttr.getBytesRef());
    }
    stream.close();
    return b.get();
}
 
Example 30
Source Project: crate   Source File: StringFieldType.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {

    MultiPhraseQuery.Builder mpqb = new MultiPhraseQuery.Builder();
    mpqb.setSlop(slop);

    TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);

    PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
    int position = -1;

    List<Term> multiTerms = new ArrayList<>();
    stream.reset();
    while (stream.incrementToken()) {
        int positionIncrement = posIncrAtt.getPositionIncrement();

        if (positionIncrement > 0 && multiTerms.size() > 0) {
            if (enablePositionIncrements) {
                mpqb.add(multiTerms.toArray(new Term[0]), position);
            } else {
                mpqb.add(multiTerms.toArray(new Term[0]));
            }
            multiTerms.clear();
        }
        position += positionIncrement;
        multiTerms.add(new Term(field, termAtt.getBytesRef()));
    }

    if (enablePositionIncrements) {
        mpqb.add(multiTerms.toArray(new Term[0]), position);
    } else {
        mpqb.add(multiTerms.toArray(new Term[0]));
    }
    return mpqb.build();
}