org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute Java Examples

The following examples show how to use org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AbstractTestCase.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
protected List<BytesRef> analyze(String text, String field, Analyzer analyzer) throws IOException {
  List<BytesRef> bytesRefs = new ArrayList<>();

  try (TokenStream tokenStream = analyzer.tokenStream(field, text)) {
    TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class);
    
    tokenStream.reset();
  
    while (tokenStream.incrementToken()) {
      bytesRefs.add(BytesRef.deepCopyOf(termAttribute.getBytesRef()));
    }

    tokenStream.end();
  }

  return bytesRefs;
}
 
Example #2
Source File: Tagger.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public Tagger(Terms terms, Bits liveDocs, TokenStream tokenStream,
              TagClusterReducer tagClusterReducer, boolean skipAltTokens,
              boolean ignoreStopWords) throws IOException {
  this.terms = terms;
  this.liveDocs = liveDocs;
  this.tokenStream = tokenStream;
  this.skipAltTokens = skipAltTokens;
  this.ignoreStopWords = ignoreStopWords;
  byteRefAtt = tokenStream.addAttribute(TermToBytesRefAttribute.class);
  posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
  offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
  taggingAtt = tokenStream.addAttribute(TaggingAttribute.class);
  tokenStream.reset();

  this.tagClusterReducer = tagClusterReducer;
}
 
Example #3
Source File: AnalysisRequestHandlerBase.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Analyzes the given text using the given analyzer and returns the produced tokens.
 *
 * @param query    The query to analyze.
 * @param analyzer The analyzer to use.
 */
protected Set<BytesRef> getQueryTokenSet(String query, Analyzer analyzer) {
  try (TokenStream tokenStream = analyzer.tokenStream("", query)){
    final Set<BytesRef> tokens = new HashSet<>();
    final TermToBytesRefAttribute bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);

    tokenStream.reset();

    while (tokenStream.incrementToken()) {
      tokens.add(BytesRef.deepCopyOf(bytesAtt.getBytesRef()));
    }

    tokenStream.end();
    return tokens;
  } catch (IOException ioe) {
    throw new RuntimeException("Error occurred while iterating over tokenstream", ioe);
  }
}
 
Example #4
Source File: TextField.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Analyzes a text part using the provided {@link Analyzer} for a multi-term query.
 * <p>
 * Expects a single token to be used as multi-term term. This single token might also be filtered out
 * so zero token is supported and null is returned in this case.
 *
 * @return The multi-term term bytes; or null if there is no multi-term terms.
 * @throws SolrException If the {@link Analyzer} tokenizes more than one token;
 * or if an underlying {@link IOException} occurs.
 */
public static BytesRef analyzeMultiTerm(String field, String part, Analyzer analyzerIn) {
  if (part == null || analyzerIn == null) return null;

  try (TokenStream source = analyzerIn.tokenStream(field, part)){
    source.reset();

    TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);

    if (!source.incrementToken()) {
      // Accept no tokens because it may have been filtered out by a StopFilter for example.
      return null;
    }
    BytesRef bytes = BytesRef.deepCopyOf(termAtt.getBytesRef());
    if (source.incrementToken())
      throw  new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned too many terms for multiTerm term: " + part);

    source.end();
    return bytes;
  } catch (IOException e) {
    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"error analyzing range part: " + part, e);
  }
}
 
Example #5
Source File: TestNumericTokenStream.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testLongStream() throws Exception {
  @SuppressWarnings("resource")
  final LegacyNumericTokenStream stream=new LegacyNumericTokenStream().setLongValue(lvalue);
  final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
  assertNotNull(bytesAtt);
  final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
  assertNotNull(typeAtt);
  final LegacyNumericTokenStream.LegacyNumericTermAttribute numericAtt = stream.getAttribute(LegacyNumericTokenStream.LegacyNumericTermAttribute.class);
  assertNotNull(numericAtt);
  stream.reset();
  assertEquals(64, numericAtt.getValueSize());
  for (int shift=0; shift<64; shift+= LegacyNumericUtils.PRECISION_STEP_DEFAULT) {
    assertTrue("New token is available", stream.incrementToken());
    assertEquals("Shift value wrong", shift, numericAtt.getShift());
    assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), LegacyNumericUtils.prefixCodedToLong(bytesAtt.getBytesRef()));
    assertEquals("Term raw value is incorrectly encoded", lvalue & ~((1L << shift) - 1L), numericAtt.getRawValue());
    assertEquals("Type incorrect", (shift == 0) ? LegacyNumericTokenStream.TOKEN_TYPE_FULL_PREC : LegacyNumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
  }
  assertFalse("More tokens available", stream.incrementToken());
  stream.end();
  stream.close();
}
 
Example #6
Source File: TestNumericTokenStream.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testIntStream() throws Exception {
  @SuppressWarnings("resource")
  final LegacyNumericTokenStream stream=new LegacyNumericTokenStream().setIntValue(ivalue);
  final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
  assertNotNull(bytesAtt);
  final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
  assertNotNull(typeAtt);
  final LegacyNumericTokenStream.LegacyNumericTermAttribute numericAtt = stream.getAttribute(LegacyNumericTokenStream.LegacyNumericTermAttribute.class);
  assertNotNull(numericAtt);
  stream.reset();
  assertEquals(32, numericAtt.getValueSize());
  for (int shift=0; shift<32; shift+= LegacyNumericUtils.PRECISION_STEP_DEFAULT) {
    assertTrue("New token is available", stream.incrementToken());
    assertEquals("Shift value wrong", shift, numericAtt.getShift());
    assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), LegacyNumericUtils.prefixCodedToInt(bytesAtt.getBytesRef()));
    assertEquals("Term raw value is incorrectly encoded", ((long) ivalue) & ~((1L << shift) - 1L), numericAtt.getRawValue());
    assertEquals("Type incorrect", (shift == 0) ? LegacyNumericTokenStream.TOKEN_TYPE_FULL_PREC : LegacyNumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
  }
  assertFalse("More tokens available", stream.incrementToken());
  stream.end();
  stream.close();
}
 
Example #7
Source File: QueryBuilder.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** 
 * Creates simple phrase query from the cached tokenstream contents 
 */
protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException {
  PhraseQuery.Builder builder = new PhraseQuery.Builder();
  builder.setSlop(slop);
  
  TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
  BoostAttribute boostAtt = stream.addAttribute(BoostAttribute.class);
  PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
  int position = -1;
  float phraseBoost = DEFAULT_BOOST;
  stream.reset();
  while (stream.incrementToken()) {
    if (enablePositionIncrements) {
      position += posIncrAtt.getPositionIncrement();
    } else {
      position += 1;
    }
    builder.add(new Term(field, termAtt.getBytesRef()), position);
    phraseBoost *= boostAtt.getBoost();
  }
  PhraseQuery query = builder.build();
  if (phraseBoost == DEFAULT_BOOST) {
    return query;
  }
  return new BoostQuery(query, phraseBoost);
}
 
Example #8
Source File: QueryBuilder.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** 
 * Creates complex boolean query from the cached tokenstream contents 
 */
protected Query analyzeMultiBoolean(String field, TokenStream stream, BooleanClause.Occur operator) throws IOException {
  BooleanQuery.Builder q = newBooleanQuery();
  List<TermAndBoost> currentQuery = new ArrayList<>();
  
  TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
  PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
  BoostAttribute boostAtt = stream.addAttribute(BoostAttribute.class);

  stream.reset();
  while (stream.incrementToken()) {
    if (posIncrAtt.getPositionIncrement() != 0) {
      add(q, currentQuery, operator);
      currentQuery.clear();
    }
    currentQuery.add(new TermAndBoost(new Term(field, termAtt.getBytesRef()), boostAtt.getBoost()));
  }
  add(q, currentQuery, operator);
  
  return q.build();
}
 
Example #9
Source File: LuceneSearchUtil.java    From yes-cart with Apache License 2.0 6 votes vote down vote up
List<String> analyse(String search) {

            final TokenStream stream = get().tokenStream("X", search);

            final List<String> result = new ArrayList<>();
            try {
                stream.reset();
                while(stream.incrementToken()) {
                    result.add(stream.getAttribute(TermToBytesRefAttribute.class).getBytesRef().utf8ToString());
                }
                stream.close();
            } catch (IOException e) {
                // OK
            }

            return result;

        }
 
Example #10
Source File: PhraseCountQueryBuilder.java    From pyramid with Apache License 2.0 6 votes vote down vote up
protected Query doToQuery(QueryShardContext context) throws IOException {
//        Analyzer analyzer = context.getMapperService().searchAnalyzer();
        Analyzer analyzer = new WhitespaceAnalyzer();
        try (TokenStream source = analyzer.tokenStream(fieldName, value.toString())) {
            CachingTokenFilter stream = new CachingTokenFilter(new LowerCaseFilter(source));
            TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
            if (termAtt == null) {
                return null;
            }
            List<CustomSpanTermQuery> clauses = new ArrayList<>();
            stream.reset();
            while (stream.incrementToken()) {
                Term term = new Term(fieldName, termAtt.getBytesRef());
                    clauses.add(new CustomSpanTermQuery(term));
            }
            return new PhraseCountQuery(clauses.toArray(new CustomSpanTermQuery[clauses.size()]), slop, inOrder, weightedCount);
        } catch (IOException e) {
            throw new RuntimeException("Error analyzing query text", e);
        }


    }
 
Example #11
Source File: SpanOrTermsBuilder.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public SpanQuery getSpanQuery(Element e) throws ParserException {
  String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
  String value = DOMUtils.getNonBlankTextOrFail(e);

  List<SpanQuery> clausesList = new ArrayList<>();

  try (TokenStream ts = analyzer.tokenStream(fieldName, value)) {
    TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
    ts.reset();
    while (ts.incrementToken()) {
      SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(termAtt.getBytesRef())));
      clausesList.add(stq);
    }
    ts.end();
    SpanOrQuery soq = new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()]));
    float boost = DOMUtils.getAttribute(e, "boost", 1.0f);
    return new SpanBoostQuery(soq, boost);
  }
  catch (IOException ioe) {
    throw new ParserException("IOException parsing value:" + value);
  }
}
 
Example #12
Source File: Tagger.java    From SolrTextTagger with Apache License 2.0 6 votes vote down vote up
public Tagger(Terms terms, Bits liveDocs, TokenStream tokenStream,
              TagClusterReducer tagClusterReducer, boolean skipAltTokens,
              boolean ignoreStopWords) throws IOException {
  this.terms = terms;
  this.liveDocs = liveDocs;
  this.tokenStream = tokenStream;
  this.skipAltTokens = skipAltTokens;
  this.ignoreStopWords = ignoreStopWords;
  byteRefAtt = tokenStream.addAttribute(TermToBytesRefAttribute.class);
  posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
  offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
  taggingAtt = tokenStream.addAttribute(TaggingAttribute.class);
  tokenStream.reset();

  this.tagClusterReducer = tagClusterReducer;
}
 
Example #13
Source File: TestEmptyTokenStream.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testIndexWriter_LUCENE4656() throws IOException {
  Directory directory = newDirectory();
  IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(null));

  TokenStream ts = new EmptyTokenStream();
  assertFalse(ts.hasAttribute(TermToBytesRefAttribute.class));

  Document doc = new Document();
  doc.add(new StringField("id", "0", Field.Store.YES));
  doc.add(new TextField("description", ts));
  
  // this should not fail because we have no TermToBytesRefAttribute
  writer.addDocument(doc);
  
  assertEquals(1, writer.getDocStats().numDocs);

  writer.close();
  directory.close();
}
 
Example #14
Source File: StringFieldType.java    From crate with Apache License 2.0 6 votes vote down vote up
@Override
public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePosIncrements) throws IOException {

    PhraseQuery.Builder builder = new PhraseQuery.Builder();
    builder.setSlop(slop);

    TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
    PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
    int position = -1;

    stream.reset();
    while (stream.incrementToken()) {
        if (enablePosIncrements) {
            position += posIncrAtt.getPositionIncrement();
        } else {
            position += 1;
        }
        builder.add(new Term(field, termAtt.getBytesRef()), position);
    }

    return builder.build();
}
 
Example #15
Source File: TestPerfTasksLogic.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void assertEqualCollation(Analyzer a1, Analyzer a2, String text)
    throws Exception {
  TokenStream ts1 = a1.tokenStream("bogus", text);
  TokenStream ts2 = a2.tokenStream("bogus", text);
  ts1.reset();
  ts2.reset();
  TermToBytesRefAttribute termAtt1 = ts1.addAttribute(TermToBytesRefAttribute.class);
  TermToBytesRefAttribute termAtt2 = ts2.addAttribute(TermToBytesRefAttribute.class);
  assertTrue(ts1.incrementToken());
  assertTrue(ts2.incrementToken());
  BytesRef bytes1 = termAtt1.getBytesRef();
  BytesRef bytes2 = termAtt2.getBytesRef();
  assertEquals(bytes1, bytes2);
  assertFalse(ts1.incrementToken());
  assertFalse(ts2.incrementToken());
  ts1.close();
  ts2.close();
}
 
Example #16
Source File: ReadTokensTask.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public int doLogic() throws Exception {
  List<IndexableField> fields = doc.getFields();
  Analyzer analyzer = getRunData().getAnalyzer();
  int tokenCount = 0;
  for(final IndexableField field : fields) {
    if (field.fieldType().indexOptions() == IndexOptions.NONE ||
        field.fieldType().tokenized() == false) {
      continue;
    }
    
    final TokenStream stream = field.tokenStream(analyzer, null);
    // reset the TokenStream to the first token
    stream.reset();

    TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
    while(stream.incrementToken()) {
      termAtt.getBytesRef();
      tokenCount++;
    }
    stream.end();
    stream.close();
  }
  totalTokenCount += tokenCount;
  return tokenCount;
}
 
Example #17
Source File: IcuCollationAnalyzerTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
private BytesRef bytesFromTokenStream(TokenStream stream) throws Exception {
    TermToBytesRefAttribute termAttr = stream.getAttribute(TermToBytesRefAttribute.class);
    stream.reset();
    BytesRefBuilder bytesRefBuilder = new BytesRefBuilder();
    while (stream.incrementToken()) {
        BytesRef bytesRef = termAttr.getBytesRef();
        bytesRefBuilder.append(bytesRef);
    }
    stream.close();
    return bytesRefBuilder.toBytesRef();
}
 
Example #18
Source File: SortFormTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
private BytesRef sortKeyFromTokenStream(TokenStream stream) throws Exception {
    TermToBytesRefAttribute termAttr = stream.getAttribute(TermToBytesRefAttribute.class);
    BytesRefBuilder b = new BytesRefBuilder();
    stream.reset();
    while (stream.incrementToken()) {
        b.append(termAttr.getBytesRef());
    }
    stream.close();
    return b.get();
}
 
Example #19
Source File: CursorMarkTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private static Object getRandomCollation(SchemaField sf) throws IOException {
  Object val;
  Analyzer analyzer = sf.getType().getIndexAnalyzer();
  String term = TestUtil.randomRealisticUnicodeString(random());
  try (TokenStream ts = analyzer.tokenStream("fake", term)) {
    TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
    ts.reset();
    assertTrue(ts.incrementToken());
    val = BytesRef.deepCopyOf(termAtt.getBytesRef());
    assertFalse(ts.incrementToken());
    ts.end();
  }
  return val;
}
 
Example #20
Source File: AlfrescoFieldType.java    From SearchServices with GNU Lesser General Public License v3.0 5 votes vote down vote up
public static BytesRef analyzeMultiTerm(String field, String part, Analyzer analyzerIn)
{
    if (part == null || analyzerIn == null)
        return null;

    TokenStream source = null;
    try
    {
        source = analyzerIn.tokenStream(field, part);
        source.reset();

        TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
        BytesRef bytes = termAtt.getBytesRef();

        if (!source.incrementToken())
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "analyzer returned no terms for multiTerm term: " + part);
        if (source.incrementToken())
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "analyzer returned too many terms for multiTerm term: " + part);

        source.end();
        return BytesRef.deepCopyOf(bytes);
    }
    catch (IOException e)
    {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "error analyzing range part: " + part, e);
    }
    finally
    {
        IOUtils.closeWhileHandlingException(source);
    }
}
 
Example #21
Source File: StringFieldType.java    From crate with Apache License 2.0 5 votes vote down vote up
@Override
public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {

    MultiPhraseQuery.Builder mpqb = new MultiPhraseQuery.Builder();
    mpqb.setSlop(slop);

    TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);

    PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
    int position = -1;

    List<Term> multiTerms = new ArrayList<>();
    stream.reset();
    while (stream.incrementToken()) {
        int positionIncrement = posIncrAtt.getPositionIncrement();

        if (positionIncrement > 0 && multiTerms.size() > 0) {
            if (enablePositionIncrements) {
                mpqb.add(multiTerms.toArray(new Term[0]), position);
            } else {
                mpqb.add(multiTerms.toArray(new Term[0]));
            }
            multiTerms.clear();
        }
        position += positionIncrement;
        multiTerms.add(new Term(field, termAtt.getBytesRef()));
    }

    if (enablePositionIncrements) {
        mpqb.add(multiTerms.toArray(new Term[0]), position);
    } else {
        mpqb.add(multiTerms.toArray(new Term[0]));
    }
    return mpqb.build();
}
 
Example #22
Source File: MinHashQParser.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void getHashesFromTokenStream(String analyserField, ArrayList<BytesRef> hashes) throws Exception {
  TokenStream ts = getReq().getSchema().getIndexAnalyzer().tokenStream(analyserField, qstr);
  TermToBytesRefAttribute termAttribute = ts.getAttribute(TermToBytesRefAttribute.class);
  ts.reset();
  while (ts.incrementToken()) {
    BytesRef term = termAttribute.getBytesRef();
    hashes.add(BytesRef.deepCopyOf(term));
  }
  ts.end();
  ts.close();
}
 
Example #23
Source File: TestLongPostings.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private String getRandomTerm(String other) throws IOException {
  Analyzer a = new MockAnalyzer(random());
  while(true) {
    String s = TestUtil.randomRealisticUnicodeString(random());
    if (other != null && s.equals(other)) {
      continue;
    }
    try (TokenStream ts = a.tokenStream("foo", s)) {
      final TermToBytesRefAttribute termAtt = ts.getAttribute(TermToBytesRefAttribute.class);
      ts.reset();

      int count = 0;
      boolean changed = false;

      while(ts.incrementToken()) {
        final BytesRef termBytes = termAtt.getBytesRef();
        if (count == 0 && !termBytes.utf8ToString().equals(s)) {
          // The value was changed during analysis.  Keep iterating so the
          // tokenStream is exhausted.
          changed = true;
        }
        count++;
      }

      ts.end();
      // Did we iterate just once and the value was unchanged?
      if (!changed && count == 1) {
        return s;
      }
    }
  }
}
 
Example #24
Source File: FieldInvertState.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Sets attributeSource to a new instance.
 */
void setAttributeSource(AttributeSource attributeSource) {
  if (this.attributeSource != attributeSource) {
    this.attributeSource = attributeSource;
    termAttribute = attributeSource.getAttribute(TermToBytesRefAttribute.class);
    termFreqAttribute = attributeSource.addAttribute(TermFrequencyAttribute.class);
    posIncrAttribute = attributeSource.addAttribute(PositionIncrementAttribute.class);
    offsetAttribute = attributeSource.addAttribute(OffsetAttribute.class);
    payloadAttribute = attributeSource.getAttribute(PayloadAttribute.class);
  }
}
 
Example #25
Source File: QueryBuilder.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Creates simple term query from the cached tokenstream contents 
 */
protected Query analyzeTerm(String field, TokenStream stream) throws IOException {
  TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
  BoostAttribute boostAtt = stream.addAttribute(BoostAttribute.class);
  
  stream.reset();
  if (!stream.incrementToken()) {
    throw new AssertionError();
  }
  
  return newTermQuery(new Term(field, termAtt.getBytesRef()), boostAtt.getBoost());
}
 
Example #26
Source File: QueryBuilder.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** 
 * Creates simple boolean query from the cached tokenstream contents 
 */
protected Query analyzeBoolean(String field, TokenStream stream) throws IOException {
  TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
  BoostAttribute boostAtt = stream.addAttribute(BoostAttribute.class);
  
  stream.reset();
  List<TermAndBoost> terms = new ArrayList<>();
  while (stream.incrementToken()) {
    terms.add(new TermAndBoost(new Term(field, termAtt.getBytesRef()), boostAtt.getBoost()));
  }
  
  return newSynonymQuery(terms.toArray(new TermAndBoost[0]));
}
 
Example #27
Source File: QueryBuilder.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** 
 * Creates complex phrase query from the cached tokenstream contents 
 */
protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException {
  MultiPhraseQuery.Builder mpqb = newMultiPhraseQueryBuilder();
  mpqb.setSlop(slop);
  
  TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);

  PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
  int position = -1;  
  
  List<Term> multiTerms = new ArrayList<>();
  stream.reset();
  while (stream.incrementToken()) {
    int positionIncrement = posIncrAtt.getPositionIncrement();
    
    if (positionIncrement > 0 && multiTerms.size() > 0) {
      if (enablePositionIncrements) {
        mpqb.add(multiTerms.toArray(new Term[0]), position);
      } else {
        mpqb.add(multiTerms.toArray(new Term[0]));
      }
      multiTerms.clear();
    }
    position += positionIncrement;
    multiTerms.add(new Term(field, termAtt.getBytesRef()));
  }
  
  if (enablePositionIncrements) {
    mpqb.add(multiTerms.toArray(new Term[0]), position);
  } else {
    mpqb.add(multiTerms.toArray(new Term[0]));
  }
  return mpqb.build();
}
 
Example #28
Source File: Test2BTerms.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public MyTokenStream(Random random, int tokensPerDoc) {
  super(new MyAttributeFactory(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY));
  this.tokensPerDoc = tokensPerDoc;
  addAttribute(TermToBytesRefAttribute.class);
  bytes.length = TOKEN_LEN;
  this.random = random;
  nextSave = TestUtil.nextInt(random, 500000, 1000000);
}
 
Example #29
Source File: Test2BTerms.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
  if (attClass == TermToBytesRefAttribute.class)
    return new MyTermAttributeImpl();
  if (CharTermAttribute.class.isAssignableFrom(attClass))
    throw new IllegalArgumentException("no");
  return delegate.createAttributeInstance(attClass);
}
 
Example #30
Source File: LegacyNumericTokenStream.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void reflectWith(AttributeReflector reflector) {
  reflector.reflect(TermToBytesRefAttribute.class, "bytes", getBytesRef());
  reflector.reflect(LegacyNumericTermAttribute.class, "shift", shift);
  reflector.reflect(LegacyNumericTermAttribute.class, "rawValue", getRawValue());
  reflector.reflect(LegacyNumericTermAttribute.class, "valueSize", valueSize);
}