Java Code Examples for org.apache.lucene.util.CharsRefBuilder#clear()

The following examples show how to use org.apache.lucene.util.CharsRefBuilder#clear() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: TestContextSuggestField.java From lucene-solr with Apache License 2.0

5 votes

@Test
public void testTokenStream() throws Exception {
  Analyzer analyzer = new MockAnalyzer(random());
  ContextSuggestField field = new ContextSuggestField("field", "input", 1, "context1", "context2");
  BytesRef surfaceForm = new BytesRef("input");
  ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
  try (OutputStreamDataOutput output = new OutputStreamDataOutput(byteArrayOutputStream)) {
    output.writeVInt(surfaceForm.length);
    output.writeBytes(surfaceForm.bytes, surfaceForm.offset, surfaceForm.length);
    output.writeVInt(1 + 1);
    output.writeByte(ContextSuggestField.TYPE);
  }
  BytesRef payload = new BytesRef(byteArrayOutputStream.toByteArray());
  String[] expectedOutputs = new String[2];
  CharsRefBuilder builder = new CharsRefBuilder();
  builder.append("context1");
  builder.append(((char) ContextSuggestField.CONTEXT_SEPARATOR));
  builder.append((char) ConcatenateGraphFilter.SEP_LABEL);
  builder.append("input");
  expectedOutputs[0] = builder.toCharsRef().toString();
  builder.clear();
  builder.append("context2");
  builder.append(((char) ContextSuggestField.CONTEXT_SEPARATOR));
  builder.append((char) ConcatenateGraphFilter.SEP_LABEL);
  builder.append("input");
  expectedOutputs[1] = builder.toCharsRef().toString();
  TokenStream stream = new TestSuggestField.PayloadAttrToTypeAttrFilter(field.tokenStream(analyzer, null));
  assertTokenStreamContents(stream, expectedOutputs, null, null, new String[]{payload.utf8ToString(), payload.utf8ToString()}, new int[]{1, 0}, null, null);

  CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer);
  stream = new TestSuggestField.PayloadAttrToTypeAttrFilter(field.tokenStream(completionAnalyzer, null));
  assertTokenStreamContents(stream, expectedOutputs, null, null, new String[]{payload.utf8ToString(), payload.utf8ToString()}, new int[]{1, 0}, null, null);
}

Example 2

Source File: SynonymMap.java From lucene-solr with Apache License 2.0

5 votes

/** Sugar: analyzes the text with the analyzer and
 *  separates by {@link SynonymMap#WORD_SEPARATOR}.
 *  reuse and its chars must not be null. */
public CharsRef analyze(String text, CharsRefBuilder reuse) throws IOException {
  try (TokenStream ts = analyzer.tokenStream("", text)) {
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
    ts.reset();
    reuse.clear();
    while (ts.incrementToken()) {
      int length = termAtt.length();
      if (length == 0) {
        throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length token");
      }
      if (posIncAtt.getPositionIncrement() != 1) {
        throw new IllegalArgumentException("term: " + text + " analyzed to a token (" + termAtt +
                                           ") with position increment != 1 (got: " + posIncAtt.getPositionIncrement() + ")");
      }
      reuse.grow(reuse.length() + length + 1); /* current + word + separator */
      int end = reuse.length();
      if (reuse.length() > 0) {
        reuse.setCharAt(end++, SynonymMap.WORD_SEPARATOR);
        reuse.setLength(reuse.length() + 1);
      }
      System.arraycopy(termAtt.buffer(), 0, reuse.chars(), end, length);
      reuse.setLength(reuse.length() + length);
    }
    ts.end();
  }
  if (reuse.length() == 0) {
    throw new IllegalArgumentException("term: " + text + " was completely eliminated by analyzer");
  }
  return reuse.get();
}

Example 3

Source File: TestConcatenateGraphFilter.java From lucene-solr with Apache License 2.0

5 votes

@Test
public void testWithSynonyms() throws Exception {
  SynonymMap.Builder builder = new SynonymMap.Builder(true);
  builder.add(new CharsRef("mykeyword"), new CharsRef("mysynonym"), true);
  Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, true);
  String input = "mykeyword another keyword";
  tokenStream.setReader(new StringReader(input));
  @SuppressWarnings("deprecation")
  SynonymFilter filter = new SynonymFilter(tokenStream, builder.build(), true);
  ConcatenateGraphFilter stream = new ConcatenateGraphFilter(filter, SEP_LABEL, false, 100);
  String[] expectedOutputs = new String[2];
  CharsRefBuilder expectedOutput = new CharsRefBuilder();
  expectedOutput.append("mykeyword");
  expectedOutput.append(SEP_LABEL);
  expectedOutput.append("another");
  expectedOutput.append(SEP_LABEL);
  expectedOutput.append("keyword");
  expectedOutputs[0] = expectedOutput.toCharsRef().toString();
  expectedOutput.clear();
  expectedOutput.append("mysynonym");
  expectedOutput.append(SEP_LABEL);
  expectedOutput.append("another");
  expectedOutput.append(SEP_LABEL);
  expectedOutput.append("keyword");
  expectedOutputs[1] = expectedOutput.toCharsRef().toString();
  assertTokenStreamContents(stream, expectedOutputs, null, null, new int[]{1, 0});
}