Java Code Examples for org.apache.lucene.util.CharsRefBuilder#append()

The following examples show how to use org.apache.lucene.util.CharsRefBuilder#append() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: TestSuggestField.java From lucene-solr with Apache License 2.0

6 votes

@Test
public void testReservedChars() throws Exception {
  CharsRefBuilder charsRefBuilder = new CharsRefBuilder();
  charsRefBuilder.append("sugg");
  charsRefBuilder.setCharAt(2, (char) ConcatenateGraphFilter.SEP_LABEL);
  IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
    new SuggestField("name", charsRefBuilder.toString(), 1);
  });
  assertTrue(expected.getMessage().contains("[0x1f]"));

  charsRefBuilder.setCharAt(2, (char) CompletionAnalyzer.HOLE_CHARACTER);
  expected = expectThrows(IllegalArgumentException.class, () -> {
    new SuggestField("name", charsRefBuilder.toString(), 1);
  });
  assertTrue(expected.getMessage().contains("[0x1e]"));

  charsRefBuilder.setCharAt(2, (char) NRTSuggesterBuilder.END_BYTE);
  expected = expectThrows(IllegalArgumentException.class, () -> {
    new SuggestField("name", charsRefBuilder.toString(), 1);
  });
  assertTrue(expected.getMessage().contains("[0x0]"));
}

Example 2

Source File: TestConcatenateGraphFilter.java From lucene-solr with Apache License 2.0

6 votes

@Test
  public void testWithStopword() throws Exception {
    for (boolean preservePosInc : new boolean[]{true, false}) {
      Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, true);
      String input = "a mykeyword a keyword"; //LUCENE-8344 add "a"
      tokenStream.setReader(new StringReader(input));
      TokenFilter tokenFilter = new StopFilter(tokenStream, StopFilter.makeStopSet("a"));
      ConcatenateGraphFilter concatStream = new ConcatenateGraphFilter(tokenFilter, SEP_LABEL, preservePosInc, 10);
      CharsRefBuilder builder = new CharsRefBuilder();
      if (preservePosInc) {
        builder.append(SEP_LABEL);
      }
      builder.append("mykeyword");
      builder.append(SEP_LABEL);
      if (preservePosInc) {
        builder.append(SEP_LABEL);
      }
      builder.append("keyword");
//      if (preservePosInc) { LUCENE-8344 uncomment
//        builder.append(SEP_LABEL);
//      }
      assertTokenStreamContents(concatStream, new String[]{builder.toCharsRef().toString()});
    }
  }

Example 3

Source File: TestContextSuggestField.java From lucene-solr with Apache License 2.0

5 votes

@Test
public void testTokenStream() throws Exception {
  Analyzer analyzer = new MockAnalyzer(random());
  ContextSuggestField field = new ContextSuggestField("field", "input", 1, "context1", "context2");
  BytesRef surfaceForm = new BytesRef("input");
  ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
  try (OutputStreamDataOutput output = new OutputStreamDataOutput(byteArrayOutputStream)) {
    output.writeVInt(surfaceForm.length);
    output.writeBytes(surfaceForm.bytes, surfaceForm.offset, surfaceForm.length);
    output.writeVInt(1 + 1);
    output.writeByte(ContextSuggestField.TYPE);
  }
  BytesRef payload = new BytesRef(byteArrayOutputStream.toByteArray());
  String[] expectedOutputs = new String[2];
  CharsRefBuilder builder = new CharsRefBuilder();
  builder.append("context1");
  builder.append(((char) ContextSuggestField.CONTEXT_SEPARATOR));
  builder.append((char) ConcatenateGraphFilter.SEP_LABEL);
  builder.append("input");
  expectedOutputs[0] = builder.toCharsRef().toString();
  builder.clear();
  builder.append("context2");
  builder.append(((char) ContextSuggestField.CONTEXT_SEPARATOR));
  builder.append((char) ConcatenateGraphFilter.SEP_LABEL);
  builder.append("input");
  expectedOutputs[1] = builder.toCharsRef().toString();
  TokenStream stream = new TestSuggestField.PayloadAttrToTypeAttrFilter(field.tokenStream(analyzer, null));
  assertTokenStreamContents(stream, expectedOutputs, null, null, new String[]{payload.utf8ToString(), payload.utf8ToString()}, new int[]{1, 0}, null, null);

  CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer);
  stream = new TestSuggestField.PayloadAttrToTypeAttrFilter(field.tokenStream(completionAnalyzer, null));
  assertTokenStreamContents(stream, expectedOutputs, null, null, new String[]{payload.utf8ToString(), payload.utf8ToString()}, new int[]{1, 0}, null, null);
}

Example 4

Source File: TestConcatenateGraphFilter.java From lucene-solr with Apache License 2.0

5 votes

@Test
public void testWithMultipleTokens() throws Exception {
  Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, true);
  String input = "mykeyword another keyword";
  tokenStream.setReader(new StringReader(input));
  ConcatenateGraphFilter stream = new ConcatenateGraphFilter(tokenStream);
  CharsRefBuilder builder = new CharsRefBuilder();
  builder.append("mykeyword");
  builder.append(SEP_LABEL);
  builder.append("another");
  builder.append(SEP_LABEL);
  builder.append("keyword");
  assertTokenStreamContents(stream, new String[]{builder.toCharsRef().toString()}, null, null, new int[]{1});
}

Example 5

Source File: TestConcatenateGraphFilter.java From lucene-solr with Apache License 2.0

5 votes

@Test
public void testWithSynonyms() throws Exception {
  SynonymMap.Builder builder = new SynonymMap.Builder(true);
  builder.add(new CharsRef("mykeyword"), new CharsRef("mysynonym"), true);
  Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, true);
  String input = "mykeyword another keyword";
  tokenStream.setReader(new StringReader(input));
  @SuppressWarnings("deprecation")
  SynonymFilter filter = new SynonymFilter(tokenStream, builder.build(), true);
  ConcatenateGraphFilter stream = new ConcatenateGraphFilter(filter, SEP_LABEL, false, 100);
  String[] expectedOutputs = new String[2];
  CharsRefBuilder expectedOutput = new CharsRefBuilder();
  expectedOutput.append("mykeyword");
  expectedOutput.append(SEP_LABEL);
  expectedOutput.append("another");
  expectedOutput.append(SEP_LABEL);
  expectedOutput.append("keyword");
  expectedOutputs[0] = expectedOutput.toCharsRef().toString();
  expectedOutput.clear();
  expectedOutput.append("mysynonym");
  expectedOutput.append(SEP_LABEL);
  expectedOutput.append("another");
  expectedOutput.append(SEP_LABEL);
  expectedOutput.append("keyword");
  expectedOutputs[1] = expectedOutput.toCharsRef().toString();
  assertTokenStreamContents(stream, expectedOutputs, null, null, new int[]{1, 0});
}

Example 6

Source File: MoreLikeThisHandler.java From lucene-solr with Apache License 2.0

5 votes

public DocListAndSet getMoreLikeThis( Reader reader, int start, int rows, List<Query> filters, List<InterestingTerm> terms, int flags ) throws IOException
{
  // SOLR-5351: if only check against a single field, use the reader directly. Otherwise we
  // repeat the stream's content for multiple fields so that query terms can be pulled from any
  // of those fields.
  String [] fields = mlt.getFieldNames();
  if (fields.length == 1) {
    rawMLTQuery = mlt.like(fields[0], reader);
  } else {
    CharsRefBuilder buffered = new CharsRefBuilder();
    char [] chunk = new char [1024];
    int len;
    while ((len = reader.read(chunk)) >= 0) {
      buffered.append(chunk, 0, len);
    }

    Collection<Object> streamValue = Collections.singleton(buffered.get().toString());
    Map<String, Collection<Object>> multifieldDoc = new HashMap<>(fields.length);
    for (String field : fields) {
      multifieldDoc.put(field, streamValue);
    }

    rawMLTQuery = mlt.like(multifieldDoc);
  }

  boostedMLTQuery = getBoostedQuery( rawMLTQuery );
  if (terms != null) {
    fillInterestingTermsFromMLTQuery( boostedMLTQuery, terms );
  }
  DocListAndSet results = new DocListAndSet();
  if (this.needDocSet) {
    results = searcher.getDocListAndSet( boostedMLTQuery, filters, null, start, rows, flags);
  } else {
    results.docList = searcher.getDocList( boostedMLTQuery, filters, null, start, rows, flags);
  }
  return results;
}