Java Code Examples for org.apache.lucene.util.BytesRefBuilder#copyChars()

The following examples show how to use org.apache.lucene.util.BytesRefBuilder#copyChars() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TrieBuilder.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
public static FST<Long> buildTrie(Set<String> sortedStrings) throws IOException {
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
  Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
  BytesRefBuilder scratchBytes = new BytesRefBuilder();
  IntsRefBuilder scratchInts = new IntsRefBuilder();
  long outputValue = 0;
  for (String mention : sortedStrings) {
    scratchBytes.copyChars(mention);
    try {
      builder.add(Util.toIntsRef(scratchBytes.get(), scratchInts), outputValue++);
    } catch (java.lang.AssertionError ae) {
      logger.debug("Assertion error for mention " + mention);
    }
  }
  return builder.finish();
}
 
Example 2
Source File: CommonTermsQueryParser.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
private final Query parseQueryString(ExtendedCommonTermsQuery query, String queryString, String field, QueryParseContext parseContext,
        Analyzer analyzer, String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch) throws IOException {
    // Logic similar to QueryParser#getFieldQuery
    int count = 0;
    try (TokenStream source = analyzer.tokenStream(field, queryString.toString())) {
        source.reset();
        CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
        BytesRefBuilder builder = new BytesRefBuilder();
        while (source.incrementToken()) {
            // UTF-8
            builder.copyChars(termAtt);
            query.add(new Term(field, builder.toBytesRef()));
            count++;
        }
    }

    if (count == 0) {
        return null;
    }
    query.setLowFreqMinimumNumberShouldMatch(lowFreqMinimumShouldMatch);
    query.setHighFreqMinimumNumberShouldMatch(highFreqMinimumShouldMatch);
    return query;
}
 
Example 3
Source File: Uid.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
public static BytesRef[] createUidsForTypesAndIds(Collection<String> types, Collection<?> ids) {
    BytesRef[] uids = new BytesRef[types.size() * ids.size()];
    BytesRefBuilder typeBytes = new BytesRefBuilder();
    BytesRefBuilder idBytes = new BytesRefBuilder();
    int index = 0;
    for (String type : types) {
        typeBytes.copyChars(type);
        for (Object id : ids) {
            uids[index++] = Uid.createUidAsBytes(typeBytes.get(), BytesRefs.toBytesRef(id, idBytes));
        }
    }
    return uids;
}
 
Example 4
Source File: BytesRefs.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
public static BytesRef toBytesRef(Object value, BytesRefBuilder spare) {
    if (value == null) {
        return null;
    }
    if (value instanceof BytesRef) {
        return (BytesRef) value;
    }
    spare.copyChars(value.toString());
    return spare.get();
}
 
Example 5
Source File: FunctionValues.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** returns the bytes representation of the string val - TODO: should this return the indexed raw bytes not? */
public boolean bytesVal(int doc, BytesRefBuilder target) throws IOException {
  String s = strVal(doc);
  if (s==null) {
    target.clear();
    return false;
  }
  target.copyChars(s);
  return true;
}
 
Example 6
Source File: BooleanPerceptronClassifier.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void updateFST(SortedMap<String, Double> weights) throws IOException {
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
  FSTCompiler<Long> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
  BytesRefBuilder scratchBytes = new BytesRefBuilder();
  IntsRefBuilder scratchInts = new IntsRefBuilder();
  for (Map.Entry<String, Double> entry : weights.entrySet()) {
    scratchBytes.copyChars(entry.getKey());
    fstCompiler.add(Util.toIntsRef(scratchBytes.get(), scratchInts), entry
            .getValue().longValue());
  }
  fst = fstCompiler.compile();
}
 
Example 7
Source File: FieldType.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Unmarshals a string-based field value.
 */
protected static Object unmarshalStringSortValue(Object value) {
  if (null == value) {
    return null;
  }
  BytesRefBuilder spare = new BytesRefBuilder();
  String stringVal = (String)value;
  spare.copyChars(stringVal);
  return spare.get();
}
 
Example 8
Source File: BytesRefs.java    From crate with Apache License 2.0 5 votes vote down vote up
public static BytesRef toBytesRef(Object value, BytesRefBuilder spare) {
    if (value == null) {
        return null;
    }
    if (value instanceof BytesRef) {
        return (BytesRef) value;
    }
    spare.copyChars(value.toString());
    return spare.get();
}
 
Example 9
Source File: SuggestUtils.java    From Elasticsearch with Apache License 2.0 4 votes vote down vote up
protected BytesRef fillBytesRef(BytesRefBuilder spare) {
    spare.copyChars(charTermAttr);
    return spare.get();
}
 
Example 10
Source File: Strings.java    From Elasticsearch with Apache License 2.0 4 votes vote down vote up
public static byte[] toUTF8Bytes(CharSequence charSequence, BytesRefBuilder spare) {
    spare.copyChars(charSequence);
    return Arrays.copyOf(spare.bytes(), spare.length());
}
 
Example 11
Source File: SimpleTextUtil.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public static void write(DataOutput out, String s, BytesRefBuilder scratch) throws IOException {
  scratch.copyChars(s, 0, s.length());
  write(out, scratch.get());
}
 
Example 12
Source File: FieldType.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/** Given the readable value, return the term value that will match it.
 * This method will modify the size and length of the {@code result} 
 * parameter and write from offset 0
 */
public void readableToIndexed(CharSequence val, BytesRefBuilder result) {
  final String internal = readableToIndexed(val.toString());
  result.copyChars(internal);
}