Java Code Examples for org.apache.lucene.util.IntsRefBuilder#clear()

The following examples show how to use org.apache.lucene.util.IntsRefBuilder#clear() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FSTTester.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
static IntsRef toIntsRefUTF32(String s, IntsRefBuilder ir) {
  final int charLength = s.length();
  int charIdx = 0;
  int intIdx = 0;
  ir.clear();
  while(charIdx < charLength) {
    ir.grow(intIdx+1);
    final int utf32 = s.codePointAt(charIdx);
    ir.append(utf32);
    charIdx += Character.charCount(utf32);
    intIdx++;
  }
  return ir.get();
}
 
Example 2
Source File: FSTTester.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
static IntsRef toIntsRef(BytesRef br, IntsRefBuilder ir) {
  ir.grow(br.length);
  ir.clear();
  for(int i=0;i<br.length;i++) {
    ir.append(br.bytes[br.offset+i]&0xFF);
  }
  return ir.get();
}
 
Example 3
Source File: FSTTester.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private T randomAcceptedWord(FST<T> fst, IntsRefBuilder in) throws IOException {
  FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());

  final List<FST.Arc<T>> arcs = new ArrayList<>();
  in.clear();
  final T NO_OUTPUT = fst.outputs.getNoOutput();
  T output = NO_OUTPUT;
  final FST.BytesReader fstReader = fst.getBytesReader();

  while(true) {
    // read all arcs:
    fst.readFirstTargetArc(arc, arc, fstReader);
    arcs.add(new FST.Arc<T>().copyFrom(arc));
    while(!arc.isLast()) {
      fst.readNextArc(arc, fstReader);
      arcs.add(new FST.Arc<T>().copyFrom(arc));
    }
    
    // pick one
    arc = arcs.get(random.nextInt(arcs.size()));
    arcs.clear();

    // accumulate output
    output = fst.outputs.add(output, arc.output());

    // append label
    if (arc.label() == FST.END_LABEL) {
      break;
    }

    in.append(arc.label());
  }

  return output;
}
 
Example 4
Source File: Util.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Just takes unsigned byte values from the BytesRef and
 *  converts into an IntsRef. */
public static IntsRef toIntsRef(BytesRef input, IntsRefBuilder scratch) {
  scratch.clear();
  for(int i=0;i<input.length;i++) {
    scratch.append(input.bytes[i+input.offset] & 0xFF);
  }
  return scratch.get();
}
 
Example 5
Source File: FstDecompounder.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Convert a character sequence into full unicode codepoints.
 */
private static IntsRefBuilder fromUTF16ToUTF32(CharSequence s, IntsRefBuilder builder) {
    builder.clear();
    for (int charIdx = 0, charLimit = s.length(); charIdx < charLimit; ) {
        final int utf32 = Character.codePointAt(s, charIdx);
        builder.append(utf32);
        charIdx += Character.charCount(utf32);
    }
    return builder;
}
 
Example 6
Source File: FstCompiler.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 4 votes vote down vote up
/**
 *
 * @param inputStream the input stream
 * @param outputStream the output stream
 * @throws IOException if compilation fails
 */
public void compile(InputStream inputStream, OutputStream outputStream) throws IOException {
    final HashSet<BytesRef> words = new HashSet<>();
    BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8));
    String line;
    String last = null;
    StringBuilder stringBuilder = new StringBuilder();
    while ((line = reader.readLine()) != null) {
        if (line.indexOf('#') >= 0) {
            continue;
        }
        line = pattern.split(line)[0].trim();
        line = line.toLowerCase(Locale.ROOT);
        if (line.equals(last)) {
            continue;
        }
        last = line;
        /*
         * Add the word to the hash set in left-to-right characters order and reversed
         * for easier matching later on.
         */
        stringBuilder.setLength(0);
        stringBuilder.append(line);
        final int len = stringBuilder.length();
        stringBuilder.append('>');
        words.add(new BytesRef(stringBuilder));
        stringBuilder.setLength(len);
        stringBuilder.reverse().append('<');
        words.add(new BytesRef(stringBuilder));
    }
    reader.close();
    final BytesRef [] all = new BytesRef[words.size()];
    words.toArray(all);
    Arrays.sort(all, BytesRef::compareTo);
    final Object nothing = NoOutputs.getSingleton().getNoOutput();
    final Builder<Object> builder = new Builder<>(INPUT_TYPE.BYTE4, NoOutputs.getSingleton());
    final IntsRefBuilder intsRef = new IntsRefBuilder();
    for (BytesRef bytesRef : all) {
        intsRef.clear();
        intsRef.copyUTF8Bytes(bytesRef);
        builder.add(intsRef.get(), nothing);
    }
    final FST<Object> fst = builder.finish();
    try (OutputStreamDataOutput out = new OutputStreamDataOutput(outputStream)) {
        fst.save(out);
    }
}