Java Code Examples for org.apache.lucene.util.IntsRefBuilder#append()

The following examples show how to use org.apache.lucene.util.IntsRefBuilder#append() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestFSTs.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testLargeOutputsOnArrayArcs() throws Exception {
  final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
  final FSTCompiler<BytesRef> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);

  final byte[] bytes = new byte[300];
  final IntsRefBuilder input = new IntsRefBuilder();
  input.append(0);
  final BytesRef output = new BytesRef(bytes);
  for(int arc=0;arc<6;arc++) {
    input.setIntAt(0, arc);
    output.bytes[0] = (byte) arc;
    fstCompiler.add(input.get(), BytesRef.deepCopyOf(output));
  }

  final FST<BytesRef> fst = fstCompiler.compile();
  for(int arc=0;arc<6;arc++) {
    input.setIntAt(0,  arc);
    final BytesRef result = Util.get(fst, input.get());
    assertNotNull(result);
    assertEquals(300, result.length);
    assertEquals(result.bytes[result.offset], arc);
    for(int byteIDX=1;byteIDX<result.length;byteIDX++) {
      assertEquals(0, result.bytes[result.offset+byteIDX]);
    }
  }
}
 
Example 2
Source File: AutomatonTestUtil.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the strings that can be produced from the given state, or
 * false if more than <code>limit</code> strings are found. 
 * <code>limit</code>&lt;0 means "infinite".
 */
private static boolean getFiniteStrings(Automaton a, int s, HashSet<Integer> pathstates, 
    HashSet<IntsRef> strings, IntsRefBuilder path, int limit) {
  pathstates.add(s);
  Transition t = new Transition();
  int count = a.initTransition(s, t);
  for (int i=0;i<count;i++) {
    a.getNextTransition(t);
    if (pathstates.contains(t.dest)) {
      return false;
    }
    for (int n = t.min; n <= t.max; n++) {
      path.append(n);
      if (a.isAccept(t.dest)) {
        strings.add(path.toIntsRef());
        if (limit >= 0 && strings.size() > limit) {
          return false;
        }
      }
      if (!getFiniteStrings(a, t.dest, pathstates, strings, path, limit)) {
        return false;
      }
      path.setLength(path.length() - 1);
    }
  }
  pathstates.remove(s);
  return true;
}
 
Example 3
Source File: FSTTester.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
static IntsRef toIntsRefUTF32(String s, IntsRefBuilder ir) {
  final int charLength = s.length();
  int charIdx = 0;
  int intIdx = 0;
  ir.clear();
  while(charIdx < charLength) {
    ir.grow(intIdx+1);
    final int utf32 = s.codePointAt(charIdx);
    ir.append(utf32);
    charIdx += Character.charCount(utf32);
    intIdx++;
  }
  return ir.get();
}
 
Example 4
Source File: FSTTester.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
static IntsRef toIntsRef(BytesRef br, IntsRefBuilder ir) {
  ir.grow(br.length);
  ir.clear();
  for(int i=0;i<br.length;i++) {
    ir.append(br.bytes[br.offset+i]&0xFF);
  }
  return ir.get();
}
 
Example 5
Source File: FSTTester.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private T randomAcceptedWord(FST<T> fst, IntsRefBuilder in) throws IOException {
  FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());

  final List<FST.Arc<T>> arcs = new ArrayList<>();
  in.clear();
  final T NO_OUTPUT = fst.outputs.getNoOutput();
  T output = NO_OUTPUT;
  final FST.BytesReader fstReader = fst.getBytesReader();

  while(true) {
    // read all arcs:
    fst.readFirstTargetArc(arc, arc, fstReader);
    arcs.add(new FST.Arc<T>().copyFrom(arc));
    while(!arc.isLast()) {
      fst.readNextArc(arc, fstReader);
      arcs.add(new FST.Arc<T>().copyFrom(arc));
    }
    
    // pick one
    arc = arcs.get(random.nextInt(arcs.size()));
    arcs.clear();

    // accumulate output
    output = fst.outputs.add(output, arc.output());

    // append label
    if (arc.label() == FST.END_LABEL) {
      break;
    }

    in.append(arc.label());
  }

  return output;
}
 
Example 6
Source File: Operations.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** If this automaton accepts a single input, return it.  Else, return null.
 *  The automaton must be deterministic. */
public static IntsRef getSingleton(Automaton a) {
  if (a.isDeterministic() == false) {
    throw new IllegalArgumentException("input automaton must be deterministic");
  }
  IntsRefBuilder builder = new IntsRefBuilder();
  HashSet<Integer> visited = new HashSet<>();
  int s = 0;
  Transition t = new Transition();
  while (true) {
    visited.add(s);
    if (a.isAccept(s) == false) {
      if (a.getNumTransitions(s) == 1) {
        a.getTransition(s, 0, t);
        if (t.min == t.max && !visited.contains(t.dest)) {
          builder.append(t.min);
          s = t.dest;
          continue;
        }
      }
    } else if (a.getNumTransitions(s) == 0) {
      return builder.get();
    }

    // Automaton accepts more than one string:
    return null;
  }
}
 
Example 7
Source File: Util.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Just takes unsigned byte values from the BytesRef and
 *  converts into an IntsRef. */
public static IntsRef toIntsRef(BytesRef input, IntsRefBuilder scratch) {
  scratch.clear();
  for(int i=0;i<input.length;i++) {
    scratch.append(input.bytes[i+input.offset] & 0xFF);
  }
  return scratch.get();
}
 
Example 8
Source File: FiniteStringsIteratorTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSingleString() {
  Automaton a = new Automaton();
  int start = a.createState();
  int end = a.createState();
  a.setAccept(end, true);
  a.addTransition(start, end, 'a', 'a');
  a.finishState();
  Set<IntsRef> accepted = TestOperations.getFiniteStrings(a);
  assertEquals(1, accepted.size());
  IntsRefBuilder intsRef = new IntsRefBuilder();
  intsRef.append('a');
  assertTrue(accepted.contains(intsRef.toIntsRef()));
}
 
Example 9
Source File: TestAutomaton.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private static IntsRef toIntsRef(String s) {
  IntsRefBuilder b = new IntsRefBuilder();
  for (int i = 0, cp = 0; i < s.length(); i += Character.charCount(cp)) {
    cp = s.codePointAt(i);
    b.append(cp);
  }

  return b.get();
}
 
Example 10
Source File: FstDecompounder.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Convert a character sequence into full unicode codepoints.
 */
private static IntsRefBuilder fromUTF16ToUTF32(CharSequence s, IntsRefBuilder builder) {
    builder.clear();
    for (int charIdx = 0, charLimit = s.length(); charIdx < charLimit; ) {
        final int utf32 = Character.codePointAt(s, charIdx);
        builder.append(utf32);
        charIdx += Character.charCount(utf32);
    }
    return builder;
}
 
Example 11
Source File: FacetsConfig.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void processFacetFields(TaxonomyWriter taxoWriter, Map<String,List<FacetField>> byField, Document doc) throws IOException {

    for(Map.Entry<String,List<FacetField>> ent : byField.entrySet()) {

      String indexFieldName = ent.getKey();
      //System.out.println("  indexFieldName=" + indexFieldName + " fields=" + ent.getValue());

      IntsRefBuilder ordinals = new IntsRefBuilder();
      for(FacetField facetField : ent.getValue()) {

        FacetsConfig.DimConfig ft = getDimConfig(facetField.dim);
        if (facetField.path.length > 1 && ft.hierarchical == false) {
          throw new IllegalArgumentException("dimension \"" + facetField.dim + "\" is not hierarchical yet has " + facetField.path.length + " components");
        }
      
        FacetLabel cp = new FacetLabel(facetField.dim, facetField.path);

        checkTaxoWriter(taxoWriter);
        int ordinal = taxoWriter.addCategory(cp);
        ordinals.append(ordinal);
        //System.out.println("ords[" + (ordinals.length-1) + "]=" + ordinal);
        //System.out.println("  add cp=" + cp);

        if (ft.multiValued && (ft.hierarchical || ft.requireDimCount)) {
          //System.out.println("  add parents");
          // Add all parents too:
          int parent = taxoWriter.getParent(ordinal);
          while (parent > 0) {
            ordinals.append(parent);
            parent = taxoWriter.getParent(parent);
          }

          if (ft.requireDimCount == false) {
            // Remove last (dimension) ord:
            ordinals.setLength(ordinals.length() - 1);
          }
        }

        // Drill down:
        int start;
        if (ft.requireDimensionDrillDown) {
          start = 1;
        } else {
          start = 2;
        }
        for (int i=start;i<=cp.length;i++) {
          doc.add(new StringField(indexFieldName, pathToString(cp.components, i), Field.Store.NO));
        }
      }

      // Facet counts:
      // DocValues are considered stored fields:
      doc.add(new BinaryDocValuesField(indexFieldName, dedupAndEncode(ordinals.get())));
    }
  }