org.apache.lucene.util.IntsRefBuilder#append

Source File: TestFSTs.java From lucene-solr with Apache License 2.0

6 votes

public void testLargeOutputsOnArrayArcs() throws Exception {
  final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
  final FSTCompiler<BytesRef> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);

  final byte[] bytes = new byte[300];
  final IntsRefBuilder input = new IntsRefBuilder();
  input.append(0);
  final BytesRef output = new BytesRef(bytes);
  for(int arc=0;arc<6;arc++) {
    input.setIntAt(0, arc);
    output.bytes[0] = (byte) arc;
    fstCompiler.add(input.get(), BytesRef.deepCopyOf(output));
  }

  final FST<BytesRef> fst = fstCompiler.compile();
  for(int arc=0;arc<6;arc++) {
    input.setIntAt(0,  arc);
    final BytesRef result = Util.get(fst, input.get());
    assertNotNull(result);
    assertEquals(300, result.length);
    assertEquals(result.bytes[result.offset], arc);
    for(int byteIDX=1;byteIDX<result.length;byteIDX++) {
      assertEquals(0, result.bytes[result.offset+byteIDX]);
    }
  }
}

Source File: AutomatonTestUtil.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Returns the strings that can be produced from the given state, or
 * false if more than <code>limit</code> strings are found. 
 * <code>limit</code>&lt;0 means "infinite".
 */
private static boolean getFiniteStrings(Automaton a, int s, HashSet<Integer> pathstates, 
    HashSet<IntsRef> strings, IntsRefBuilder path, int limit) {
  pathstates.add(s);
  Transition t = new Transition();
  int count = a.initTransition(s, t);
  for (int i=0;i<count;i++) {
    a.getNextTransition(t);
    if (pathstates.contains(t.dest)) {
      return false;
    }
    for (int n = t.min; n <= t.max; n++) {
      path.append(n);
      if (a.isAccept(t.dest)) {
        strings.add(path.toIntsRef());
        if (limit >= 0 && strings.size() > limit) {
          return false;
        }
      }
      if (!getFiniteStrings(a, t.dest, pathstates, strings, path, limit)) {
        return false;
      }
      path.setLength(path.length() - 1);
    }
  }
  pathstates.remove(s);
  return true;
}

Source File: FSTTester.java From lucene-solr with Apache License 2.0

5 votes

static IntsRef toIntsRefUTF32(String s, IntsRefBuilder ir) {
  final int charLength = s.length();
  int charIdx = 0;
  int intIdx = 0;
  ir.clear();
  while(charIdx < charLength) {
    ir.grow(intIdx+1);
    final int utf32 = s.codePointAt(charIdx);
    ir.append(utf32);
    charIdx += Character.charCount(utf32);
    intIdx++;
  }
  return ir.get();
}

Source File: FSTTester.java From lucene-solr with Apache License 2.0

5 votes

static IntsRef toIntsRef(BytesRef br, IntsRefBuilder ir) {
  ir.grow(br.length);
  ir.clear();
  for(int i=0;i<br.length;i++) {
    ir.append(br.bytes[br.offset+i]&0xFF);
  }
  return ir.get();
}

Source File: FSTTester.java From lucene-solr with Apache License 2.0

5 votes

private T randomAcceptedWord(FST<T> fst, IntsRefBuilder in) throws IOException {
  FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());

  final List<FST.Arc<T>> arcs = new ArrayList<>();
  in.clear();
  final T NO_OUTPUT = fst.outputs.getNoOutput();
  T output = NO_OUTPUT;
  final FST.BytesReader fstReader = fst.getBytesReader();

  while(true) {
    // read all arcs:
    fst.readFirstTargetArc(arc, arc, fstReader);
    arcs.add(new FST.Arc<T>().copyFrom(arc));
    while(!arc.isLast()) {
      fst.readNextArc(arc, fstReader);
      arcs.add(new FST.Arc<T>().copyFrom(arc));
    }
    
    // pick one
    arc = arcs.get(random.nextInt(arcs.size()));
    arcs.clear();

    // accumulate output
    output = fst.outputs.add(output, arc.output());

    // append label
    if (arc.label() == FST.END_LABEL) {
      break;
    }

    in.append(arc.label());
  }

  return output;
}

Source File: Operations.java From lucene-solr with Apache License 2.0

5 votes

/** If this automaton accepts a single input, return it.  Else, return null.
 *  The automaton must be deterministic. */
public static IntsRef getSingleton(Automaton a) {
  if (a.isDeterministic() == false) {
    throw new IllegalArgumentException("input automaton must be deterministic");
  }
  IntsRefBuilder builder = new IntsRefBuilder();
  HashSet<Integer> visited = new HashSet<>();
  int s = 0;
  Transition t = new Transition();
  while (true) {
    visited.add(s);
    if (a.isAccept(s) == false) {
      if (a.getNumTransitions(s) == 1) {
        a.getTransition(s, 0, t);
        if (t.min == t.max && !visited.contains(t.dest)) {
          builder.append(t.min);
          s = t.dest;
          continue;
        }
      }
    } else if (a.getNumTransitions(s) == 0) {
      return builder.get();
    }

    // Automaton accepts more than one string:
    return null;
  }
}

Source File: Util.java From lucene-solr with Apache License 2.0

5 votes

/** Just takes unsigned byte values from the BytesRef and
 *  converts into an IntsRef. */
public static IntsRef toIntsRef(BytesRef input, IntsRefBuilder scratch) {
  scratch.clear();
  for(int i=0;i<input.length;i++) {
    scratch.append(input.bytes[i+input.offset] & 0xFF);
  }
  return scratch.get();
}

Source File: FiniteStringsIteratorTest.java From lucene-solr with Apache License 2.0

5 votes

public void testSingleString() {
  Automaton a = new Automaton();
  int start = a.createState();
  int end = a.createState();
  a.setAccept(end, true);
  a.addTransition(start, end, 'a', 'a');
  a.finishState();
  Set<IntsRef> accepted = TestOperations.getFiniteStrings(a);
  assertEquals(1, accepted.size());
  IntsRefBuilder intsRef = new IntsRefBuilder();
  intsRef.append('a');
  assertTrue(accepted.contains(intsRef.toIntsRef()));
}

Source File: TestAutomaton.java From lucene-solr with Apache License 2.0

5 votes

private static IntsRef toIntsRef(String s) {
  IntsRefBuilder b = new IntsRefBuilder();
  for (int i = 0, cp = 0; i < s.length(); i += Character.charCount(cp)) {
    cp = s.codePointAt(i);
    b.append(cp);
  }

  return b.get();
}

Source File: FstDecompounder.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0

5 votes

/**
 * Convert a character sequence into full unicode codepoints.
 */
private static IntsRefBuilder fromUTF16ToUTF32(CharSequence s, IntsRefBuilder builder) {
    builder.clear();
    for (int charIdx = 0, charLimit = s.length(); charIdx < charLimit; ) {
        final int utf32 = Character.codePointAt(s, charIdx);
        builder.append(utf32);
        charIdx += Character.charCount(utf32);
    }
    return builder;
}

Source File: FacetsConfig.java From lucene-solr with Apache License 2.0

4 votes

private void processFacetFields(TaxonomyWriter taxoWriter, Map<String,List<FacetField>> byField, Document doc) throws IOException {

    for(Map.Entry<String,List<FacetField>> ent : byField.entrySet()) {

      String indexFieldName = ent.getKey();
      //System.out.println("  indexFieldName=" + indexFieldName + " fields=" + ent.getValue());

      IntsRefBuilder ordinals = new IntsRefBuilder();
      for(FacetField facetField : ent.getValue()) {

        FacetsConfig.DimConfig ft = getDimConfig(facetField.dim);
        if (facetField.path.length > 1 && ft.hierarchical == false) {
          throw new IllegalArgumentException("dimension \"" + facetField.dim + "\" is not hierarchical yet has " + facetField.path.length + " components");
        }
      
        FacetLabel cp = new FacetLabel(facetField.dim, facetField.path);

        checkTaxoWriter(taxoWriter);
        int ordinal = taxoWriter.addCategory(cp);
        ordinals.append(ordinal);
        //System.out.println("ords[" + (ordinals.length-1) + "]=" + ordinal);
        //System.out.println("  add cp=" + cp);

        if (ft.multiValued && (ft.hierarchical || ft.requireDimCount)) {
          //System.out.println("  add parents");
          // Add all parents too:
          int parent = taxoWriter.getParent(ordinal);
          while (parent > 0) {
            ordinals.append(parent);
            parent = taxoWriter.getParent(parent);
          }

          if (ft.requireDimCount == false) {
            // Remove last (dimension) ord:
            ordinals.setLength(ordinals.length() - 1);
          }
        }

        // Drill down:
        int start;
        if (ft.requireDimensionDrillDown) {
          start = 1;
        } else {
          start = 2;
        }
        for (int i=start;i<=cp.length;i++) {
          doc.add(new StringField(indexFieldName, pathToString(cp.components, i), Field.Store.NO));
        }
      }

      // Facet counts:
      // DocValues are considered stored fields:
      doc.add(new BinaryDocValuesField(indexFieldName, dedupAndEncode(ordinals.get())));
    }
  }

Java Code Examples for org.apache.lucene.util.IntsRefBuilder#append()