Java Code Examples for org.apache.lucene.util.BytesRefBuilder#append()

The following examples show how to use org.apache.lucene.util.BytesRefBuilder#append() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Correction.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
public BytesRef join(BytesRef separator, BytesRefBuilder result, BytesRef preTag, BytesRef postTag) {
    BytesRef[] toJoin = new BytesRef[this.candidates.length];
    int len = separator.length * this.candidates.length - 1;
    for (int i = 0; i < toJoin.length; i++) {
        Candidate candidate = candidates[i];
        if (preTag == null || candidate.userInput) {
            toJoin[i] = candidate.term;
        } else {
            final int maxLen = preTag.length + postTag.length + candidate.term.length;
            final BytesRefBuilder highlighted = new BytesRefBuilder();// just allocate once
            highlighted.grow(maxLen);
            if (i == 0 || candidates[i-1].userInput) {
                highlighted.append(preTag);
            }
            highlighted.append(candidate.term);
            if (toJoin.length == i + 1 || candidates[i+1].userInput) {
                highlighted.append(postTag);
            }
            toJoin[i] = highlighted.get();
        }
        len += toJoin[i].length;
    }
    result.grow(len);
    return SuggestUtils.join(separator, result, toJoin);
}
 
Example 2
Source File: Operations.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Returns the longest BytesRef that is a prefix of all accepted strings and
 * visits each state at most once.  The automaton must be deterministic.
 * 
 * @return common prefix, which can be an empty (length 0) BytesRef (never null)
 */
public static BytesRef getCommonPrefixBytesRef(Automaton a) {
  BytesRefBuilder builder = new BytesRefBuilder();
  HashSet<Integer> visited = new HashSet<>();
  int s = 0;
  boolean done;
  Transition t = new Transition();
  do {
    done = true;
    visited.add(s);
    if (a.isAccept(s) == false && a.getNumTransitions(s) == 1) {
      a.getTransition(s, 0, t);
      if (t.min == t.max && !visited.contains(t.dest)) {
        builder.append((byte) t.min);
        s = t.dest;
        done = false;
      }
    }
  } while (!done);

  return builder.get();
}
 
Example 3
Source File: ESBlobStoreContainerTestCase.java    From crate with Apache License 2.0 6 votes vote down vote up
public void testWriteRead() throws IOException {
    try(BlobStore store = newBlobStore()) {
        final BlobContainer container = store.blobContainer(new BlobPath());
        byte[] data = randomBytes(randomIntBetween(10, scaledRandomIntBetween(1024, 1 << 16)));
        writeBlob(container, "foobar", new BytesArray(data), randomBoolean());
        if (randomBoolean()) {
            // override file, to check if we get latest contents
            data = randomBytes(randomIntBetween(10, scaledRandomIntBetween(1024, 1 << 16)));
            writeBlob(container, "foobar", new BytesArray(data), false);
        }
        try (InputStream stream = container.readBlob("foobar")) {
            BytesRefBuilder target = new BytesRefBuilder();
            while (target.length() < data.length) {
                byte[] buffer = new byte[scaledRandomIntBetween(1, data.length - target.length())];
                int offset = scaledRandomIntBetween(0, buffer.length - 1);
                int read = stream.read(buffer, offset, buffer.length - offset);
                target.append(new BytesRef(buffer, offset, read));
            }
            assertEquals(data.length, target.length());
            assertArrayEquals(data, Arrays.copyOfRange(target.bytes(), 0, target.length()));
        }
    }
}
 
Example 4
Source File: SuggestUtils.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
public static BytesRef join(BytesRef separator, BytesRefBuilder result, BytesRef... toJoin) {
    result.clear();
    for (int i = 0; i < toJoin.length - 1; i++) {
        result.append(toJoin[i]);
        result.append(separator);
    }
    result.append(toJoin[toJoin.length-1]);
    return result.get();
}
 
Example 5
Source File: UTF8TaxonomyWriterCache.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private BytesRef toBytes(FacetLabel label) {
  BytesRefBuilder bytes = this.bytes.get();
  bytes.clear();
  for (int i = 0; i < label.length; i++) {
    String part = label.components[i];
    if (i > 0) {
      bytes.append(DELIM_CHAR);
    }
    bytes.grow(bytes.length() + UnicodeUtil.maxUTF8Length(part.length()));
    bytes.setLength(UnicodeUtil.UTF16toUTF8(part, 0, part.length(), bytes.bytes(), bytes.length()));
  }
  return bytes.get();
}
 
Example 6
Source File: Dictionary.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
static void encodeFlags(BytesRefBuilder b, char flags[]) {
  int len = flags.length << 1;
  b.grow(len);
  b.clear();
  for (int i = 0; i < flags.length; i++) {
    int flag = flags[i];
    b.append((byte) ((flag >> 8) & 0xff));
    b.append((byte) (flag & 0xff));
  }
}
 
Example 7
Source File: IcuCollationAnalyzerTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
private BytesRef bytesFromTokenStream(TokenStream stream) throws Exception {
    TermToBytesRefAttribute termAttr = stream.getAttribute(TermToBytesRefAttribute.class);
    stream.reset();
    BytesRefBuilder bytesRefBuilder = new BytesRefBuilder();
    while (stream.incrementToken()) {
        BytesRef bytesRef = termAttr.getBytesRef();
        bytesRefBuilder.append(bytesRef);
    }
    stream.close();
    return bytesRefBuilder.toBytesRef();
}
 
Example 8
Source File: SortFormTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
private BytesRef sortKeyFromTokenStream(TokenStream stream) throws Exception {
    TermToBytesRefAttribute termAttr = stream.getAttribute(TermToBytesRefAttribute.class);
    BytesRefBuilder b = new BytesRefBuilder();
    stream.reset();
    while (stream.incrementToken()) {
        b.append(termAttr.getBytesRef());
    }
    stream.close();
    return b.get();
}
 
Example 9
Source File: CompositeBytesReference.java    From crate with Apache License 2.0 5 votes vote down vote up
@Override
public BytesRef toBytesRef() {
    BytesRefBuilder builder = new BytesRefBuilder();
    builder.grow(length());
    BytesRef spare;
    BytesRefIterator iterator = iterator();
    try {
        while ((spare = iterator.next()) != null) {
            builder.append(spare);
        }
    } catch (IOException ex) {
        throw new AssertionError("won't happen", ex); // this is really an error since we don't do IO in our bytesreferences
    }
    return builder.toBytesRef();
}
 
Example 10
Source File: IndexNumericFieldData.java    From Elasticsearch with Apache License 2.0 4 votes vote down vote up
@Override
public void toIndexForm(Number number, BytesRefBuilder bytes) {
    bytes.append(number.intValue() != 0 ? BooleanFieldMapper.Values.TRUE : BooleanFieldMapper.Values.FALSE);
}
 
Example 11
Source File: Uid.java    From Elasticsearch with Apache License 2.0 4 votes vote down vote up
public static BytesRef typePrefixAsBytes(BytesRef type) {
    BytesRefBuilder bytesRef = new BytesRefBuilder();
    bytesRef.append(type);
    bytesRef.append(DELIMITER_BYTES);
    return bytesRef.toBytesRef();
}
 
Example 12
Source File: Uid.java    From Elasticsearch with Apache License 2.0 4 votes vote down vote up
public static BytesRef createUidAsBytes(BytesRef type, BytesRef id, BytesRefBuilder spare) {
    spare.copyBytes(type);
    spare.append(DELIMITER_BYTES);
    spare.append(id);
    return spare.get();
}
 
Example 13
Source File: TermQueryPrefixTreeStrategy.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public Query makeQuery(SpatialArgs args) {
  final SpatialOperation op = args.getOperation();
  if (op != SpatialOperation.Intersects)
    throw new UnsupportedSpatialOperation(op);

  Shape shape = args.getShape();
  int detailLevel = grid.getLevelForDistance(args.resolveDistErr(ctx, distErrPct));

  //--get a List of BytesRef for each term we want (no parents, no leaf bytes))
  final int GUESS_NUM_TERMS;
  if (shape instanceof Point)
    GUESS_NUM_TERMS = detailLevel;//perfect guess
  else
    GUESS_NUM_TERMS = 4096;//should this be a method on SpatialPrefixTree?

  BytesRefBuilder masterBytes = new BytesRefBuilder();//shared byte array for all terms
  List<BytesRef> terms = new ArrayList<>(GUESS_NUM_TERMS);

  CellIterator cells = grid.getTreeCellIterator(shape, detailLevel);
  while (cells.hasNext()) {
    Cell cell = cells.next();
    if (!cell.isLeaf())
      continue;
    BytesRef term = cell.getTokenBytesNoLeaf(null);//null because we want a new BytesRef
    //We copy out the bytes because it may be re-used across the iteration. This also gives us the opportunity
    // to use one contiguous block of memory for the bytes of all terms we need.
    masterBytes.grow(masterBytes.length() + term.length);
    masterBytes.append(term);
    term.bytes = null;//don't need; will reset later
    term.offset = masterBytes.length() - term.length;
    terms.add(term);
  }
  //doing this now because if we did earlier, it's possible the bytes needed to grow()
  for (BytesRef byteRef : terms) {
    byteRef.bytes = masterBytes.bytes();
  }
  //unfortunately TermsQuery will needlessly sort & dedupe
  //TODO an automatonQuery might be faster?
  return new TermInSetQuery(getFieldName(), terms);
}
 
Example 14
Source File: TermGroupFacetCollector.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
  if (segmentFacetCounts != null) {
    segmentResults.add(createSegmentResult());
  }

  groupFieldTermsIndex = DocValues.getSorted(context.reader(), groupField);
  facetFieldDocTermOrds = DocValues.getSortedSet(context.reader(), facetField);
  facetFieldNumTerms = (int) facetFieldDocTermOrds.getValueCount();
  if (facetFieldNumTerms == 0) {
    facetOrdTermsEnum = null;
  } else {
    facetOrdTermsEnum = facetFieldDocTermOrds.termsEnum();
  }
  // [facetFieldNumTerms() + 1] for all possible facet values and docs not containing facet field
  segmentFacetCounts = new int[facetFieldNumTerms + 1];
  segmentTotalCount = 0;

  segmentGroupedFacetHits.clear();
  for (GroupedFacetHit groupedFacetHit : groupedFacetHits) {
    int groupOrd = groupedFacetHit.groupValue == null ? -1 : groupFieldTermsIndex.lookupTerm(groupedFacetHit.groupValue);
    if (groupedFacetHit.groupValue != null && groupOrd < 0) {
      continue;
    }

    int facetOrd;
    if (groupedFacetHit.facetValue != null) {
      if (facetOrdTermsEnum == null || !facetOrdTermsEnum.seekExact(groupedFacetHit.facetValue)) {
        continue;
      }
      facetOrd = (int) facetOrdTermsEnum.ord();
    } else {
      facetOrd = facetFieldNumTerms;
    }

    // (facetFieldDocTermOrds.numTerms() + 1) for all possible facet values and docs not containing facet field
    int segmentGroupedFacetsIndex = groupOrd * (facetFieldNumTerms + 1) + facetOrd;
    segmentGroupedFacetHits.put(segmentGroupedFacetsIndex);
  }

  if (facetPrefix != null) {
    TermsEnum.SeekStatus seekStatus;
    if (facetOrdTermsEnum != null) {
      seekStatus = facetOrdTermsEnum.seekCeil(facetPrefix);
    } else {
      seekStatus = TermsEnum.SeekStatus.END;
    }

    if (seekStatus != TermsEnum.SeekStatus.END) {
      startFacetOrd = (int) facetOrdTermsEnum.ord();
    } else {
      startFacetOrd = 0;
      endFacetOrd = 0;
      return;
    }

    BytesRefBuilder facetEndPrefix = new BytesRefBuilder();
    facetEndPrefix.append(facetPrefix);
    facetEndPrefix.append(UnicodeUtil.BIG_TERM);
    seekStatus = facetOrdTermsEnum.seekCeil(facetEndPrefix.get());
    if (seekStatus != TermsEnum.SeekStatus.END) {
      endFacetOrd = (int) facetOrdTermsEnum.ord();
    } else {
      endFacetOrd = facetFieldNumTerms; // Don't include null...
    }
  } else {
    startFacetOrd = 0;
    endFacetOrd = facetFieldNumTerms + 1;
  }
}
 
Example 15
Source File: PathHierarchyAggregatorFactory.java    From elasticsearch-aggregation-pathhierarchy with MIT License 4 votes vote down vote up
/**
 * Handles iterations on doc values:
 *  Advance the iterator to exactly target and return whether target has a value.
 *  target must be greater than or equal to the current doc ID and must be a valid doc ID, ie. &ge; 0 and &lt; maxDoc.
 *  After this method returns, docID() returns target.
 */
@Override
public boolean advanceExact(int docId) throws IOException {
    if (valuesSource.advanceExact(docId)) {
        count = 0;
        int t = 0;
        for (int i=0; i < valuesSource.docValueCount(); i++) {
            int depth = 0;
            BytesRef val = valuesSource.nextValue();
            BytesRefBuilder cleanVal = new BytesRefBuilder();
            int startNewValOffset = -1;

            for (int offset=0; offset < val.length; offset++) {
                // it is a separator
                if (val.length - offset >= separator.length &&
                        FutureArrays.equals(
                                separator.bytes, separator.offset, separator.offset + separator.length,
                                val.bytes, val.offset + offset, val.offset + offset + separator.length)) {
                    // ignore separator at the beginning
                    if (offset == 0) {
                        offset += separator.length -1;
                        continue;
                    }

                    // A new path needs to be add
                    if (startNewValOffset != -1) {
                        cleanVal.append(val.bytes, val.offset + startNewValOffset, offset - startNewValOffset);
                        if (depth >= minDepth) {
                            values[t++].copyBytes(cleanVal);
                        }
                        startNewValOffset = -1;
                        cleanVal.append(separator);
                        depth ++;
                    // two separators following each other
                    } else if (keepBlankPath) {
                        count++;
                        growExact();
                        values[t++].copyBytes(cleanVal);
                        cleanVal.append(separator);
                        depth ++;
                    }

                    if (maxDepth >= 0 && depth > maxDepth) {
                        break;
                    }
                    offset += separator.length - 1;
                } else {
                    if (startNewValOffset == -1) {
                        startNewValOffset = offset;
                        if (depth >= minDepth) {
                            count++;
                            growExact();
                        }
                    }
                }
            }

            if (startNewValOffset != -1 && minDepth <= depth) {
                cleanVal.append(val.bytes, val.offset + startNewValOffset, val.length - startNewValOffset);
                values[t++].copyBytes(cleanVal);
            }

        }
        sort();  // sort values that are stored between offsets 0 and count of values
        return true;
    } else
        return false;
}