org.apache.lucene.util.BytesRefBuilder Java Examples
The following examples show how to use
org.apache.lucene.util.BytesRefBuilder.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TrieBuilder.java From ambiverse-nlu with Apache License 2.0 | 6 votes |
public static FST<Long> buildTrie(Set<String> sortedStrings) throws IOException { PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs); BytesRefBuilder scratchBytes = new BytesRefBuilder(); IntsRefBuilder scratchInts = new IntsRefBuilder(); long outputValue = 0; for (String mention : sortedStrings) { scratchBytes.copyChars(mention); try { builder.add(Util.toIntsRef(scratchBytes.get(), scratchInts), outputValue++); } catch (java.lang.AssertionError ae) { logger.debug("Assertion error for mention " + mention); } } return builder.finish(); }
Example #2
Source File: TestLegacyNumericUtils.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testIntConversionAndOrdering() throws Exception { // generate a series of encoded ints, each numerical one bigger than the one before BytesRefBuilder act = new BytesRefBuilder(); BytesRefBuilder last = new BytesRefBuilder(); for (int i=-100000; i<100000; i++) { LegacyNumericUtils.intToPrefixCoded(i, 0, act); if (last!=null) { // test if smaller assertTrue("actual bigger than last (BytesRef)", last.get().compareTo(act.get()) < 0 ); assertTrue("actual bigger than last (as String)", last.get().utf8ToString().compareTo(act.get().utf8ToString()) < 0 ); } // test is back and forward conversion works assertEquals("forward and back conversion should generate same int", i, LegacyNumericUtils.prefixCodedToInt(act.get())); // next step last.copyBytes(act.get()); } }
Example #3
Source File: TermBuilder.java From Elasticsearch with Apache License 2.0 | 6 votes |
@Override public BytesRef term(Long value) { BytesRefBuilder builder = new BytesRefBuilder(); NumericUtils.longToPrefixCoded(value, 0, builder); return builder.get(); }
Example #4
Source File: Correction.java From Elasticsearch with Apache License 2.0 | 6 votes |
public BytesRef join(BytesRef separator, BytesRefBuilder result, BytesRef preTag, BytesRef postTag) { BytesRef[] toJoin = new BytesRef[this.candidates.length]; int len = separator.length * this.candidates.length - 1; for (int i = 0; i < toJoin.length; i++) { Candidate candidate = candidates[i]; if (preTag == null || candidate.userInput) { toJoin[i] = candidate.term; } else { final int maxLen = preTag.length + postTag.length + candidate.term.length; final BytesRefBuilder highlighted = new BytesRefBuilder();// just allocate once highlighted.grow(maxLen); if (i == 0 || candidates[i-1].userInput) { highlighted.append(preTag); } highlighted.append(candidate.term); if (toJoin.length == i + 1 || candidates[i+1].userInput) { highlighted.append(postTag); } toJoin[i] = highlighted.get(); } len += toJoin[i].length; } result.grow(len); return SuggestUtils.join(separator, result, toJoin); }
Example #5
Source File: CommonTermsQueryParser.java From Elasticsearch with Apache License 2.0 | 6 votes |
private final Query parseQueryString(ExtendedCommonTermsQuery query, String queryString, String field, QueryParseContext parseContext, Analyzer analyzer, String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch) throws IOException { // Logic similar to QueryParser#getFieldQuery int count = 0; try (TokenStream source = analyzer.tokenStream(field, queryString.toString())) { source.reset(); CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); BytesRefBuilder builder = new BytesRefBuilder(); while (source.incrementToken()) { // UTF-8 builder.copyChars(termAtt); query.add(new Term(field, builder.toBytesRef())); count++; } } if (count == 0) { return null; } query.setLowFreqMinimumNumberShouldMatch(lowFreqMinimumShouldMatch); query.setHighFreqMinimumNumberShouldMatch(highFreqMinimumShouldMatch); return query; }
Example #6
Source File: TestLegacyNumericUtils.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testLongConversionAndOrdering() throws Exception { // generate a series of encoded longs, each numerical one bigger than the one before BytesRefBuilder last = new BytesRefBuilder(); BytesRefBuilder act = new BytesRefBuilder(); for (long l=-100000L; l<100000L; l++) { LegacyNumericUtils.longToPrefixCoded(l, 0, act); if (last!=null) { // test if smaller assertTrue("actual bigger than last (BytesRef)", last.get().compareTo(act.get()) < 0 ); assertTrue("actual bigger than last (as String)", last.get().utf8ToString().compareTo(act.get().utf8ToString()) < 0 ); } // test is back and forward conversion works assertEquals("forward and back conversion should generate same long", l, LegacyNumericUtils.prefixCodedToLong(act.get())); // next step last.copyBytes(act); } }
Example #7
Source File: LegacyNumericUtils.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Returns prefix coded bits after reducing the precision by <code>shift</code> bits. * This is method is used by {@link org.apache.solr.legacy.LegacyNumericTokenStream}. * After encoding, {@code bytes.offset} will always be 0. * @param val the numeric value * @param shift how many bits to strip from the right * @param bytes will contain the encoded value */ public static void intToPrefixCoded(final int val, final int shift, final BytesRefBuilder bytes) { // ensure shift is 0..31 if ((shift & ~0x1f) != 0) { throw new IllegalArgumentException("Illegal shift value, must be 0..31; got shift=" + shift); } int nChars = (((31-shift)*37)>>8) + 1; // i/7 is the same as (i*37)>>8 for i in 0..63 bytes.setLength(nChars+1); // one extra for the byte that contains the shift info bytes.grow(LegacyNumericUtils.BUF_SIZE_LONG); // use the max bytes.setByteAt(0, (byte)(SHIFT_START_INT + shift)); int sortableBits = val ^ 0x80000000; sortableBits >>>= shift; while (nChars > 0) { // Store 7 bits per byte for compatibility // with UTF-8 encoding of terms bytes.setByteAt(nChars--, (byte)(sortableBits & 0x7f)); sortableBits >>>= 7; } }
Example #8
Source File: TrieField.java From lucene-solr with Apache License 2.0 | 6 votes |
private void storedToIndexed(IndexableField f, final BytesRefBuilder bytes) { final Number val = f.numericValue(); if (val != null) { switch (type) { case INTEGER: LegacyNumericUtils.intToPrefixCoded(val.intValue(), 0, bytes); break; case FLOAT: LegacyNumericUtils.intToPrefixCoded(NumericUtils.floatToSortableInt(val.floatValue()), 0, bytes); break; case LONG: //fallthrough! case DATE: LegacyNumericUtils.longToPrefixCoded(val.longValue(), 0, bytes); break; case DOUBLE: LegacyNumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(val.doubleValue()), 0, bytes); break; default: throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name()); } } else { // the old BinaryField encoding is no longer supported throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Invalid field contents: "+f.name()); } }
Example #9
Source File: ExpandComponent.java From lucene-solr with Apache License 2.0 | 6 votes |
private Query getGroupQuery(String fname, FieldType ft, int size, LongHashSet groupSet) { BytesRef[] bytesRefs = new BytesRef[size]; int index = -1; BytesRefBuilder term = new BytesRefBuilder(); Iterator<LongCursor> it = groupSet.iterator(); while (it.hasNext()) { LongCursor cursor = it.next(); String stringVal = numericToString(ft, cursor.value); ft.readableToIndexed(stringVal, term); bytesRefs[++index] = term.toBytesRef(); } return new TermInSetQuery(fname, bytesRefs); }
Example #10
Source File: DateFieldMapper.java From Elasticsearch with Apache License 2.0 | 6 votes |
@Override public BytesRef indexedValueForSearch(Object value) { BytesRefBuilder bytesRef = new BytesRefBuilder(); NumericUtils.longToPrefixCoded(parseValue(value), 0, bytesRef); // 0 because of exact match return bytesRef.get(); }
Example #11
Source File: GroupConverter.java From lucene-solr with Apache License 2.0 | 6 votes |
@SuppressWarnings({"unchecked", "rawtypes"}) static TopGroups<BytesRef> fromMutable(SchemaField field, TopGroups<MutableValue> values) { if (values == null) { return null; } FieldType fieldType = field.getType(); GroupDocs<BytesRef> groupDocs[] = new GroupDocs[values.groups.length]; for (int i = 0; i < values.groups.length; i++) { GroupDocs<MutableValue> original = values.groups[i]; final BytesRef groupValue; if (original.groupValue.exists) { BytesRefBuilder binary = new BytesRefBuilder(); fieldType.readableToIndexed(Utils.OBJECT_TO_STRING.apply(original.groupValue.toObject()), binary); groupValue = binary.get(); } else { groupValue = null; } groupDocs[i] = new GroupDocs<>(original.score, original.maxScore, original.totalHits, original.scoreDocs, groupValue, original.groupSortValues); } return new TopGroups<>(values.groupSort, values.withinGroupSort, values.totalHitCount, values.totalGroupedHitCount, groupDocs, values.maxScore); }
Example #12
Source File: SearchGroupsResultTransformer.java From lucene-solr with Apache License 2.0 | 6 votes |
@SuppressWarnings({"rawtypes"}) private SearchGroup<BytesRef> deserializeOneSearchGroup(SchemaField groupField, String groupValue, SortField[] groupSortField, List<Comparable> rawSearchGroupData) { SearchGroup<BytesRef> searchGroup = new SearchGroup<>(); searchGroup.groupValue = null; if (groupValue != null) { if (groupField != null) { BytesRefBuilder builder = new BytesRefBuilder(); groupField.getType().readableToIndexed(groupValue, builder); searchGroup.groupValue = builder.get(); } else { searchGroup.groupValue = new BytesRef(groupValue); } } searchGroup.sortValues = rawSearchGroupData.toArray(new Comparable[rawSearchGroupData.size()]); for (int i = 0; i < searchGroup.sortValues.length; i++) { SchemaField field = groupSortField[i].getField() != null ? searcher.getSchema().getFieldOrNull(groupSortField[i].getField()) : null; searchGroup.sortValues[i] = ShardResultTransformerUtils.unmarshalSortValue(searchGroup.sortValues[i], field); } return searchGroup; }
Example #13
Source File: DocumentBuilder.java From modernmt with Apache License 2.0 | 6 votes |
private static Term makeLongTerm(long value, String field) { BytesRefBuilder builder = new BytesRefBuilder(); NumericUtils.longToPrefixCoded(value, 0, builder); return new Term(field, builder.toBytesRef()); }
Example #14
Source File: SimpleTextPointsReader.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void checkIntegrity() throws IOException { BytesRefBuilder scratch = new BytesRefBuilder(); IndexInput clone = dataIn.clone(); clone.seek(0); // checksum is fixed-width encoded with 20 bytes, plus 1 byte for newline (the space is included in SimpleTextUtil.CHECKSUM): long footerStartPos = dataIn.length() - (SimpleTextUtil.CHECKSUM.length + 21); ChecksumIndexInput input = new BufferedChecksumIndexInput(clone); while (true) { SimpleTextUtil.readLine(input, scratch); if (input.getFilePointer() >= footerStartPos) { // Make sure we landed at precisely the right location: if (input.getFilePointer() != footerStartPos) { throw new CorruptIndexException("SimpleText failure: footer does not start at expected position current=" + input.getFilePointer() + " vs expected=" + footerStartPos, input); } SimpleTextUtil.checkFooter(input); break; } } }
Example #15
Source File: SimpleTextDocValuesReader.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void checkIntegrity() throws IOException { BytesRefBuilder scratch = new BytesRefBuilder(); IndexInput clone = data.clone(); clone.seek(0); // checksum is fixed-width encoded with 20 bytes, plus 1 byte for newline (the space is included in SimpleTextUtil.CHECKSUM): long footerStartPos = data.length() - (SimpleTextUtil.CHECKSUM.length + 21); ChecksumIndexInput input = new BufferedChecksumIndexInput(clone); while (true) { SimpleTextUtil.readLine(input, scratch); if (input.getFilePointer() >= footerStartPos) { // Make sure we landed at precisely the right location: if (input.getFilePointer() != footerStartPos) { throw new CorruptIndexException("SimpleText failure: footer does not start at expected position current=" + input.getFilePointer() + " vs expected=" + footerStartPos, input); } SimpleTextUtil.checkFooter(input); break; } } }
Example #16
Source File: XJoinQParserPlugin.java From BioSolr with Apache License 2.0 | 6 votes |
static private Transformer transformer(final FieldType ft) { return new Transformer() { BytesRefBuilder term = new BytesRefBuilder(); @Override public BytesRef transform(Object joinId) { String joinStr = joinId.toString(); // logic same as TermQParserPlugin if (ft != null) { ft.readableToIndexed(joinStr, term); } else { term.copyChars(joinStr); } return term.toBytesRef(); } }; }
Example #17
Source File: BytesRefTermStream.java From siren-join with GNU Affero General Public License v3.0 | 6 votes |
@Override public BytesRef next() { BytesRefBuilder b = new BytesRefBuilder(); NumericUtils.longToPrefixCoded((int) values.valueAt(this.count++), 0, b); return b.toBytesRef(); }
Example #18
Source File: EnumField.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public List<IndexableField> createFields(SchemaField sf, Object value) { if (sf.hasDocValues()) { List<IndexableField> fields = new ArrayList<>(); final IndexableField field = createField(sf, value); fields.add(field); if (sf.multiValued()) { BytesRefBuilder bytes = new BytesRefBuilder(); readableToIndexed(enumMapping.stringValueToIntValue(value.toString()).toString(), bytes); fields.add(new SortedSetDocValuesField(sf.getName(), bytes.toBytesRef())); } else { final long bits = field.numericValue().intValue(); fields.add(new NumericDocValuesField(sf.getName(), bits)); } return fields; } else { return Collections.singletonList(createField(sf, value)); } }
Example #19
Source File: TrieField.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void readableToIndexed(CharSequence val, BytesRefBuilder result) { String s = val.toString(); switch (type) { case INTEGER: LegacyNumericUtils.intToPrefixCoded(parseIntFromUser(null, s), 0, result); break; case FLOAT: LegacyNumericUtils.intToPrefixCoded(NumericUtils.floatToSortableInt(parseFloatFromUser(null, s)), 0, result); break; case LONG: LegacyNumericUtils.longToPrefixCoded(parseLongFromUser(null, s), 0, result); break; case DOUBLE: LegacyNumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(parseDoubleFromUser(null, s)), 0, result); break; case DATE: LegacyNumericUtils.longToPrefixCoded(DateMathParser.parseMath(null, s).getTime(), 0, result); break; default: throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + type); } }
Example #20
Source File: LegacyNumericUtils.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Returns prefix coded bits after reducing the precision by <code>shift</code> bits. * This is method is used by {@link org.apache.solr.legacy.LegacyNumericTokenStream}. * After encoding, {@code bytes.offset} will always be 0. * @param val the numeric value * @param shift how many bits to strip from the right * @param bytes will contain the encoded value */ public static void longToPrefixCoded(final long val, final int shift, final BytesRefBuilder bytes) { // ensure shift is 0..63 if ((shift & ~0x3f) != 0) { throw new IllegalArgumentException("Illegal shift value, must be 0..63; got shift=" + shift); } int nChars = (((63-shift)*37)>>8) + 1; // i/7 is the same as (i*37)>>8 for i in 0..63 bytes.setLength(nChars+1); // one extra for the byte that contains the shift info bytes.grow(BUF_SIZE_LONG); bytes.setByteAt(0, (byte)(SHIFT_START_LONG + shift)); long sortableBits = val ^ 0x8000000000000000L; sortableBits >>>= shift; while (nChars > 0) { // Store 7 bits per byte for compatibility // with UTF-8 encoding of terms bytes.setByteAt(nChars--, (byte)(sortableBits & 0x7f)); sortableBits >>>= 7; } }
Example #21
Source File: FSTCompletionBuilder.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Builds the final automaton from a list of entries. */ private FST<Object> buildAutomaton(BytesRefSorter sorter) throws IOException { // Build the automaton. final Outputs<Object> outputs = NoOutputs.getSingleton(); final Object empty = outputs.getNoOutput(); final FSTCompiler<Object> fstCompiler = new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs) .shareMaxTailLength(shareMaxTailLength).build(); BytesRefBuilder scratch = new BytesRefBuilder(); BytesRef entry; final IntsRefBuilder scratchIntsRef = new IntsRefBuilder(); int count = 0; BytesRefIterator iter = sorter.iterator(); while((entry = iter.next()) != null) { count++; if (scratch.get().compareTo(entry) != 0) { fstCompiler.add(Util.toIntsRef(entry, scratchIntsRef), empty); scratch.copyBytes(entry); } } return count == 0 ? null : fstCompiler.compile(); }
Example #22
Source File: GroupConverter.java From lucene-solr with Apache License 2.0 | 6 votes |
static Collection<SearchGroup<BytesRef>> fromMutable(SchemaField field, Collection<SearchGroup<MutableValue>> values) { if (values == null) { return null; } FieldType fieldType = field.getType(); List<SearchGroup<BytesRef>> result = new ArrayList<>(values.size()); for (SearchGroup<MutableValue> original : values) { SearchGroup<BytesRef> converted = new SearchGroup<>(); converted.sortValues = original.sortValues; if (original.groupValue.exists) { BytesRefBuilder binary = new BytesRefBuilder(); fieldType.readableToIndexed(Utils.OBJECT_TO_STRING.apply(original.groupValue.toObject()), binary); converted.groupValue = binary.get(); } else { converted.groupValue = null; } result.add(converted); } return result; }
Example #23
Source File: BBoxStrategy.java From lucene-solr with Apache License 2.0 | 5 votes |
private Query makeNumberTermQuery(String field, double number) { if (hasPointVals) { return DoublePoint.newExactQuery(field, number); } else if (legacyNumericFieldType != null) { BytesRefBuilder bytes = new BytesRefBuilder(); LegacyNumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(number), 0, bytes); return new TermQuery(new Term(field, bytes.get())); } throw new UnsupportedOperationException("An index is required for this operation."); }
Example #24
Source File: Store.java From crate with Apache License 2.0 | 5 votes |
/** * Computes a strong hash value for small files. Note that this method should only be used for files < 1MB */ public static void hashFile(BytesRefBuilder fileHash, InputStream in, long size) throws IOException { final int len = (int) Math.min(1024 * 1024, size); // for safety we limit this to 1MB fileHash.grow(len); fileHash.setLength(len); final int readBytes = Streams.readFully(in, fileHash.bytes(), 0, len); assert readBytes == len : Integer.toString(readBytes) + " != " + Integer.toString(len); assert fileHash.length() == len : Integer.toString(fileHash.length()) + " != " + Integer.toString(len); }
Example #25
Source File: TermBuilder.java From Elasticsearch with Apache License 2.0 | 5 votes |
@Override public BytesRef term(Float value) { int intValue = NumericUtils.floatToSortableInt(value); BytesRefBuilder bytesRef = new BytesRefBuilder(); NumericUtils.intToPrefixCoded(intValue, 0, bytesRef); return bytesRef.get(); }
Example #26
Source File: SuggestUtils.java From Elasticsearch with Apache License 2.0 | 5 votes |
public static BytesRef join(BytesRef separator, BytesRefBuilder result, BytesRef... toJoin) { result.clear(); for (int i = 0; i < toJoin.length - 1; i++) { result.append(toJoin[i]); result.append(separator); } result.append(toJoin[toJoin.length-1]); return result.get(); }
Example #27
Source File: TermSuggester.java From Elasticsearch with Apache License 2.0 | 5 votes |
private List<Token> queryTerms(SuggestionContext suggestion, CharsRefBuilder spare) throws IOException { final List<Token> result = new ArrayList<>(); final String field = suggestion.getField(); SuggestUtils.analyze(suggestion.getAnalyzer(), suggestion.getText(), field, new SuggestUtils.TokenConsumer() { @Override public void nextToken() { Term term = new Term(field, BytesRef.deepCopyOf(fillBytesRef(new BytesRefBuilder()))); result.add(new Token(term, offsetAttr.startOffset(), offsetAttr.endOffset())); } }, spare); return result; }
Example #28
Source File: DirectCandidateGenerator.java From Elasticsearch with Apache License 2.0 | 5 votes |
protected BytesRef preFilter(final BytesRef term, final CharsRefBuilder spare, final BytesRefBuilder byteSpare) throws IOException { if (preFilter == null) { return term; } final BytesRefBuilder result = byteSpare; SuggestUtils.analyze(preFilter, term, field, new SuggestUtils.TokenConsumer() { @Override public void nextToken() throws IOException { this.fillBytesRef(result); } }, spare); return result.get(); }
Example #29
Source File: TestLegacyNumericUtils.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testIntSpecialValues() throws Exception { int[] vals=new int[]{ Integer.MIN_VALUE, Integer.MIN_VALUE+1, Integer.MIN_VALUE+2, -64765767, -4000, -3000, -2000, -1000, -1, 0, 1, 10, 300, 765878989, Integer.MAX_VALUE-2, Integer.MAX_VALUE-1, Integer.MAX_VALUE }; BytesRefBuilder[] prefixVals=new BytesRefBuilder[vals.length]; for (int i=0; i<vals.length; i++) { prefixVals[i] = new BytesRefBuilder(); LegacyNumericUtils.intToPrefixCoded(vals[i], 0, prefixVals[i]); // check forward and back conversion assertEquals( "forward and back conversion should generate same int", vals[i], LegacyNumericUtils.prefixCodedToInt(prefixVals[i].get()) ); // test if decoding values as long fails correctly final int index = i; expectThrows(NumberFormatException.class, () -> { LegacyNumericUtils.prefixCodedToLong(prefixVals[index].get()); }); } // check sort order (prefixVals should be ascending) for (int i=1; i<prefixVals.length; i++) { assertTrue( "check sort order", prefixVals[i-1].get().compareTo(prefixVals[i].get()) < 0 ); } // check the prefix encoding, lower precision should have the difference to original value equal to the lower removed bits final BytesRefBuilder ref = new BytesRefBuilder(); for (int i=0; i<vals.length; i++) { for (int j=0; j<32; j++) { LegacyNumericUtils.intToPrefixCoded(vals[i], j, ref); int prefixVal= LegacyNumericUtils.prefixCodedToInt(ref.get()); int mask=(1 << j) - 1; assertEquals( "difference between prefix val and original value for "+vals[i]+" with shift="+j, vals[i] & mask, vals[i]-prefixVal ); } } }
Example #30
Source File: Uid.java From Elasticsearch with Apache License 2.0 | 5 votes |
public static BytesRef[] createUidsForTypesAndIds(Collection<String> types, Collection<?> ids) { BytesRef[] uids = new BytesRef[types.size() * ids.size()]; BytesRefBuilder typeBytes = new BytesRefBuilder(); BytesRefBuilder idBytes = new BytesRefBuilder(); int index = 0; for (String type : types) { typeBytes.copyChars(type); for (Object id : ids) { uids[index++] = Uid.createUidAsBytes(typeBytes.get(), BytesRefs.toBytesRef(id, idBytes)); } } return uids; }