org.apache.lucene.util.BytesRef Java Examples
The following examples show how to use
org.apache.lucene.util.BytesRef.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: GlobalOrdinalsQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { SortedDocValues values = DocValues.getSorted(context.reader(), joinField); if (values == null) { return Explanation.noMatch("Not a match"); } if (values.advance(doc) != doc) { return Explanation.noMatch("Not a match"); } int segmentOrd = values.ordValue(); BytesRef joinValue = values.lookupOrd(segmentOrd); int ord; if (globalOrds != null) { ord = (int) globalOrds.getGlobalOrds(context.ord).get(segmentOrd); } else { ord = segmentOrd; } if (foundOrds.get(ord) == false) { return Explanation.noMatch("Not a match, join value " + Term.toString(joinValue)); } return Explanation.match(score(), "A match, join value " + Term.toString(joinValue)); }
Example #2
Source File: ParentChildFilteredTermsEnum.java From Elasticsearch with Apache License 2.0 | 6 votes |
@Override protected AcceptStatus accept(BytesRef term) throws IOException { if (parentTypes.isEmpty()) { return AcceptStatus.END; } BytesRef[] typeAndId = Uid.splitUidIntoTypeAndId(term); if (parentTypes.contains(typeAndId[0])) { type = typeAndId[0].utf8ToString(); id = typeAndId[1]; return AcceptStatus.YES; } else { BytesRef nextType = parentTypes.ceiling(typeAndId[0]); if (nextType == null) { return AcceptStatus.END; } seekTerm = nextType; return AcceptStatus.NO_AND_SEEK; } }
Example #3
Source File: TestTermVectorsReader.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testDocsEnum() throws IOException { TermVectorsReader reader = Codec.getDefault().termVectorsFormat().vectorsReader(dir, seg.info, fieldInfos, newIOContext(random())); for (int j = 0; j < 5; j++) { Terms vector = reader.get(j).terms(testFields[0]); assertNotNull(vector); assertEquals(testTerms.length, vector.size()); TermsEnum termsEnum = vector.iterator(); PostingsEnum postingsEnum = null; for (int i = 0; i < testTerms.length; i++) { final BytesRef text = termsEnum.next(); assertNotNull(text); String term = text.utf8ToString(); //System.out.println("Term: " + term); assertEquals(testTerms[i], term); postingsEnum = TestUtil.docs(random(), termsEnum, postingsEnum, PostingsEnum.NONE); assertNotNull(postingsEnum); int doc = postingsEnum.docID(); assertEquals(-1, doc); assertTrue(postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(DocIdSetIterator.NO_MORE_DOCS, postingsEnum.nextDoc()); } assertNull(termsEnum.next()); } reader.close(); }
Example #4
Source File: STUniformSplitTermsWriter.java From lucene-solr with Apache License 2.0 | 6 votes |
private void combinePostingsPerField(BytesRef term, Map<String, MergingFieldTerms> fieldTermsMap, Map<String, List<SegmentPostings>> fieldPostingsMap, List<MergingFieldTerms> groupedFieldTerms) { groupedFieldTerms.clear(); for (Map.Entry<String, List<SegmentPostings>> fieldPostingsEntry : fieldPostingsMap.entrySet()) { // The field defined in fieldPostingsMap comes from the FieldInfos of the SegmentReadState. // The fieldTermsMap contains entries for fields coming from the SegmentMergeSate. // So it is possible that the field is not present in fieldTermsMap because it is removed. MergingFieldTerms fieldTerms = fieldTermsMap.get(fieldPostingsEntry.getKey()); if (fieldTerms != null) { fieldTerms.resetIterator(term, fieldPostingsEntry.getValue()); groupedFieldTerms.add(fieldTerms); } } // Keep the fields ordered by their number in the target merge segment. groupedFieldTerms.sort(Comparator.comparingInt(fieldTerms -> fieldTerms.fieldMetadata.getFieldInfo().number)); }
Example #5
Source File: TestBKDRadixSelector.java From lucene-solr with Apache License 2.0 | 6 votes |
private int getMinDocId(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension, byte[] partitionPoint, int dataDims, int indexDims, byte[] dataDim) throws IOException { int docID = Integer.MAX_VALUE; try (PointReader reader = p.writer.getReader(p.start, p.count)) { while (reader.next()) { PointValue pointValue = reader.pointValue(); BytesRef packedValue = pointValue.packedValue(); int offset = dimension * bytesPerDimension; int dataOffset = indexDims * bytesPerDimension; int dataLength = (dataDims - indexDims) * bytesPerDimension; if (Arrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension, partitionPoint, 0, bytesPerDimension) == 0 && Arrays.compareUnsigned(packedValue.bytes, packedValue.offset + dataOffset, packedValue.offset + dataOffset + dataLength, dataDim, 0, dataLength) == 0) { int newDocID = pointValue.docID(); if (newDocID < docID) { docID = newDocID; } } } } return docID; }
Example #6
Source File: LindenFieldCacheImpl.java From linden with Apache License 2.0 | 6 votes |
@Override protected Accountable createValue(final AtomicReader reader, CacheKey key, boolean setDocsWithField) throws IOException { final Map<String, Integer> uidMap = new HashMap<>(); Uninvert u = new Uninvert() { private String currentValue; @Override public void visitTerm(BytesRef term) { currentValue = term.utf8ToString(); } @Override public void visitDoc(int docID) { uidMap.put(currentValue, docID); } @Override protected TermsEnum termsEnum(Terms terms) throws IOException { return terms.iterator(null); } }; u.uninvert(reader, key.field, setDocsWithField); return new PerReaderUIDMaps(reader.getContext().ord, uidMap); }
Example #7
Source File: TestLuceneIndexer.java From dremio-oss with Apache License 2.0 | 6 votes |
@Override public void run() { try { for (int i = 0; i < 10000; ++i) { final Document document = new Document(); final String key = "key" + i; final String val = "value" + i; document.add(new StringField(key, val, Field.Store.YES)); document.add(new SortedDocValuesField(key, new BytesRef(val.getBytes()))); index.add(document); data.put(key, val); sleep(1); } } catch (InterruptedException e) { } }
Example #8
Source File: TermBuilder.java From Elasticsearch with Apache License 2.0 | 6 votes |
@Override public BytesRef term(Long value) { BytesRefBuilder builder = new BytesRefBuilder(); NumericUtils.longToPrefixCoded(value, 0, builder); return builder.get(); }
Example #9
Source File: SimpleTextUtil.java From lucene-solr with Apache License 2.0 | 6 votes |
/** Inverse of {@link BytesRef#toString}. */ public static BytesRef fromBytesRefString(String s) { if (s.length() < 2) { throw new IllegalArgumentException("string " + s + " was not created from BytesRef.toString?"); } if (s.charAt(0) != '[' || s.charAt(s.length()-1) != ']') { throw new IllegalArgumentException("string " + s + " was not created from BytesRef.toString?"); } String[] parts = s.substring(1, s.length()-1).split(" "); byte[] bytes = new byte[parts.length]; for(int i=0;i<parts.length;i++) { bytes[i] = (byte) Integer.parseInt(parts[i], 16); } return new BytesRef(bytes); }
Example #10
Source File: GroupingSearchTest.java From lucene-solr with Apache License 2.0 | 6 votes |
private void compareGroupValue(String expected, GroupDocs<?> group) { if (expected == null) { if (group.groupValue == null) { return; } else if (group.groupValue.getClass().isAssignableFrom(MutableValueStr.class)) { return; } else if (((BytesRef) group.groupValue).length == 0) { return; } fail(); } if (group.groupValue.getClass().isAssignableFrom(BytesRef.class)) { assertEquals(new BytesRef(expected), group.groupValue); } else if (group.groupValue.getClass().isAssignableFrom(MutableValueStr.class)) { MutableValueStr v = new MutableValueStr(); v.value.copyChars(expected); assertEquals(v, group.groupValue); } else { fail(); } }
Example #11
Source File: IpColumnReference.java From crate with Apache License 2.0 | 6 votes |
@Override public String value() { try { if (values.advanceExact(docId)) { long ord = values.nextOrd(); if (values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) { throw new GroupByOnArrayUnsupportedException(columnName); } BytesRef encoded = values.lookupOrd(ord); return (String) DocValueFormat.IP.format(encoded); } else { return null; } } catch (IOException e) { throw new UncheckedIOException(e); } }
Example #12
Source File: MutatableAction.java From incubator-retired-blur with Apache License 2.0 | 6 votes |
private IterableRow getIterableRow(String rowId, IndexSearcherCloseable searcher) throws IOException { IndexReader indexReader = searcher.getIndexReader(); BytesRef rowIdRef = new BytesRef(rowId); List<AtomicReaderTermsEnum> possibleRowIds = new ArrayList<AtomicReaderTermsEnum>(); for (AtomicReaderContext atomicReaderContext : indexReader.leaves()) { AtomicReader atomicReader = atomicReaderContext.reader(); Fields fields = atomicReader.fields(); if (fields == null) { continue; } Terms terms = fields.terms(BlurConstants.ROW_ID); if (terms == null) { continue; } TermsEnum termsEnum = terms.iterator(null); if (!termsEnum.seekExact(rowIdRef, true)) { continue; } // need atomic read as well... possibleRowIds.add(new AtomicReaderTermsEnum(atomicReader, termsEnum)); } if (possibleRowIds.isEmpty()) { return null; } return new IterableRow(rowId, getRecords(possibleRowIds)); }
Example #13
Source File: IndexNumericFieldData.java From Elasticsearch with Apache License 2.0 | 5 votes |
@Override public long toLong(BytesRef indexForm) { if (indexForm.equals(BooleanFieldMapper.Values.FALSE)) { return 0; } else if (indexForm.equals(BooleanFieldMapper.Values.TRUE)) { return 1; } else { throw new IllegalArgumentException("Cannot convert " + indexForm + " to a boolean"); } }
Example #14
Source File: TestBinaryResponseWriter.java From lucene-solr with Apache License 2.0 | 5 votes |
private void compareStringFormat(String input) { byte[] bytes1 = new byte[1024]; int len1 = ByteUtils.UTF16toUTF8(input, 0, input.length(), bytes1, 0); BytesRef bytesref = new BytesRef(input); System.out.println(); assertEquals(len1, bytesref.length); for (int i = 0; i < len1; i++) { assertEquals(input + " not matching char at :" + i, bytesref.bytes[i], bytes1[i]); } }
Example #15
Source File: CustomSpellCheckListner.java From customized-symspell with MIT License | 5 votes |
/** * Relod method of spellcheck listner * @param newSearcher * @param checker * @throws IOException * @throws SpellCheckException */ public void reload(SolrIndexSearcher newSearcher, SpellChecker checker) throws IOException, SpellCheckException { DirectoryReader productsIndexReader = newSearcher.getIndexReader(); Fields fields = MultiFields.getFields(productsIndexReader); IndexSchema schema = newSearcher.getCore().getLatestSchema(); long time = System.currentTimeMillis(); for (String field : fields) { if (!fieldArr.contains(field)) { continue; } FieldType type = schema.getField(field).getType(); int insertionsCount = 0; for (TermsEnum iterator = fields.terms(field).iterator(); iterator.next() != null; ) { BytesRef term = iterator.term(); CharsRefBuilder charsRefBuilder = new CharsRefBuilder(); type.indexedToReadable(term, charsRefBuilder); insertionsCount++; checker.getDataHolder().addItem( new DictionaryItem(charsRefBuilder.toString().trim(), (double) iterator.totalTermFreq(), 0.0)); } log.info("Spellcheck Dictionary populated for Field Name {}, Count {}", field, insertionsCount); } log.info("Data for SpellChecker was populated. Time={} ms", (System.currentTimeMillis() - time)); }
Example #16
Source File: FSTTermsReader.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public BytesRef next() throws IOException { if (seekPending) { // previously positioned, but termOutputs not fetched seekPending = false; SeekStatus status = seekCeil(term); assert status == SeekStatus.FOUND; // must positioned on valid term } updateEnum(fstEnum.next()); return term; }
Example #17
Source File: CompressingTermVectorsReader.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public BytesRef getPayload() throws IOException { checkPosition(); if (payloadIndex == null || payload.length == 0) { return null; } else { return payload; } }
Example #18
Source File: TokenStreamOffsetStrategy.java From lucene-solr with Apache License 2.0 | 5 votes |
TokenStreamOffsetsEnum(TokenStream ts, CharArrayMatcher[] matchers) throws IOException { this.stream = ts; this.matchers = matchers; matchDescriptions = new BytesRef[matchers.length]; charTermAtt = ts.addAttribute(CharTermAttribute.class); offsetAtt = ts.addAttribute(OffsetAttribute.class); ts.reset(); }
Example #19
Source File: BKDWriter.java From lucene-solr with Apache License 2.0 | 5 votes |
private void writeLeafBlockPackedValues(DataOutput out, int[] commonPrefixLengths, int count, int sortedDim, IntFunction<BytesRef> packedValues, int leafCardinality) throws IOException { int prefixLenSum = Arrays.stream(commonPrefixLengths).sum(); if (prefixLenSum == packedBytesLength) { // all values in this block are equal out.writeByte((byte) -1); } else { assert commonPrefixLengths[sortedDim] < bytesPerDim; // estimate if storing the values with cardinality is cheaper than storing all values. int compressedByteOffset = sortedDim * bytesPerDim + commonPrefixLengths[sortedDim]; int highCardinalityCost; int lowCardinalityCost; if (count == leafCardinality) { // all values in this block are different highCardinalityCost = 0; lowCardinalityCost = 1; } else { // compute cost of runLen compression int numRunLens = 0; for (int i = 0; i < count; ) { // do run-length compression on the byte at compressedByteOffset int runLen = runLen(packedValues, i, Math.min(i + 0xff, count), compressedByteOffset); assert runLen <= 0xff; numRunLens++; i += runLen; } // Add cost of runLen compression highCardinalityCost = count * (packedBytesLength - prefixLenSum - 1) + 2 * numRunLens; // +1 is the byte needed for storing the cardinality lowCardinalityCost = leafCardinality * (packedBytesLength - prefixLenSum + 1); } if (lowCardinalityCost <= highCardinalityCost) { out.writeByte((byte) -2); writeLowCardinalityLeafBlockPackedValues(out, commonPrefixLengths, count, packedValues); } else { out.writeByte((byte) sortedDim); writeHighCardinalityLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues, compressedByteOffset); } } }
Example #20
Source File: NodeOsInfoExpression.java From Elasticsearch with Apache License 2.0 | 5 votes |
private void addChildImplementations(final OsInfo info) { childImplementations.put(AVAILABLE_PROCESSORS, new OsInfoExpression() { @Override public Integer value() { return info.getAvailableProcessors(); } }); childImplementations.put(OS, new OsInfoExpression() { @Override public BytesRef value() { return OS_NAME; } }); childImplementations.put(ARCH, new OsInfoExpression() { @Override public BytesRef value() { return OS_ARCH; } }); childImplementations.put(VERSION, new OsInfoExpression() { @Override public BytesRef value() { return OS_VERSION; } }); childImplementations.put(SysNodesTableInfo.SYS_COL_OS_INFO_JVM, new NodeOsJvmExpression()); }
Example #21
Source File: GraphTermsQParserPlugin.java From lucene-solr with Apache License 2.0 | 5 votes |
public DocSet getDocSet(IndexSearcher searcher) throws IOException { IndexReaderContext top = ReaderUtil.getTopLevelContext(searcher.getTopReaderContext()); List<LeafReaderContext> segs = top.leaves(); DocSetBuilder builder = new DocSetBuilder(top.reader().maxDoc(), Math.min(64,(top.reader().maxDoc()>>>10)+4)); PointValues[] segPoints = new PointValues[segs.size()]; for (int i=0; i<segPoints.length; i++) { segPoints[i] = segs.get(i).reader().getPointValues(field); } int maxCollect = Math.min(maxDocFreq, top.reader().maxDoc()); PointSetQuery.CutoffPointVisitor visitor = new PointSetQuery.CutoffPointVisitor(maxCollect); PrefixCodedTerms.TermIterator iterator = sortedPackedPoints.iterator(); outer: for (BytesRef point = iterator.next(); point != null; point = iterator.next()) { visitor.setPoint(point); for (int i=0; i<segs.size(); i++) { if (segPoints[i] == null) continue; visitor.setBase(segs.get(i).docBase); segPoints[i].intersect(visitor); if (visitor.getCount() > maxDocFreq) { continue outer; } } int collected = visitor.getCount(); int[] ids = visitor.getGlobalIds(); for (int i=0; i<collected; i++) { builder.add( ids[i] ); } } FixedBitSet liveDocs = getLiveDocs(searcher); DocSet set = builder.build(liveDocs); return set; }
Example #22
Source File: MatchQueryBuilder.java From Elasticsearch with Apache License 2.0 | 5 votes |
public Query query(Map<String, Object> fields, BytesRef queryString) throws IOException { assert fields.size() == 1; Map.Entry<String, Object> entry = fields.entrySet().iterator().next(); Query query = singleQueryAndApply( matchType.matchQueryType(), entry.getKey(), queryString, floatOrNull(entry.getValue())); Float boost = this.options.boost(); if (boost != null) { query.setBoost(boost); } return query; }
Example #23
Source File: DocumentsImpl.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public Optional<Term> seekTerm(String termText) { Objects.requireNonNull(termText); if (curField == null) { // field is not selected log.warn("Field not selected."); return Optional.empty(); } try { Terms terms = IndexUtils.getTerms(reader, curField); setTermsIterator(terms.iterator()); if (tenum.seekCeil(new BytesRef(termText)) == TermsEnum.SeekStatus.END) { // reached to the end of the iterator resetTermsIterator(); log.info("Reached the end of the term iterator for field: {}.", curField); return Optional.empty(); } else { return Optional.of(new Term(curField, tenum.term())); } } catch (IOException e) { resetTermsIterator(); throw new LukeException(String.format(Locale.ENGLISH, "Terms not available for field: %s.", curField), e); } finally { // discard current postings enum resetPostingsIterator(); } }
Example #24
Source File: DocumentValueSourceDictionaryTest.java From lucene-solr with Apache License 2.0 | 5 votes |
private Map<String, Document> generateIndexDocuments(int ndocs) { Map<String, Document> docs = new HashMap<>(); for(int i = 0; i < ndocs ; i++) { Field field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES); Field weight1 = new NumericDocValuesField(WEIGHT_FIELD_NAME_1, 10 + i); Field weight2 = new NumericDocValuesField(WEIGHT_FIELD_NAME_2, 20 + i); Field weight3 = new NumericDocValuesField(WEIGHT_FIELD_NAME_3, 30 + i); Field contexts = new StoredField(CONTEXTS_FIELD_NAME, new BytesRef("ctx_" + i + "_0")); Document doc = new Document(); doc.add(field); // even if payload is not required usually have it if (usually()) { Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i)); doc.add(payload); } doc.add(weight1); doc.add(weight2); doc.add(weight3); doc.add(contexts); for(int j = 1; j < atLeast(3); j++) { contexts.setBytesValue(new BytesRef("ctx_" + i + "_" + j)); doc.add(contexts); } docs.put(field.stringValue(), doc); } return docs; }
Example #25
Source File: LegacyNumericUtils.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Returns the shift value from a prefix encoded {@code long}. * @throws NumberFormatException if the supplied {@link BytesRef} is * not correctly prefix encoded. */ public static int getPrefixCodedLongShift(final BytesRef val) { final int shift = val.bytes[val.offset] - SHIFT_START_LONG; if (shift > 63 || shift < 0) throw new NumberFormatException("Invalid shift value (" + shift + ") in prefixCoded bytes (is encoded value really an INT?)"); return shift; }
Example #26
Source File: TestSegmentReader.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testTerms() throws IOException { final Collection<String> fields = FieldInfos.getIndexedFields(reader); for (String field : fields) { Terms terms = MultiTerms.getTerms(reader, field); assertNotNull(terms); TermsEnum termsEnum = terms.iterator(); while(termsEnum.next() != null) { BytesRef term = termsEnum.term(); assertTrue(term != null); String fieldValue = (String) DocHelper.nameValues.get(field); assertTrue(fieldValue.indexOf(term.utf8ToString()) != -1); } } PostingsEnum termDocs = TestUtil.docs(random(), reader, DocHelper.TEXT_FIELD_1_KEY, new BytesRef("field"), null, 0); assertTrue(termDocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); termDocs = TestUtil.docs(random(), reader, DocHelper.NO_NORMS_KEY, new BytesRef(DocHelper.NO_NORMS_TEXT), null, 0); assertTrue(termDocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); PostingsEnum positions = MultiTerms.getTermPostingsEnum(reader, DocHelper.TEXT_FIELD_1_KEY, new BytesRef("field")); // NOTE: prior rev of this test was failing to first // call next here: assertTrue(positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertTrue(positions.docID() == 0); assertTrue(positions.nextPosition() >= 0); }
Example #27
Source File: BaseExplanationTestCase.java From lucene-solr with Apache License 2.0 | 5 votes |
public static Document createDoc(int index) { Document doc = new Document(); doc.add(newStringField(KEY, ""+index, Field.Store.NO)); doc.add(new SortedDocValuesField(KEY, new BytesRef(""+index))); Field f = newTextField(FIELD, docFields[index], Field.Store.NO); doc.add(f); doc.add(newTextField(ALTFIELD, docFields[index], Field.Store.NO)); return doc; }
Example #28
Source File: SignificantTermsAggregatorFactory.java From Elasticsearch with Apache License 2.0 | 5 votes |
public long getBackgroundFrequency(BytesRef termBytes) { assert termsEnum != null; // having failed to find a field in the index we don't expect any calls for frequencies long result = 0; try { if (termsEnum.seekExact(termBytes)) { result = termsEnum.docFreq(); } } catch (IOException e) { throw new ElasticsearchException("IOException loading background document frequency info", e); } return result; }
Example #29
Source File: TermInSetQueryTest.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testRamBytesUsed() { List<BytesRef> terms = new ArrayList<>(); final int numTerms = 10000 + random().nextInt(1000); for (int i = 0; i < numTerms; ++i) { terms.add(new BytesRef(RandomStrings.randomUnicodeOfLength(random(), 10))); } TermInSetQuery query = new TermInSetQuery("f", terms); final long actualRamBytesUsed = RamUsageTester.sizeOf(query); final long expectedRamBytesUsed = query.ramBytesUsed(); // error margin within 5% assertEquals(expectedRamBytesUsed, actualRamBytesUsed, actualRamBytesUsed / 20); }
Example #30
Source File: FreeTextSuggester.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public List<LookupResult> lookup(final CharSequence key, Set<BytesRef> contexts, /* ignored */ boolean onlyMorePopular, int num) { try { return lookup(key, contexts, num); } catch (IOException ioe) { // bogus: throw new RuntimeException(ioe); } }