org.apache.lucene.util.BytesRef Java Examples

The following examples show how to use org.apache.lucene.util.BytesRef. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: SimpleTextUtil.java From lucene-solr with Apache License 2.0

6 votes

/** Inverse of {@link BytesRef#toString}. */
public static BytesRef fromBytesRefString(String s) {
  if (s.length() < 2) {
    throw new IllegalArgumentException("string " + s + " was not created from BytesRef.toString?");
  }
  if (s.charAt(0) != '[' || s.charAt(s.length()-1) != ']') {
    throw new IllegalArgumentException("string " + s + " was not created from BytesRef.toString?");
  }
  String[] parts = s.substring(1, s.length()-1).split(" ");
  byte[] bytes = new byte[parts.length];
  for(int i=0;i<parts.length;i++) {
    bytes[i] = (byte) Integer.parseInt(parts[i], 16);
  }

  return new BytesRef(bytes);
}

Example #2

Source File: ParentChildFilteredTermsEnum.java From Elasticsearch with Apache License 2.0

6 votes

@Override
protected AcceptStatus accept(BytesRef term) throws IOException {
    if (parentTypes.isEmpty()) {
        return AcceptStatus.END;
    }

    BytesRef[] typeAndId = Uid.splitUidIntoTypeAndId(term);
    if (parentTypes.contains(typeAndId[0])) {
        type = typeAndId[0].utf8ToString();
        id = typeAndId[1];
        return AcceptStatus.YES;
    } else {
        BytesRef nextType = parentTypes.ceiling(typeAndId[0]);
        if (nextType == null) {
            return AcceptStatus.END;
        }
        seekTerm = nextType;
        return AcceptStatus.NO_AND_SEEK;
    }
}

Example #3

Source File: TestBKDRadixSelector.java From lucene-solr with Apache License 2.0

6 votes

private int getMinDocId(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension, byte[] partitionPoint, int dataDims, int indexDims, byte[] dataDim) throws  IOException {
 int docID = Integer.MAX_VALUE;
  try (PointReader reader = p.writer.getReader(p.start, p.count)) {
    while (reader.next()) {
      PointValue pointValue = reader.pointValue();
      BytesRef packedValue = pointValue.packedValue();
      int offset = dimension * bytesPerDimension;
      int dataOffset = indexDims * bytesPerDimension;
      int dataLength = (dataDims - indexDims) * bytesPerDimension;
      if (Arrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension, partitionPoint, 0, bytesPerDimension) == 0
        && Arrays.compareUnsigned(packedValue.bytes, packedValue.offset + dataOffset, packedValue.offset + dataOffset + dataLength, dataDim, 0, dataLength) == 0) {
        int newDocID = pointValue.docID();
        if (newDocID < docID) {
          docID = newDocID;
        }
      }
    }
  }
  return docID;
}

Example #4

Source File: MutatableAction.java From incubator-retired-blur with Apache License 2.0

6 votes

private IterableRow getIterableRow(String rowId, IndexSearcherCloseable searcher) throws IOException {
  IndexReader indexReader = searcher.getIndexReader();
  BytesRef rowIdRef = new BytesRef(rowId);
  List<AtomicReaderTermsEnum> possibleRowIds = new ArrayList<AtomicReaderTermsEnum>();
  for (AtomicReaderContext atomicReaderContext : indexReader.leaves()) {
    AtomicReader atomicReader = atomicReaderContext.reader();
    Fields fields = atomicReader.fields();
    if (fields == null) {
      continue;
    }
    Terms terms = fields.terms(BlurConstants.ROW_ID);
    if (terms == null) {
      continue;
    }
    TermsEnum termsEnum = terms.iterator(null);
    if (!termsEnum.seekExact(rowIdRef, true)) {
      continue;
    }
    // need atomic read as well...
    possibleRowIds.add(new AtomicReaderTermsEnum(atomicReader, termsEnum));
  }
  if (possibleRowIds.isEmpty()) {
    return null;
  }
  return new IterableRow(rowId, getRecords(possibleRowIds));
}

Example #5

Source File: STUniformSplitTermsWriter.java From lucene-solr with Apache License 2.0

6 votes

private void combinePostingsPerField(BytesRef term,
                                     Map<String, MergingFieldTerms> fieldTermsMap,
                                     Map<String, List<SegmentPostings>> fieldPostingsMap,
                                     List<MergingFieldTerms> groupedFieldTerms) {
  groupedFieldTerms.clear();
  for (Map.Entry<String, List<SegmentPostings>> fieldPostingsEntry : fieldPostingsMap.entrySet()) {
    // The field defined in fieldPostingsMap comes from the FieldInfos of the SegmentReadState.
    // The fieldTermsMap contains entries for fields coming from the SegmentMergeSate.
    // So it is possible that the field is not present in fieldTermsMap because it is removed.
    MergingFieldTerms fieldTerms = fieldTermsMap.get(fieldPostingsEntry.getKey());
    if (fieldTerms != null) {
      fieldTerms.resetIterator(term, fieldPostingsEntry.getValue());
      groupedFieldTerms.add(fieldTerms);
    }
  }
  // Keep the fields ordered by their number in the target merge segment.
  groupedFieldTerms.sort(Comparator.comparingInt(fieldTerms -> fieldTerms.fieldMetadata.getFieldInfo().number));
}

Example #6

Source File: TestTermVectorsReader.java From lucene-solr with Apache License 2.0

6 votes

public void testDocsEnum() throws IOException {
  TermVectorsReader reader = Codec.getDefault().termVectorsFormat().vectorsReader(dir, seg.info, fieldInfos, newIOContext(random()));
  for (int j = 0; j < 5; j++) {
    Terms vector = reader.get(j).terms(testFields[0]);
    assertNotNull(vector);
    assertEquals(testTerms.length, vector.size());
    TermsEnum termsEnum = vector.iterator();
    PostingsEnum postingsEnum = null;
    for (int i = 0; i < testTerms.length; i++) {
      final BytesRef text = termsEnum.next();
      assertNotNull(text);
      String term = text.utf8ToString();
      //System.out.println("Term: " + term);
      assertEquals(testTerms[i], term);
      
      postingsEnum = TestUtil.docs(random(), termsEnum, postingsEnum, PostingsEnum.NONE);
      assertNotNull(postingsEnum);
      int doc = postingsEnum.docID();
      assertEquals(-1, doc);
      assertTrue(postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
      assertEquals(DocIdSetIterator.NO_MORE_DOCS, postingsEnum.nextDoc());
    }
    assertNull(termsEnum.next());
  }
  reader.close();
}

Example #7

Source File: TermBuilder.java From Elasticsearch with Apache License 2.0

6 votes

@Override
public BytesRef term(Long value) {
    BytesRefBuilder builder = new BytesRefBuilder();
    NumericUtils.longToPrefixCoded(value, 0, builder);
    return builder.get();
}

Example #8

Source File: LindenFieldCacheImpl.java From linden with Apache License 2.0

6 votes

@Override
protected Accountable createValue(final AtomicReader reader, CacheKey key, boolean setDocsWithField)
    throws IOException {
  final Map<String, Integer> uidMap = new HashMap<>();

  Uninvert u = new Uninvert() {
    private String currentValue;

    @Override
    public void visitTerm(BytesRef term) {
      currentValue = term.utf8ToString();
    }

    @Override
    public void visitDoc(int docID) {
      uidMap.put(currentValue, docID);
    }

    @Override
    protected TermsEnum termsEnum(Terms terms) throws IOException {
      return terms.iterator(null);
    }
  };
  u.uninvert(reader, key.field, setDocsWithField);
  return new PerReaderUIDMaps(reader.getContext().ord, uidMap);
}

Example #9

Source File: TestLuceneIndexer.java From dremio-oss with Apache License 2.0

6 votes

@Override
public void run() {
  try {
    for (int i = 0; i < 10000; ++i) {
      final Document document = new Document();
      final String key = "key" + i;
      final String val = "value" + i;
      document.add(new StringField(key, val, Field.Store.YES));
      document.add(new SortedDocValuesField(key, new BytesRef(val.getBytes())));
      index.add(document);
      data.put(key, val);
      sleep(1);
    }
  } catch (InterruptedException e) {
  }
}

Example #10

Source File: IpColumnReference.java From crate with Apache License 2.0

6 votes

@Override
public String value() {
    try {
        if (values.advanceExact(docId)) {
            long ord = values.nextOrd();
            if (values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
                throw new GroupByOnArrayUnsupportedException(columnName);
            }
            BytesRef encoded = values.lookupOrd(ord);
            return (String) DocValueFormat.IP.format(encoded);
        } else {
            return null;
        }
    } catch (IOException e) {
        throw new UncheckedIOException(e);
    }
}

Example #11

Source File: GlobalOrdinalsQuery.java From lucene-solr with Apache License 2.0

6 votes

@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
  SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
  if (values == null) {
    return Explanation.noMatch("Not a match");
  }

  if (values.advance(doc) != doc) {
    return Explanation.noMatch("Not a match");
  }
  int segmentOrd = values.ordValue();
  BytesRef joinValue = values.lookupOrd(segmentOrd);

  int ord;
  if (globalOrds != null) {
    ord = (int) globalOrds.getGlobalOrds(context.ord).get(segmentOrd);
  } else {
    ord = segmentOrd;
  }
  if (foundOrds.get(ord) == false) {
    return Explanation.noMatch("Not a match, join value " + Term.toString(joinValue));
  }

  return Explanation.match(score(), "A match, join value " + Term.toString(joinValue));
}

Example #12

Source File: GroupingSearchTest.java From lucene-solr with Apache License 2.0

6 votes

private void compareGroupValue(String expected, GroupDocs<?> group) {
  if (expected == null) {
    if (group.groupValue == null) {
      return;
    } else if (group.groupValue.getClass().isAssignableFrom(MutableValueStr.class)) {
      return;
    } else if (((BytesRef) group.groupValue).length == 0) {
      return;
    }
    fail();
  }

  if (group.groupValue.getClass().isAssignableFrom(BytesRef.class)) {
    assertEquals(new BytesRef(expected), group.groupValue);
  } else if (group.groupValue.getClass().isAssignableFrom(MutableValueStr.class)) {
    MutableValueStr v = new MutableValueStr();
    v.value.copyChars(expected);
    assertEquals(v, group.groupValue);
  } else {
    fail();
  }
}

Example #13

Source File: LegacyNumericUtils.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Returns the shift value from a prefix encoded {@code long}.
 * @throws NumberFormatException if the supplied {@link BytesRef} is
 * not correctly prefix encoded.
 */
public static int getPrefixCodedLongShift(final BytesRef val) {
  final int shift = val.bytes[val.offset] - SHIFT_START_LONG;
  if (shift > 63 || shift < 0)
    throw new NumberFormatException("Invalid shift value (" + shift + ") in prefixCoded bytes (is encoded value really an INT?)");
  return shift;
}

Example #14

Source File: TestFSTs.java From lucene-solr with Apache License 2.0

5 votes

public void testShortestPaths() throws Exception {
  final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
  final FSTCompiler<Long> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);

  final IntsRefBuilder scratch = new IntsRefBuilder();
  fstCompiler.add(Util.toIntsRef(new BytesRef("aab"), scratch), 22L);
  fstCompiler.add(Util.toIntsRef(new BytesRef("aac"), scratch), 7L);
  fstCompiler.add(Util.toIntsRef(new BytesRef("ax"), scratch), 17L);
  final FST<Long> fst = fstCompiler.compile();
  //Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"));
  //Util.toDot(fst, w, false, false);
  //w.close();

  Util.TopResults<Long> res = Util.shortestPaths(fst,
                                                fst.getFirstArc(new FST.Arc<Long>()),
                                                outputs.getNoOutput(),
                                                minLongComparator,
                                                3,
                                                true);
  assertTrue(res.isComplete);
  assertEquals(3, res.topN.size());
  assertEquals(Util.toIntsRef(new BytesRef("aac"), scratch), res.topN.get(0).input);
  assertEquals(7L, res.topN.get(0).output.longValue());

  assertEquals(Util.toIntsRef(new BytesRef("ax"), scratch), res.topN.get(1).input);
  assertEquals(17L,res.topN.get(1).output.longValue());

  assertEquals(Util.toIntsRef(new BytesRef("aab"), scratch), res.topN.get(2).input);
  assertEquals(22L, res.topN.get(2).output.longValue());
}

Example #15

Source File: MergeSortRowIdLookup.java From incubator-retired-blur with Apache License 2.0

5 votes

@Override
public int compareTo(TermsEnumReader o) {
  try {
    BytesRef t1 = _termsEnum.term();
    BytesRef t2 = o._termsEnum.term();
    return t1.compareTo(t2);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}

Example #16

Source File: DocumentValueSourceDictionaryTest.java From lucene-solr with Apache License 2.0

5 votes

private Map<String, Document> generateIndexDocuments(int ndocs) {
  Map<String, Document> docs = new HashMap<>();
  for(int i = 0; i < ndocs ; i++) {
    Field field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES);
    Field weight1 = new NumericDocValuesField(WEIGHT_FIELD_NAME_1, 10 + i);
    Field weight2 = new NumericDocValuesField(WEIGHT_FIELD_NAME_2, 20 + i);
    Field weight3 = new NumericDocValuesField(WEIGHT_FIELD_NAME_3, 30 + i);
    Field contexts = new StoredField(CONTEXTS_FIELD_NAME, new BytesRef("ctx_"  + i + "_0"));
    Document doc = new Document();
    doc.add(field);
    // even if payload is not required usually have it
    if (usually()) {
      Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i));
      doc.add(payload);
    }
    doc.add(weight1);
    doc.add(weight2);
    doc.add(weight3);
    doc.add(contexts);
    for(int j = 1; j < atLeast(3); j++) {
      contexts.setBytesValue(new BytesRef("ctx_" + i + "_" + j));
      doc.add(contexts);
    }
    docs.put(field.stringValue(), doc);
  }
  return docs;
}

Example #17

Source File: CustomSpellCheckListner.java From customized-symspell with MIT License

5 votes

/**
 * Relod method of spellcheck listner
 * @param newSearcher
 * @param checker
 * @throws IOException
 * @throws SpellCheckException
 */
public void reload(SolrIndexSearcher newSearcher, SpellChecker checker)
    throws IOException, SpellCheckException {

  DirectoryReader productsIndexReader = newSearcher.getIndexReader();
  Fields fields = MultiFields.getFields(productsIndexReader);
  IndexSchema schema = newSearcher.getCore().getLatestSchema();
  long time = System.currentTimeMillis();
  for (String field : fields) {
    if (!fieldArr.contains(field)) {
      continue;
    }
    FieldType type = schema.getField(field).getType();
    int insertionsCount = 0;
    for (TermsEnum iterator = fields.terms(field).iterator(); iterator.next() != null; ) {
      BytesRef term = iterator.term();
      CharsRefBuilder charsRefBuilder = new CharsRefBuilder();
      type.indexedToReadable(term, charsRefBuilder);
      insertionsCount++;
      checker.getDataHolder().addItem(
          new DictionaryItem(charsRefBuilder.toString().trim(), (double) iterator.totalTermFreq(),
              0.0));
    }
    log.info("Spellcheck Dictionary populated for Field Name {}, Count {}", field,
        insertionsCount);
  }
  log.info("Data for SpellChecker  was populated. Time={} ms",
      (System.currentTimeMillis() - time));
}

Example #18

Source File: Uid.java From crate with Apache License 2.0

5 votes

/** With base64 ids, we decode and prepend an escape char in the cases that
 *  it could be mixed up with numeric or utf8 encoding. In the majority of
 *  cases (253/256) the encoded id is exactly the binary form. */
private static BytesRef encodeBase64Id(String id) {
    byte[] b = Base64.getUrlDecoder().decode(id);
    if (Byte.toUnsignedInt(b[0]) >= BASE64_ESCAPE) {
        byte[] newB = new byte[b.length + 1];
        newB[0] = (byte) BASE64_ESCAPE;
        System.arraycopy(b, 0, newB, 1, b.length);
        b = newB;
    }
    return new BytesRef(b, 0, b.length);
}

Example #19

Source File: TestBinaryResponseWriter.java From lucene-solr with Apache License 2.0

5 votes

private void compareStringFormat(String input) {
  byte[] bytes1 = new byte[1024];
  int len1 = ByteUtils.UTF16toUTF8(input, 0, input.length(), bytes1, 0);
  BytesRef bytesref = new BytesRef(input);
  System.out.println();
  assertEquals(len1, bytesref.length);
  for (int i = 0; i < len1; i++) {
    assertEquals(input + " not matching char at :" + i, bytesref.bytes[i], bytes1[i]);
  }
}

Example #20

Source File: SecureAtomicReader.java From incubator-retired-blur with Apache License 2.0

5 votes

@Override
public BytesRef next() throws IOException {
  while (true) {
    BytesRef ref = in.next();
    if (ref == null) {
      return null;
    }
    if (!_maskTermsEnum.seekExact(ref, true)) {
      return ref;
    }
    if (checkDocs()) {
      return ref;
    }
  }
}

Example #21

Source File: TestSegmentReader.java From lucene-solr with Apache License 2.0

5 votes

public void testTerms() throws IOException {
  final Collection<String> fields = FieldInfos.getIndexedFields(reader);
  for (String field : fields) {
    Terms terms = MultiTerms.getTerms(reader, field);
    assertNotNull(terms);
    TermsEnum termsEnum = terms.iterator();
    while(termsEnum.next() != null) {
      BytesRef term = termsEnum.term();
      assertTrue(term != null);
      String fieldValue = (String) DocHelper.nameValues.get(field);
      assertTrue(fieldValue.indexOf(term.utf8ToString()) != -1);
    }
  }
  
  PostingsEnum termDocs = TestUtil.docs(random(), reader,
      DocHelper.TEXT_FIELD_1_KEY,
      new BytesRef("field"),
      null,
      0);
  assertTrue(termDocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);

  termDocs = TestUtil.docs(random(), reader,
      DocHelper.NO_NORMS_KEY,
      new BytesRef(DocHelper.NO_NORMS_TEXT),
      null,
      0);

  assertTrue(termDocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);

  
  PostingsEnum positions = MultiTerms.getTermPostingsEnum(reader,
                                                                    DocHelper.TEXT_FIELD_1_KEY,
                                                                    new BytesRef("field"));
  // NOTE: prior rev of this test was failing to first
  // call next here:
  assertTrue(positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  assertTrue(positions.docID() == 0);
  assertTrue(positions.nextPosition() >= 0);
}

Example #22

Source File: BaseExplanationTestCase.java From lucene-solr with Apache License 2.0

5 votes

public static Document createDoc(int index) {
  Document doc = new Document();
  doc.add(newStringField(KEY, ""+index, Field.Store.NO));
  doc.add(new SortedDocValuesField(KEY, new BytesRef(""+index)));
  Field f = newTextField(FIELD, docFields[index], Field.Store.NO);
  doc.add(f);
  doc.add(newTextField(ALTFIELD, docFields[index], Field.Store.NO));
  return doc;
}

Example #23

Source File: SignificantTermsAggregatorFactory.java From Elasticsearch with Apache License 2.0

5 votes

public long getBackgroundFrequency(BytesRef termBytes) {
    assert termsEnum != null; // having failed to find a field in the index we don't expect any calls for frequencies
    long result = 0;
    try {
        if (termsEnum.seekExact(termBytes)) {
            result = termsEnum.docFreq();
        }
    } catch (IOException e) {
        throw new ElasticsearchException("IOException loading background document frequency info", e);
    }
    return result;
}

Example #24

Source File: TermInSetQueryTest.java From lucene-solr with Apache License 2.0

5 votes

public void testRamBytesUsed() {
  List<BytesRef> terms = new ArrayList<>();
  final int numTerms = 10000 + random().nextInt(1000);
  for (int i = 0; i < numTerms; ++i) {
    terms.add(new BytesRef(RandomStrings.randomUnicodeOfLength(random(), 10)));
  }
  TermInSetQuery query = new TermInSetQuery("f", terms);
  final long actualRamBytesUsed = RamUsageTester.sizeOf(query);
  final long expectedRamBytesUsed = query.ramBytesUsed();
  // error margin within 5%
  assertEquals(expectedRamBytesUsed, actualRamBytesUsed, actualRamBytesUsed / 20);
}

Example #25

Source File: FreeTextSuggester.java From lucene-solr with Apache License 2.0

5 votes

@Override
public List<LookupResult> lookup(final CharSequence key, Set<BytesRef> contexts, /* ignored */ boolean onlyMorePopular, int num) {
  try {
    return lookup(key, contexts, num);
  } catch (IOException ioe) {
    // bogus:
    throw new RuntimeException(ioe);
  }
}

Example #26

Source File: CompositeBytesReference.java From crate with Apache License 2.0

5 votes

@Override
public BytesRef toBytesRef() {
    BytesRefBuilder builder = new BytesRefBuilder();
    builder.grow(length());
    BytesRef spare;
    BytesRefIterator iterator = iterator();
    try {
        while ((spare = iterator.next()) != null) {
            builder.append(spare);
        }
    } catch (IOException ex) {
        throw new AssertionError("won't happen", ex); // this is really an error since we don't do IO in our bytesreferences
    }
    return builder.toBytesRef();
}

Example #27

Source File: LegacyCell.java From lucene-solr with Apache License 2.0

5 votes

@Override
public BytesRef getTokenBytesWithLeaf(BytesRef result) {
  result = getTokenBytesNoLeaf(result);
  if (!isLeaf || getLevel() == getMaxLevels())
    return result;
  if (result.bytes.length < result.offset + result.length + 1) {
    assert false : "Not supposed to happen; performance bug";
    byte[] copy = new byte[result.length + 1];
    System.arraycopy(result.bytes, result.offset, copy, 0, result.length - 1);
    result.bytes = copy;
    result.offset = 0;
  }
  result.bytes[result.offset + result.length++] = LEAF_BYTE;
  return result;
}

Example #28

Source File: PerSegmentSingleValuedFaceting.java From lucene-solr with Apache License 2.0

5 votes

public PerSegmentSingleValuedFaceting(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix, Predicate<BytesRef> filter) {
  this.searcher = searcher;
  this.docs = docs;
  this.fieldName = fieldName;
  this.offset = offset;
  this.limit = limit;
  this.mincount = mincount;
  this.missing = missing;
  this.sort = sort;
  this.prefix = prefix;
  this.termFilter = filter;
}

Example #29

Source File: Operations.java From lucene-solr with Apache License 2.0

5 votes

private static void reverseBytes(BytesRef ref) {
  if (ref.length <= 1) return;
  int num = ref.length >> 1;
  for (int i = ref.offset; i < ( ref.offset + num ); i++) {
    byte b = ref.bytes[i];
    ref.bytes[i] = ref.bytes[ref.offset * 2 + ref.length - i - 1];
    ref.bytes[ref.offset * 2 + ref.length - i - 1] = b;
  }
}

Example #30

Source File: BufferedInputIterator.java From lucene-solr with Apache License 2.0

5 votes

@Override
public BytesRef payload() {
  if (hasPayloads && curPos < payloads.size()) {
    return payloads.get(payloadSpare, curPos);
  }
  return null;
}