org.apache.lucene.util.BytesRef Java Exaples

Source File: SimpleTextUtil.java From lucene-solr with Apache License 2.0

6 votes

/** Inverse of {@link BytesRef#toString}. */
public static BytesRef fromBytesRefString(String s) {
  if (s.length() < 2) {
    throw new IllegalArgumentException("string " + s + " was not created from BytesRef.toString?");
  }
  if (s.charAt(0) != '[' || s.charAt(s.length()-1) != ']') {
    throw new IllegalArgumentException("string " + s + " was not created from BytesRef.toString?");
  }
  String[] parts = s.substring(1, s.length()-1).split(" ");
  byte[] bytes = new byte[parts.length];
  for(int i=0;i<parts.length;i++) {
    bytes[i] = (byte) Integer.parseInt(parts[i], 16);
  }

  return new BytesRef(bytes);
}

Source File: ParentChildFilteredTermsEnum.java From Elasticsearch with Apache License 2.0

6 votes

@Override
protected AcceptStatus accept(BytesRef term) throws IOException {
    if (parentTypes.isEmpty()) {
        return AcceptStatus.END;
    }

    BytesRef[] typeAndId = Uid.splitUidIntoTypeAndId(term);
    if (parentTypes.contains(typeAndId[0])) {
        type = typeAndId[0].utf8ToString();
        id = typeAndId[1];
        return AcceptStatus.YES;
    } else {
        BytesRef nextType = parentTypes.ceiling(typeAndId[0]);
        if (nextType == null) {
            return AcceptStatus.END;
        }
        seekTerm = nextType;
        return AcceptStatus.NO_AND_SEEK;
    }
}

Source File: TestBKDRadixSelector.java From lucene-solr with Apache License 2.0

6 votes

private int getMinDocId(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension, byte[] partitionPoint, int dataDims, int indexDims, byte[] dataDim) throws  IOException {
 int docID = Integer.MAX_VALUE;
  try (PointReader reader = p.writer.getReader(p.start, p.count)) {
    while (reader.next()) {
      PointValue pointValue = reader.pointValue();
      BytesRef packedValue = pointValue.packedValue();
      int offset = dimension * bytesPerDimension;
      int dataOffset = indexDims * bytesPerDimension;
      int dataLength = (dataDims - indexDims) * bytesPerDimension;
      if (Arrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension, partitionPoint, 0, bytesPerDimension) == 0
        && Arrays.compareUnsigned(packedValue.bytes, packedValue.offset + dataOffset, packedValue.offset + dataOffset + dataLength, dataDim, 0, dataLength) == 0) {
        int newDocID = pointValue.docID();
        if (newDocID < docID) {
          docID = newDocID;
        }
      }
    }
  }
  return docID;
}

Source File: MutatableAction.java From incubator-retired-blur with Apache License 2.0

6 votes

private IterableRow getIterableRow(String rowId, IndexSearcherCloseable searcher) throws IOException {
  IndexReader indexReader = searcher.getIndexReader();
  BytesRef rowIdRef = new BytesRef(rowId);
  List<AtomicReaderTermsEnum> possibleRowIds = new ArrayList<AtomicReaderTermsEnum>();
  for (AtomicReaderContext atomicReaderContext : indexReader.leaves()) {
    AtomicReader atomicReader = atomicReaderContext.reader();
    Fields fields = atomicReader.fields();
    if (fields == null) {
      continue;
    }
    Terms terms = fields.terms(BlurConstants.ROW_ID);
    if (terms == null) {
      continue;
    }
    TermsEnum termsEnum = terms.iterator(null);
    if (!termsEnum.seekExact(rowIdRef, true)) {
      continue;
    }
    // need atomic read as well...
    possibleRowIds.add(new AtomicReaderTermsEnum(atomicReader, termsEnum));
  }
  if (possibleRowIds.isEmpty()) {
    return null;
  }
  return new IterableRow(rowId, getRecords(possibleRowIds));
}

Source File: STUniformSplitTermsWriter.java From lucene-solr with Apache License 2.0

6 votes

private void combinePostingsPerField(BytesRef term,
                                     Map<String, MergingFieldTerms> fieldTermsMap,
                                     Map<String, List<SegmentPostings>> fieldPostingsMap,
                                     List<MergingFieldTerms> groupedFieldTerms) {
  groupedFieldTerms.clear();
  for (Map.Entry<String, List<SegmentPostings>> fieldPostingsEntry : fieldPostingsMap.entrySet()) {
    // The field defined in fieldPostingsMap comes from the FieldInfos of the SegmentReadState.
    // The fieldTermsMap contains entries for fields coming from the SegmentMergeSate.
    // So it is possible that the field is not present in fieldTermsMap because it is removed.
    MergingFieldTerms fieldTerms = fieldTermsMap.get(fieldPostingsEntry.getKey());
    if (fieldTerms != null) {
      fieldTerms.resetIterator(term, fieldPostingsEntry.getValue());
      groupedFieldTerms.add(fieldTerms);
    }
  }
  // Keep the fields ordered by their number in the target merge segment.
  groupedFieldTerms.sort(Comparator.comparingInt(fieldTerms -> fieldTerms.fieldMetadata.getFieldInfo().number));
}

Source File: TestTermVectorsReader.java From lucene-solr with Apache License 2.0

6 votes

public void testDocsEnum() throws IOException {
  TermVectorsReader reader = Codec.getDefault().termVectorsFormat().vectorsReader(dir, seg.info, fieldInfos, newIOContext(random()));
  for (int j = 0; j < 5; j++) {
    Terms vector = reader.get(j).terms(testFields[0]);
    assertNotNull(vector);
    assertEquals(testTerms.length, vector.size());
    TermsEnum termsEnum = vector.iterator();
    PostingsEnum postingsEnum = null;
    for (int i = 0; i < testTerms.length; i++) {
      final BytesRef text = termsEnum.next();
      assertNotNull(text);
      String term = text.utf8ToString();
      //System.out.println("Term: " + term);
      assertEquals(testTerms[i], term);
      
      postingsEnum = TestUtil.docs(random(), termsEnum, postingsEnum, PostingsEnum.NONE);
      assertNotNull(postingsEnum);
      int doc = postingsEnum.docID();
      assertEquals(-1, doc);
      assertTrue(postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
      assertEquals(DocIdSetIterator.NO_MORE_DOCS, postingsEnum.nextDoc());
    }
    assertNull(termsEnum.next());
  }
  reader.close();
}

Source File: TermBuilder.java From Elasticsearch with Apache License 2.0

6 votes

@Override
public BytesRef term(Long value) {
    BytesRefBuilder builder = new BytesRefBuilder();
    NumericUtils.longToPrefixCoded(value, 0, builder);
    return builder.get();
}

Source File: LindenFieldCacheImpl.java From linden with Apache License 2.0

6 votes

@Override
protected Accountable createValue(final AtomicReader reader, CacheKey key, boolean setDocsWithField)
    throws IOException {
  final Map<String, Integer> uidMap = new HashMap<>();

  Uninvert u = new Uninvert() {
    private String currentValue;

    @Override
    public void visitTerm(BytesRef term) {
      currentValue = term.utf8ToString();
    }

    @Override
    public void visitDoc(int docID) {
      uidMap.put(currentValue, docID);
    }

    @Override
    protected TermsEnum termsEnum(Terms terms) throws IOException {
      return terms.iterator(null);
    }
  };
  u.uninvert(reader, key.field, setDocsWithField);
  return new PerReaderUIDMaps(reader.getContext().ord, uidMap);
}

Source File: TestLuceneIndexer.java From dremio-oss with Apache License 2.0

6 votes

@Override
public void run() {
  try {
    for (int i = 0; i < 10000; ++i) {
      final Document document = new Document();
      final String key = "key" + i;
      final String val = "value" + i;
      document.add(new StringField(key, val, Field.Store.YES));
      document.add(new SortedDocValuesField(key, new BytesRef(val.getBytes())));
      index.add(document);
      data.put(key, val);
      sleep(1);
    }
  } catch (InterruptedException e) {
  }
}

Source File: IpColumnReference.java From crate with Apache License 2.0

6 votes

@Override
public String value() {
    try {
        if (values.advanceExact(docId)) {
            long ord = values.nextOrd();
            if (values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
                throw new GroupByOnArrayUnsupportedException(columnName);
            }
            BytesRef encoded = values.lookupOrd(ord);
            return (String) DocValueFormat.IP.format(encoded);
        } else {
            return null;
        }
    } catch (IOException e) {
        throw new UncheckedIOException(e);
    }
}

Source File: GlobalOrdinalsQuery.java From lucene-solr with Apache License 2.0

6 votes

@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
  SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
  if (values == null) {
    return Explanation.noMatch("Not a match");
  }

  if (values.advance(doc) != doc) {
    return Explanation.noMatch("Not a match");
  }
  int segmentOrd = values.ordValue();
  BytesRef joinValue = values.lookupOrd(segmentOrd);

  int ord;
  if (globalOrds != null) {
    ord = (int) globalOrds.getGlobalOrds(context.ord).get(segmentOrd);
  } else {
    ord = segmentOrd;
  }
  if (foundOrds.get(ord) == false) {
    return Explanation.noMatch("Not a match, join value " + Term.toString(joinValue));
  }

  return Explanation.match(score(), "A match, join value " + Term.toString(joinValue));
}

Source File: GroupingSearchTest.java From lucene-solr with Apache License 2.0

6 votes

private void compareGroupValue(String expected, GroupDocs<?> group) {
  if (expected == null) {
    if (group.groupValue == null) {
      return;
    } else if (group.groupValue.getClass().isAssignableFrom(MutableValueStr.class)) {
      return;
    } else if (((BytesRef) group.groupValue).length == 0) {
      return;
    }
    fail();
  }

  if (group.groupValue.getClass().isAssignableFrom(BytesRef.class)) {
    assertEquals(new BytesRef(expected), group.groupValue);
  } else if (group.groupValue.getClass().isAssignableFrom(MutableValueStr.class)) {
    MutableValueStr v = new MutableValueStr();
    v.value.copyChars(expected);
    assertEquals(v, group.groupValue);
  } else {
    fail();
  }
}

Source File: LegacyNumericUtils.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Returns the shift value from a prefix encoded {@code long}.
 * @throws NumberFormatException if the supplied {@link BytesRef} is
 * not correctly prefix encoded.
 */
public static int getPrefixCodedLongShift(final BytesRef val) {
  final int shift = val.bytes[val.offset] - SHIFT_START_LONG;
  if (shift > 63 || shift < 0)
    throw new NumberFormatException("Invalid shift value (" + shift + ") in prefixCoded bytes (is encoded value really an INT?)");
  return shift;
}

Source File: TestFSTs.java From lucene-solr with Apache License 2.0

5 votes

public void testShortestPaths() throws Exception {
  final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
  final FSTCompiler<Long> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);

  final IntsRefBuilder scratch = new IntsRefBuilder();
  fstCompiler.add(Util.toIntsRef(new BytesRef("aab"), scratch), 22L);
  fstCompiler.add(Util.toIntsRef(new BytesRef("aac"), scratch), 7L);
  fstCompiler.add(Util.toIntsRef(new BytesRef("ax"), scratch), 17L);
  final FST<Long> fst = fstCompiler.compile();
  //Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"));
  //Util.toDot(fst, w, false, false);
  //w.close();

  Util.TopResults<Long> res = Util.shortestPaths(fst,
                                                fst.getFirstArc(new FST.Arc<Long>()),
                                                outputs.getNoOutput(),
                                                minLongComparator,
                                                3,
                                                true);
  assertTrue(res.isComplete);
  assertEquals(3, res.topN.size());
  assertEquals(Util.toIntsRef(new BytesRef("aac"), scratch), res.topN.get(0).input);
  assertEquals(7L, res.topN.get(0).output.longValue());

  assertEquals(Util.toIntsRef(new BytesRef("ax"), scratch), res.topN.get(1).input);
  assertEquals(17L,res.topN.get(1).output.longValue());

  assertEquals(Util.toIntsRef(new BytesRef("aab"), scratch), res.topN.get(2).input);
  assertEquals(22L, res.topN.get(2).output.longValue());
}

Source File: MergeSortRowIdLookup.java From incubator-retired-blur with Apache License 2.0

5 votes

@Override
public int compareTo(TermsEnumReader o) {
  try {
    BytesRef t1 = _termsEnum.term();
    BytesRef t2 = o._termsEnum.term();
    return t1.compareTo(t2);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}

Source File: DocumentValueSourceDictionaryTest.java From lucene-solr with Apache License 2.0

5 votes

private Map<String, Document> generateIndexDocuments(int ndocs) {
  Map<String, Document> docs = new HashMap<>();
  for(int i = 0; i < ndocs ; i++) {
    Field field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES);
    Field weight1 = new NumericDocValuesField(WEIGHT_FIELD_NAME_1, 10 + i);
    Field weight2 = new NumericDocValuesField(WEIGHT_FIELD_NAME_2, 20 + i);
    Field weight3 = new NumericDocValuesField(WEIGHT_FIELD_NAME_3, 30 + i);
    Field contexts = new StoredField(CONTEXTS_FIELD_NAME, new BytesRef("ctx_"  + i + "_0"));
    Document doc = new Document();
    doc.add(field);
    // even if payload is not required usually have it
    if (usually()) {
      Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i));
      doc.add(payload);
    }
    doc.add(weight1);
    doc.add(weight2);
    doc.add(weight3);
    doc.add(contexts);
    for(int j = 1; j < atLeast(3); j++) {
      contexts.setBytesValue(new BytesRef("ctx_" + i + "_" + j));
      doc.add(contexts);
    }
    docs.put(field.stringValue(), doc);
  }
  return docs;
}

Source File: CustomSpellCheckListner.java From customized-symspell with MIT License

5 votes

/**
 * Relod method of spellcheck listner
 * @param newSearcher
 * @param checker
 * @throws IOException
 * @throws SpellCheckException
 */
public void reload(SolrIndexSearcher newSearcher, SpellChecker checker)
    throws IOException, SpellCheckException {

  DirectoryReader productsIndexReader = newSearcher.getIndexReader();
  Fields fields = MultiFields.getFields(productsIndexReader);
  IndexSchema schema = newSearcher.getCore().getLatestSchema();
  long time = System.currentTimeMillis();
  for (String field : fields) {
    if (!fieldArr.contains(field)) {
      continue;
    }
    FieldType type = schema.getField(field).getType();
    int insertionsCount = 0;
    for (TermsEnum iterator = fields.terms(field).iterator(); iterator.next() != null; ) {
      BytesRef term = iterator.term();
      CharsRefBuilder charsRefBuilder = new CharsRefBuilder();
      type.indexedToReadable(term, charsRefBuilder);
      insertionsCount++;
      checker.getDataHolder().addItem(
          new DictionaryItem(charsRefBuilder.toString().trim(), (double) iterator.totalTermFreq(),
              0.0));
    }
    log.info("Spellcheck Dictionary populated for Field Name {}, Count {}", field,
        insertionsCount);
  }
  log.info("Data for SpellChecker  was populated. Time={} ms",
      (System.currentTimeMillis() - time));
}

Source File: Uid.java From crate with Apache License 2.0

5 votes

/** With base64 ids, we decode and prepend an escape char in the cases that
 *  it could be mixed up with numeric or utf8 encoding. In the majority of
 *  cases (253/256) the encoded id is exactly the binary form. */
private static BytesRef encodeBase64Id(String id) {
    byte[] b = Base64.getUrlDecoder().decode(id);
    if (Byte.toUnsignedInt(b[0]) >= BASE64_ESCAPE) {
        byte[] newB = new byte[b.length + 1];
        newB[0] = (byte) BASE64_ESCAPE;
        System.arraycopy(b, 0, newB, 1, b.length);
        b = newB;
    }
    return new BytesRef(b, 0, b.length);
}

Source File: TestBinaryResponseWriter.java From lucene-solr with Apache License 2.0

5 votes

private void compareStringFormat(String input) {
  byte[] bytes1 = new byte[1024];
  int len1 = ByteUtils.UTF16toUTF8(input, 0, input.length(), bytes1, 0);
  BytesRef bytesref = new BytesRef(input);
  System.out.println();
  assertEquals(len1, bytesref.length);
  for (int i = 0; i < len1; i++) {
    assertEquals(input + " not matching char at :" + i, bytesref.bytes[i], bytes1[i]);
  }
}

Source File: SecureAtomicReader.java From incubator-retired-blur with Apache License 2.0

5 votes

@Override
public BytesRef next() throws IOException {
  while (true) {
    BytesRef ref = in.next();
    if (ref == null) {
      return null;
    }
    if (!_maskTermsEnum.seekExact(ref, true)) {
      return ref;
    }
    if (checkDocs()) {
      return ref;
    }
  }
}

Source File: TestSegmentReader.java From lucene-solr with Apache License 2.0

5 votes

public void testTerms() throws IOException {
  final Collection<String> fields = FieldInfos.getIndexedFields(reader);
  for (String field : fields) {
    Terms terms = MultiTerms.getTerms(reader, field);
    assertNotNull(terms);
    TermsEnum termsEnum = terms.iterator();
    while(termsEnum.next() != null) {
      BytesRef term = termsEnum.term();
      assertTrue(term != null);
      String fieldValue = (String) DocHelper.nameValues.get(field);
      assertTrue(fieldValue.indexOf(term.utf8ToString()) != -1);
    }
  }
  
  PostingsEnum termDocs = TestUtil.docs(random(), reader,
      DocHelper.TEXT_FIELD_1_KEY,
      new BytesRef("field"),
      null,
      0);
  assertTrue(termDocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);

  termDocs = TestUtil.docs(random(), reader,
      DocHelper.NO_NORMS_KEY,
      new BytesRef(DocHelper.NO_NORMS_TEXT),
      null,
      0);

  assertTrue(termDocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);

  
  PostingsEnum positions = MultiTerms.getTermPostingsEnum(reader,
                                                                    DocHelper.TEXT_FIELD_1_KEY,
                                                                    new BytesRef("field"));
  // NOTE: prior rev of this test was failing to first
  // call next here:
  assertTrue(positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  assertTrue(positions.docID() == 0);
  assertTrue(positions.nextPosition() >= 0);
}

Source File: BaseExplanationTestCase.java From lucene-solr with Apache License 2.0

5 votes

public static Document createDoc(int index) {
  Document doc = new Document();
  doc.add(newStringField(KEY, ""+index, Field.Store.NO));
  doc.add(new SortedDocValuesField(KEY, new BytesRef(""+index)));
  Field f = newTextField(FIELD, docFields[index], Field.Store.NO);
  doc.add(f);
  doc.add(newTextField(ALTFIELD, docFields[index], Field.Store.NO));
  return doc;
}

Source File: SignificantTermsAggregatorFactory.java From Elasticsearch with Apache License 2.0

5 votes

public long getBackgroundFrequency(BytesRef termBytes) {
    assert termsEnum != null; // having failed to find a field in the index we don't expect any calls for frequencies
    long result = 0;
    try {
        if (termsEnum.seekExact(termBytes)) {
            result = termsEnum.docFreq();
        }
    } catch (IOException e) {
        throw new ElasticsearchException("IOException loading background document frequency info", e);
    }
    return result;
}

Source File: TermInSetQueryTest.java From lucene-solr with Apache License 2.0

5 votes

public void testRamBytesUsed() {
  List<BytesRef> terms = new ArrayList<>();
  final int numTerms = 10000 + random().nextInt(1000);
  for (int i = 0; i < numTerms; ++i) {
    terms.add(new BytesRef(RandomStrings.randomUnicodeOfLength(random(), 10)));
  }
  TermInSetQuery query = new TermInSetQuery("f", terms);
  final long actualRamBytesUsed = RamUsageTester.sizeOf(query);
  final long expectedRamBytesUsed = query.ramBytesUsed();
  // error margin within 5%
  assertEquals(expectedRamBytesUsed, actualRamBytesUsed, actualRamBytesUsed / 20);
}

Source File: FreeTextSuggester.java From lucene-solr with Apache License 2.0

5 votes

@Override
public List<LookupResult> lookup(final CharSequence key, Set<BytesRef> contexts, /* ignored */ boolean onlyMorePopular, int num) {
  try {
    return lookup(key, contexts, num);
  } catch (IOException ioe) {
    // bogus:
    throw new RuntimeException(ioe);
  }
}

Source File: CompositeBytesReference.java From crate with Apache License 2.0

5 votes

@Override
public BytesRef toBytesRef() {
    BytesRefBuilder builder = new BytesRefBuilder();
    builder.grow(length());
    BytesRef spare;
    BytesRefIterator iterator = iterator();
    try {
        while ((spare = iterator.next()) != null) {
            builder.append(spare);
        }
    } catch (IOException ex) {
        throw new AssertionError("won't happen", ex); // this is really an error since we don't do IO in our bytesreferences
    }
    return builder.toBytesRef();
}

Source File: LegacyCell.java From lucene-solr with Apache License 2.0

5 votes

@Override
public BytesRef getTokenBytesWithLeaf(BytesRef result) {
  result = getTokenBytesNoLeaf(result);
  if (!isLeaf || getLevel() == getMaxLevels())
    return result;
  if (result.bytes.length < result.offset + result.length + 1) {
    assert false : "Not supposed to happen; performance bug";
    byte[] copy = new byte[result.length + 1];
    System.arraycopy(result.bytes, result.offset, copy, 0, result.length - 1);
    result.bytes = copy;
    result.offset = 0;
  }
  result.bytes[result.offset + result.length++] = LEAF_BYTE;
  return result;
}

Source File: PerSegmentSingleValuedFaceting.java From lucene-solr with Apache License 2.0

5 votes

public PerSegmentSingleValuedFaceting(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix, Predicate<BytesRef> filter) {
  this.searcher = searcher;
  this.docs = docs;
  this.fieldName = fieldName;
  this.offset = offset;
  this.limit = limit;
  this.mincount = mincount;
  this.missing = missing;
  this.sort = sort;
  this.prefix = prefix;
  this.termFilter = filter;
}

Source File: Operations.java From lucene-solr with Apache License 2.0

5 votes

private static void reverseBytes(BytesRef ref) {
  if (ref.length <= 1) return;
  int num = ref.length >> 1;
  for (int i = ref.offset; i < ( ref.offset + num ); i++) {
    byte b = ref.bytes[i];
    ref.bytes[i] = ref.bytes[ref.offset * 2 + ref.length - i - 1];
    ref.bytes[ref.offset * 2 + ref.length - i - 1] = b;
  }
}

Source File: BufferedInputIterator.java From lucene-solr with Apache License 2.0

5 votes

@Override
public BytesRef payload() {
  if (hasPayloads && curPos < payloads.size()) {
    return payloads.get(payloadSpare, curPos);
  }
  return null;
}

org.apache.lucene.util.BytesRef Java Examples