Java Code Examples for org.apache.lucene.util.BytesRef#deepCopyOf()

The following examples show how to use org.apache.lucene.util.BytesRef#deepCopyOf() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TextField.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Analyzes a text part using the provided {@link Analyzer} for a multi-term query.
 * <p>
 * Expects a single token to be used as multi-term term. This single token might also be filtered out
 * so zero token is supported and null is returned in this case.
 *
 * @return The multi-term term bytes; or null if there is no multi-term terms.
 * @throws SolrException If the {@link Analyzer} tokenizes more than one token;
 * or if an underlying {@link IOException} occurs.
 */
public static BytesRef analyzeMultiTerm(String field, String part, Analyzer analyzerIn) {
  if (part == null || analyzerIn == null) return null;

  try (TokenStream source = analyzerIn.tokenStream(field, part)){
    source.reset();

    TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);

    if (!source.incrementToken()) {
      // Accept no tokens because it may have been filtered out by a StopFilter for example.
      return null;
    }
    BytesRef bytes = BytesRef.deepCopyOf(termAtt.getBytesRef());
    if (source.incrementToken())
      throw  new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned too many terms for multiTerm term: " + part);

    source.end();
    return bytes;
  } catch (IOException e) {
    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"error analyzing range part: " + part, e);
  }
}
 
Example 2
Source File: TermAutomatonQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private int getTermID(BytesRef term) {
  Integer id = termToID.get(term);
  if (id == null) {
    id = termToID.size();
    if (term != null) {
      term = BytesRef.deepCopyOf(term);
    }
    termToID.put(term, id);
    idToTerm.put(id, term);
    if (term == null) {
      anyTermID = id;
    }
  }

  return id;
}
 
Example 3
Source File: XJoinQParserPlugin.java    From BioSolr with Apache License 2.0 6 votes vote down vote up
static private Transformer transformer(final FieldType ft) {
  return new Transformer() {
    
    BytesRef term = new BytesRef();
    
    @Override
    public BytesRef transform(Object joinId) {
      if (joinId == null) {
        throw new RuntimeException("joinId is null! (weird)");
      }
      String joinStr = joinId.toString();
      // logic same as TermQParserPlugin
      if (ft != null) {
        ft.readableToIndexed(joinStr, term);
      } else {
        term.copyChars(joinStr);
      }
      return BytesRef.deepCopyOf(term);
    }
    
  };
}
 
Example 4
Source File: SpanOrTermsBuilder.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public SpanQuery getSpanQuery(Element e) throws ParserException {
  String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
  String value = DOMUtils.getNonBlankTextOrFail(e);

  List<SpanQuery> clausesList = new ArrayList<>();

  try (TokenStream ts = analyzer.tokenStream(fieldName, value)) {
    TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
    ts.reset();
    while (ts.incrementToken()) {
      SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(termAtt.getBytesRef())));
      clausesList.add(stq);
    }
    ts.end();
    SpanOrQuery soq = new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()]));
    float boost = DOMUtils.getAttribute(e, "boost", 1.0f);
    return new SpanBoostQuery(soq, boost);
  }
  catch (IOException ioe) {
    throw new ParserException("IOException parsing value:" + value);
  }
}
 
Example 5
Source File: SortingLeafReader.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
  final BinaryDocValues oldDocValues = in.getBinaryDocValues(field);
  if (oldDocValues == null) return null;
  CachedBinaryDVs dvs;
  synchronized (cachedBinaryDVs) {
    dvs = cachedBinaryDVs.get(field);
    if (dvs == null) {
      FixedBitSet docsWithField = new FixedBitSet(maxDoc());
      BytesRef[] values = new BytesRef[maxDoc()];
      while (true) {
        int docID = oldDocValues.nextDoc();
        if (docID == NO_MORE_DOCS) {
          break;
        }
        int newDocID = docMap.oldToNew(docID);
        docsWithField.set(newDocID);
        values[newDocID] = BytesRef.deepCopyOf(oldDocValues.binaryValue());
      }
      dvs = new CachedBinaryDVs(values, docsWithField);
      cachedBinaryDVs.put(field, dvs);
    }
  }
  return new SortingBinaryDocValues(dvs);
}
 
Example 6
Source File: TestTopDocsCollector.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testRealisticConcurrentMinimumScore() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
  try (LineFileDocs docs = new LineFileDocs(random())) {
    int numDocs = atLeast(100);
    for (int i = 0; i < numDocs; i++) {
      writer.addDocument(docs.nextDoc());
    }
  }

  IndexReader reader = writer.getReader();
  writer.close();

  Terms terms = MultiTerms.getTerms(reader, "body");
  int termCount = 0;
  TermsEnum termsEnum = terms.iterator();
  while(termsEnum.next() != null) {
    termCount++;
  }
  assertTrue(termCount > 0);

  // Target ~10 terms to search:
  double chance = 10.0 / termCount;
  termsEnum = terms.iterator();
  while(termsEnum.next() != null) {
    if (random().nextDouble() <= chance) {
      BytesRef term = BytesRef.deepCopyOf(termsEnum.term());
      Query query = new TermQuery(new Term("body", term));

      TopDocsCollector<ScoreDoc> collector = doSearchWithThreshold(5, 0, query, reader);
      TopDocs tdc = doConcurrentSearchWithThreshold(5, 0, query, reader);
      TopDocs tdc2 = collector.topDocs();

      CheckHits.checkEqual(query, tdc.scoreDocs, tdc2.scoreDocs);
    }
  }

  reader.close();
  dir.close();
}
 
Example 7
Source File: BytesArray.java    From crate with Apache License 2.0 5 votes vote down vote up
public BytesArray(BytesRef bytesRef, boolean deepCopy) {
    if (deepCopy) {
        bytesRef = BytesRef.deepCopyOf(bytesRef);
    }
    bytes = bytesRef.bytes;
    offset = bytesRef.offset;
    length = bytesRef.length;
}
 
Example 8
Source File: AlfrescoFieldType.java    From SearchServices with GNU Lesser General Public License v3.0 5 votes vote down vote up
public static BytesRef analyzeMultiTerm(String field, String part, Analyzer analyzerIn)
{
    if (part == null || analyzerIn == null)
        return null;

    TokenStream source = null;
    try
    {
        source = analyzerIn.tokenStream(field, part);
        source.reset();

        TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
        BytesRef bytes = termAtt.getBytesRef();

        if (!source.incrementToken())
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "analyzer returned no terms for multiTerm term: " + part);
        if (source.incrementToken())
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "analyzer returned too many terms for multiTerm term: " + part);

        source.end();
        return BytesRef.deepCopyOf(bytes);
    }
    catch (IOException e)
    {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "error analyzing range part: " + part, e);
    }
    finally
    {
        IOUtils.closeWhileHandlingException(source);
    }
}
 
Example 9
Source File: MemoryIndex.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void storePointValues(Info info, BytesRef pointValue) {
  if (info.pointValues == null) {
    info.pointValues = new BytesRef[4];
  }
  info.pointValues = ArrayUtil.grow(info.pointValues, info.pointValuesCount + 1);
  info.pointValues[info.pointValuesCount++] = BytesRef.deepCopyOf(pointValue);
}
 
Example 10
Source File: UniformSplitTermsWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * @return 1 if the field was written; 0 otherwise.
 */
protected int writeFieldTerms(BlockWriter blockWriter, DataOutput fieldsOutput, TermsEnum termsEnum,
                            FieldInfo fieldInfo, NormsProducer normsProducer) throws IOException {

  FieldMetadata fieldMetadata = new FieldMetadata(fieldInfo, maxDoc);
  fieldMetadata.setDictionaryStartFP(dictionaryOutput.getFilePointer());

  postingsWriter.setField(fieldInfo);
  blockWriter.setField(fieldMetadata);
  IndexDictionary.Builder dictionaryBuilder = new FSTDictionary.Builder();
  BytesRef lastTerm = null;
  while (termsEnum.next() != null) {
    BlockTermState blockTermState = writePostingLine(termsEnum, fieldMetadata, normsProducer);
    if (blockTermState != null) {
      lastTerm = BytesRef.deepCopyOf(termsEnum.term());
      blockWriter.addLine(lastTerm, blockTermState, dictionaryBuilder);
    }
  }

  // Flush remaining terms.
  blockWriter.finishLastBlock(dictionaryBuilder);

  if (fieldMetadata.getNumTerms() > 0) {
    fieldMetadata.setLastTerm(lastTerm);
    fieldMetadataWriter.write(fieldsOutput, fieldMetadata);
    writeDictionary(dictionaryBuilder);
    return 1;
  }
  return 0;
}
 
Example 11
Source File: Token.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public Token clone() {
  final Token t = (Token) super.clone();
  if (payload != null) {
    t.payload = BytesRef.deepCopyOf(payload);
  }
  return t;
}
 
Example 12
Source File: TestMultiDocValues.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testSortedSet() throws Exception {
  Directory dir = newDirectory();
  
  IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
  iwc.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

  int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
  for (int i = 0; i < numDocs; i++) {
    Document doc = new Document();
    int numValues = random().nextInt(5);
    for (int j = 0; j < numValues; j++) {
      doc.add(new SortedSetDocValuesField("bytes", new BytesRef(TestUtil.randomUnicodeString(random()))));
    }
    iw.addDocument(doc);
    if (random().nextInt(17) == 0) {
      iw.commit();
    }
  }
  DirectoryReader ir = iw.getReader();
  iw.forceMerge(1);
  DirectoryReader ir2 = iw.getReader();
  LeafReader merged = getOnlyLeafReader(ir2);
  iw.close();
  
  SortedSetDocValues multi = MultiDocValues.getSortedSetValues(ir, "bytes");
  SortedSetDocValues single = merged.getSortedSetDocValues("bytes");
  if (multi == null) {
    assertNull(single);
  } else {
    assertEquals(single.getValueCount(), multi.getValueCount());
    // check values
    for (long i = 0; i < single.getValueCount(); i++) {
      final BytesRef expected = BytesRef.deepCopyOf(single.lookupOrd(i));
      final BytesRef actual = multi.lookupOrd(i);
      assertEquals(expected, actual);
    }
    // check ord list
    while (true) {
      int docID = single.nextDoc();
      assertEquals(docID, multi.nextDoc());
      if (docID == NO_MORE_DOCS) {
        break;
      }

      ArrayList<Long> expectedList = new ArrayList<>();
      long ord;
      while ((ord = single.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
        expectedList.add(ord);
      }
      
      int upto = 0;
      while ((ord = multi.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
        assertEquals(expectedList.get(upto).longValue(), ord);
        upto++;
      }
      assertEquals(expectedList.size(), upto);
    }
  }
  testRandomAdvance(merged.getSortedSetDocValues("bytes"), MultiDocValues.getSortedSetValues(ir, "bytes"));
  testRandomAdvanceExact(merged.getSortedSetDocValues("bytes"), MultiDocValues.getSortedSetValues(ir, "bytes"), merged.maxDoc());
  
  ir.close();
  ir2.close();
  dir.close();
}
 
Example 13
Source File: MaskedField.java    From deprecated-security-advanced-modules with Apache License 2.0 4 votes vote down vote up
private BytesRef customHash(BytesRef in) {
    final BytesRef copy = BytesRef.deepCopyOf(in);
    return new BytesRef(customHash(copy.bytes));
}
 
Example 14
Source File: TermGroupFacetCollector.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public void collect(int doc) throws IOException {
  if (doc > groupFieldTermsIndex.docID()) {
    groupFieldTermsIndex.advance(doc);
  }

  int groupOrd;
  if (doc == groupFieldTermsIndex.docID()) {
    groupOrd = groupFieldTermsIndex.ordValue();
  } else {
    groupOrd = -1;
  }
  
  if (facetFieldNumTerms == 0) {
    int segmentGroupedFacetsIndex = groupOrd * (facetFieldNumTerms + 1);
    if (facetPrefix != null || segmentGroupedFacetHits.exists(segmentGroupedFacetsIndex)) {
      return;
    }

    segmentTotalCount++;
    segmentFacetCounts[facetFieldNumTerms]++;

    segmentGroupedFacetHits.put(segmentGroupedFacetsIndex);
    BytesRef groupKey;
    if (groupOrd == -1) {
      groupKey = null;
    } else {
      groupKey = BytesRef.deepCopyOf(groupFieldTermsIndex.lookupOrd(groupOrd));
    }
    groupedFacetHits.add(new GroupedFacetHit(groupKey, null));
    return;
  }

  if (doc > facetFieldDocTermOrds.docID()) {
    facetFieldDocTermOrds.advance(doc);
  }
  boolean empty = true;
  if (doc == facetFieldDocTermOrds.docID()) {
    long ord;
    while ((ord = facetFieldDocTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
      process(groupOrd, (int) ord);
      empty = false;
    }
  }
  
  if (empty) {
    process(groupOrd, facetFieldNumTerms); // this facet ord is reserved for docs not containing facet field.
  }
}
 
Example 15
Source File: TermGroupFacetCollector.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public void collect(int doc) throws IOException {
  if (doc > facetFieldTermsIndex.docID()) {
    facetFieldTermsIndex.advance(doc);
  }

  int facetOrd;
  if (doc == facetFieldTermsIndex.docID()) {
    facetOrd = facetFieldTermsIndex.ordValue();
  } else {
    facetOrd = -1;
  }
  
  if (facetOrd < startFacetOrd || facetOrd >= endFacetOrd) {
    return;
  }

  if (doc > groupFieldTermsIndex.docID()) {
    groupFieldTermsIndex.advance(doc);
  }

  int groupOrd;
  if (doc == groupFieldTermsIndex.docID()) {
    groupOrd = groupFieldTermsIndex.ordValue();
  } else {
    groupOrd = -1;
  }
  int segmentGroupedFacetsIndex = groupOrd * (facetFieldTermsIndex.getValueCount()+1) + facetOrd;
  if (segmentGroupedFacetHits.exists(segmentGroupedFacetsIndex)) {
    return;
  }

  segmentTotalCount++;
  segmentFacetCounts[facetOrd+1]++;

  segmentGroupedFacetHits.put(segmentGroupedFacetsIndex);

  BytesRef groupKey;
  if (groupOrd == -1) {
    groupKey = null;
  } else {
    groupKey = BytesRef.deepCopyOf(groupFieldTermsIndex.lookupOrd(groupOrd));
  }

  BytesRef facetKey;
  if (facetOrd == -1) {
    facetKey = null;
  } else {
    facetKey = BytesRef.deepCopyOf(facetFieldTermsIndex.lookupOrd(facetOrd));
  }

  groupedFacetHits.add(new GroupedFacetHit(groupKey, facetKey));
}
 
Example 16
Source File: TermGroupSelector.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public BytesRef copyValue() {
  if (groupId == -1)
    return null;
  return BytesRef.deepCopyOf(currentValue());
}
 
Example 17
Source File: OrdsIntersectTermsEnum.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private boolean setSavedStartTerm(BytesRef startTerm) {
  savedStartTerm = startTerm == null ? null : BytesRef.deepCopyOf(startTerm);
  return true;
}
 
Example 18
Source File: RandomPostingsTester.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public FieldAndTerm(String field, BytesRef term, long ord) {
  this.field = field;
  this.term = BytesRef.deepCopyOf(term);
  this.ord = ord;
}
 
Example 19
Source File: AbstractTestCompressionMode.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
byte[] decompress(byte[] compressed, int originalLength, int offset, int length) throws IOException {
  Decompressor decompressor = mode.newDecompressor();
  final BytesRef bytes = new BytesRef();
  decompressor.decompress(new ByteArrayDataInput(compressed), originalLength, offset, length, bytes);
  return BytesRef.deepCopyOf(bytes).bytes;
}
 
Example 20
Source File: FuzzyTermsEnum.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public BytesRef next() throws IOException {

  if (queuedBottom != null) {
    bottomChanged(queuedBottom);
    queuedBottom = null;
  }
  

  BytesRef term;

  term = actualEnum.next();
  if (term == null) {
    // end
    return null;
  }

  int ed = maxEdits;
    
  // we know the outer DFA always matches.
  // now compute exact edit distance
  while (ed > 0) {
    if (matches(term, ed - 1)) {
      ed--;
    } else {
      break;
    }
  }
    
  if (ed == 0) { // exact match
    boostAtt.setBoost(1.0F);
  } else {
    final int codePointCount = UnicodeUtil.codePointCount(term);
    int minTermLength = Math.min(codePointCount, termLength);

    float similarity = 1.0f - (float) ed / (float) minTermLength;
    boostAtt.setBoost(similarity);
  }
    
  final float bottom = maxBoostAtt.getMaxNonCompetitiveBoost();
  final BytesRef bottomTerm = maxBoostAtt.getCompetitiveTerm();
  if (bottom != this.bottom || bottomTerm != this.bottomTerm) {
    this.bottom = bottom;
    this.bottomTerm = bottomTerm;
    // clone the term before potentially doing something with it
    // this is a rare but wonderful occurrence anyway

    // We must delay bottomChanged until the next next() call otherwise we mess up docFreq(), etc., for the current term:
    queuedBottom = BytesRef.deepCopyOf(term);
  }
  
  return term;
}