org.apache.lucene.util.StringHelper Java Examples

The following examples show how to use org.apache.lucene.util.StringHelper. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestSegmentInfos.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testVersionsOneSegment() throws IOException {
  BaseDirectoryWrapper dir = newDirectory();
  dir.setCheckIndexOnClose(false);
  byte id[] = StringHelper.randomId();
  Codec codec = Codec.getDefault();

  SegmentInfos sis = new SegmentInfos(Version.LATEST.major);
  SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_9_0_0, Version.LUCENE_9_0_0, "_0", 1, false, Codec.getDefault(),
                                     Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap(), null);
  info.setFiles(Collections.<String>emptySet());
  codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
  SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1, StringHelper.randomId());

  sis.add(commitInfo);
  sis.commit(dir);
  sis = SegmentInfos.readLatestCommit(dir);
  assertEquals(Version.LUCENE_9_0_0, sis.getMinSegmentLuceneVersion());
  assertEquals(Version.LATEST, sis.getCommitLuceneVersion());
  dir.close();
}
 
Example #2
Source File: CodecUtil.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** Retrieves the full index header from the provided {@link IndexInput}.
 *  This throws {@link CorruptIndexException} if this file does
 * not appear to be an index file. */
public static byte[] readIndexHeader(IndexInput in) throws IOException {
  in.seek(0);
  final int actualHeader = in.readInt();
  if (actualHeader != CODEC_MAGIC) {
    throw new CorruptIndexException("codec header mismatch: actual header=" + actualHeader + " vs expected header=" + CODEC_MAGIC, in);
  }
  String codec = in.readString();
  in.readInt();
  in.seek(in.getFilePointer() + StringHelper.ID_LENGTH);
  int suffixLength = in.readByte() & 0xFF;
  byte[] bytes = new byte[headerLength(codec) + StringHelper.ID_LENGTH + 1 + suffixLength];
  in.seek(0);
  in.readBytes(bytes, 0, bytes.length);
  return bytes;
}
 
Example #3
Source File: SegmentCommitInfo.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Sole constructor.
 * @param info
 *          {@link SegmentInfo} that we wrap
 * @param delCount
 *          number of deleted documents in this segment
 * @param delGen
 *          deletion generation number (used to name deletion files)
 * @param fieldInfosGen
 *          FieldInfos generation number (used to name field-infos files)
 * @param docValuesGen
 *          DocValues generation number (used to name doc-values updates files)
 * @param id Id that uniquely identifies this segment commit. This id must be 16 bytes long. See {@link StringHelper#randomId()}
 */
public SegmentCommitInfo(SegmentInfo info, int delCount, int softDelCount, long delGen, long fieldInfosGen, long docValuesGen, byte[] id) {
  this.info = info;
  this.delCount = delCount;
  this.softDelCount = softDelCount;
  this.delGen = delGen;
  this.nextWriteDelGen = delGen == -1 ? 1 : delGen + 1;
  this.fieldInfosGen = fieldInfosGen;
  this.nextWriteFieldInfosGen = fieldInfosGen == -1 ? 1 : fieldInfosGen + 1;
  this.docValuesGen = docValuesGen;
  this.nextWriteDocValuesGen = docValuesGen == -1 ? 1 : docValuesGen + 1;
  this.id = id;
  if (id != null && id.length != StringHelper.ID_LENGTH) {
    throw new IllegalArgumentException("invalid id: " + Arrays.toString(id));
  }
}
 
Example #4
Source File: SegmentCommitInfo.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** Returns a description of this segment. */
public String toString(int pendingDelCount) {
  String s = info.toString(delCount + pendingDelCount);
  if (delGen != -1) {
    s += ":delGen=" + delGen;
  }
  if (fieldInfosGen != -1) {
    s += ":fieldInfosGen=" + fieldInfosGen;
  }
  if (docValuesGen != -1) {
    s += ":dvGen=" + docValuesGen;
  }
  if (softDelCount > 0) {
    s += " :softDel=" + softDelCount;
  }
  if (this.id != null) {
    s += " :id=" + StringHelper.idToString(id);
  }

  return s;
}
 
Example #5
Source File: BaseSegmentInfoFormatTestCase.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** Test versions */
public void testVersions() throws Exception {
  Codec codec = getCodec();
  for (Version v : getVersions()) {
    for (Version minV : new Version[] { v, null}) {
      Directory dir = newDirectory();
      byte id[] = StringHelper.randomId();
      SegmentInfo info = new SegmentInfo(dir, v, minV, "_123", 1, false, codec, 
                                         Collections.<String,String>emptyMap(), id, Collections.emptyMap(), null);
      info.setFiles(Collections.<String>emptySet());
      codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
      SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
      assertEquals(info2.getVersion(), v);
      if (supportsMinVersion()) {
        assertEquals(info2.getMinVersion(), minV);
      } else {
        assertEquals(info2.getMinVersion(), null);
      }
      dir.close();
    }
  }
}
 
Example #6
Source File: BaseSegmentInfoFormatTestCase.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** Test attributes map */
public void testAttributes() throws Exception {
  Directory dir = newDirectory();
  Codec codec = getCodec();
  byte id[] = StringHelper.randomId();
  Map<String,String> attributes = new HashMap<>();
  attributes.put("key1", "value1");
  attributes.put("key2", "value2");
  SegmentInfo info = new SegmentInfo(dir, getVersions()[0], getVersions()[0], "_123", 1, false, codec, 
                                     Collections.emptyMap(), id, attributes, null);
  info.setFiles(Collections.<String>emptySet());
  codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
  SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
  assertEquals(attributes, info2.getAttributes());
  
  // attributes map should be immutable
  expectThrows(UnsupportedOperationException.class, () -> {
    info2.getAttributes().put("bogus", "bogus");
  });

  dir.close();
}
 
Example #7
Source File: BaseSegmentInfoFormatTestCase.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** Test diagnostics map */
public void testDiagnostics() throws Exception {
  Directory dir = newDirectory();
  Codec codec = getCodec();
  byte id[] = StringHelper.randomId();
  Map<String,String> diagnostics = new HashMap<>();
  diagnostics.put("key1", "value1");
  diagnostics.put("key2", "value2");
  SegmentInfo info = new SegmentInfo(dir, getVersions()[0], getVersions()[0], "_123", 1, false, codec, 
                                     diagnostics, id, Collections.emptyMap(), null);
  info.setFiles(Collections.<String>emptySet());
  codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
  SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
  assertEquals(diagnostics, info2.getDiagnostics());

  // diagnostics map should be immutable
  expectThrows(UnsupportedOperationException.class, () -> {
    info2.getDiagnostics().put("bogus", "bogus");
  });

  dir.close();
}
 
Example #8
Source File: BaseSegmentInfoFormatTestCase.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** Tests SI writer adds itself to files... */
public void testAddsSelfToFiles() throws Exception {
  Directory dir = newDirectory();
  Codec codec = getCodec();
  byte id[] = StringHelper.randomId();
  SegmentInfo info = new SegmentInfo(dir, getVersions()[0], getVersions()[0], "_123", 1, false, codec, 
                                     Collections.emptyMap(), id, Collections.emptyMap(), null);
  Set<String> originalFiles = Collections.singleton("_123.a");
  info.setFiles(originalFiles);
  codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
  
  Set<String> modifiedFiles = info.files();
  assertTrue(modifiedFiles.containsAll(originalFiles));
  assertTrue("did you forget to add yourself to files()", modifiedFiles.size() > originalFiles.size());
  
  SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
  assertEquals(info.files(), info2.files());

  // files set should be immutable
  expectThrows(UnsupportedOperationException.class, () -> {
    info2.files().add("bogus");
  });

  dir.close();
}
 
Example #9
Source File: TestCodecUtil.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testWriteVeryLongSuffix() throws Exception {
  StringBuilder justLongEnough = new StringBuilder();
  for (int i = 0; i < 255; i++) {
    justLongEnough.append('a');
  }
  ByteBuffersDataOutput out = new ByteBuffersDataOutput();
  IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp");
  byte[] id = StringHelper.randomId();
  CodecUtil.writeIndexHeader(output, "foobar", 5, id, justLongEnough.toString());
  output.close();
  
  IndexInput input = new ByteBuffersIndexInput(out.toDataInput(), "temp");
  CodecUtil.checkIndexHeader(input, "foobar", 5, 5, id, justLongEnough.toString());
  assertEquals(input.getFilePointer(), input.length());
  assertEquals(input.getFilePointer(), CodecUtil.indexHeaderLength("foobar", justLongEnough.toString()));
  input.close();
}
 
Example #10
Source File: SimpleTextFieldsReader.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private TreeMap<String,Long> readFields(IndexInput in) throws IOException {
  ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
  BytesRefBuilder scratch = new BytesRefBuilder();
  TreeMap<String,Long> fields = new TreeMap<>();

  while (true) {
    SimpleTextUtil.readLine(input, scratch);
    if (scratch.get().equals(END)) {
      SimpleTextUtil.checkFooter(input);
      return fields;
    } else if (StringHelper.startsWith(scratch.get(), FIELD)) {
      String fieldName = new String(scratch.bytes(), FIELD.length, scratch.length() - FIELD.length, StandardCharsets.UTF_8);
      fields.put(fieldName, input.getFilePointer());
    }
  }
}
 
Example #11
Source File: TestPendingDeletes.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testIsFullyDeleted() throws IOException {
  Directory dir = new ByteBuffersDirectory();
  SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 3, false, Codec.getDefault(),
      Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
  SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1, StringHelper.randomId());
  FieldInfos fieldInfos = FieldInfos.EMPTY;
  si.getCodec().fieldInfosFormat().write(dir, si, "", fieldInfos, IOContext.DEFAULT);
  PendingDeletes deletes = newPendingDeletes(commitInfo);
  for (int i = 0; i < 3; i++) {
    assertTrue(deletes.delete(i));
    if (random().nextBoolean()) {
      assertTrue(deletes.writeLiveDocs(dir));
    }
    assertEquals(i == 2, deletes.isFullyDeleted(() -> null));
  }
}
 
Example #12
Source File: TestMergePolicy.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private static MergePolicy.MergeSpecification createRandomMergeSpecification(Directory dir, int numMerges) {
  MergePolicy.MergeSpecification ms = new MergePolicy.MergeSpecification();
    for (int ii = 0; ii < numMerges; ++ii) {
      final SegmentInfo si = new SegmentInfo(
          dir, // dir
          Version.LATEST, // version
          Version.LATEST, // min version
          TestUtil.randomSimpleString(random()), // name
          random().nextInt(1000), // maxDoc
          random().nextBoolean(), // isCompoundFile
          null, // codec
          Collections.emptyMap(), // diagnostics
          TestUtil.randomSimpleString(// id
              random(),
              StringHelper.ID_LENGTH,
              StringHelper.ID_LENGTH).getBytes(StandardCharsets.US_ASCII),
          Collections.emptyMap(), // attributes
          null /* indexSort */);
      final List<SegmentCommitInfo> segments = new LinkedList<SegmentCommitInfo>();
      segments.add(new SegmentCommitInfo(si, 0, 0, 0, 0, 0, StringHelper.randomId()));
      ms.add(new MergePolicy.OneMerge(segments));
    }
    return ms;
}
 
Example #13
Source File: TestSegmentInfos.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testIDChangesOnAdvance() throws IOException {
  try (BaseDirectoryWrapper dir = newDirectory()) {
    dir.setCheckIndexOnClose(false);
    byte id[] = StringHelper.randomId();
    SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_9_0_0, Version.LUCENE_9_0_0, "_0", 1, false, Codec.getDefault(),
        Collections.<String, String>emptyMap(), StringHelper.randomId(), Collections.<String, String>emptyMap(), null);
    SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1, id);
    assertEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId()));
    commitInfo.advanceDelGen();
    assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId()));

    id = commitInfo.getId();
    commitInfo.advanceDocValuesGen();
    assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId()));

    id = commitInfo.getId();
    commitInfo.advanceFieldInfosGen();
    assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId()));
    SegmentCommitInfo clone = commitInfo.clone();
    id = commitInfo.getId();
    assertEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId()));
    assertEquals(StringHelper.idToString(id), StringHelper.idToString(clone.getId()));

    commitInfo.advanceFieldInfosGen();
    assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId()));
    assertEquals("clone changed but shouldn't", StringHelper.idToString(id), StringHelper.idToString(clone.getId()));
  }
}
 
Example #14
Source File: TestSegmentInfos.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testVersionsTwoSegments() throws IOException {
  BaseDirectoryWrapper dir = newDirectory();
  dir.setCheckIndexOnClose(false);
  byte id[] = StringHelper.randomId();
  Codec codec = Codec.getDefault();

  SegmentInfos sis = new SegmentInfos(Version.LATEST.major);
  SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_9_0_0, Version.LUCENE_9_0_0, "_0", 1, false, Codec.getDefault(),
                                     Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap(), null);
  info.setFiles(Collections.<String>emptySet());
  codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
  SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1, StringHelper.randomId());
  sis.add(commitInfo);

  info = new SegmentInfo(dir, Version.LUCENE_9_0_0, Version.LUCENE_9_0_0, "_1", 1, false, Codec.getDefault(),
                         Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap(), null);
  info.setFiles(Collections.<String>emptySet());
  codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
  commitInfo = new SegmentCommitInfo(info, 0, 0,-1, -1, -1, StringHelper.randomId());
  sis.add(commitInfo);

  sis.commit(dir);
  byte[] commitInfoId0 = sis.info(0).getId();
  byte[] commitInfoId1 = sis.info(1).getId();
  sis = SegmentInfos.readLatestCommit(dir);
  assertEquals(Version.LUCENE_9_0_0, sis.getMinSegmentLuceneVersion());
  assertEquals(Version.LATEST, sis.getCommitLuceneVersion());
  assertEquals(StringHelper.idToString(commitInfoId0), StringHelper.idToString(sis.info(0).getId()));
  assertEquals(StringHelper.idToString(commitInfoId1), StringHelper.idToString(sis.info(1).getId()));
  dir.close();
}
 
Example #15
Source File: AutomatonTermsEnum.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Returns true if the term matches the automaton. Also stashes away the term
 * to assist with smart enumeration.
 */
@Override
protected AcceptStatus accept(final BytesRef term) {
  if (commonSuffixRef == null || StringHelper.endsWith(term, commonSuffixRef)) {
    if (runAutomaton.run(term.bytes, term.offset, term.length))
      return linear ? AcceptStatus.YES : AcceptStatus.YES_AND_SEEK;
    else
      return (linear && term.compareTo(linearUpperBound) < 0) ? 
          AcceptStatus.NO : AcceptStatus.NO_AND_SEEK;
  } else {
    return (linear && term.compareTo(linearUpperBound) < 0) ? 
        AcceptStatus.NO : AcceptStatus.NO_AND_SEEK;
  }
}
 
Example #16
Source File: TestCodecUtil.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testWriteNonAsciiSuffix() throws Exception {
  ByteBuffersDataOutput out = new ByteBuffersDataOutput();
  IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp");
  expectThrows(IllegalArgumentException.class, () -> {
    CodecUtil.writeIndexHeader(output, "foobar", 5, StringHelper.randomId(), "\u1234");
  });
}
 
Example #17
Source File: TestOneMergeWrappingMergePolicy.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private static MergePolicy.MergeSpecification createRandomMergeSpecification(Directory dir) {
  MergePolicy.MergeSpecification ms;
  if (0 < random().nextInt(10)) { // ~ 1 in 10 times return null
    ms = new MergePolicy.MergeSpecification();
    // append up to 10 (random non-sensical) one merge objects
    for (int ii = 0; ii < random().nextInt(10); ++ii) {
      final SegmentInfo si = new SegmentInfo(
          dir, // dir
          Version.LATEST, // version
          Version.LATEST, // min version
          TestUtil.randomSimpleString(random()), // name
          random().nextInt(), // maxDoc
          random().nextBoolean(), // isCompoundFile
          null, // codec
          Collections.emptyMap(), // diagnostics
          TestUtil.randomSimpleString(// id
              random(),
              StringHelper.ID_LENGTH,
              StringHelper.ID_LENGTH).getBytes(StandardCharsets.US_ASCII),
          Collections.emptyMap(), // attributes
          null /* indexSort */);
      final List<SegmentCommitInfo> segments = new LinkedList<SegmentCommitInfo>();
      segments.add(new SegmentCommitInfo(si, 0, 0, 0, 0, 0, StringHelper.randomId()));
      ms.add(new MergePolicy.OneMerge(segments));
    }
  }
  return null;
}
 
Example #18
Source File: TestCodecUtil.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testWriteTooLongSuffix() throws Exception {
  StringBuilder tooLong = new StringBuilder();
  for (int i = 0; i < 256; i++) {
    tooLong.append('a');
  }
  ByteBuffersDataOutput out = new ByteBuffersDataOutput();
  IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp");
  expectThrows(IllegalArgumentException.class, () -> {
    CodecUtil.writeIndexHeader(output, "foobar", 5, StringHelper.randomId(), tooLong.toString());
  });
}
 
Example #19
Source File: TestIndexWriterThreadsToSegments.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
byte[] readSegmentInfoID(Directory dir, String file) throws IOException {
  try (IndexInput in = dir.openInput(file, IOContext.DEFAULT)) {
    in.readInt(); // magic
    in.readString(); // codec name
    in.readInt(); // version
    byte id[] = new byte[StringHelper.ID_LENGTH];
    in.readBytes(id, 0, id.length);
    return id;
  }
}
 
Example #20
Source File: TestPendingDeletes.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testDeleteDoc() throws IOException {
  Directory dir = new ByteBuffersDirectory();
  SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 10, false, Codec.getDefault(),
      Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
  SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1, StringHelper.randomId());
  PendingDeletes deletes = newPendingDeletes(commitInfo);
  assertNull(deletes.getLiveDocs());
  int docToDelete = TestUtil.nextInt(random(), 0, 7);
  assertTrue(deletes.delete(docToDelete));
  assertNotNull(deletes.getLiveDocs());
  assertEquals(1, deletes.numPendingDeletes());

  Bits liveDocs = deletes.getLiveDocs();
  assertFalse(liveDocs.get(docToDelete));
  assertFalse(deletes.delete(docToDelete)); // delete again

  assertTrue(liveDocs.get(8));
  assertTrue(deletes.delete(8));
  assertTrue(liveDocs.get(8)); // we have a snapshot
  assertEquals(2, deletes.numPendingDeletes());

  assertTrue(liveDocs.get(9));
  assertTrue(deletes.delete(9));
  assertTrue(liveDocs.get(9));

  // now make sure new live docs see the deletions
  liveDocs = deletes.getLiveDocs();
  assertFalse(liveDocs.get(9));
  assertFalse(liveDocs.get(8));
  assertFalse(liveDocs.get(docToDelete));
  assertEquals(3, deletes.numPendingDeletes());
  dir.close();
}
 
Example #21
Source File: TestCodecUtil.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSegmentHeaderLength() throws Exception {
  ByteBuffersDataOutput out = new ByteBuffersDataOutput();
  IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp");
  CodecUtil.writeIndexHeader(output, "FooBar", 5, StringHelper.randomId(), "xyz");
  output.writeString("this is the data");
  output.close();
  
  IndexInput input = new ByteBuffersIndexInput(out.toDataInput(), "temp");
  input.seek(CodecUtil.indexHeaderLength("FooBar", "xyz"));
  assertEquals("this is the data", input.readString());
  input.close();
}
 
Example #22
Source File: TestDoc.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private SegmentCommitInfo merge(Directory dir, SegmentCommitInfo si1, SegmentCommitInfo si2, String merged, boolean useCompoundFile)
  throws Exception {
  IOContext context = newIOContext(random(), new IOContext(new MergeInfo(-1, -1, false, -1)));
  SegmentReader r1 = new SegmentReader(si1, Version.LATEST.major, context);
  SegmentReader r2 = new SegmentReader(si2, Version.LATEST.major, context);

  final Codec codec = Codec.getDefault();
  TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir);
  final SegmentInfo si = new SegmentInfo(si1.info.dir, Version.LATEST, null, merged, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);

  SegmentMerger merger = new SegmentMerger(Arrays.<CodecReader>asList(r1, r2),
                                           si, InfoStream.getDefault(), trackingDir,
                                           new FieldInfos.FieldNumbers(null), context);

  MergeState mergeState = merger.merge();
  r1.close();
  r2.close();;
  si.setFiles(new HashSet<>(trackingDir.getCreatedFiles()));
    
  if (useCompoundFile) {
    Collection<String> filesToDelete = si.files();
    codec.compoundFormat().write(dir, si, context);
    si.setUseCompoundFile(true);
    for(String name : filesToDelete) {
      si1.info.dir.deleteFile(name);
    }
  }

  return new SegmentCommitInfo(si, 0, 0, -1L, -1L, -1L, StringHelper.randomId());
}
 
Example #23
Source File: DocTermOrds.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private BytesRef setTerm() throws IOException {
  term = termsEnum.term();
  //System.out.println("  setTerm() term=" + term.utf8ToString() + " vs prefix=" + (prefix == null ? "null" : prefix.utf8ToString()));
  if (prefix != null && !StringHelper.startsWith(term, prefix)) {
    term = null;
  }
  return term;
}
 
Example #24
Source File: CodecUtil.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Expert: just reads and verifies the object ID of an index header */
public static byte[] checkIndexHeaderID(DataInput in, byte[] expectedID) throws IOException {
  byte id[] = new byte[StringHelper.ID_LENGTH];
  in.readBytes(id, 0, id.length);
  if (!Arrays.equals(id, expectedID)) {
    throw new CorruptIndexException("file mismatch, expected id=" + StringHelper.idToString(expectedID) 
                                                       + ", got=" + StringHelper.idToString(id), in);
  }
  return id;
}
 
Example #25
Source File: MultiPhrasePrefixQuery.java    From crate with Apache License 2.0 5 votes vote down vote up
private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException {
    // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms
    // instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually.
    List<LeafReaderContext> leaves = reader.leaves();
    for (LeafReaderContext leaf : leaves) {
        Terms _terms = leaf.reader().terms(field);
        if (_terms == null) {
            continue;
        }

        TermsEnum termsEnum = _terms.iterator();
        TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes());
        if (TermsEnum.SeekStatus.END == seekStatus) {
            continue;
        }

        for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) {
            if (!StringHelper.startsWith(term, prefix.bytes())) {
                break;
            }

            terms.add(new Term(field, BytesRef.deepCopyOf(term)));
            if (terms.size() >= maxExpansions) {
                return;
            }
        }
    }
}
 
Example #26
Source File: ModuloBucketBuilder.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
private static int hashCode(@Nullable Object value) {
    if (value == null) {
        return 0;
    }
    if (value instanceof BytesRef) {
        // since lucene 4.8
        // BytesRef.hashCode() uses a random seed across different jvm
        // which causes the hashCode / routing to be different on each node
        // this breaks the group by redistribution logic - need to use a fixed seed here
        // to be consistent.
        return StringHelper.murmurhash3_x86_32(((BytesRef) value), 1);
    }
    return value.hashCode();
}
 
Example #27
Source File: SrndPrefixQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  /* inspired by PrefixQuery.rewrite(): */
  Terms terms = MultiTerms.getTerms(reader, fieldName);
  if (terms != null) {
    TermsEnum termsEnum = terms.iterator();

    boolean skip = false;
    TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getPrefix()));
    if (status == TermsEnum.SeekStatus.FOUND) {
      mtv.visitMatchingTerm(getLucenePrefixTerm(fieldName));
    } else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
      if (StringHelper.startsWith(termsEnum.term(), prefixRef)) {
        mtv.visitMatchingTerm(new Term(fieldName, termsEnum.term().utf8ToString()));
      } else {
        skip = true;
      }
    } else {
      // EOF
      skip = true;
    }

    if (!skip) {
      while(true) {
        BytesRef text = termsEnum.next();
        if (text != null && StringHelper.startsWith(text, prefixRef)) {
          mtv.visitMatchingTerm(new Term(fieldName, text.utf8ToString()));
        } else {
          break;
        }
      }
    }
  }
}
 
Example #28
Source File: SrndTruncQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  int prefixLength = prefix.length();
  Terms terms = MultiTerms.getTerms(reader, fieldName);
  if (terms != null) {
    Matcher matcher = pattern.matcher("");
    try {
      TermsEnum termsEnum = terms.iterator();

      TermsEnum.SeekStatus status = termsEnum.seekCeil(prefixRef);
      BytesRef text;
      if (status == TermsEnum.SeekStatus.FOUND) {
        text = prefixRef;
      } else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
        text = termsEnum.term();
      } else {
        text = null;
      }

      while(text != null) {
        if (text != null && StringHelper.startsWith(text, prefixRef)) {
          String textString = text.utf8ToString();
          matcher.reset(textString.substring(prefixLength));
          if (matcher.matches()) {
            mtv.visitMatchingTerm(new Term(fieldName, textString));
          }
        } else {
          break;
        }
        text = termsEnum.next();
      }
    } finally {
      matcher.reset();
    }
  }
}
 
Example #29
Source File: SimpleTextUtil.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public static void checkFooter(ChecksumIndexInput input) throws IOException {
  BytesRefBuilder scratch = new BytesRefBuilder();
  String expectedChecksum = String.format(Locale.ROOT, "%020d", input.getChecksum());
  readLine(input, scratch);
  if (StringHelper.startsWith(scratch.get(), CHECKSUM) == false) {
    throw new CorruptIndexException("SimpleText failure: expected checksum line but got " + scratch.get().utf8ToString(), input);
  }
  String actualChecksum = new BytesRef(scratch.bytes(), CHECKSUM.length, scratch.length() - CHECKSUM.length).utf8ToString();
  if (!expectedChecksum.equals(actualChecksum)) {
    throw new CorruptIndexException("SimpleText checksum failure: " + actualChecksum + " != " + expectedChecksum, input);
  }
  if (input.length() != input.getFilePointer()) {
    throw new CorruptIndexException("Unexpected stuff at the end of file, please be careful with your text editor!", input);
  }
}
 
Example #30
Source File: MultiPhrasePrefixQuery.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException {
    // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms
    // instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually.
    List<LeafReaderContext> leaves = reader.leaves();
    for (LeafReaderContext leaf : leaves) {
        Terms _terms = leaf.reader().terms(field);
        if (_terms == null) {
            continue;
        }

        TermsEnum termsEnum = _terms.iterator();
        TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes());
        if (TermsEnum.SeekStatus.END == seekStatus) {
            continue;
        }

        for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) {
            if (!StringHelper.startsWith(term, prefix.bytes())) {
                break;
            }

            terms.add(new Term(field, BytesRef.deepCopyOf(term)));
            if (terms.size() >= maxExpansions) {
                return;
            }
        }
    }
}