Java Code Examples for org.apache.lucene.util.StringHelper

The following examples show how to use org.apache.lucene.util.StringHelper. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: lucene-solr   Source File: SimpleTextFieldsReader.java    License: Apache License 2.0 6 votes vote down vote up
private TreeMap<String,Long> readFields(IndexInput in) throws IOException {
  ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
  BytesRefBuilder scratch = new BytesRefBuilder();
  TreeMap<String,Long> fields = new TreeMap<>();

  while (true) {
    SimpleTextUtil.readLine(input, scratch);
    if (scratch.get().equals(END)) {
      SimpleTextUtil.checkFooter(input);
      return fields;
    } else if (StringHelper.startsWith(scratch.get(), FIELD)) {
      String fieldName = new String(scratch.bytes(), FIELD.length, scratch.length() - FIELD.length, StandardCharsets.UTF_8);
      fields.put(fieldName, input.getFilePointer());
    }
  }
}
 
Example 2
Source Project: lucene-solr   Source File: BaseSegmentInfoFormatTestCase.java    License: Apache License 2.0 6 votes vote down vote up
/** Tests SI writer adds itself to files... */
public void testAddsSelfToFiles() throws Exception {
  Directory dir = newDirectory();
  Codec codec = getCodec();
  byte id[] = StringHelper.randomId();
  SegmentInfo info = new SegmentInfo(dir, getVersions()[0], getVersions()[0], "_123", 1, false, codec, 
                                     Collections.emptyMap(), id, Collections.emptyMap(), null);
  Set<String> originalFiles = Collections.singleton("_123.a");
  info.setFiles(originalFiles);
  codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
  
  Set<String> modifiedFiles = info.files();
  assertTrue(modifiedFiles.containsAll(originalFiles));
  assertTrue("did you forget to add yourself to files()", modifiedFiles.size() > originalFiles.size());
  
  SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
  assertEquals(info.files(), info2.files());

  // files set should be immutable
  expectThrows(UnsupportedOperationException.class, () -> {
    info2.files().add("bogus");
  });

  dir.close();
}
 
Example 3
Source Project: lucene-solr   Source File: BaseSegmentInfoFormatTestCase.java    License: Apache License 2.0 6 votes vote down vote up
/** Test diagnostics map */
public void testDiagnostics() throws Exception {
  Directory dir = newDirectory();
  Codec codec = getCodec();
  byte id[] = StringHelper.randomId();
  Map<String,String> diagnostics = new HashMap<>();
  diagnostics.put("key1", "value1");
  diagnostics.put("key2", "value2");
  SegmentInfo info = new SegmentInfo(dir, getVersions()[0], getVersions()[0], "_123", 1, false, codec, 
                                     diagnostics, id, Collections.emptyMap(), null);
  info.setFiles(Collections.<String>emptySet());
  codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
  SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
  assertEquals(diagnostics, info2.getDiagnostics());

  // diagnostics map should be immutable
  expectThrows(UnsupportedOperationException.class, () -> {
    info2.getDiagnostics().put("bogus", "bogus");
  });

  dir.close();
}
 
Example 4
Source Project: lucene-solr   Source File: BaseSegmentInfoFormatTestCase.java    License: Apache License 2.0 6 votes vote down vote up
/** Test attributes map */
public void testAttributes() throws Exception {
  Directory dir = newDirectory();
  Codec codec = getCodec();
  byte id[] = StringHelper.randomId();
  Map<String,String> attributes = new HashMap<>();
  attributes.put("key1", "value1");
  attributes.put("key2", "value2");
  SegmentInfo info = new SegmentInfo(dir, getVersions()[0], getVersions()[0], "_123", 1, false, codec, 
                                     Collections.emptyMap(), id, attributes, null);
  info.setFiles(Collections.<String>emptySet());
  codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
  SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
  assertEquals(attributes, info2.getAttributes());
  
  // attributes map should be immutable
  expectThrows(UnsupportedOperationException.class, () -> {
    info2.getAttributes().put("bogus", "bogus");
  });

  dir.close();
}
 
Example 5
Source Project: lucene-solr   Source File: BaseSegmentInfoFormatTestCase.java    License: Apache License 2.0 6 votes vote down vote up
/** Test versions */
public void testVersions() throws Exception {
  Codec codec = getCodec();
  for (Version v : getVersions()) {
    for (Version minV : new Version[] { v, null}) {
      Directory dir = newDirectory();
      byte id[] = StringHelper.randomId();
      SegmentInfo info = new SegmentInfo(dir, v, minV, "_123", 1, false, codec, 
                                         Collections.<String,String>emptyMap(), id, Collections.emptyMap(), null);
      info.setFiles(Collections.<String>emptySet());
      codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
      SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
      assertEquals(info2.getVersion(), v);
      if (supportsMinVersion()) {
        assertEquals(info2.getMinVersion(), minV);
      } else {
        assertEquals(info2.getMinVersion(), null);
      }
      dir.close();
    }
  }
}
 
Example 6
Source Project: lucene-solr   Source File: CodecUtil.java    License: Apache License 2.0 6 votes vote down vote up
/** Retrieves the full index header from the provided {@link IndexInput}.
 *  This throws {@link CorruptIndexException} if this file does
 * not appear to be an index file. */
public static byte[] readIndexHeader(IndexInput in) throws IOException {
  in.seek(0);
  final int actualHeader = in.readInt();
  if (actualHeader != CODEC_MAGIC) {
    throw new CorruptIndexException("codec header mismatch: actual header=" + actualHeader + " vs expected header=" + CODEC_MAGIC, in);
  }
  String codec = in.readString();
  in.readInt();
  in.seek(in.getFilePointer() + StringHelper.ID_LENGTH);
  int suffixLength = in.readByte() & 0xFF;
  byte[] bytes = new byte[headerLength(codec) + StringHelper.ID_LENGTH + 1 + suffixLength];
  in.seek(0);
  in.readBytes(bytes, 0, bytes.length);
  return bytes;
}
 
Example 7
Source Project: lucene-solr   Source File: SegmentCommitInfo.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Sole constructor.
 * @param info
 *          {@link SegmentInfo} that we wrap
 * @param delCount
 *          number of deleted documents in this segment
 * @param delGen
 *          deletion generation number (used to name deletion files)
 * @param fieldInfosGen
 *          FieldInfos generation number (used to name field-infos files)
 * @param docValuesGen
 *          DocValues generation number (used to name doc-values updates files)
 * @param id Id that uniquely identifies this segment commit. This id must be 16 bytes long. See {@link StringHelper#randomId()}
 */
public SegmentCommitInfo(SegmentInfo info, int delCount, int softDelCount, long delGen, long fieldInfosGen, long docValuesGen, byte[] id) {
  this.info = info;
  this.delCount = delCount;
  this.softDelCount = softDelCount;
  this.delGen = delGen;
  this.nextWriteDelGen = delGen == -1 ? 1 : delGen + 1;
  this.fieldInfosGen = fieldInfosGen;
  this.nextWriteFieldInfosGen = fieldInfosGen == -1 ? 1 : fieldInfosGen + 1;
  this.docValuesGen = docValuesGen;
  this.nextWriteDocValuesGen = docValuesGen == -1 ? 1 : docValuesGen + 1;
  this.id = id;
  if (id != null && id.length != StringHelper.ID_LENGTH) {
    throw new IllegalArgumentException("invalid id: " + Arrays.toString(id));
  }
}
 
Example 8
Source Project: lucene-solr   Source File: SegmentCommitInfo.java    License: Apache License 2.0 6 votes vote down vote up
/** Returns a description of this segment. */
public String toString(int pendingDelCount) {
  String s = info.toString(delCount + pendingDelCount);
  if (delGen != -1) {
    s += ":delGen=" + delGen;
  }
  if (fieldInfosGen != -1) {
    s += ":fieldInfosGen=" + fieldInfosGen;
  }
  if (docValuesGen != -1) {
    s += ":dvGen=" + docValuesGen;
  }
  if (softDelCount > 0) {
    s += " :softDel=" + softDelCount;
  }
  if (this.id != null) {
    s += " :id=" + StringHelper.idToString(id);
  }

  return s;
}
 
Example 9
Source Project: lucene-solr   Source File: TestCodecUtil.java    License: Apache License 2.0 6 votes vote down vote up
public void testWriteVeryLongSuffix() throws Exception {
  StringBuilder justLongEnough = new StringBuilder();
  for (int i = 0; i < 255; i++) {
    justLongEnough.append('a');
  }
  ByteBuffersDataOutput out = new ByteBuffersDataOutput();
  IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp");
  byte[] id = StringHelper.randomId();
  CodecUtil.writeIndexHeader(output, "foobar", 5, id, justLongEnough.toString());
  output.close();
  
  IndexInput input = new ByteBuffersIndexInput(out.toDataInput(), "temp");
  CodecUtil.checkIndexHeader(input, "foobar", 5, 5, id, justLongEnough.toString());
  assertEquals(input.getFilePointer(), input.length());
  assertEquals(input.getFilePointer(), CodecUtil.indexHeaderLength("foobar", justLongEnough.toString()));
  input.close();
}
 
Example 10
Source Project: lucene-solr   Source File: TestSegmentInfos.java    License: Apache License 2.0 6 votes vote down vote up
public void testVersionsOneSegment() throws IOException {
  BaseDirectoryWrapper dir = newDirectory();
  dir.setCheckIndexOnClose(false);
  byte id[] = StringHelper.randomId();
  Codec codec = Codec.getDefault();

  SegmentInfos sis = new SegmentInfos(Version.LATEST.major);
  SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_9_0_0, Version.LUCENE_9_0_0, "_0", 1, false, Codec.getDefault(),
                                     Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap(), null);
  info.setFiles(Collections.<String>emptySet());
  codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
  SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1, StringHelper.randomId());

  sis.add(commitInfo);
  sis.commit(dir);
  sis = SegmentInfos.readLatestCommit(dir);
  assertEquals(Version.LUCENE_9_0_0, sis.getMinSegmentLuceneVersion());
  assertEquals(Version.LATEST, sis.getCommitLuceneVersion());
  dir.close();
}
 
Example 11
Source Project: lucene-solr   Source File: TestPendingDeletes.java    License: Apache License 2.0 6 votes vote down vote up
public void testIsFullyDeleted() throws IOException {
  Directory dir = new ByteBuffersDirectory();
  SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 3, false, Codec.getDefault(),
      Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
  SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1, StringHelper.randomId());
  FieldInfos fieldInfos = FieldInfos.EMPTY;
  si.getCodec().fieldInfosFormat().write(dir, si, "", fieldInfos, IOContext.DEFAULT);
  PendingDeletes deletes = newPendingDeletes(commitInfo);
  for (int i = 0; i < 3; i++) {
    assertTrue(deletes.delete(i));
    if (random().nextBoolean()) {
      assertTrue(deletes.writeLiveDocs(dir));
    }
    assertEquals(i == 2, deletes.isFullyDeleted(() -> null));
  }
}
 
Example 12
Source Project: lucene-solr   Source File: TestMergePolicy.java    License: Apache License 2.0 6 votes vote down vote up
private static MergePolicy.MergeSpecification createRandomMergeSpecification(Directory dir, int numMerges) {
  MergePolicy.MergeSpecification ms = new MergePolicy.MergeSpecification();
    for (int ii = 0; ii < numMerges; ++ii) {
      final SegmentInfo si = new SegmentInfo(
          dir, // dir
          Version.LATEST, // version
          Version.LATEST, // min version
          TestUtil.randomSimpleString(random()), // name
          random().nextInt(1000), // maxDoc
          random().nextBoolean(), // isCompoundFile
          null, // codec
          Collections.emptyMap(), // diagnostics
          TestUtil.randomSimpleString(// id
              random(),
              StringHelper.ID_LENGTH,
              StringHelper.ID_LENGTH).getBytes(StandardCharsets.US_ASCII),
          Collections.emptyMap(), // attributes
          null /* indexSort */);
      final List<SegmentCommitInfo> segments = new LinkedList<SegmentCommitInfo>();
      segments.add(new SegmentCommitInfo(si, 0, 0, 0, 0, 0, StringHelper.randomId()));
      ms.add(new MergePolicy.OneMerge(segments));
    }
    return ms;
}
 
Example 13
Source Project: Elasticsearch   Source File: ModuloBucketBuilder.java    License: Apache License 2.0 5 votes vote down vote up
private static int hashCode(@Nullable Object value) {
    if (value == null) {
        return 0;
    }
    if (value instanceof BytesRef) {
        // since lucene 4.8
        // BytesRef.hashCode() uses a random seed across different jvm
        // which causes the hashCode / routing to be different on each node
        // this breaks the group by redistribution logic - need to use a fixed seed here
        // to be consistent.
        return StringHelper.murmurhash3_x86_32(((BytesRef) value), 1);
    }
    return value.hashCode();
}
 
Example 14
Source Project: Elasticsearch   Source File: MultiPhrasePrefixQuery.java    License: Apache License 2.0 5 votes vote down vote up
private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException {
    // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms
    // instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually.
    List<LeafReaderContext> leaves = reader.leaves();
    for (LeafReaderContext leaf : leaves) {
        Terms _terms = leaf.reader().terms(field);
        if (_terms == null) {
            continue;
        }

        TermsEnum termsEnum = _terms.iterator();
        TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes());
        if (TermsEnum.SeekStatus.END == seekStatus) {
            continue;
        }

        for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) {
            if (!StringHelper.startsWith(term, prefix.bytes())) {
                break;
            }

            terms.add(new Term(field, BytesRef.deepCopyOf(term)));
            if (terms.size() >= maxExpansions) {
                return;
            }
        }
    }
}
 
Example 15
Source Project: lucene-solr   Source File: FixedGapTermsIndexWriter.java    License: Apache License 2.0 5 votes vote down vote up
/** NOTE: if your codec does not sort in unicode code
 *  point order, you must override this method, to simply
 *  return indexedTerm.length. */
protected int indexedTermPrefixLength(final BytesRef priorTerm, final BytesRef indexedTerm) {
  // As long as codec sorts terms in unicode codepoint
  // order, we can safely strip off the non-distinguishing
  // suffix to save RAM in the loaded terms index.
  return StringHelper.sortKeyLength(priorTerm, indexedTerm);
}
 
Example 16
Source Project: lucene-solr   Source File: SimpleTextUtil.java    License: Apache License 2.0 5 votes vote down vote up
public static void checkFooter(ChecksumIndexInput input) throws IOException {
  BytesRefBuilder scratch = new BytesRefBuilder();
  String expectedChecksum = String.format(Locale.ROOT, "%020d", input.getChecksum());
  readLine(input, scratch);
  if (StringHelper.startsWith(scratch.get(), CHECKSUM) == false) {
    throw new CorruptIndexException("SimpleText failure: expected checksum line but got " + scratch.get().utf8ToString(), input);
  }
  String actualChecksum = new BytesRef(scratch.bytes(), CHECKSUM.length, scratch.length() - CHECKSUM.length).utf8ToString();
  if (!expectedChecksum.equals(actualChecksum)) {
    throw new CorruptIndexException("SimpleText checksum failure: " + actualChecksum + " != " + expectedChecksum, input);
  }
  if (input.length() != input.getFilePointer()) {
    throw new CorruptIndexException("Unexpected stuff at the end of file, please be careful with your text editor!", input);
  }
}
 
Example 17
Source Project: lucene-solr   Source File: BaseMergePolicyTestCase.java    License: Apache License 2.0 5 votes vote down vote up
public void testFindForcedDeletesMerges() throws IOException {
  MergePolicy mp = mergePolicy();
  if (mp instanceof FilterMergePolicy) {
    assumeFalse("test doesn't work with MockRandomMP",
        ((FilterMergePolicy) mp).in instanceof MockRandomMergePolicy);
  }
  SegmentInfos infos = new SegmentInfos(Version.LATEST.major);
  try (Directory directory = newDirectory()) {
    MergePolicy.MergeContext context = new MockMergeContext(s -> 0);
    int numSegs = random().nextInt(10);
    for (int i = 0; i < numSegs; i++) {
      SegmentInfo info = new SegmentInfo(
          directory, // dir
          Version.LATEST, // version
          Version.LATEST, // min version
          TestUtil.randomSimpleString(random()), // name
          random().nextInt(Integer.MAX_VALUE), // maxDoc
          random().nextBoolean(), // isCompoundFile
          null, // codec
          Collections.emptyMap(), // diagnostics
          TestUtil.randomSimpleString(// id
              random(),
              StringHelper.ID_LENGTH,
              StringHelper.ID_LENGTH).getBytes(StandardCharsets.US_ASCII),
          Collections.emptyMap(), // attributes
          null /* indexSort */);
      info.setFiles(Collections.emptyList());
      infos.add(new SegmentCommitInfo(info, random().nextInt(1), 0, -1, -1, -1, StringHelper.randomId()));
    }
    MergePolicy.MergeSpecification forcedDeletesMerges = mp.findForcedDeletesMerges(infos, context);
    if (forcedDeletesMerges != null) {
      assertEquals(0, forcedDeletesMerges.merges.size());
    }
  }
}
 
Example 18
Source Project: lucene-solr   Source File: BaseMergePolicyTestCase.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Make a new {@link SegmentCommitInfo} with the given {@code maxDoc},
 * {@code numDeletedDocs} and {@code sizeInBytes}, which are usually the
 * numbers that merge policies care about.
 */
protected static SegmentCommitInfo makeSegmentCommitInfo(String name, int maxDoc, int numDeletedDocs, double sizeMB, String source) {
  if (name.startsWith("_") == false) {
    throw new IllegalArgumentException("name must start with an _, got " + name);
  }
  byte[] id = new byte[StringHelper.ID_LENGTH];
  random().nextBytes(id);
  SegmentInfo info = new SegmentInfo(FAKE_DIRECTORY, Version.LATEST, Version.LATEST,
      name, maxDoc, false, TestUtil.getDefaultCodec(), Collections.emptyMap(), id,
      Collections.singletonMap(IndexWriter.SOURCE, source), null);
  info.setFiles(Collections.singleton(name + "_size=" + Long.toString((long) (sizeMB * 1024 * 1024)) + ".fake"));
  return new SegmentCommitInfo(info, numDeletedDocs, 0, 0, 0, 0, StringHelper.randomId());
}
 
Example 19
Source Project: lucene-solr   Source File: BaseSegmentInfoFormatTestCase.java    License: Apache License 2.0 5 votes vote down vote up
/** Test files map */
public void testFiles() throws Exception {
  Directory dir = newDirectory();
  Codec codec = getCodec();
  byte id[] = StringHelper.randomId();
  SegmentInfo info = new SegmentInfo(dir, getVersions()[0], getVersions()[0], "_123", 1, false, codec, 
                                     Collections.emptyMap(), id, Collections.emptyMap(), null);
  info.setFiles(Collections.<String>emptySet());
  codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
  SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
  assertEquals(info.files(), info2.files());
  dir.close();
}
 
Example 20
Source Project: lucene-solr   Source File: BaseSegmentInfoFormatTestCase.java    License: Apache License 2.0 5 votes vote down vote up
/** Test unique ID */
public void testUniqueID() throws Exception {
  Codec codec = getCodec();
  Directory dir = newDirectory();
  byte id[] = StringHelper.randomId();
  SegmentInfo info = new SegmentInfo(dir, getVersions()[0], getVersions()[0], "_123", 1, false, codec, 
                                     Collections.<String,String>emptyMap(), id, Collections.emptyMap(), null);
  info.setFiles(Collections.<String>emptySet());
  codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
  SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
  assertIDEquals(id, info2.getId());
  dir.close();
}
 
Example 21
Source Project: lucene-solr   Source File: SrndTruncQuery.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  int prefixLength = prefix.length();
  Terms terms = MultiTerms.getTerms(reader, fieldName);
  if (terms != null) {
    Matcher matcher = pattern.matcher("");
    try {
      TermsEnum termsEnum = terms.iterator();

      TermsEnum.SeekStatus status = termsEnum.seekCeil(prefixRef);
      BytesRef text;
      if (status == TermsEnum.SeekStatus.FOUND) {
        text = prefixRef;
      } else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
        text = termsEnum.term();
      } else {
        text = null;
      }

      while(text != null) {
        if (text != null && StringHelper.startsWith(text, prefixRef)) {
          String textString = text.utf8ToString();
          matcher.reset(textString.substring(prefixLength));
          if (matcher.matches()) {
            mtv.visitMatchingTerm(new Term(fieldName, textString));
          }
        } else {
          break;
        }
        text = termsEnum.next();
      }
    } finally {
      matcher.reset();
    }
  }
}
 
Example 22
Source Project: lucene-solr   Source File: SrndPrefixQuery.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  /* inspired by PrefixQuery.rewrite(): */
  Terms terms = MultiTerms.getTerms(reader, fieldName);
  if (terms != null) {
    TermsEnum termsEnum = terms.iterator();

    boolean skip = false;
    TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getPrefix()));
    if (status == TermsEnum.SeekStatus.FOUND) {
      mtv.visitMatchingTerm(getLucenePrefixTerm(fieldName));
    } else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
      if (StringHelper.startsWith(termsEnum.term(), prefixRef)) {
        mtv.visitMatchingTerm(new Term(fieldName, termsEnum.term().utf8ToString()));
      } else {
        skip = true;
      }
    } else {
      // EOF
      skip = true;
    }

    if (!skip) {
      while(true) {
        BytesRef text = termsEnum.next();
        if (text != null && StringHelper.startsWith(text, prefixRef)) {
          mtv.visitMatchingTerm(new Term(fieldName, text.utf8ToString()));
        } else {
          break;
        }
      }
    }
  }
}
 
Example 23
Source Project: lucene-solr   Source File: CodecUtil.java    License: Apache License 2.0 5 votes vote down vote up
/** Expert: just reads and verifies the object ID of an index header */
public static byte[] checkIndexHeaderID(DataInput in, byte[] expectedID) throws IOException {
  byte id[] = new byte[StringHelper.ID_LENGTH];
  in.readBytes(id, 0, id.length);
  if (!Arrays.equals(id, expectedID)) {
    throw new CorruptIndexException("file mismatch, expected id=" + StringHelper.idToString(expectedID) 
                                                       + ", got=" + StringHelper.idToString(id), in);
  }
  return id;
}
 
Example 24
Source Project: lucene-solr   Source File: AutomatonTermsEnum.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Returns true if the term matches the automaton. Also stashes away the term
 * to assist with smart enumeration.
 */
@Override
protected AcceptStatus accept(final BytesRef term) {
  if (commonSuffixRef == null || StringHelper.endsWith(term, commonSuffixRef)) {
    if (runAutomaton.run(term.bytes, term.offset, term.length))
      return linear ? AcceptStatus.YES : AcceptStatus.YES_AND_SEEK;
    else
      return (linear && term.compareTo(linearUpperBound) < 0) ? 
          AcceptStatus.NO : AcceptStatus.NO_AND_SEEK;
  } else {
    return (linear && term.compareTo(linearUpperBound) < 0) ? 
        AcceptStatus.NO : AcceptStatus.NO_AND_SEEK;
  }
}
 
Example 25
Source Project: lucene-solr   Source File: TestCodecUtil.java    License: Apache License 2.0 5 votes vote down vote up
public void testSegmentHeaderLength() throws Exception {
  ByteBuffersDataOutput out = new ByteBuffersDataOutput();
  IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp");
  CodecUtil.writeIndexHeader(output, "FooBar", 5, StringHelper.randomId(), "xyz");
  output.writeString("this is the data");
  output.close();
  
  IndexInput input = new ByteBuffersIndexInput(out.toDataInput(), "temp");
  input.seek(CodecUtil.indexHeaderLength("FooBar", "xyz"));
  assertEquals("this is the data", input.readString());
  input.close();
}
 
Example 26
Source Project: lucene-solr   Source File: TestCodecUtil.java    License: Apache License 2.0 5 votes vote down vote up
public void testWriteTooLongSuffix() throws Exception {
  StringBuilder tooLong = new StringBuilder();
  for (int i = 0; i < 256; i++) {
    tooLong.append('a');
  }
  ByteBuffersDataOutput out = new ByteBuffersDataOutput();
  IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp");
  expectThrows(IllegalArgumentException.class, () -> {
    CodecUtil.writeIndexHeader(output, "foobar", 5, StringHelper.randomId(), tooLong.toString());
  });
}
 
Example 27
Source Project: lucene-solr   Source File: TestCodecUtil.java    License: Apache License 2.0 5 votes vote down vote up
public void testWriteNonAsciiSuffix() throws Exception {
  ByteBuffersDataOutput out = new ByteBuffersDataOutput();
  IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp");
  expectThrows(IllegalArgumentException.class, () -> {
    CodecUtil.writeIndexHeader(output, "foobar", 5, StringHelper.randomId(), "\u1234");
  });
}
 
Example 28
Source Project: lucene-solr   Source File: TestSegmentInfos.java    License: Apache License 2.0 5 votes vote down vote up
public void testVersionsTwoSegments() throws IOException {
  BaseDirectoryWrapper dir = newDirectory();
  dir.setCheckIndexOnClose(false);
  byte id[] = StringHelper.randomId();
  Codec codec = Codec.getDefault();

  SegmentInfos sis = new SegmentInfos(Version.LATEST.major);
  SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_9_0_0, Version.LUCENE_9_0_0, "_0", 1, false, Codec.getDefault(),
                                     Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap(), null);
  info.setFiles(Collections.<String>emptySet());
  codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
  SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1, StringHelper.randomId());
  sis.add(commitInfo);

  info = new SegmentInfo(dir, Version.LUCENE_9_0_0, Version.LUCENE_9_0_0, "_1", 1, false, Codec.getDefault(),
                         Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap(), null);
  info.setFiles(Collections.<String>emptySet());
  codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
  commitInfo = new SegmentCommitInfo(info, 0, 0,-1, -1, -1, StringHelper.randomId());
  sis.add(commitInfo);

  sis.commit(dir);
  byte[] commitInfoId0 = sis.info(0).getId();
  byte[] commitInfoId1 = sis.info(1).getId();
  sis = SegmentInfos.readLatestCommit(dir);
  assertEquals(Version.LUCENE_9_0_0, sis.getMinSegmentLuceneVersion());
  assertEquals(Version.LATEST, sis.getCommitLuceneVersion());
  assertEquals(StringHelper.idToString(commitInfoId0), StringHelper.idToString(sis.info(0).getId()));
  assertEquals(StringHelper.idToString(commitInfoId1), StringHelper.idToString(sis.info(1).getId()));
  dir.close();
}
 
Example 29
Source Project: lucene-solr   Source File: TestSegmentInfos.java    License: Apache License 2.0 5 votes vote down vote up
public void testIDChangesOnAdvance() throws IOException {
  try (BaseDirectoryWrapper dir = newDirectory()) {
    dir.setCheckIndexOnClose(false);
    byte id[] = StringHelper.randomId();
    SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_9_0_0, Version.LUCENE_9_0_0, "_0", 1, false, Codec.getDefault(),
        Collections.<String, String>emptyMap(), StringHelper.randomId(), Collections.<String, String>emptyMap(), null);
    SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1, id);
    assertEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId()));
    commitInfo.advanceDelGen();
    assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId()));

    id = commitInfo.getId();
    commitInfo.advanceDocValuesGen();
    assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId()));

    id = commitInfo.getId();
    commitInfo.advanceFieldInfosGen();
    assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId()));
    SegmentCommitInfo clone = commitInfo.clone();
    id = commitInfo.getId();
    assertEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId()));
    assertEquals(StringHelper.idToString(id), StringHelper.idToString(clone.getId()));

    commitInfo.advanceFieldInfosGen();
    assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId()));
    assertEquals("clone changed but shouldn't", StringHelper.idToString(id), StringHelper.idToString(clone.getId()));
  }
}
 
Example 30
private static MergePolicy.MergeSpecification createRandomMergeSpecification(Directory dir) {
  MergePolicy.MergeSpecification ms;
  if (0 < random().nextInt(10)) { // ~ 1 in 10 times return null
    ms = new MergePolicy.MergeSpecification();
    // append up to 10 (random non-sensical) one merge objects
    for (int ii = 0; ii < random().nextInt(10); ++ii) {
      final SegmentInfo si = new SegmentInfo(
          dir, // dir
          Version.LATEST, // version
          Version.LATEST, // min version
          TestUtil.randomSimpleString(random()), // name
          random().nextInt(), // maxDoc
          random().nextBoolean(), // isCompoundFile
          null, // codec
          Collections.emptyMap(), // diagnostics
          TestUtil.randomSimpleString(// id
              random(),
              StringHelper.ID_LENGTH,
              StringHelper.ID_LENGTH).getBytes(StandardCharsets.US_ASCII),
          Collections.emptyMap(), // attributes
          null /* indexSort */);
      final List<SegmentCommitInfo> segments = new LinkedList<SegmentCommitInfo>();
      segments.add(new SegmentCommitInfo(si, 0, 0, 0, 0, 0, StringHelper.randomId()));
      ms.add(new MergePolicy.OneMerge(segments));
    }
  }
  return null;
}