org.apache.lucene.util.StringHelper#ID

Source File: CodecUtil.java From lucene-solr with Apache License 2.0

6 votes

/** Retrieves the full index header from the provided {@link IndexInput}.
 *  This throws {@link CorruptIndexException} if this file does
 * not appear to be an index file. */
public static byte[] readIndexHeader(IndexInput in) throws IOException {
  in.seek(0);
  final int actualHeader = in.readInt();
  if (actualHeader != CODEC_MAGIC) {
    throw new CorruptIndexException("codec header mismatch: actual header=" + actualHeader + " vs expected header=" + CODEC_MAGIC, in);
  }
  String codec = in.readString();
  in.readInt();
  in.seek(in.getFilePointer() + StringHelper.ID_LENGTH);
  int suffixLength = in.readByte() & 0xFF;
  byte[] bytes = new byte[headerLength(codec) + StringHelper.ID_LENGTH + 1 + suffixLength];
  in.seek(0);
  in.readBytes(bytes, 0, bytes.length);
  return bytes;
}

Source File: SegmentCommitInfo.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Sole constructor.
 * @param info
 *          {@link SegmentInfo} that we wrap
 * @param delCount
 *          number of deleted documents in this segment
 * @param delGen
 *          deletion generation number (used to name deletion files)
 * @param fieldInfosGen
 *          FieldInfos generation number (used to name field-infos files)
 * @param docValuesGen
 *          DocValues generation number (used to name doc-values updates files)
 * @param id Id that uniquely identifies this segment commit. This id must be 16 bytes long. See {@link StringHelper#randomId()}
 */
public SegmentCommitInfo(SegmentInfo info, int delCount, int softDelCount, long delGen, long fieldInfosGen, long docValuesGen, byte[] id) {
  this.info = info;
  this.delCount = delCount;
  this.softDelCount = softDelCount;
  this.delGen = delGen;
  this.nextWriteDelGen = delGen == -1 ? 1 : delGen + 1;
  this.fieldInfosGen = fieldInfosGen;
  this.nextWriteFieldInfosGen = fieldInfosGen == -1 ? 1 : fieldInfosGen + 1;
  this.docValuesGen = docValuesGen;
  this.nextWriteDocValuesGen = docValuesGen == -1 ? 1 : docValuesGen + 1;
  this.id = id;
  if (id != null && id.length != StringHelper.ID_LENGTH) {
    throw new IllegalArgumentException("invalid id: " + Arrays.toString(id));
  }
}

Source File: BaseMergePolicyTestCase.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Make a new {@link SegmentCommitInfo} with the given {@code maxDoc},
 * {@code numDeletedDocs} and {@code sizeInBytes}, which are usually the
 * numbers that merge policies care about.
 */
protected static SegmentCommitInfo makeSegmentCommitInfo(String name, int maxDoc, int numDeletedDocs, double sizeMB, String source) {
  if (name.startsWith("_") == false) {
    throw new IllegalArgumentException("name must start with an _, got " + name);
  }
  byte[] id = new byte[StringHelper.ID_LENGTH];
  random().nextBytes(id);
  SegmentInfo info = new SegmentInfo(FAKE_DIRECTORY, Version.LATEST, Version.LATEST,
      name, maxDoc, false, TestUtil.getDefaultCodec(), Collections.emptyMap(), id,
      Collections.singletonMap(IndexWriter.SOURCE, source), null);
  info.setFiles(Collections.singleton(name + "_size=" + Long.toString((long) (sizeMB * 1024 * 1024)) + ".fake"));
  return new SegmentCommitInfo(info, numDeletedDocs, 0, 0, 0, 0, StringHelper.randomId());
}

Source File: CodecUtil.java From lucene-solr with Apache License 2.0

5 votes

/** Expert: just reads and verifies the object ID of an index header */
public static byte[] checkIndexHeaderID(DataInput in, byte[] expectedID) throws IOException {
  byte id[] = new byte[StringHelper.ID_LENGTH];
  in.readBytes(id, 0, id.length);
  if (!Arrays.equals(id, expectedID)) {
    throw new CorruptIndexException("file mismatch, expected id=" + StringHelper.idToString(expectedID) 
                                                       + ", got=" + StringHelper.idToString(id), in);
  }
  return id;
}

Source File: TestIndexWriterThreadsToSegments.java From lucene-solr with Apache License 2.0

5 votes

byte[] readSegmentInfoID(Directory dir, String file) throws IOException {
  try (IndexInput in = dir.openInput(file, IOContext.DEFAULT)) {
    in.readInt(); // magic
    in.readString(); // codec name
    in.readInt(); // version
    byte id[] = new byte[StringHelper.ID_LENGTH];
    in.readBytes(id, 0, id.length);
    return id;
  }
}

Source File: BaseSegmentInfoFormatTestCase.java From lucene-solr with Apache License 2.0

4 votes

/** 
 * Sets some otherwise hard-to-test properties: 
 * random segment names, ID values, document count, etc and round-trips
 */
public void testRandom() throws Exception {
  Codec codec = getCodec();
  Version[] versions = getVersions();
  for (int i = 0; i < 10; i++) {
    Directory dir = newDirectory();
    Version version = versions[random().nextInt(versions.length)];
    long randomSegmentIndex = Math.abs(random().nextLong());
    String name = "_" + Long.toString(randomSegmentIndex != Long.MIN_VALUE ? randomSegmentIndex : random().nextInt(Integer.MAX_VALUE), Character.MAX_RADIX);
    int docCount = TestUtil.nextInt(random(), 1, IndexWriter.MAX_DOCS);
    boolean isCompoundFile = random().nextBoolean();
    Set<String> files = new HashSet<>();
    int numFiles = random().nextInt(10);
    for (int j = 0; j < numFiles; j++) {
      String file = IndexFileNames.segmentFileName(name, "", Integer.toString(j));
      files.add(file);
      dir.createOutput(file, IOContext.DEFAULT).close();
    }
    Map<String,String> diagnostics = new HashMap<>();
    int numDiags = random().nextInt(10);
    for (int j = 0; j < numDiags; j++) {
      diagnostics.put(TestUtil.randomUnicodeString(random()), 
                      TestUtil.randomUnicodeString(random()));
    }
    byte id[] = new byte[StringHelper.ID_LENGTH];
    random().nextBytes(id);
    
    Map<String,String> attributes = new HashMap<>();
    int numAttributes = random().nextInt(10);
    for (int j = 0; j < numAttributes; j++) {
      attributes.put(TestUtil.randomUnicodeString(random()), 
                     TestUtil.randomUnicodeString(random()));
    }
    
    SegmentInfo info = new SegmentInfo(dir, version, null, name, docCount, isCompoundFile, codec, diagnostics, id, attributes, null);
    info.setFiles(files);
    codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
    SegmentInfo info2 = codec.segmentInfoFormat().read(dir, name, id, IOContext.DEFAULT);
    assertEquals(info, info2);
 
    dir.close();
  }
}

Source File: CodecUtil.java From lucene-solr with Apache License 2.0

3 votes

/**
 * Writes a codec header for an index file, which records both a string to
 * identify the format of the file, a version number, and data to identify
 * the file instance (ID and auxiliary suffix such as generation).
 * <p>
 * This header can be parsed and validated with 
 * {@link #checkIndexHeader(DataInput, String, int, int, byte[], String) checkIndexHeader()}.
 * <p>
 * IndexHeader --&gt; CodecHeader,ObjectID,ObjectSuffix
 * <ul>
 *    <li>CodecHeader   --&gt; {@link #writeHeader}
 *    <li>ObjectID     --&gt; {@link DataOutput#writeByte byte}<sup>16</sup>
 *    <li>ObjectSuffix --&gt; SuffixLength,SuffixBytes
 *    <li>SuffixLength  --&gt; {@link DataOutput#writeByte byte}
 *    <li>SuffixBytes   --&gt; {@link DataOutput#writeByte byte}<sup>SuffixLength</sup>
 * </ul>
 * <p>
 * Note that the length of an index header depends only upon the
 * name of the codec and suffix, so this length can be computed at any time
 * with {@link #indexHeaderLength(String,String)}.
 * 
 * @param out Output stream
 * @param codec String to identify the format of this file. It should be simple ASCII, 
 *              less than 128 characters in length.
 * @param id Unique identifier for this particular file instance.
 * @param suffix auxiliary suffix information for the file. It should be simple ASCII,
 *              less than 256 characters in length.
 * @param version Version number
 * @throws IOException If there is an I/O error writing to the underlying medium.
 * @throws IllegalArgumentException If the codec name is not simple ASCII, or 
 *         is more than 127 characters in length, or if id is invalid,
 *         or if the suffix is not simple ASCII, or more than 255 characters
 *         in length.
 */
public static void writeIndexHeader(DataOutput out, String codec, int version, byte[] id, String suffix) throws IOException {
  if (id.length != StringHelper.ID_LENGTH) {
    throw new IllegalArgumentException("Invalid id: " + StringHelper.idToString(id));
  }
  writeHeader(out, codec, version);
  out.writeBytes(id, 0, id.length);
  BytesRef suffixBytes = new BytesRef(suffix);
  if (suffixBytes.length != suffix.length() || suffixBytes.length >= 256) {
    throw new IllegalArgumentException("suffix must be simple ASCII, less than 256 characters in length [got " + suffix + "]");
  }
  out.writeByte((byte) suffixBytes.length);
  out.writeBytes(suffixBytes.bytes, suffixBytes.offset, suffixBytes.length);
}

Source File: CodecUtil.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Computes the length of an index header.
 * 
 * @param codec Codec name.
 * @return length of the entire index header.
 * @see #writeIndexHeader(DataOutput, String, int, byte[], String)
 */
public static int indexHeaderLength(String codec, String suffix) {
  return headerLength(codec) + StringHelper.ID_LENGTH + 1 + suffix.length();
}

Java Code Examples for org.apache.lucene.util.StringHelper#ID_LENGTH