Java Code Examples for org.apache.lucene.util.StringHelper#ID_LENGTH

The following examples show how to use org.apache.lucene.util.StringHelper#ID_LENGTH . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: CodecUtil.java From lucene-solr with Apache License 2.0

6 votes

/** Retrieves the full index header from the provided {@link IndexInput}.
 *  This throws {@link CorruptIndexException} if this file does
 * not appear to be an index file. */
public static byte[] readIndexHeader(IndexInput in) throws IOException {
  in.seek(0);
  final int actualHeader = in.readInt();
  if (actualHeader != CODEC_MAGIC) {
    throw new CorruptIndexException("codec header mismatch: actual header=" + actualHeader + " vs expected header=" + CODEC_MAGIC, in);
  }
  String codec = in.readString();
  in.readInt();
  in.seek(in.getFilePointer() + StringHelper.ID_LENGTH);
  int suffixLength = in.readByte() & 0xFF;
  byte[] bytes = new byte[headerLength(codec) + StringHelper.ID_LENGTH + 1 + suffixLength];
  in.seek(0);
  in.readBytes(bytes, 0, bytes.length);
  return bytes;
}

Example 2

Source File: SegmentCommitInfo.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Sole constructor.
 * @param info
 *          {@link SegmentInfo} that we wrap
 * @param delCount
 *          number of deleted documents in this segment
 * @param delGen
 *          deletion generation number (used to name deletion files)
 * @param fieldInfosGen
 *          FieldInfos generation number (used to name field-infos files)
 * @param docValuesGen
 *          DocValues generation number (used to name doc-values updates files)
 * @param id Id that uniquely identifies this segment commit. This id must be 16 bytes long. See {@link StringHelper#randomId()}
 */
public SegmentCommitInfo(SegmentInfo info, int delCount, int softDelCount, long delGen, long fieldInfosGen, long docValuesGen, byte[] id) {
  this.info = info;
  this.delCount = delCount;
  this.softDelCount = softDelCount;
  this.delGen = delGen;
  this.nextWriteDelGen = delGen == -1 ? 1 : delGen + 1;
  this.fieldInfosGen = fieldInfosGen;
  this.nextWriteFieldInfosGen = fieldInfosGen == -1 ? 1 : fieldInfosGen + 1;
  this.docValuesGen = docValuesGen;
  this.nextWriteDocValuesGen = docValuesGen == -1 ? 1 : docValuesGen + 1;
  this.id = id;
  if (id != null && id.length != StringHelper.ID_LENGTH) {
    throw new IllegalArgumentException("invalid id: " + Arrays.toString(id));
  }
}

Example 3

Source File: BaseMergePolicyTestCase.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Make a new {@link SegmentCommitInfo} with the given {@code maxDoc},
 * {@code numDeletedDocs} and {@code sizeInBytes}, which are usually the
 * numbers that merge policies care about.
 */
protected static SegmentCommitInfo makeSegmentCommitInfo(String name, int maxDoc, int numDeletedDocs, double sizeMB, String source) {
  if (name.startsWith("_") == false) {
    throw new IllegalArgumentException("name must start with an _, got " + name);
  }
  byte[] id = new byte[StringHelper.ID_LENGTH];
  random().nextBytes(id);
  SegmentInfo info = new SegmentInfo(FAKE_DIRECTORY, Version.LATEST, Version.LATEST,
      name, maxDoc, false, TestUtil.getDefaultCodec(), Collections.emptyMap(), id,
      Collections.singletonMap(IndexWriter.SOURCE, source), null);
  info.setFiles(Collections.singleton(name + "_size=" + Long.toString((long) (sizeMB * 1024 * 1024)) + ".fake"));
  return new SegmentCommitInfo(info, numDeletedDocs, 0, 0, 0, 0, StringHelper.randomId());
}

Example 4

Source File: CodecUtil.java From lucene-solr with Apache License 2.0

5 votes

/** Expert: just reads and verifies the object ID of an index header */
public static byte[] checkIndexHeaderID(DataInput in, byte[] expectedID) throws IOException {
  byte id[] = new byte[StringHelper.ID_LENGTH];
  in.readBytes(id, 0, id.length);
  if (!Arrays.equals(id, expectedID)) {
    throw new CorruptIndexException("file mismatch, expected id=" + StringHelper.idToString(expectedID) 
                                                       + ", got=" + StringHelper.idToString(id), in);
  }
  return id;
}

Example 5

Source File: TestIndexWriterThreadsToSegments.java From lucene-solr with Apache License 2.0

5 votes

byte[] readSegmentInfoID(Directory dir, String file) throws IOException {
  try (IndexInput in = dir.openInput(file, IOContext.DEFAULT)) {
    in.readInt(); // magic
    in.readString(); // codec name
    in.readInt(); // version
    byte id[] = new byte[StringHelper.ID_LENGTH];
    in.readBytes(id, 0, id.length);
    return id;
  }
}

Example 6

Source File: BaseSegmentInfoFormatTestCase.java From lucene-solr with Apache License 2.0

4 votes

/** 
 * Sets some otherwise hard-to-test properties: 
 * random segment names, ID values, document count, etc and round-trips
 */
public void testRandom() throws Exception {
  Codec codec = getCodec();
  Version[] versions = getVersions();
  for (int i = 0; i < 10; i++) {
    Directory dir = newDirectory();
    Version version = versions[random().nextInt(versions.length)];
    long randomSegmentIndex = Math.abs(random().nextLong());
    String name = "_" + Long.toString(randomSegmentIndex != Long.MIN_VALUE ? randomSegmentIndex : random().nextInt(Integer.MAX_VALUE), Character.MAX_RADIX);
    int docCount = TestUtil.nextInt(random(), 1, IndexWriter.MAX_DOCS);
    boolean isCompoundFile = random().nextBoolean();
    Set<String> files = new HashSet<>();
    int numFiles = random().nextInt(10);
    for (int j = 0; j < numFiles; j++) {
      String file = IndexFileNames.segmentFileName(name, "", Integer.toString(j));
      files.add(file);
      dir.createOutput(file, IOContext.DEFAULT).close();
    }
    Map<String,String> diagnostics = new HashMap<>();
    int numDiags = random().nextInt(10);
    for (int j = 0; j < numDiags; j++) {
      diagnostics.put(TestUtil.randomUnicodeString(random()), 
                      TestUtil.randomUnicodeString(random()));
    }
    byte id[] = new byte[StringHelper.ID_LENGTH];
    random().nextBytes(id);
    
    Map<String,String> attributes = new HashMap<>();
    int numAttributes = random().nextInt(10);
    for (int j = 0; j < numAttributes; j++) {
      attributes.put(TestUtil.randomUnicodeString(random()), 
                     TestUtil.randomUnicodeString(random()));
    }
    
    SegmentInfo info = new SegmentInfo(dir, version, null, name, docCount, isCompoundFile, codec, diagnostics, id, attributes, null);
    info.setFiles(files);
    codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
    SegmentInfo info2 = codec.segmentInfoFormat().read(dir, name, id, IOContext.DEFAULT);
    assertEquals(info, info2);
 
    dir.close();
  }
}

Example 7

Source File: CodecUtil.java From lucene-solr with Apache License 2.0

3 votes

/**
 * Writes a codec header for an index file, which records both a string to
 * identify the format of the file, a version number, and data to identify
 * the file instance (ID and auxiliary suffix such as generation).
 * <p>
 * This header can be parsed and validated with 
 * {@link #checkIndexHeader(DataInput, String, int, int, byte[], String) checkIndexHeader()}.
 * <p>
 * IndexHeader --&gt; CodecHeader,ObjectID,ObjectSuffix
 * <ul>
 *    <li>CodecHeader   --&gt; {@link #writeHeader}
 *    <li>ObjectID     --&gt; {@link DataOutput#writeByte byte}<sup>16</sup>
 *    <li>ObjectSuffix --&gt; SuffixLength,SuffixBytes
 *    <li>SuffixLength  --&gt; {@link DataOutput#writeByte byte}
 *    <li>SuffixBytes   --&gt; {@link DataOutput#writeByte byte}<sup>SuffixLength</sup>
 * </ul>
 * <p>
 * Note that the length of an index header depends only upon the
 * name of the codec and suffix, so this length can be computed at any time
 * with {@link #indexHeaderLength(String,String)}.
 * 
 * @param out Output stream
 * @param codec String to identify the format of this file. It should be simple ASCII, 
 *              less than 128 characters in length.
 * @param id Unique identifier for this particular file instance.
 * @param suffix auxiliary suffix information for the file. It should be simple ASCII,
 *              less than 256 characters in length.
 * @param version Version number
 * @throws IOException If there is an I/O error writing to the underlying medium.
 * @throws IllegalArgumentException If the codec name is not simple ASCII, or 
 *         is more than 127 characters in length, or if id is invalid,
 *         or if the suffix is not simple ASCII, or more than 255 characters
 *         in length.
 */
public static void writeIndexHeader(DataOutput out, String codec, int version, byte[] id, String suffix) throws IOException {
  if (id.length != StringHelper.ID_LENGTH) {
    throw new IllegalArgumentException("Invalid id: " + StringHelper.idToString(id));
  }
  writeHeader(out, codec, version);
  out.writeBytes(id, 0, id.length);
  BytesRef suffixBytes = new BytesRef(suffix);
  if (suffixBytes.length != suffix.length() || suffixBytes.length >= 256) {
    throw new IllegalArgumentException("suffix must be simple ASCII, less than 256 characters in length [got " + suffix + "]");
  }
  out.writeByte((byte) suffixBytes.length);
  out.writeBytes(suffixBytes.bytes, suffixBytes.offset, suffixBytes.length);
}

Example 8

Source File: CodecUtil.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Computes the length of an index header.
 * 
 * @param codec Codec name.
 * @return length of the entire index header.
 * @see #writeIndexHeader(DataOutput, String, int, byte[], String)
 */
public static int indexHeaderLength(String codec, String suffix) {
  return headerLength(codec) + StringHelper.ID_LENGTH + 1 + suffix.length();
}