Java Code Examples for org.apache.lucene.store.DataOutput#writeByte()

The following examples show how to use org.apache.lucene.store.DataOutput#writeByte() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: XAnalyzingSuggester.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
@Override
public boolean store(OutputStream output) throws IOException {
  DataOutput dataOut = new OutputStreamDataOutput(output);
  try {
    if (fst == null) {
      return false;
    }

    fst.save(dataOut);
    dataOut.writeVInt(maxAnalyzedPathsForOneInput);
    dataOut.writeByte((byte) (hasPayloads ? 1 : 0));
  } finally {
    IOUtils.close(output);
  }
  return true;
}
 
Example 2
Source File: CompressingStoredFieldsWriter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** 
 * Writes a float in a variable-length format.  Writes between one and 
 * five bytes. Small integral values typically take fewer bytes.
 * <p>
 * ZFloat --&gt; Header, Bytes*?
 * <ul>
 *    <li>Header --&gt; {@link DataOutput#writeByte Uint8}. When it is
 *       equal to 0xFF then the value is negative and stored in the next
 *       8 bytes. When it is equal to 0xFE then the value is stored as a
 *       float in the next 4 bytes. Otherwise if the first bit is set
 *       then the other bits in the header encode the value plus one and
 *       no other bytes are read. Otherwise, the value is a positive float
 *       value whose first byte is the header, and 7 bytes need to be read
 *       to complete it.
 *    <li>Bytes --&gt; Potential additional bytes to read depending on the
 *       header.
 * </ul>
 */
static void writeZDouble(DataOutput out, double d) throws IOException {
  int intVal = (int) d;
  final long doubleBits = Double.doubleToLongBits(d);
  
  if (d == intVal &&
      intVal >= -1 && 
      intVal <= 0x7C &&
      doubleBits != NEGATIVE_ZERO_DOUBLE) {
    // small integer value [-1..124]: single byte
    out.writeByte((byte) (0x80 | (intVal + 1)));
    return;
  } else if (d == (float) d) {
    // d has an accurate float representation: 5 bytes
    out.writeByte((byte) 0xFE);
    out.writeInt(Float.floatToIntBits((float) d));
  } else if ((doubleBits >>> 63) == 0) {
    // other positive doubles: 8 bytes
    out.writeLong(doubleBits);
  } else {
    // other negative doubles: 9 bytes
    out.writeByte((byte) 0xFF);
    out.writeLong(doubleBits);
  }
}
 
Example 3
Source File: CompressingStoredFieldsWriter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** 
 * Writes a float in a variable-length format.  Writes between one and 
 * five bytes. Small integral values typically take fewer bytes.
 * <p>
 * ZFloat --&gt; Header, Bytes*?
 * <ul>
 *    <li>Header --&gt; {@link DataOutput#writeByte Uint8}. When it is
 *       equal to 0xFF then the value is negative and stored in the next
 *       4 bytes. Otherwise if the first bit is set then the other bits
 *       in the header encode the value plus one and no other
 *       bytes are read. Otherwise, the value is a positive float value
 *       whose first byte is the header, and 3 bytes need to be read to
 *       complete it.
 *    <li>Bytes --&gt; Potential additional bytes to read depending on the
 *       header.
 * </ul>
 */
static void writeZFloat(DataOutput out, float f) throws IOException {
  int intVal = (int) f;
  final int floatBits = Float.floatToIntBits(f);

  if (f == intVal
      && intVal >= -1
      && intVal <= 0x7D
      && floatBits != NEGATIVE_ZERO_FLOAT) {
    // small integer value [-1..125]: single byte
    out.writeByte((byte) (0x80 | (1 + intVal)));
  } else if ((floatBits >>> 31) == 0) {
    // other positive floats: 4 bytes
    out.writeInt(floatBits);
  } else {
    // other negative float: 5 bytes
    out.writeByte((byte) 0xFF);
    out.writeInt(floatBits);
  }
}
 
Example 4
Source File: JaspellLookup.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void writeRecursively(DataOutput out, TSTNode node) throws IOException {
  if (node == null) {
    return;
  }
  out.writeString(new String(new char[] {node.splitchar}, 0, 1));
  byte mask = 0;
  if (node.relatives[TSTNode.LOKID] != null) mask |= LO_KID;
  if (node.relatives[TSTNode.EQKID] != null) mask |= EQ_KID;
  if (node.relatives[TSTNode.HIKID] != null) mask |= HI_KID;
  if (node.data != null) mask |= HAS_VALUE;
  out.writeByte(mask);
  if (node.data != null) {
    out.writeLong(((Number)node.data).longValue());
  }
  writeRecursively(out, node.relatives[TSTNode.LOKID]);
  writeRecursively(out, node.relatives[TSTNode.EQKID]);
  writeRecursively(out, node.relatives[TSTNode.HIKID]);
}
 
Example 5
Source File: TSTLookup.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void writeRecursively(DataOutput out, TernaryTreeNode node) throws IOException {
  // write out the current node
  out.writeString(new String(new char[] {node.splitchar}, 0, 1));
  // prepare a mask of kids
  byte mask = 0;
  if (node.eqKid != null) mask |= EQ_KID;
  if (node.loKid != null) mask |= LO_KID;
  if (node.hiKid != null) mask |= HI_KID;
  if (node.token != null) mask |= HAS_TOKEN;
  if (node.val != null) mask |= HAS_VALUE;
  out.writeByte(mask);
  if (node.token != null) out.writeString(node.token);
  if (node.val != null) out.writeLong(((Number)node.val).longValue());
  // recurse and write kids
  if (node.loKid != null) {
    writeRecursively(out, node.loKid);
  }
  if (node.eqKid != null) {
    writeRecursively(out, node.eqKid);
  }
  if (node.hiKid != null) {
    writeRecursively(out, node.hiKid);
  }
}
 
Example 6
Source File: BinaryDictionaryWriter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void writePosDict(Path path) throws IOException {
  Files.createDirectories(path.getParent());
  try (OutputStream os = Files.newOutputStream(path);
       OutputStream bos = new BufferedOutputStream(os)) {
    final DataOutput out = new OutputStreamDataOutput(bos);
    CodecUtil.writeHeader(out, BinaryDictionary.POSDICT_HEADER, BinaryDictionary.VERSION);
    out.writeVInt(posDict.size());
    for (String s : posDict) {
      if (s == null) {
        out.writeByte((byte)0);
        out.writeByte((byte)0);
        out.writeByte((byte)0);
      } else {
        String[] data = CSVUtil.parse(s);
        if (data.length != 3) {
          throw new IllegalArgumentException("Malformed pos/inflection: " + s + "; expected 3 characters");
        }
        out.writeString(data[0]);
        out.writeString(data[1]);
        out.writeString(data[2]);
      }
    }
  }
}
 
Example 7
Source File: CharacterDefinitionWriter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void write(Path baseDir) throws IOException {
  Path path = baseDir.resolve(CharacterDefinition.class.getName().replace('.', '/') + CharacterDefinition.FILENAME_SUFFIX);
  Files.createDirectories(path.getParent());
  try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(path))){
    final DataOutput out = new OutputStreamDataOutput(os);
    CodecUtil.writeHeader(out, CharacterDefinition.HEADER, CharacterDefinition.VERSION);
    out.writeBytes(characterCategoryMap, 0, characterCategoryMap.length);
    for (int i = 0; i < CharacterDefinition.CLASS_COUNT; i++) {
      final byte b = (byte) (
        (invokeMap[i] ? 0x01 : 0x00) | 
        (groupMap[i] ? 0x02 : 0x00)
      );
      out.writeByte(b);
    }
  }
}
 
Example 8
Source File: BinaryDictionaryWriter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void writePosDict(Path path) throws IOException {
  Files.createDirectories(path.getParent());
  try (OutputStream os = Files.newOutputStream(path);
       OutputStream bos = new BufferedOutputStream(os)) {
    final DataOutput out = new OutputStreamDataOutput(bos);
    CodecUtil.writeHeader(out, BinaryDictionary.POSDICT_HEADER, BinaryDictionary.VERSION);
    out.writeVInt(posDict.size());
    for (String s : posDict) {
      if (s == null) {
        out.writeByte((byte) POS.Tag.UNKNOWN.ordinal());
      } else {
        String[] data = CSVUtil.parse(s);
        if (data.length != 2) {
          throw new IllegalArgumentException("Malformed pos/inflection: " + s + "; expected 2 characters");
        }
        out.writeByte((byte) POS.Tag.valueOf(data[0]).ordinal());
      }
    }
  }
}
 
Example 9
Source File: CharacterDefinitionWriter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void write(Path baseDir) throws IOException {
  Path path = baseDir.resolve(CharacterDefinition.class.getName().replace('.', '/') + CharacterDefinition.FILENAME_SUFFIX);
  Files.createDirectories(path.getParent());
  try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(path))){
    final DataOutput out = new OutputStreamDataOutput(os);
    CodecUtil.writeHeader(out, CharacterDefinition.HEADER, CharacterDefinition.VERSION);
    out.writeBytes(characterCategoryMap, 0, characterCategoryMap.length);
    for (int i = 0; i < CharacterDefinition.CLASS_COUNT; i++) {
      final byte b = (byte) (
        (invokeMap[i] ? 0x01 : 0x00) | 
        (groupMap[i] ? 0x02 : 0x00)
      );
      out.writeByte(b);
    }
  }
}
 
Example 10
Source File: LZ4.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private static void encodeLiterals(byte[] bytes, int token, int anchor, int literalLen, DataOutput out) throws IOException {
  out.writeByte((byte) token);

  // encode literal length
  if (literalLen >= 0x0F) {
    encodeLen(literalLen - 0x0F, out);
  }

  // encode literals
  out.writeBytes(bytes, anchor, literalLen);
}
 
Example 11
Source File: LZ4.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private static void encodeLen(int l, DataOutput out) throws IOException {
  while (l >= 0xFF) {
    out.writeByte((byte) 0xFF);
    l -= 0xFF;
  }
  out.writeByte((byte) l);
}
 
Example 12
Source File: CodecUtil.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Expert: verifies the incoming {@link IndexInput} has an index header
 * and that its segment ID matches the expected one, and then copies
 * that index header into the provided {@link DataOutput}.  This is
 * useful when building compound files.
 *
 * @param in Input stream, positioned at the point where the
 *        index header was previously written. Typically this is located
 *        at the beginning of the file.
 * @param out Output stream, where the header will be copied to.
 * @param expectedID Expected segment ID
 * @throws CorruptIndexException If the first four bytes are not
 *         {@link #CODEC_MAGIC}, or if the <code>expectedID</code>
 *         does not match.
 * @throws IOException If there is an I/O error reading from the underlying medium.
 *
 * @lucene.internal 
 */
public static void verifyAndCopyIndexHeader(IndexInput in, DataOutput out, byte[] expectedID) throws IOException {
  // make sure it's large enough to have a header and footer
  if (in.length() < footerLength() + headerLength("")) {
    throw new CorruptIndexException("compound sub-files must have a valid codec header and footer: file is too small (" + in.length() + " bytes)", in);
  }

  int actualHeader = in.readInt();
  if (actualHeader != CODEC_MAGIC) {
    throw new CorruptIndexException("compound sub-files must have a valid codec header and footer: codec header mismatch: actual header=" + actualHeader + " vs expected header=" + CodecUtil.CODEC_MAGIC, in);
  }

  // we can't verify these, so we pass-through:
  String codec = in.readString();
  int version = in.readInt();

  // verify id:
  checkIndexHeaderID(in, expectedID);

  // we can't verify extension either, so we pass-through:
  int suffixLength = in.readByte() & 0xFF;
  byte[] suffixBytes = new byte[suffixLength];
  in.readBytes(suffixBytes, 0, suffixLength);

  // now write the header we just verified
  out.writeInt(CodecUtil.CODEC_MAGIC);
  out.writeString(codec);
  out.writeInt(version);
  out.writeBytes(expectedID, 0, expectedID.length);
  out.writeByte((byte) suffixLength);
  out.writeBytes(suffixBytes, 0, suffixLength);
}
 
Example 13
Source File: CompressingStoredFieldsWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** 
 * Writes a long in a variable-length format.  Writes between one and 
 * ten bytes. Small values or values representing timestamps with day,
 * hour or second precision typically require fewer bytes.
 * <p>
 * ZLong --&gt; Header, Bytes*?
 * <ul>
 *    <li>Header --&gt; The first two bits indicate the compression scheme:
 *       <ul>
 *          <li>00 - uncompressed
 *          <li>01 - multiple of 1000 (second)
 *          <li>10 - multiple of 3600000 (hour)
 *          <li>11 - multiple of 86400000 (day)
 *       </ul>
 *       Then the next bit is a continuation bit, indicating whether more
 *       bytes need to be read, and the last 5 bits are the lower bits of
 *       the encoded value. In order to reconstruct the value, you need to
 *       combine the 5 lower bits of the header with a vLong in the next
 *       bytes (if the continuation bit is set to 1). Then
 *       {@link BitUtil#zigZagDecode(int) zigzag-decode} it and finally
 *       multiply by the multiple corresponding to the compression scheme.
 *    <li>Bytes --&gt; Potential additional bytes to read depending on the
 *       header.
 * </ul>
 */
// T for "timestamp"
static void writeTLong(DataOutput out, long l) throws IOException {
  int header; 
  if (l % SECOND != 0) {
    header = 0;
  } else if (l % DAY == 0) {
    // timestamp with day precision
    header = DAY_ENCODING;
    l /= DAY;
  } else if (l % HOUR == 0) {
    // timestamp with hour precision, or day precision with a timezone
    header = HOUR_ENCODING;
    l /= HOUR;
  } else {
    // timestamp with second precision
    header = SECOND_ENCODING;
    l /= SECOND;
  }

  final long zigZagL = BitUtil.zigZagEncode(l);
  header |= (zigZagL & 0x1F); // last 5 bits
  final long upperBits = zigZagL >>> 5;
  if (upperBits != 0) {
    header |= 0x20;
  }
  out.writeByte((byte) header);
  if (upperBits != 0) {
    out.writeVLong(upperBits);
  }
}
 
Example 14
Source File: AnalyzingSuggester.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public boolean store(DataOutput output) throws IOException {
  output.writeVLong(count);
  if (fst == null) {
    return false;
  }

  fst.save(output, output);
  output.writeVInt(maxAnalyzedPathsForOneInput);
  output.writeByte((byte) (hasPayloads ? 1 : 0));
  return true;
}
 
Example 15
Source File: AbstractBlockPackedWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
static void writeVLong(DataOutput out, long i) throws IOException {
  int k = 0;
  while ((i & ~0x7FL) != 0L && k++ < 8) {
    out.writeByte((byte)((i & 0x7FL) | 0x80L));
    i >>>= 7;
  }
  out.writeByte((byte) i);
}
 
Example 16
Source File: SimpleTextUtil.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public static void write(DataOutput out, BytesRef b) throws IOException {
  for(int i=0;i<b.length;i++) {
    final byte bx = b.bytes[b.offset+i];
    if (bx == NEWLINE || bx == ESCAPE) {
      out.writeByte(ESCAPE);
    }
    out.writeByte(bx);
  }
}
 
Example 17
Source File: DocIdsWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
static void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOException {
  // docs can be sorted either when all docs in a block have the same value
  // or when a segment is sorted
  boolean sorted = true;
  for (int i = 1; i < count; ++i) {
    if (docIds[start + i - 1] > docIds[start + i]) {
      sorted = false;
      break;
    }
  }
  if (sorted) {
    out.writeByte((byte) 0);
    int previous = 0;
    for (int i = 0; i < count; ++i) {
      int doc = docIds[start + i];
      out.writeVInt(doc - previous);
      previous = doc;
    }
  } else {
    long max = 0;
    for (int i = 0; i < count; ++i) {
      max |= Integer.toUnsignedLong(docIds[start + i]);
    }
    if (max <= 0xffffff) {
      out.writeByte((byte) 24);
      for (int i = 0; i < count; ++i) {
        out.writeShort((short) (docIds[start + i] >>> 8));
        out.writeByte((byte) docIds[start + i]);
      }
    } else {
      out.writeByte((byte) 32);
      for (int i = 0; i < count; ++i) {
        out.writeInt(docIds[start + i]);
      }
    }
  }
}
 
Example 18
Source File: SimpleTextUtil.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public static void writeNewline(DataOutput out) throws IOException {
  out.writeByte(NEWLINE);
}
 
Example 19
Source File: SimplePrimaryNode.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void handleIndexing(Socket socket, AtomicBoolean stop, InputStream is, DataInput in, DataOutput out, BufferedOutputStream bos) throws IOException, InterruptedException {
  Thread.currentThread().setName("indexing");
  message("start handling indexing socket=" + socket);
  while (true) {
    while (true) {
      if (is.available() > 0) {
        break;
      }
      if (stop.get()) {
        return;
      }
      Thread.sleep(10);
    }
    byte cmd;
    try {
      cmd = in.readByte();
    } catch (EOFException eofe) {
      // done
      return;
    }
    //message("INDEXING OP " + cmd);
    if (cmd == CMD_ADD_DOC) {
      handleAddDocument(in, out);
      out.writeByte((byte) 1);
      bos.flush();
    } else if (cmd == CMD_UPDATE_DOC) {
      handleUpdateDocument(in, out);
      out.writeByte((byte) 1);
      bos.flush();
    } else if (cmd == CMD_DELETE_DOC) {
      handleDeleteDocument(in, out);
      out.writeByte((byte) 1);
      bos.flush();
    } else if (cmd == CMD_DELETE_ALL_DOCS) {
      writer.deleteAll();
      out.writeByte((byte) 1);
      bos.flush();
    } else if (cmd == CMD_FORCE_MERGE) {
      writer.forceMerge(1);
      out.writeByte((byte) 1);
      bos.flush();
    } else if (cmd == CMD_INDEXING_DONE) {
      out.writeByte((byte) 1);
      bos.flush();
      break;
    } else {
      throw new IllegalArgumentException("cmd must be add, update or delete; got " + cmd);
    }
  }
}
 
Example 20
Source File: CodecUtil.java    From lucene-solr with Apache License 2.0 3 votes vote down vote up
/**
 * Writes a codec header for an index file, which records both a string to
 * identify the format of the file, a version number, and data to identify
 * the file instance (ID and auxiliary suffix such as generation).
 * <p>
 * This header can be parsed and validated with 
 * {@link #checkIndexHeader(DataInput, String, int, int, byte[], String) checkIndexHeader()}.
 * <p>
 * IndexHeader --&gt; CodecHeader,ObjectID,ObjectSuffix
 * <ul>
 *    <li>CodecHeader   --&gt; {@link #writeHeader}
 *    <li>ObjectID     --&gt; {@link DataOutput#writeByte byte}<sup>16</sup>
 *    <li>ObjectSuffix --&gt; SuffixLength,SuffixBytes
 *    <li>SuffixLength  --&gt; {@link DataOutput#writeByte byte}
 *    <li>SuffixBytes   --&gt; {@link DataOutput#writeByte byte}<sup>SuffixLength</sup>
 * </ul>
 * <p>
 * Note that the length of an index header depends only upon the
 * name of the codec and suffix, so this length can be computed at any time
 * with {@link #indexHeaderLength(String,String)}.
 * 
 * @param out Output stream
 * @param codec String to identify the format of this file. It should be simple ASCII, 
 *              less than 128 characters in length.
 * @param id Unique identifier for this particular file instance.
 * @param suffix auxiliary suffix information for the file. It should be simple ASCII,
 *              less than 256 characters in length.
 * @param version Version number
 * @throws IOException If there is an I/O error writing to the underlying medium.
 * @throws IllegalArgumentException If the codec name is not simple ASCII, or 
 *         is more than 127 characters in length, or if id is invalid,
 *         or if the suffix is not simple ASCII, or more than 255 characters
 *         in length.
 */
public static void writeIndexHeader(DataOutput out, String codec, int version, byte[] id, String suffix) throws IOException {
  if (id.length != StringHelper.ID_LENGTH) {
    throw new IllegalArgumentException("Invalid id: " + StringHelper.idToString(id));
  }
  writeHeader(out, codec, version);
  out.writeBytes(id, 0, id.length);
  BytesRef suffixBytes = new BytesRef(suffix);
  if (suffixBytes.length != suffix.length() || suffixBytes.length >= 256) {
    throw new IllegalArgumentException("suffix must be simple ASCII, less than 256 characters in length [got " + suffix + "]");
  }
  out.writeByte((byte) suffixBytes.length);
  out.writeBytes(suffixBytes.bytes, suffixBytes.offset, suffixBytes.length);
}