org.apache.lucene.store.DataOutput Java Examples

The following examples show how to use org.apache.lucene.store.DataOutput. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BinaryDictionaryWriter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void writePosDict(Path path) throws IOException {
  Files.createDirectories(path.getParent());
  try (OutputStream os = Files.newOutputStream(path);
       OutputStream bos = new BufferedOutputStream(os)) {
    final DataOutput out = new OutputStreamDataOutput(bos);
    CodecUtil.writeHeader(out, BinaryDictionary.POSDICT_HEADER, BinaryDictionary.VERSION);
    out.writeVInt(posDict.size());
    for (String s : posDict) {
      if (s == null) {
        out.writeByte((byte) POS.Tag.UNKNOWN.ordinal());
      } else {
        String[] data = CSVUtil.parse(s);
        if (data.length != 2) {
          throw new IllegalArgumentException("Malformed pos/inflection: " + s + "; expected 2 characters");
        }
        out.writeByte((byte) POS.Tag.valueOf(data[0]).ordinal());
      }
    }
  }
}
 
Example #2
Source File: ConnectionCostsWriter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void write(Path baseDir) throws IOException {
  Files.createDirectories(baseDir);
  String fileName = ConnectionCosts.class.getName().replace('.', '/') + ConnectionCosts.FILENAME_SUFFIX;
  try (OutputStream os = Files.newOutputStream(baseDir.resolve(fileName));
       OutputStream bos = new BufferedOutputStream(os)) {
    final DataOutput out = new OutputStreamDataOutput(bos);
    CodecUtil.writeHeader(out, ConnectionCosts.HEADER, ConnectionCosts.VERSION);
    out.writeVInt(forwardSize);
    out.writeVInt(backwardSize);
    int last = 0;
    for (int i = 0; i < costs.limit() / 2; i++) {
      short cost = costs.getShort(i * 2);
      int delta = (int) cost - last;
      out.writeZInt(delta);
      last = cost;
    }
  }
}
 
Example #3
Source File: UniformSplitRot13PostingsFormat.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
protected BlockEncoder getBlockEncoder() {
  return (blockBytes, length) -> {
    byte[] encodedBytes = Rot13CypherTestUtil.encode(blockBytes, Math.toIntExact(length));
    return new BlockEncoder.WritableBytes() {
      @Override
      public long size() {
        return encodedBytes.length;
      }

      @Override
      public void writeTo(DataOutput dataOutput) throws IOException {
        encoderCalled = true;
        dataOutput.writeBytes(encodedBytes, 0, encodedBytes.length);
      }
    };
  };
}
 
Example #4
Source File: BinaryDictionaryWriter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void writePosDict(Path path) throws IOException {
  Files.createDirectories(path.getParent());
  try (OutputStream os = Files.newOutputStream(path);
       OutputStream bos = new BufferedOutputStream(os)) {
    final DataOutput out = new OutputStreamDataOutput(bos);
    CodecUtil.writeHeader(out, BinaryDictionary.POSDICT_HEADER, BinaryDictionary.VERSION);
    out.writeVInt(posDict.size());
    for (String s : posDict) {
      if (s == null) {
        out.writeByte((byte)0);
        out.writeByte((byte)0);
        out.writeByte((byte)0);
      } else {
        String[] data = CSVUtil.parse(s);
        if (data.length != 3) {
          throw new IllegalArgumentException("Malformed pos/inflection: " + s + "; expected 3 characters");
        }
        out.writeString(data[0]);
        out.writeString(data[1]);
        out.writeString(data[2]);
      }
    }
  }
}
 
Example #5
Source File: ForUtil.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Create a new {@link ForUtil} instance and save state into <code>out</code>.
 */
ForUtil(float acceptableOverheadRatio, DataOutput out) throws IOException {
  out.writeVInt(PackedInts.VERSION_CURRENT);
  encodedSizes = new int[33];
  encoders = new PackedInts.Encoder[33];
  decoders = new PackedInts.Decoder[33];
  iterations = new int[33];

  for (int bpv = 1; bpv <= 32; ++bpv) {
    final FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(
        BLOCK_SIZE, bpv, acceptableOverheadRatio);
    assert formatAndBits.format.isSupported(formatAndBits.bitsPerValue);
    assert formatAndBits.bitsPerValue <= 32;
    encodedSizes[bpv] = encodedSize(formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue);
    encoders[bpv] = PackedInts.getEncoder(
        formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue);
    decoders[bpv] = PackedInts.getDecoder(
        formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue);
    iterations[bpv] = computeIterations(decoders[bpv]);

    out.writeVInt(formatAndBits.format.getId() << 5 | (formatAndBits.bitsPerValue - 1));
  }
}
 
Example #6
Source File: CharacterDefinitionWriter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void write(Path baseDir) throws IOException {
  Path path = baseDir.resolve(CharacterDefinition.class.getName().replace('.', '/') + CharacterDefinition.FILENAME_SUFFIX);
  Files.createDirectories(path.getParent());
  try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(path))){
    final DataOutput out = new OutputStreamDataOutput(os);
    CodecUtil.writeHeader(out, CharacterDefinition.HEADER, CharacterDefinition.VERSION);
    out.writeBytes(characterCategoryMap, 0, characterCategoryMap.length);
    for (int i = 0; i < CharacterDefinition.CLASS_COUNT; i++) {
      final byte b = (byte) (
        (invokeMap[i] ? 0x01 : 0x00) | 
        (groupMap[i] ? 0x02 : 0x00)
      );
      out.writeByte(b);
    }
  }
}
 
Example #7
Source File: SimplePrimaryNode.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void handleUpdateDocument(DataInput in, DataOutput out) throws IOException {
  int fieldCount = in.readVInt();
  Document doc = new Document();
  String docid = null;
  for(int i=0;i<fieldCount;i++) {
    String name = in.readString();
    String value = in.readString();
    // NOTE: clearly NOT general!
    if (name.equals("docid")) {
      docid = value;
      doc.add(new StringField("docid", value, Field.Store.YES));
    } else if (name.equals("marker")) {
      doc.add(new StringField("marker", value, Field.Store.YES));
    } else if (name.equals("title")) {
      doc.add(new StringField("title", value, Field.Store.YES));
      doc.add(new Field("titleTokenized", value, tokenizedWithTermVectors));
    } else if (name.equals("body")) {
      doc.add(new Field("body", value, tokenizedWithTermVectors));
    } else {
      throw new IllegalArgumentException("unhandled field name " + name);
    }
  }

  writer.updateDocument(new Term("docid", docid), doc);
}
 
Example #8
Source File: SimplePrimaryNode.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void handleAddDocument(DataInput in, DataOutput out) throws IOException {
  int fieldCount = in.readVInt();
  Document doc = new Document();
  for(int i=0;i<fieldCount;i++) {
    String name = in.readString();
    String value = in.readString();
    // NOTE: clearly NOT general!
    if (name.equals("docid") || name.equals("marker")) {
      doc.add(new StringField(name, value, Field.Store.YES));
    } else if (name.equals("title")) {
      doc.add(new StringField("title", value, Field.Store.YES));
      doc.add(new Field("titleTokenized", value, tokenizedWithTermVectors));
    } else if (name.equals("body")) {
      doc.add(new Field("body", value, tokenizedWithTermVectors));
    } else {
      throw new IllegalArgumentException("unhandled field name " + name);
    }
  }
  writer.addDocument(doc);
}
 
Example #9
Source File: ByteSliceReader.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public long writeTo(DataOutput out) throws IOException {
  long size = 0;
  while(true) {
    if (limit + bufferOffset == endIndex) {
      assert endIndex - bufferOffset >= upto;
      out.writeBytes(buffer, upto, limit-upto);
      size += limit-upto;
      break;
    } else {
      out.writeBytes(buffer, upto, limit-upto);
      size += limit-upto;
      nextSlice();
    }
  }

  return size;
}
 
Example #10
Source File: LZ4.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private static void encodeSequence(byte[] bytes, int anchor, int matchRef, int matchOff, int matchLen, DataOutput out) throws IOException {
  final int literalLen = matchOff - anchor;
  assert matchLen >= 4;
  // encode token
  final int token = (Math.min(literalLen, 0x0F) << 4) | Math.min(matchLen - 4, 0x0F);
  encodeLiterals(bytes, token, anchor, literalLen, out);

  // encode match dec
  final int matchDec = matchOff - matchRef;
  assert matchDec > 0 && matchDec < 1 << 16;
  out.writeByte((byte) matchDec);
  out.writeByte((byte) (matchDec >>> 8));

  // encode match len
  if (matchLen >= MIN_MATCH + 0x0F) {
    encodeLen(matchLen - 0x0F - MIN_MATCH, out);
  }
}
 
Example #11
Source File: Lucene84SkipWriter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
static void writeImpacts(CompetitiveImpactAccumulator acc, DataOutput out) throws IOException {
  Collection<Impact> impacts = acc.getCompetitiveFreqNormPairs();
  Impact previous = new Impact(0, 0);
  for (Impact impact : impacts) {
    assert impact.freq > previous.freq;
    assert Long.compareUnsigned(impact.norm, previous.norm) > 0;
    int freqDelta = impact.freq - previous.freq - 1;
    long normDelta = impact.norm - previous.norm - 1;
    if (normDelta == 0) {
      // most of time, norm only increases by 1, so we can fold everything in a single byte
      out.writeVInt(freqDelta << 1);
    } else {
      out.writeVInt((freqDelta << 1) | 1);
      out.writeZLong(normDelta);
    }
    previous = impact;
  }
}
 
Example #12
Source File: CharacterDefinitionWriter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void write(Path baseDir) throws IOException {
  Path path = baseDir.resolve(CharacterDefinition.class.getName().replace('.', '/') + CharacterDefinition.FILENAME_SUFFIX);
  Files.createDirectories(path.getParent());
  try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(path))){
    final DataOutput out = new OutputStreamDataOutput(os);
    CodecUtil.writeHeader(out, CharacterDefinition.HEADER, CharacterDefinition.VERSION);
    out.writeBytes(characterCategoryMap, 0, characterCategoryMap.length);
    for (int i = 0; i < CharacterDefinition.CLASS_COUNT; i++) {
      final byte b = (byte) (
        (invokeMap[i] ? 0x01 : 0x00) | 
        (groupMap[i] ? 0x02 : 0x00)
      );
      out.writeByte(b);
    }
  }
}
 
Example #13
Source File: JaspellLookup.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void writeRecursively(DataOutput out, TSTNode node) throws IOException {
  if (node == null) {
    return;
  }
  out.writeString(new String(new char[] {node.splitchar}, 0, 1));
  byte mask = 0;
  if (node.relatives[TSTNode.LOKID] != null) mask |= LO_KID;
  if (node.relatives[TSTNode.EQKID] != null) mask |= EQ_KID;
  if (node.relatives[TSTNode.HIKID] != null) mask |= HI_KID;
  if (node.data != null) mask |= HAS_VALUE;
  out.writeByte(mask);
  if (node.data != null) {
    out.writeLong(((Number)node.data).longValue());
  }
  writeRecursively(out, node.relatives[TSTNode.LOKID]);
  writeRecursively(out, node.relatives[TSTNode.EQKID]);
  writeRecursively(out, node.relatives[TSTNode.HIKID]);
}
 
Example #14
Source File: Lucene50PostingsWriter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void encodeTerm(DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
  IntBlockTermState state = (IntBlockTermState)_state;
  if (absolute) {
    lastState = emptyState;
  }
  out.writeVLong(state.docStartFP - lastState.docStartFP);
  if (writePositions) {
    out.writeVLong(state.posStartFP - lastState.posStartFP);
    if (writePayloads || writeOffsets) {
      out.writeVLong(state.payStartFP - lastState.payStartFP);
    }
  }
  if (state.singletonDocID != -1) {
    out.writeVInt(state.singletonDocID);
  }
  if (writePositions) {
    if (state.lastPosBlockOffset != -1) {
      out.writeVLong(state.lastPosBlockOffset);
    }
  }
  if (state.skipOffset != -1) {
    out.writeVLong(state.skipOffset);
  }
  lastState = state;
}
 
Example #15
Source File: AbstractBlockPackedWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
static void writeVLong(DataOutput out, long i) throws IOException {
  int k = 0;
  while ((i & ~0x7FL) != 0L && k++ < 8) {
    out.writeByte((byte)((i & 0x7FL) | 0x80L));
    i >>>= 7;
  }
  out.writeByte((byte) i);
}
 
Example #16
Source File: ForDeltaUtil.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Encode deltas of a strictly monotonically increasing sequence of integers.
 * The provided {@code longs} are expected to be deltas between consecutive values.
 */
void encodeDeltas(long[] longs, DataOutput out) throws IOException {
  if (longs[0] == 1 && PForUtil.allEqual(longs)) { // happens with very dense postings
    out.writeByte((byte) 0);
  } else {
    long or = 0;
    for (long l : longs) {
      or |= l;
    }
    assert or != 0;
    final int bitsPerValue = PackedInts.bitsRequired(or);
    out.writeByte((byte) bitsPerValue);
    forUtil.encode(longs, bitsPerValue, out);
  }
}
 
Example #17
Source File: PackedInts.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Save this mutable into <code>out</code>. Instantiating a reader from
 * the generated data will return a reader with the same number of bits
 * per value.
 */
public void save(DataOutput out) throws IOException {
  Writer writer = getWriterNoHeader(out, getFormat(), size(), getBitsPerValue(), DEFAULT_BUFFER_SIZE);
  writer.writeHeader();
  for (int i = 0; i < size(); ++i) {
    writer.add(get(i));
  }
  writer.finish();
}
 
Example #18
Source File: DirectWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Returns an instance suitable for encoding {@code numValues} using {@code bitsPerValue} */
public static DirectWriter getInstance(DataOutput output, long numValues, int bitsPerValue) {
  if (Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) < 0) {
    throw new IllegalArgumentException("Unsupported bitsPerValue " + bitsPerValue + ". Did you use bitsRequired?");
  }
  return new DirectWriter(output, numValues, bitsPerValue);
}
 
Example #19
Source File: LZ4.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private static void encodeLen(int l, DataOutput out) throws IOException {
  while (l >= 0xFF) {
    out.writeByte((byte) 0xFF);
    l -= 0xFF;
  }
  out.writeByte((byte) l);
}
 
Example #20
Source File: CompressingStoredFieldsWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** 
 * Writes a long in a variable-length format.  Writes between one and 
 * ten bytes. Small values or values representing timestamps with day,
 * hour or second precision typically require fewer bytes.
 * <p>
 * ZLong --&gt; Header, Bytes*?
 * <ul>
 *    <li>Header --&gt; The first two bits indicate the compression scheme:
 *       <ul>
 *          <li>00 - uncompressed
 *          <li>01 - multiple of 1000 (second)
 *          <li>10 - multiple of 3600000 (hour)
 *          <li>11 - multiple of 86400000 (day)
 *       </ul>
 *       Then the next bit is a continuation bit, indicating whether more
 *       bytes need to be read, and the last 5 bits are the lower bits of
 *       the encoded value. In order to reconstruct the value, you need to
 *       combine the 5 lower bits of the header with a vLong in the next
 *       bytes (if the continuation bit is set to 1). Then
 *       {@link BitUtil#zigZagDecode(int) zigzag-decode} it and finally
 *       multiply by the multiple corresponding to the compression scheme.
 *    <li>Bytes --&gt; Potential additional bytes to read depending on the
 *       header.
 * </ul>
 */
// T for "timestamp"
static void writeTLong(DataOutput out, long l) throws IOException {
  int header; 
  if (l % SECOND != 0) {
    header = 0;
  } else if (l % DAY == 0) {
    // timestamp with day precision
    header = DAY_ENCODING;
    l /= DAY;
  } else if (l % HOUR == 0) {
    // timestamp with hour precision, or day precision with a timezone
    header = HOUR_ENCODING;
    l /= HOUR;
  } else {
    // timestamp with second precision
    header = SECOND_ENCODING;
    l /= SECOND;
  }

  final long zigZagL = BitUtil.zigZagEncode(l);
  header |= (zigZagL & 0x1F); // last 5 bits
  final long upperBits = zigZagL >>> 5;
  if (upperBits != 0) {
    header |= 0x20;
  }
  out.writeByte((byte) header);
  if (upperBits != 0) {
    out.writeVLong(upperBits);
  }
}
 
Example #21
Source File: IntSequenceOutputs.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void write(IntsRef prefix, DataOutput out) throws IOException {
  assert prefix != null;
  out.writeVInt(prefix.length);
  for(int idx=0;idx<prefix.length;idx++) {
    out.writeVInt(prefix.ints[prefix.offset+idx]);
  }
}
 
Example #22
Source File: FreeTextSuggester.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public boolean store(DataOutput output) throws IOException {
  CodecUtil.writeHeader(output, CODEC_NAME, VERSION_CURRENT);
  output.writeVLong(count);
  output.writeByte(separator);
  output.writeVInt(grams);
  output.writeVLong(totTokens);
  fst.save(output, output);
  return true;
}
 
Example #23
Source File: SortedSetSortField.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void serialize(DataOutput out) throws IOException {
  out.writeString(getField());
  out.writeInt(reverse ? 1 : 0);
  out.writeInt(selector.ordinal());
  if (missingValue == SortField.STRING_FIRST) {
    out.writeInt(1);
  }
  else if (missingValue == SortField.STRING_LAST) {
    out.writeInt(2);
  }
  else {
    out.writeInt(0);
  }
}
 
Example #24
Source File: LZ4.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private static void encodeLiterals(byte[] bytes, int token, int anchor, int literalLen, DataOutput out) throws IOException {
  out.writeByte((byte) token);

  // encode literal length
  if (literalLen >= 0x0F) {
    encodeLen(literalLen - 0x0F, out);
  }

  // encode literals
  out.writeBytes(bytes, anchor, literalLen);
}
 
Example #25
Source File: BKDWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void writeActualBounds(DataOutput out, int[] commonPrefixLengths, int count, IntFunction<BytesRef> packedValues) throws IOException {
  for (int dim = 0; dim < numIndexDims; ++dim) {
    int commonPrefixLength = commonPrefixLengths[dim];
    int suffixLength = bytesPerDim - commonPrefixLength;
    if (suffixLength > 0) {
      BytesRef[] minMax = computeMinMax(count, packedValues, dim * bytesPerDim + commonPrefixLength, suffixLength);
      BytesRef min = minMax[0];
      BytesRef max = minMax[1];
      out.writeBytes(min.bytes, min.offset, min.length);
      out.writeBytes(max.bytes, max.offset, max.length);
    }
  }
}
 
Example #26
Source File: IDVersionPostingsWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void encodeTerm(DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
  IDVersionTermState state = (IDVersionTermState) _state;
  out.writeVInt(state.docID);
  if (absolute) {
    out.writeVLong(state.idVersion);
  } else {
    long delta = state.idVersion - lastEncodedVersion;
    out.writeZLong(delta);
  }
  lastEncodedVersion = state.idVersion;
}
 
Example #27
Source File: NRTSuggesterBuilder.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Builds and stores a FST that can be loaded with
 * {@link NRTSuggester#load(IndexInput, CompletionPostingsFormat.FSTLoadMode)})}
 */
public boolean store(DataOutput output) throws IOException {
  final FST<PairOutputs.Pair<Long, BytesRef>> fst = fstCompiler.compile();
  if (fst == null) {
    return false;
  }
  fst.save(output, output);

  /* write some more  meta-info */
  assert maxAnalyzedPathsPerOutput > 0;
  output.writeVInt(maxAnalyzedPathsPerOutput);
  output.writeVInt(END_BYTE);
  output.writeVInt(PAYLOAD_SEP);
  return true;
}
 
Example #28
Source File: Lookup.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Calls {@link #store(DataOutput)} after converting
 * {@link OutputStream} to {@link DataOutput}
 */
public boolean store(OutputStream output) throws IOException {
  DataOutput dataOut = new OutputStreamDataOutput(output);
  try {
    return store(dataOut);
  } finally {
    IOUtils.close(output);
  }
}
 
Example #29
Source File: DirectWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
DirectWriter(DataOutput output, long numValues, int bitsPerValue) {
  this.output = output;
  this.numValues = numValues;
  this.bitsPerValue = bitsPerValue;
  encoder = BulkOperation.of(PackedInts.Format.PACKED, bitsPerValue);
  iterations = encoder.computeIterations((int) Math.min(numValues, Integer.MAX_VALUE), PackedInts.DEFAULT_BUFFER_SIZE);
  nextBlocks = new byte[iterations * encoder.byteBlockCount()];
  nextValues = new long[iterations * encoder.byteValueCount()];
}
 
Example #30
Source File: PackedWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
PackedWriter(PackedInts.Format format, DataOutput out, int valueCount, int bitsPerValue, int mem) {
  super(out, valueCount, bitsPerValue);
  this.format = format;
  encoder = BulkOperation.of(format, bitsPerValue);
  iterations = encoder.computeIterations(valueCount, mem);
  nextBlocks = new byte[iterations * encoder.byteBlockCount()];
  nextValues = new long[iterations * encoder.byteValueCount()];
  off = 0;
  written = 0;
  finished = false;
}