Java Code Examples for org.apache.lucene.codecs.DocValuesProducer

The following examples show how to use org.apache.lucene.codecs.DocValuesProducer. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: lucene-solr   Source File: AssertingDocValuesFormat.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
  BinaryDocValues values = valuesProducer.getBinary(field);
  
  int docID;
  int lastDocID = -1;
  while ((docID = values.nextDoc()) != NO_MORE_DOCS) {
    assert docID >= 0 && docID < maxDoc;
    assert docID > lastDocID;
    lastDocID = docID;
    BytesRef value = values.binaryValue();
    assert value.isValid();
  }

  in.addBinaryField(field, valuesProducer);
}
 
Example 2
Source Project: lucene-solr   Source File: AssertingDocValuesFormat.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void addSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
  SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);

  long valueCount = 0;
  int lastDocID = -1;
  while (true) {
    int docID = values.nextDoc();
    if (docID == NO_MORE_DOCS) {
      break;
    }
    assert values.docID() > lastDocID;
    lastDocID = values.docID();
    int count = values.docValueCount();
    assert count > 0;
    valueCount += count;
    long previous = Long.MIN_VALUE;
    for (int i = 0; i < count; i++) {
      long nextValue = values.nextValue();
      assert nextValue >= previous;
      previous = nextValue;
    }
  }
  in.addSortedNumericField(field, valuesProducer);
}
 
Example 3
Source Project: lucene-solr   Source File: SegmentDocValues.java    License: Apache License 2.0 6 votes vote down vote up
private RefCount<DocValuesProducer> newDocValuesProducer(SegmentCommitInfo si, Directory dir, final Long gen, FieldInfos infos) throws IOException {
  Directory dvDir = dir;
  String segmentSuffix = "";
  if (gen.longValue() != -1) {
    dvDir = si.info.dir; // gen'd files are written outside CFS, so use SegInfo directory
    segmentSuffix = Long.toString(gen.longValue(), Character.MAX_RADIX);
  }

  // set SegmentReadState to list only the fields that are relevant to that gen
  SegmentReadState srs = new SegmentReadState(dvDir, si.info, infos, IOContext.READ, segmentSuffix);
  DocValuesFormat dvFormat = si.info.getCodec().docValuesFormat();
  return new RefCount<DocValuesProducer>(dvFormat.fieldsProducer(srs)) {
    @SuppressWarnings("synthetic-access")
    @Override
    protected void release() throws IOException {
      object.close();
      synchronized (SegmentDocValues.this) {
        genDVProducers.remove(gen);
      }
    }
  };
}
 
Example 4
Source Project: lucene-solr   Source File: SegmentReader.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * init most recent DocValues for the current commit
 */
private DocValuesProducer initDocValuesProducer() throws IOException {

  if (fieldInfos.hasDocValues() == false) {
    return null;
  } else {
    Directory dir;
    if (core.cfsReader != null) {
      dir = core.cfsReader;
    } else {
      dir = si.info.dir;
    }
    if (si.hasFieldUpdates()) {
      return new SegmentDocValuesProducer(si, dir, core.coreFieldInfos, fieldInfos, segDocValues);
    } else {
      // simple case, no DocValues updates
      return segDocValues.getDocValuesProducer(-1L, si, dir, fieldInfos);
    }
  }
}
 
Example 5
Source Project: Elasticsearch   Source File: VersionFieldUpgrader.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public DocValuesProducer getDocValuesReader() {
    DocValuesProducer producer = in.getDocValuesReader();
    // TODO: move this nullness stuff out
    if (producer == null) {
        producer = FilterDocValuesProducer.EMPTY;
    }
    return new UninvertedVersions(producer, this);
}
 
Example 6
Source Project: lucene-solr   Source File: AssertingDocValuesFormat.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
  assert state.fieldInfos.hasDocValues();
  DocValuesProducer producer = in.fieldsProducer(state);
  assert producer != null;
  return new AssertingDocValuesProducer(producer, state.segmentInfo.maxDoc(), false);
}
 
Example 7
Source Project: lucene-solr   Source File: AssertingDocValuesFormat.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
  NumericDocValues values = valuesProducer.getNumeric(field);

  int docID;
  int lastDocID = -1;
  while ((docID = values.nextDoc()) != NO_MORE_DOCS) {
    assert docID >= 0 && docID < maxDoc;
    assert docID > lastDocID;
    lastDocID = docID;
    long value = values.longValue();
  }
  
  in.addNumericField(field, valuesProducer);
}
 
Example 8
Source Project: lucene-solr   Source File: AssertingDocValuesFormat.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
  SortedDocValues values = valuesProducer.getSorted(field);

  int valueCount = values.getValueCount();
  assert valueCount <= maxDoc;
  BytesRef lastValue = null;
  for (int ord=0;ord<valueCount;ord++) {
    BytesRef b = values.lookupOrd(ord);
    assert b != null;
    assert b.isValid();
    if (ord > 0) {
      assert b.compareTo(lastValue) > 0;
    }
    lastValue = BytesRef.deepCopyOf(b);
  }
  
  FixedBitSet seenOrds = new FixedBitSet(valueCount);
  
  int docID;
  int lastDocID = -1;
  while ((docID = values.nextDoc()) != NO_MORE_DOCS) {
    assert docID >= 0 && docID < maxDoc;
    assert docID > lastDocID;
    lastDocID = docID;
    int ord = values.ordValue();
    assert ord >= 0 && ord < valueCount;
    seenOrds.set(ord);
  }
  
  assert seenOrds.cardinality() == valueCount;
  in.addSortedField(field, valuesProducer);
}
 
Example 9
Source Project: lucene-solr   Source File: AssertingDocValuesFormat.java    License: Apache License 2.0 5 votes vote down vote up
AssertingDocValuesProducer(DocValuesProducer in, int maxDoc, boolean merging) {
  this.in = in;
  this.maxDoc = maxDoc;
  this.merging = merging;
  this.creationThread = Thread.currentThread();
  // do a few simple checks on init
  assert toString() != null;
  assert ramBytesUsed() >= 0;
  assert getChildResources() != null;
}
 
Example 10
Source Project: lucene-solr   Source File: CrankyDocValuesFormat.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
  if (random.nextInt(100) == 0) {
    throw new IOException("Fake IOException from DocValuesConsumer.addNumericField()");
  }
  delegate.addNumericField(field, valuesProducer);
}
 
Example 11
Source Project: lucene-solr   Source File: CrankyDocValuesFormat.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
  if (random.nextInt(100) == 0) {
    throw new IOException("Fake IOException from DocValuesConsumer.addBinaryField()");
  }
  delegate.addBinaryField(field, valuesProducer);
}
 
Example 12
Source Project: lucene-solr   Source File: CrankyDocValuesFormat.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
  if (random.nextInt(100) == 0) {
    throw new IOException("Fake IOException from DocValuesConsumer.addSortedField()");
  }
  delegate.addSortedField(field, valuesProducer);
}
 
Example 13
Source Project: lucene-solr   Source File: CrankyDocValuesFormat.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void addSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
  if (random.nextInt(100) == 0) {
    throw new IOException("Fake IOException from DocValuesConsumer.addSortedNumericField()");
  }
  delegate.addSortedNumericField(field, valuesProducer);
}
 
Example 14
Source Project: lucene-solr   Source File: CrankyDocValuesFormat.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
  if (random.nextInt(100) == 0) {
    throw new IOException("Fake IOException from DocValuesConsumer.addSortedSetField()");
  }
  delegate.addSortedSetField(field, valuesProducer);
}
 
Example 15
Source Project: lucene-solr   Source File: Lucene80DocValuesConsumer.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void addSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
  meta.writeInt(field.number);
  meta.writeByte(Lucene80DocValuesFormat.SORTED_NUMERIC);

  long[] stats = writeValues(field, valuesProducer);
  int numDocsWithField = Math.toIntExact(stats[0]);
  long numValues = stats[1];
  assert numValues >= numDocsWithField;

  meta.writeInt(numDocsWithField);
  if (numValues > numDocsWithField) {
    long start = data.getFilePointer();
    meta.writeLong(start);
    meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);

    final DirectMonotonicWriter addressesWriter = DirectMonotonicWriter.getInstance(meta, data, numDocsWithField + 1L, DIRECT_MONOTONIC_BLOCK_SHIFT);
    long addr = 0;
    addressesWriter.add(addr);
    SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
      addr += values.docValueCount();
      addressesWriter.add(addr);
    }
    addressesWriter.finish();
    meta.writeLong(data.getFilePointer() - start);
  }
}
 
Example 16
Source Project: lucene-solr   Source File: PerFieldDocValuesFormat.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public long ramBytesUsed() {
  long size = 0;
  for (Map.Entry<String,DocValuesProducer> entry : formats.entrySet()) {
    size += (entry.getKey().length() * Character.BYTES) + entry.getValue().ramBytesUsed();
  }
  return size;
}
 
Example 17
Source Project: lucene-solr   Source File: CheckIndex.java    License: Apache License 2.0 5 votes vote down vote up
private static void checkDocValues(FieldInfo fi, DocValuesProducer dvReader, int maxDoc, PrintStream infoStream, DocValuesStatus status) throws Exception {
  switch(fi.getDocValuesType()) {
    case SORTED:
      status.totalSortedFields++;
      checkDVIterator(fi, maxDoc, dvReader::getSorted);
      checkBinaryDocValues(fi.name, maxDoc, dvReader.getSorted(fi), dvReader.getSorted(fi));
      checkSortedDocValues(fi.name, maxDoc, dvReader.getSorted(fi), dvReader.getSorted(fi));
      break;
    case SORTED_NUMERIC:
      status.totalSortedNumericFields++;
      checkDVIterator(fi, maxDoc, dvReader::getSortedNumeric);
      checkSortedNumericDocValues(fi.name, maxDoc, dvReader.getSortedNumeric(fi), dvReader.getSortedNumeric(fi));
      break;
    case SORTED_SET:
      status.totalSortedSetFields++;
      checkDVIterator(fi, maxDoc, dvReader::getSortedSet);
      checkSortedSetDocValues(fi.name, maxDoc, dvReader.getSortedSet(fi), dvReader.getSortedSet(fi));
      break;
    case BINARY:
      status.totalBinaryFields++;
      checkDVIterator(fi, maxDoc, dvReader::getBinary);
      checkBinaryDocValues(fi.name, maxDoc, dvReader.getBinary(fi), dvReader.getBinary(fi));
      break;
    case NUMERIC:
      status.totalNumericFields++;
      checkDVIterator(fi, maxDoc, dvReader::getNumeric);
      checkNumericDocValues(fi.name, dvReader.getNumeric(fi), dvReader.getNumeric(fi));
      break;
    default:
      throw new AssertionError();
  }
}
 
Example 18
Source Project: lucene-solr   Source File: MergeReaderWrapper.java    License: Apache License 2.0 5 votes vote down vote up
MergeReaderWrapper(CodecReader in) throws IOException {
  this.in = in;
  
  FieldsProducer fields = in.getPostingsReader();
  if (fields != null) {
    fields = fields.getMergeInstance();
  }
  this.fields = fields;
  
  NormsProducer norms = in.getNormsReader();
  if (norms != null) {
    norms = norms.getMergeInstance();
  }
  this.norms = norms;
  
  DocValuesProducer docValues = in.getDocValuesReader();
  if (docValues != null) {
    docValues = docValues.getMergeInstance();
  }
  this.docValues = docValues;
  
  StoredFieldsReader store = in.getFieldsReader();
  if (store != null) {
    store = store.getMergeInstance();
  }
  this.store = store;
  
  TermVectorsReader vectors = in.getTermVectorsReader();
  if (vectors != null) {
    vectors = vectors.getMergeInstance();
  }
  this.vectors = vectors;
}
 
Example 19
Source Project: lucene-solr   Source File: SegmentDocValues.java    License: Apache License 2.0 5 votes vote down vote up
/** Returns the {@link DocValuesProducer} for the given generation. */
synchronized DocValuesProducer getDocValuesProducer(long gen, SegmentCommitInfo si, Directory dir, FieldInfos infos) throws IOException {
  RefCount<DocValuesProducer> dvp = genDVProducers.get(gen);
  if (dvp == null) {
    dvp = newDocValuesProducer(si, dir, gen, infos);
    assert dvp != null;
    genDVProducers.put(gen, dvp);
  } else {
    dvp.incRef();
  }
  return dvp.get();
}
 
Example 20
Source Project: crate   Source File: RecoverySourcePruneMergePolicy.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public DocValuesProducer getDocValuesReader() {
    DocValuesProducer docValuesReader = super.getDocValuesReader();
    return new FilterDocValuesProducer(docValuesReader) {
        @Override
        public NumericDocValues getNumeric(FieldInfo field) throws IOException {
            NumericDocValues numeric = super.getNumeric(field);
            if (recoverySourceField.equals(field.name)) {
                assert numeric != null : recoverySourceField + " must have numeric DV but was null";
                final DocIdSetIterator intersection;
                if (recoverySourceToKeep == null) {
                    // we can't return null here lucenes DocIdMerger expects an instance
                    intersection = DocIdSetIterator.empty();
                } else {
                    intersection = ConjunctionDISI.intersectIterators(Arrays.asList(numeric,
                                                                                    new BitSetIterator(recoverySourceToKeep, recoverySourceToKeep.length())));
                }
                return new FilterNumericDocValues(numeric) {
                    @Override
                    public int nextDoc() throws IOException {
                        return intersection.nextDoc();
                    }

                    @Override
                    public int advance(int target) {
                        throw new UnsupportedOperationException();
                    }

                    @Override
                    public boolean advanceExact(int target) {
                        throw new UnsupportedOperationException();
                    }
                };

            }
            return numeric;
        }
    };
}
 
Example 21
Source Project: Elasticsearch   Source File: VersionFieldUpgrader.java    License: Apache License 2.0 4 votes vote down vote up
UninvertedVersions(DocValuesProducer in, CodecReader reader) {
    super(in);
    this.reader = reader;
}
 
Example 22
Source Project: Elasticsearch   Source File: VersionFieldUpgrader.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public DocValuesProducer getMergeInstance() throws IOException {
    return new UninvertedVersions(in.getMergeInstance(), reader);
}
 
Example 23
Source Project: lucene-solr   Source File: LegacyDocValuesIterables.java    License: Apache License 2.0 4 votes vote down vote up
/** Converts all concatenated ords (in docID order) from {@link SortedSetDocValues} into {@code Iterable&lt;Number&gt;}.
 *
 * @deprecated Consume {@link SortedSetDocValues} instead. */
@Deprecated
public static Iterable<Number> sortedSetOrdsIterable(final DocValuesProducer valuesProducer, final FieldInfo fieldInfo) {

  return new Iterable<Number>() {

    @Override
    public Iterator<Number> iterator() {

      final SortedSetDocValues values;
      try {
        values = valuesProducer.getSortedSet(fieldInfo);
      } catch (IOException ioe) {
        throw new RuntimeException(ioe);
      }

      return new Iterator<Number>() {
        private boolean nextIsSet;
        private long nextOrd;

        private void setNext() {
          try {
            if (nextIsSet == false) {
              if (values.docID() == -1) {
                values.nextDoc();
              }
              while (true) {
                if (values.docID() == NO_MORE_DOCS) {
                  nextOrd = -1;
                  break;
                }
                nextOrd = values.nextOrd();
                if (nextOrd != -1) {
                  break;
                }
                values.nextDoc();
              }
              nextIsSet = true;
            }
          } catch (IOException ioe) {
            throw new RuntimeException(ioe);
          }
        }
        
        @Override
        public boolean hasNext() {
          setNext();
          return nextOrd != -1;
        }

        @Override
        public Number next() {
          setNext();
          assert nextOrd != -1;
          nextIsSet = false;
          return nextOrd;
        }
      };
    }
  };
}
 
Example 24
Source Project: lucene-solr   Source File: LegacyDocValuesIterables.java    License: Apache License 2.0 4 votes vote down vote up
/** Converts all concatenated values (in docID order) from {@link SortedNumericDocValues} into {@code Iterable&lt;Number&gt;}.
 *
 * @deprecated Consume {@link SortedDocValues} instead. */
@Deprecated
public static Iterable<Number> sortedNumericToValues(final DocValuesProducer valuesProducer, final FieldInfo fieldInfo) {
  return new Iterable<Number>() {

    @Override
    public Iterator<Number> iterator() {

      final SortedNumericDocValues values;
      try {
        values = valuesProducer.getSortedNumeric(fieldInfo);
      } catch (IOException ioe) {
        throw new RuntimeException(ioe);
      }

      return new Iterator<Number>() {
        private boolean nextIsSet;
        private int nextCount;
        private int upto;
        private long nextValue;

        private void setNext() {
          try {
            if (nextIsSet == false) {
              if (upto == nextCount) {
                values.nextDoc();
                if (values.docID() == NO_MORE_DOCS) {
                  nextCount = 0;
                  nextIsSet = false;
                  return;
                } else {
                  nextCount = values.docValueCount();
                }
                upto = 0;
              }
              nextValue = values.nextValue();
              upto++;
              nextIsSet = true;
            }
          } catch (IOException ioe) {
            throw new RuntimeException(ioe);
          }
        }
        
        @Override
        public boolean hasNext() {
          setNext();
          return nextCount != 0;
        }

        @Override
        public Number next() {
          setNext();
          assert nextCount != 0;
          nextIsSet = false;
          return nextValue;
        }
      };
    }
  };
}
 
Example 25
Source Project: lucene-solr   Source File: SimpleTextDocValuesWriter.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
  assert fieldSeen(field.name);
  assert field.getDocValuesType() == DocValuesType.NUMERIC || field.hasNorms();
  writeFieldEntry(field, DocValuesType.NUMERIC);

  // first pass to find min/max
  long minValue = Long.MAX_VALUE;
  long maxValue = Long.MIN_VALUE;
  NumericDocValues values = valuesProducer.getNumeric(field);
  int numValues = 0;
  for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
    long v = values.longValue();
    minValue = Math.min(minValue, v);
    maxValue = Math.max(maxValue, v);
    numValues++;
  }
  if (numValues != numDocs) {
    minValue = Math.min(minValue, 0);
    maxValue = Math.max(maxValue, 0);
  }
  
  // write our minimum value to the .dat, all entries are deltas from that
  SimpleTextUtil.write(data, MINVALUE);
  SimpleTextUtil.write(data, Long.toString(minValue), scratch);
  SimpleTextUtil.writeNewline(data);
  
  // build up our fixed-width "simple text packed ints"
  // format
  BigInteger maxBig = BigInteger.valueOf(maxValue);
  BigInteger minBig = BigInteger.valueOf(minValue);
  BigInteger diffBig = maxBig.subtract(minBig);
  int maxBytesPerValue = diffBig.toString().length();
  StringBuilder sb = new StringBuilder();
  for (int i = 0; i < maxBytesPerValue; i++) {
    sb.append('0');
  }
  
  // write our pattern to the .dat
  SimpleTextUtil.write(data, PATTERN);
  SimpleTextUtil.write(data, sb.toString(), scratch);
  SimpleTextUtil.writeNewline(data);

  final String patternString = sb.toString();
  
  final DecimalFormat encoder = new DecimalFormat(patternString, new DecimalFormatSymbols(Locale.ROOT));
  
  int numDocsWritten = 0;

  // second pass to write the values
  values = valuesProducer.getNumeric(field);
  for (int i = 0; i < numDocs; ++i) {
    if (values.docID() < i) {
      values.nextDoc();
      assert values.docID() >= i;
    }
    long value = values.docID() != i ? 0 : values.longValue();
    assert value >= minValue;
    Number delta = BigInteger.valueOf(value).subtract(BigInteger.valueOf(minValue));
    String s = encoder.format(delta);
    assert s.length() == patternString.length();
    SimpleTextUtil.write(data, s, scratch);
    SimpleTextUtil.writeNewline(data);
    if (values.docID() != i) {
      SimpleTextUtil.write(data, "F", scratch);
    } else {
      SimpleTextUtil.write(data, "T", scratch);
    }
    SimpleTextUtil.writeNewline(data);
    numDocsWritten++;
    assert numDocsWritten <= numDocs;
  }

  assert numDocs == numDocsWritten: "numDocs=" + numDocs + " numDocsWritten=" + numDocsWritten;
}
 
Example 26
Source Project: lucene-solr   Source File: SimpleTextDocValuesWriter.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
  assert fieldSeen(field.name);
  assert field.getDocValuesType() == DocValuesType.BINARY;
  doAddBinaryField(field, valuesProducer);
}
 
Example 27
Source Project: lucene-solr   Source File: SimpleTextDocValuesWriter.java    License: Apache License 2.0 4 votes vote down vote up
private void doAddBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
  int maxLength = 0;
  BinaryDocValues values = valuesProducer.getBinary(field);
  for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
    maxLength = Math.max(maxLength, values.binaryValue().length);
  }
  writeFieldEntry(field, DocValuesType.BINARY);

  // write maxLength
  SimpleTextUtil.write(data, MAXLENGTH);
  SimpleTextUtil.write(data, Integer.toString(maxLength), scratch);
  SimpleTextUtil.writeNewline(data);
  
  int maxBytesLength = Long.toString(maxLength).length();
  StringBuilder sb = new StringBuilder();
  for (int i = 0; i < maxBytesLength; i++) {
    sb.append('0');
  }
  // write our pattern for encoding lengths
  SimpleTextUtil.write(data, PATTERN);
  SimpleTextUtil.write(data, sb.toString(), scratch);
  SimpleTextUtil.writeNewline(data);
  final DecimalFormat encoder = new DecimalFormat(sb.toString(), new DecimalFormatSymbols(Locale.ROOT));

  values = valuesProducer.getBinary(field);
  int numDocsWritten = 0;
  for (int i = 0; i < numDocs; ++i) {
    if (values.docID() < i) {
      values.nextDoc();
      assert values.docID() >= i;
    }
    // write length
    final int length = values.docID() != i ? 0 : values.binaryValue().length;
    SimpleTextUtil.write(data, LENGTH);
    SimpleTextUtil.write(data, encoder.format(length), scratch);
    SimpleTextUtil.writeNewline(data);
      
    // write bytes -- don't use SimpleText.write
    // because it escapes:
    if (values.docID() == i) {
      BytesRef value = values.binaryValue();
      data.writeBytes(value.bytes, value.offset, value.length);
    }

    // pad to fit
    for (int j = length; j < maxLength; j++) {
      data.writeByte((byte)' ');
    }
    SimpleTextUtil.writeNewline(data);
    if (values.docID() != i) {
      SimpleTextUtil.write(data, "F", scratch);
    } else {
      SimpleTextUtil.write(data, "T", scratch);
    }
    SimpleTextUtil.writeNewline(data);
    numDocsWritten++;
  }

  assert numDocs == numDocsWritten;
}
 
Example 28
Source Project: lucene-solr   Source File: SimpleTextDocValuesWriter.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
  assert fieldSeen(field.name);
  assert field.getDocValuesType() == DocValuesType.SORTED;
  writeFieldEntry(field, DocValuesType.SORTED);

  int valueCount = 0;
  int maxLength = -1;
  TermsEnum terms = valuesProducer.getSorted(field).termsEnum();
  for(BytesRef value = terms.next(); value != null; value = terms.next()) {
    maxLength = Math.max(maxLength, value.length);
    valueCount++;
  }

  // write numValues
  SimpleTextUtil.write(data, NUMVALUES);
  SimpleTextUtil.write(data, Integer.toString(valueCount), scratch);
  SimpleTextUtil.writeNewline(data);
  
  // write maxLength
  SimpleTextUtil.write(data, MAXLENGTH);
  SimpleTextUtil.write(data, Integer.toString(maxLength), scratch);
  SimpleTextUtil.writeNewline(data);
  
  int maxBytesLength = Integer.toString(maxLength).length();
  StringBuilder sb = new StringBuilder();
  for (int i = 0; i < maxBytesLength; i++) {
    sb.append('0');
  }
  
  // write our pattern for encoding lengths
  SimpleTextUtil.write(data, PATTERN);
  SimpleTextUtil.write(data, sb.toString(), scratch);
  SimpleTextUtil.writeNewline(data);
  final DecimalFormat encoder = new DecimalFormat(sb.toString(), new DecimalFormatSymbols(Locale.ROOT));
  
  int maxOrdBytes = Long.toString(valueCount+1L).length();
  sb.setLength(0);
  for (int i = 0; i < maxOrdBytes; i++) {
    sb.append('0');
  }
  
  // write our pattern for ords
  SimpleTextUtil.write(data, ORDPATTERN);
  SimpleTextUtil.write(data, sb.toString(), scratch);
  SimpleTextUtil.writeNewline(data);
  final DecimalFormat ordEncoder = new DecimalFormat(sb.toString(), new DecimalFormatSymbols(Locale.ROOT));

  // for asserts:
  int valuesSeen = 0;

  terms = valuesProducer.getSorted(field).termsEnum();
  for(BytesRef value = terms.next(); value != null; value = terms.next()) {
    // write length
    SimpleTextUtil.write(data, LENGTH);
    SimpleTextUtil.write(data, encoder.format(value.length), scratch);
    SimpleTextUtil.writeNewline(data);
      
    // write bytes -- don't use SimpleText.write
    // because it escapes:
    data.writeBytes(value.bytes, value.offset, value.length);

    // pad to fit
    for (int i = value.length; i < maxLength; i++) {
      data.writeByte((byte)' ');
    }
    SimpleTextUtil.writeNewline(data);
    valuesSeen++;
    assert valuesSeen <= valueCount;
  }

  assert valuesSeen == valueCount;

  SortedDocValues values = valuesProducer.getSorted(field);
  for (int i = 0; i < numDocs; ++i) {
    if (values.docID() < i) {
      values.nextDoc();
      assert values.docID() >= i;
    }
    int ord = -1;
    if (values.docID() == i) {
      ord = values.ordValue();
    }
    SimpleTextUtil.write(data, ordEncoder.format(ord+1L), scratch);
    SimpleTextUtil.writeNewline(data);
  }
}
 
Example 29
Source Project: lucene-solr   Source File: SimpleTextDocValuesWriter.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public void addSortedNumericField(FieldInfo field, final DocValuesProducer valuesProducer) throws IOException {
  assert fieldSeen(field.name);
  assert field.getDocValuesType() == DocValuesType.SORTED_NUMERIC;
  doAddBinaryField(field, new EmptyDocValuesProducer() {
    @Override
    public BinaryDocValues getBinary(FieldInfo field) throws IOException {
      SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
      return new BinaryDocValues() {

        @Override
        public int nextDoc() throws IOException {
          int doc = values.nextDoc();
          setCurrentDoc();
          return doc;
        }

        @Override
        public int docID() {
          return values.docID();
        }

        @Override
        public long cost() {
          return values.cost();
        }

        @Override
        public int advance(int target) throws IOException {
          int doc = values.advance(target);
          setCurrentDoc();
          return doc;
        }

        @Override
        public boolean advanceExact(int target) throws IOException {
          if (values.advanceExact(target)) {
            setCurrentDoc();
            return true;
          }
          return false;
        }
        
        final StringBuilder builder = new StringBuilder();
        BytesRef binaryValue;

        private void setCurrentDoc() throws IOException {
          if (docID() == NO_MORE_DOCS) {
            return;
          }
          builder.setLength(0);
          for (int i = 0, count = values.docValueCount(); i < count; ++i) {
            if (i > 0) {
              builder.append(',');
            }
            builder.append(Long.toString(values.nextValue()));
          }
          binaryValue = new BytesRef(builder.toString());
        }

        @Override
        public BytesRef binaryValue() throws IOException {
          return binaryValue;
        }
      };
    }
  });
}
 
Example 30
Source Project: lucene-solr   Source File: SimpleTextDocValuesFormat.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
  return new SimpleTextDocValuesReader(state, "dat");
}