Java Code Examples for org.apache.lucene.index.CodecReader

The following examples show how to use org.apache.lucene.index.CodecReader. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Elasticsearch   Source File: VersionFieldUpgrader.java    License: Apache License 2.0 6 votes vote down vote up
static CodecReader wrap(CodecReader reader) throws IOException {
    final FieldInfos fieldInfos = reader.getFieldInfos();
    final FieldInfo versionInfo = fieldInfos.fieldInfo(VersionFieldMapper.NAME);
    if (versionInfo != null && versionInfo.getDocValuesType() != DocValuesType.NONE) {
        // the reader is a recent one, it has versions and they are stored
        // in a numeric doc values field
        return reader;
    }
    // The segment is an old one, look at the _uid field
    final Terms terms = reader.terms(UidFieldMapper.NAME);
    if (terms == null || !terms.hasPayloads()) {
        // The segment doesn't have an _uid field or doesn't have payloads
        // don't try to do anything clever. If any other segment has versions
        // all versions of this segment will be initialized to 0
        return reader;
    }
    // convert _uid payloads -> _version docvalues
    return new VersionFieldUpgrader(reader);
}
 
Example 2
Source Project: lucene-solr   Source File: AddIndexesTask.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public int doLogic() throws Exception {
  IndexWriter writer = getRunData().getIndexWriter();
  if (useAddIndexesDir) {
    writer.addIndexes(inputDir);
  } else {
    try (IndexReader r = DirectoryReader.open(inputDir)) {
      CodecReader leaves[] = new CodecReader[r.leaves().size()];
      int i = 0;
      for (LeafReaderContext leaf : r.leaves()) {
        leaves[i++] = SlowCodecReaderWrapper.wrap(leaf.reader());
      }
      writer.addIndexes(leaves);
    }
  }
  return 1;
}
 
Example 3
Source Project: lucene-solr   Source File: TaxonomyMergeUtils.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Merges the given taxonomy and index directories and commits the changes to
 * the given writers.
 */
public static void merge(Directory srcIndexDir, Directory srcTaxoDir, OrdinalMap map, IndexWriter destIndexWriter,
    DirectoryTaxonomyWriter destTaxoWriter, FacetsConfig srcConfig) throws IOException {
  
  // merge the taxonomies
  destTaxoWriter.addTaxonomy(srcTaxoDir, map);
  int ordinalMap[] = map.getMap();
  DirectoryReader reader = DirectoryReader.open(srcIndexDir);
  try {
    List<LeafReaderContext> leaves = reader.leaves();
    int numReaders = leaves.size();
    CodecReader wrappedLeaves[] = new CodecReader[numReaders];
    for (int i = 0; i < numReaders; i++) {
      wrappedLeaves[i] = SlowCodecReaderWrapper.wrap(new OrdinalMappingLeafReader(leaves.get(i).reader(), ordinalMap, srcConfig));
    }
    destIndexWriter.addIndexes(wrappedLeaves);
    
    // commit changes to taxonomy and index respectively.
    destTaxoWriter.commit();
    destIndexWriter.commit();
  } finally {
    reader.close();
  }
}
 
Example 4
@Override
public CodecReader wrapForMerge(CodecReader reader) throws IOException {
  // Wrap the reader with an uninverting reader if any of the fields have no docvalues but the 
  // Schema says there should be
  
  
  Map<String,UninvertingReader.Type> uninversionMap = null;
  
  for(FieldInfo fi: reader.getFieldInfos()) {
    final UninvertingReader.Type type = getUninversionType(fi);
    if (type != null) {
      if (uninversionMap == null) {
        uninversionMap = new HashMap<>();
      }
      uninversionMap.put(fi.name, type);
    }
    
  }
  
  if(uninversionMap == null) {
    return reader; // Default to normal reader if nothing to uninvert
  } else {
    return new UninvertingFilterCodecReader(reader, uninversionMap);
  }
  
}
 
Example 5
Source Project: crate   Source File: RecoverySourcePruneMergePolicy.java    License: Apache License 2.0 6 votes vote down vote up
static CodecReader wrapReader(String recoverySourceField, CodecReader reader, Supplier<Query> retainSourceQuerySupplier)
    throws IOException {
    NumericDocValues recoverySource = reader.getNumericDocValues(recoverySourceField);
    if (recoverySource == null || recoverySource.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
        return reader; // early terminate - nothing to do here since non of the docs has a recovery source anymore.
    }
    IndexSearcher s = new IndexSearcher(reader);
    s.setQueryCache(null);
    Weight weight = s.createWeight(s.rewrite(retainSourceQuerySupplier.get()), ScoreMode.COMPLETE_NO_SCORES, 1.0f);
    Scorer scorer = weight.scorer(reader.getContext());
    if (scorer != null) {
        BitSet recoverySourceToKeep = BitSet.of(scorer.iterator(), reader.maxDoc());
        // calculating the cardinality is significantly cheaper than skipping all bulk-merging we might do
        // if retentions are high we keep most of it
        if (recoverySourceToKeep.cardinality() == reader.maxDoc()) {
            return reader; // keep all source
        }
        return new SourcePruningFilterCodecReader(recoverySourceField, reader, recoverySourceToKeep);
    } else {
        return new SourcePruningFilterCodecReader(recoverySourceField, reader, null);
    }
}
 
Example 6
Source Project: Elasticsearch   Source File: ElasticsearchMergePolicy.java    License: Apache License 2.0 5 votes vote down vote up
/** Return an "upgraded" view of the reader. */
static CodecReader filter(CodecReader reader) throws IOException {
    // convert 0.90.x _uid payloads to _version docvalues if needed
    reader = VersionFieldUpgrader.wrap(reader);
    // TODO: remove 0.90.x/1.x freqs/prox/payloads from _uid? 
    // the previous code never did this, so some indexes carry around trash.
    return reader;
}
 
Example 7
Source Project: Elasticsearch   Source File: ElasticsearchMergePolicy.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public List<CodecReader> getMergeReaders() throws IOException {
    final List<CodecReader> newReaders = new ArrayList<>();
    for (CodecReader reader : super.getMergeReaders()) {
        newReaders.add(filter(reader));
    }
    return newReaders;
}
 
Example 8
Source Project: Elasticsearch   Source File: VersionFieldUpgrader.java    License: Apache License 2.0 5 votes vote down vote up
VersionFieldUpgrader(CodecReader in) {
    super(in);

    // Find a free field number
    int fieldNumber = 0;
    for (FieldInfo fi : in.getFieldInfos()) {
        fieldNumber = Math.max(fieldNumber, fi.number + 1);
    }
        
    // TODO: lots of things can wrong here...
    FieldInfo newInfo = new FieldInfo(VersionFieldMapper.NAME,               // field name
                                      fieldNumber,                           // field number
                                      false,                                 // store term vectors
                                      false,                                 // omit norms
                                      false,                                 // store payloads
                                      IndexOptions.NONE,                     // index options
                                      DocValuesType.NUMERIC,                 // docvalues
                                      -1,                                    // docvalues generation
                                      Collections.<String, String>emptyMap() // attributes
                                      );
    newInfo.checkConsistency(); // fail merge immediately if above code is wrong
    
    final ArrayList<FieldInfo> fieldInfoList = new ArrayList<>();
    for (FieldInfo info : in.getFieldInfos()) {
        if (!info.name.equals(VersionFieldMapper.NAME)) {
            fieldInfoList.add(info);
        }
    }
    fieldInfoList.add(newInfo);
    infos = new FieldInfos(fieldInfoList.toArray(new FieldInfo[fieldInfoList.size()]));
}
 
Example 9
Source Project: lucene-solr   Source File: TestUtil.java    License: Apache License 2.0 5 votes vote down vote up
public static void addIndexesSlowly(IndexWriter writer, DirectoryReader... readers) throws IOException {
  List<CodecReader> leaves = new ArrayList<>();
  for (DirectoryReader reader : readers) {
    for (LeafReaderContext context : reader.leaves()) {
      leaves.add(SlowCodecReaderWrapper.wrap(context.reader()));
    }
  }
  writer.addIndexes(leaves.toArray(new CodecReader[leaves.size()]));
}
 
Example 10
Source Project: lucene-solr   Source File: DirectUpdateHandler2.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public int mergeIndexes(MergeIndexesCommand cmd) throws IOException {
  TestInjection.injectDirectUpdateLatch();
  mergeIndexesCommands.mark();
  int rc;

  log.info("start {}", cmd);
  
  List<DirectoryReader> readers = cmd.readers;
  if (readers != null && readers.size() > 0) {
    List<CodecReader> mergeReaders = new ArrayList<>();
    for (DirectoryReader reader : readers) {
      for (LeafReaderContext leaf : reader.leaves()) {
        mergeReaders.add(SlowCodecReaderWrapper.wrap(leaf.reader()));
      }
    }
    RefCounted<IndexWriter> iw = solrCoreState.getIndexWriter(core);
    try {
      iw.get().addIndexes(mergeReaders.toArray(new CodecReader[mergeReaders.size()]));
    } finally {
      iw.decref();
    }
    rc = 1;
  } else {
    rc = 0;
  }
  log.info("end_mergeIndexes");

  // TODO: consider soft commit issues
  if (rc == 1 && commitTracker.getTimeUpperBound() > 0) {
    commitTracker.scheduleCommitWithin(commitTracker.getTimeUpperBound());
  } else if (rc == 1 && softCommitTracker.getTimeUpperBound() > 0) {
    softCommitTracker.scheduleCommitWithin(softCommitTracker.getTimeUpperBound());
  }

  return rc;
}
 
Example 11
Source Project: lucene-solr   Source File: IndexSizeEstimator.java    License: Apache License 2.0 5 votes vote down vote up
private void estimateStoredFields(Map<String, Object> result) throws IOException {
  log.info("- estimating stored fields...");
  Map<String, Map<String, Object>> stats = new HashMap<>();
  for (LeafReaderContext context : reader.leaves()) {
    LeafReader leafReader = context.reader();
    EstimatingVisitor visitor = new EstimatingVisitor(stats, topN, maxLength, samplingStep);
    Bits liveDocs = leafReader.getLiveDocs();
    if (leafReader instanceof CodecReader) {
      CodecReader codecReader = (CodecReader)leafReader;
      StoredFieldsReader storedFieldsReader = codecReader.getFieldsReader();
      // this instance may be faster for a full sequential pass
      StoredFieldsReader mergeInstance = storedFieldsReader.getMergeInstance();
      for (int docId = 0; docId < leafReader.maxDoc(); docId += samplingStep) {
        if (liveDocs != null && !liveDocs.get(docId)) {
          continue;
        }
        mergeInstance.visitDocument(docId, visitor);
      }
      if (mergeInstance != storedFieldsReader) {
        mergeInstance.close();
      }
    } else {
      for (int docId = 0; docId < leafReader.maxDoc(); docId += samplingStep) {
        if (liveDocs != null && !liveDocs.get(docId)) {
          continue;
        }
        leafReader.document(docId, visitor);
      }
    }
  }
  result.put(STORED_FIELDS, stats);
}
 
Example 12
Source Project: crate   Source File: RecoverySourcePruneMergePolicy.java    License: Apache License 2.0 5 votes vote down vote up
RecoverySourcePruneMergePolicy(String recoverySourceField, Supplier<Query> retainSourceQuerySupplier, MergePolicy in) {
    super(in, toWrap -> new OneMerge(toWrap.segments) {
        @Override
        public CodecReader wrapForMerge(CodecReader reader) throws IOException {
            CodecReader wrapped = toWrap.wrapForMerge(reader);
            return wrapReader(recoverySourceField, wrapped, retainSourceQuerySupplier);
        }
    });
}
 
Example 13
Source Project: Elasticsearch   Source File: VersionFieldUpgrader.java    License: Apache License 2.0 4 votes vote down vote up
UninvertedVersions(DocValuesProducer in, CodecReader reader) {
    super(in);
    this.reader = reader;
}
 
Example 14
Source Project: lucene-solr   Source File: SolrIndexSplitter.java    License: Apache License 2.0 4 votes vote down vote up
public LiveDocsReader(CodecReader in, FixedBitSet liveDocs) {
  super(in);
  this.liveDocs = liveDocs;
  this.numDocs = liveDocs.cardinality();
}
 
Example 15
Source Project: lucene-solr   Source File: IndexSizeEstimatorTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testEstimator() throws Exception {
  JettySolrRunner jetty = cluster.getRandomJetty(random());
  String randomCoreName = jetty.getCoreContainer().getAllCoreNames().iterator().next();
  SolrCore core = jetty.getCoreContainer().getCore(randomCoreName);
  RefCounted<SolrIndexSearcher> searcherRef = core.getSearcher();
  try {
    SolrIndexSearcher searcher = searcherRef.get();
    // limit the max length
    IndexSizeEstimator estimator = new IndexSizeEstimator(searcher.getRawReader(), 20, 50, true, true);
    IndexSizeEstimator.Estimate estimate = estimator.estimate();
    Map<String, Long> fieldsBySize = estimate.getFieldsBySize();
    assertFalse("empty fieldsBySize", fieldsBySize.isEmpty());
    assertEquals(fieldsBySize.toString(), fields.size(), fieldsBySize.size());
    fieldsBySize.forEach((k, v) -> assertTrue("unexpected size of " + k + ": " + v, v > 0));
    Map<String, Long> typesBySize = estimate.getTypesBySize();
    assertFalse("empty typesBySize", typesBySize.isEmpty());
    assertTrue("expected at least 8 types: " + typesBySize.toString(), typesBySize.size() >= 8);
    typesBySize.forEach((k, v) -> assertTrue("unexpected size of " + k + ": " + v, v > 0));
    Map<String, Object> summary = estimate.getSummary();
    assertNotNull("summary", summary);
    assertFalse("empty summary", summary.isEmpty());
    assertEquals(summary.keySet().toString(), fields.size(), summary.keySet().size());
    Map<String, Object> details = estimate.getDetails();
    assertNotNull("details", details);
    assertFalse("empty details", details.isEmpty());
    // by type
    assertEquals(details.keySet().toString(), 6, details.keySet().size());

    // check sampling
    estimator.setSamplingThreshold(searcher.getRawReader().maxDoc() / 2);
    IndexSizeEstimator.Estimate sampledEstimate = estimator.estimate();
    Map<String, Long> sampledFieldsBySize = sampledEstimate.getFieldsBySize();
    assertFalse("empty fieldsBySize", sampledFieldsBySize.isEmpty());
    // verify that the sampled values are within 50% of the original values
    fieldsBySize.forEach((field, size) -> {
      Long sampledSize = sampledFieldsBySize.get(field);
      assertNotNull("sampled size for " + field + " is missing in " + sampledFieldsBySize, sampledSize);
      double delta = (double) size * 0.5;
      assertEquals("sampled size of " + field + " is wildly off", (double)size, (double)sampledSize, delta);
    });
    // verify the reader is still usable - SOLR-13694
    IndexReader reader = searcher.getRawReader();
    for (LeafReaderContext context : reader.leaves()) {
      LeafReader leafReader = context.reader();
      assertTrue("unexpected LeafReader class: " + leafReader.getClass().getName(), leafReader instanceof CodecReader);
      Bits liveDocs = leafReader.getLiveDocs();
      CodecReader codecReader = (CodecReader) leafReader;
      StoredFieldsReader storedFieldsReader = codecReader.getFieldsReader();
      StoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
      assertNotNull(storedFieldsReader);
      for (int docId = 0; docId < leafReader.maxDoc(); docId++) {
        if (liveDocs != null && !liveDocs.get(docId)) {
          continue;
        }
        storedFieldsReader.visitDocument(docId, visitor);
      }
    }
  } finally {
    searcherRef.decref();
    core.close();
  }
}
 
Example 16
Source Project: crate   Source File: RecoverySourcePruneMergePolicy.java    License: Apache License 2.0 4 votes vote down vote up
SourcePruningFilterCodecReader(String recoverySourceField, CodecReader reader, BitSet recoverySourceToKeep) {
    super(reader);
    this.recoverySourceField = recoverySourceField;
    this.recoverySourceToKeep = recoverySourceToKeep;
}