org.apache.lucene.codecs.PostingsFormat Java Exaples

Source File: CompletionFieldsConsumer.java From lucene-solr with Apache License 2.0

6 votes

CompletionFieldsConsumer(String codecName, PostingsFormat delegatePostingsFormat, SegmentWriteState state) throws IOException {
  this.codecName = codecName;
  this.delegatePostingsFormatName = delegatePostingsFormat.getName();
  this.state = state;
  String dictFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, DICT_EXTENSION);
  boolean success = false;
  try {
    this.delegateFieldsConsumer = delegatePostingsFormat.fieldsConsumer(state);
    dictOut = state.directory.createOutput(dictFile, state.context);
    CodecUtil.writeIndexHeader(dictOut, codecName, COMPLETION_VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
    success = true;
  } finally {
    if (success == false) {
      IOUtils.closeWhileHandlingException(dictOut, delegateFieldsConsumer);
    }
  }
}

Source File: TestRuleSetupAndRestoreClassEnv.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Check codec restrictions.
 * 
 * @throws AssumptionViolatedException if the class does not work with a given codec.
 */
private void checkCodecRestrictions(Codec codec) {
  assumeFalse("Class not allowed to use codec: " + codec.getName() + ".",
      shouldAvoidCodec(codec.getName()));

  if (codec instanceof RandomCodec && !avoidCodecs.isEmpty()) {
    for (String name : ((RandomCodec)codec).formatNames) {
      assumeFalse("Class not allowed to use postings format: " + name + ".",
          shouldAvoidCodec(name));
    }
  }

  PostingsFormat pf = codec.postingsFormat();
  assumeFalse("Class not allowed to use postings format: " + pf.getName() + ".",
      shouldAvoidCodec(pf.getName()));

  assumeFalse("Class not allowed to use postings format: " + LuceneTestCase.TEST_POSTINGSFORMAT + ".", 
      shouldAvoidCodec(LuceneTestCase.TEST_POSTINGSFORMAT));
}

Source File: MtasCodecPostingsFormat.java From mtas with Apache License 2.0

6 votes

/**
 * Instantiates a new mtas codec postings format.
 *
 * @param delegate the delegate
 */
public MtasCodecPostingsFormat(PostingsFormat delegate) {
  super(MtasCodec.MTAS_CODEC_NAME);
  delegateCodecName = delegate.getName();
  delegatePostingsFormat = delegate;
  // preload to prevent NoClassDefFoundErrors
  try {
    Class.forName("mtas.codec.payload.MtasPayloadDecoder");
    Class.forName("mtas.codec.payload.MtasBitInputStream");
    Class.forName("mtas.analysis.token.MtasPosition");
    Class.forName("mtas.analysis.token.MtasOffset");
    Class.forName("mtas.codec.tree.MtasRBTree");
    Class.forName("mtas.codec.MtasTerms");
    Class.forName("mtas.codec.util.CodecInfo");
    Class.forName("mtas.codec.tree.MtasTreeNodeId");
  } catch (ClassNotFoundException e) {
    log.error(e);
  }
}

Source File: MtasCodec.java From mtas with Apache License 2.0

6 votes

@Override
public PostingsFormat postingsFormat() {
  initDelegate();
  if (delegate.postingsFormat() instanceof PerFieldPostingsFormat) {
    Codec defaultCodec = Codec.getDefault();
    PostingsFormat defaultPostingsFormat = defaultCodec.postingsFormat();
    if (defaultPostingsFormat instanceof PerFieldPostingsFormat) {
      defaultPostingsFormat = ((PerFieldPostingsFormat) defaultPostingsFormat)
          .getPostingsFormatForField(null);
      if ((defaultPostingsFormat == null)
          || (defaultPostingsFormat instanceof PerFieldPostingsFormat)) {
        // fallback option
        return new MtasCodecPostingsFormat(
            PostingsFormat.forName("Lucene70"));
      } else {
        return new MtasCodecPostingsFormat(defaultPostingsFormat);
      }
    } else {
      return new MtasCodecPostingsFormat(defaultPostingsFormat);
    }
  } else {
    return new MtasCodecPostingsFormat(delegate.postingsFormat());
  }
}

Source File: SolrResourceLoader.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Reloads all Lucene SPI implementations using the new classloader.
 * This method must be called after {@link #addToClassLoader(List)}
 * and before using this ResourceLoader.
 */
synchronized void reloadLuceneSPI() {
  // TODO improve to use a static Set<URL> to check when we need to
  if (!needToReloadLuceneSPI) {
    return;
  }
  needToReloadLuceneSPI = false; // reset
  log.debug("Reloading Lucene SPI");

  // Codecs:
  PostingsFormat.reloadPostingsFormats(this.classLoader);
  DocValuesFormat.reloadDocValuesFormats(this.classLoader);
  Codec.reloadCodecs(this.classLoader);
  // Analysis:
  CharFilterFactory.reloadCharFilters(this.classLoader);
  TokenFilterFactory.reloadTokenFilters(this.classLoader);
  TokenizerFactory.reloadTokenizers(this.classLoader);
}

Source File: DirectPostingsFormat.java From lucene-solr with Apache License 2.0

6 votes

@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
  FieldsProducer postings = PostingsFormat.forName("Lucene84").fieldsProducer(state);
  if (state.context.context != IOContext.Context.MERGE) {
    FieldsProducer loadedPostings;
    try {
      postings.checkIntegrity();
      loadedPostings = new DirectFields(state, postings, minSkipCount, lowFreqCutoff);
    } finally {
      postings.close();
    }
    return loadedPostings;
  } else {
    // Don't load postings for merge:
    return postings;
  }
}

Source File: TestSuggestField.java From lucene-solr with Apache License 2.0

6 votes

static IndexWriterConfig iwcWithSuggestField(Analyzer analyzer, final Set<String> suggestFields) {
  IndexWriterConfig iwc = newIndexWriterConfig(random(), analyzer);
  iwc.setMergePolicy(newLogMergePolicy());
  Codec filterCodec = new Lucene86Codec() {
    CompletionPostingsFormat.FSTLoadMode fstLoadMode =
        RandomPicks.randomFrom(random(), CompletionPostingsFormat.FSTLoadMode.values());
    PostingsFormat postingsFormat = new Completion84PostingsFormat(fstLoadMode);

    @Override
    public PostingsFormat getPostingsFormatForField(String field) {
      if (suggestFields.contains(field)) {
        return postingsFormat;
      }
      return super.getPostingsFormatForField(field);
    }
  };
  iwc.setCodec(filterCodec);
  return iwc;
}

Source File: PerFieldMappingPostingFormatCodec.java From Elasticsearch with Apache License 2.0

5 votes

@Override
public PostingsFormat getPostingsFormatForField(String field) {
    final MappedFieldType indexName = mapperService.indexName(field);
    if (indexName == null) {
        logger.warn("no index mapper found for field: [{}] returning default postings format", field);
    } else if (indexName instanceof CompletionFieldMapper.CompletionFieldType) {
        // CompletionFieldMapper needs a special postings format
        final CompletionFieldMapper.CompletionFieldType fieldType = (CompletionFieldMapper.CompletionFieldType) indexName;
        final PostingsFormat defaultFormat = super.getPostingsFormatForField(field);
        return fieldType.postingsFormat(defaultFormat);
    }
    return super.getPostingsFormatForField(field);
}

Source File: TestUtil.java From lucene-solr with Apache License 2.0

5 votes

/** Return a Codec that can read any of the
 *  default codecs and formats, but always writes in the specified
 *  format. */
public static Codec alwaysPostingsFormat(final PostingsFormat format) {
  // TODO: we really need for postings impls etc to announce themselves
  // (and maybe their params, too) to infostream on flush and merge.
  // otherwise in a real debugging situation we won't know whats going on!
  if (LuceneTestCase.VERBOSE) {
    System.out.println("forcing postings format to:" + format);
  }
  return new AssertingCodec() {
    @Override
    public PostingsFormat getPostingsFormatForField(String field) {
      return format;
    }
  };
}

Source File: RandomCodec.java From lucene-solr with Apache License 2.0

5 votes

private final void add(Set<String> avoidCodecs, PostingsFormat... postings) {
  for (PostingsFormat p : postings) {
    if (!avoidCodecs.contains(p.getName())) {
      formats.add(p);
      formatNames.add(p.getName());
    }
  }
}

Source File: RandomCodec.java From lucene-solr with Apache License 2.0

5 votes

@Override
public PostingsFormat getPostingsFormatForField(String name) {
  PostingsFormat codec = previousMappings.get(name);
  if (codec == null) {
    codec = formats.get(Math.abs(perFieldSeed ^ name.hashCode()) % formats.size());
    previousMappings.put(name, codec);
    // Safety:
    assert previousMappings.size() < 10000: "test went insane";
  }
  return codec;
}

Source File: CrankyPostingsFormat.java From lucene-solr with Apache License 2.0

5 votes

CrankyPostingsFormat(PostingsFormat delegate, Random random) {
  // we impersonate the passed-in codec, so we don't need to be in SPI,
  // and so we dont change file formats
  super(delegate.getName());
  this.delegate = delegate;
  this.random = random;
}

Source File: CompletionFieldsProducer.java From lucene-solr with Apache License 2.0

5 votes

CompletionFieldsProducer(String codecName, SegmentReadState state, FSTLoadMode fstLoadMode) throws IOException {
  String indexFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, INDEX_EXTENSION);
  delegateFieldsProducer = null;
  boolean success = false;

  try (ChecksumIndexInput index = state.directory.openChecksumInput(indexFile, state.context)) {
    // open up dict file containing all fsts
    String dictFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, DICT_EXTENSION);
    dictIn = state.directory.openInput(dictFile, state.context);
    CodecUtil.checkIndexHeader(dictIn, codecName, COMPLETION_CODEC_VERSION, COMPLETION_VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
    // just validate the footer for the dictIn
    CodecUtil.retrieveChecksum(dictIn);

    // open up index file (fieldNumber, offset)
    CodecUtil.checkIndexHeader(index, codecName, COMPLETION_CODEC_VERSION, COMPLETION_VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
    // load delegate PF
    PostingsFormat delegatePostingsFormat = PostingsFormat.forName(index.readString());
    delegateFieldsProducer = delegatePostingsFormat.fieldsProducer(state);

    // read suggest field numbers and their offsets in the terms file from index
    int numFields = index.readVInt();
    readers = new HashMap<>(numFields);
    for (int i = 0; i < numFields; i++) {
      int fieldNumber = index.readVInt();
      long offset = index.readVLong();
      long minWeight = index.readVLong();
      long maxWeight = index.readVLong();
      byte type = index.readByte();
      FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNumber);
      // we don't load the FST yet
      readers.put(fieldInfo.name, new CompletionsTermsReader(dictIn, offset, minWeight, maxWeight, type, fstLoadMode));
    }
    CodecUtil.checkFooter(index);
    success = true;
  } finally {
    if (success == false) {
      IOUtils.closeWhileHandlingException(delegateFieldsProducer, dictIn);
    }
  }
}

Source File: TestUtil.java From lucene-solr with Apache License 2.0

5 votes

public static String getPostingsFormat(Codec codec, String field) {
  PostingsFormat p = codec.postingsFormat();
  if (p instanceof PerFieldPostingsFormat) {
    return ((PerFieldPostingsFormat)p).getPostingsFormatForField(field).getName();
  } else {
    return p.getName();
  }
}

Source File: CompletionFieldMapper.java From Elasticsearch with Apache License 2.0

5 votes

public synchronized PostingsFormat postingsFormat(PostingsFormat in) {
    if (in instanceof Completion090PostingsFormat) {
        throw new IllegalStateException("Double wrapping of " + Completion090PostingsFormat.class);
    }
    if (postingsFormat == null) {
        postingsFormat = new Completion090PostingsFormat(in, analyzingSuggestLookupProvider);
    }
    return postingsFormat;
}

Source File: Completion090PostingsFormat.java From Elasticsearch with Apache License 2.0

5 votes

public CompletionFieldsProducer(SegmentReadState state) throws IOException {
    String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
    IndexInput input = state.directory.openInput(suggestFSTFile, state.context);
    version = CodecUtil.checkHeader(input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_VERSION_CURRENT);
    FieldsProducer delegateProducer = null;
    boolean success = false;
    try {
        PostingsFormat delegatePostingsFormat = PostingsFormat.forName(input.readString());
        String providerName = input.readString();
        CompletionLookupProvider completionLookupProvider = providers.get(providerName);
        if (completionLookupProvider == null) {
            throw new IllegalStateException("no provider with name [" + providerName + "] registered");
        }
        // TODO: we could clone the ReadState and make it always forward IOContext.MERGE to prevent unecessary heap usage?
        delegateProducer = delegatePostingsFormat.fieldsProducer(state);
        /*
         * If we are merging we don't load the FSTs at all such that we
         * don't consume so much memory during merge
         */
        if (state.context.context != Context.MERGE) {
            // TODO: maybe we can do this in a fully lazy fashion based on some configuration
            // eventually we should have some kind of curciut breaker that prevents us from going OOM here
            // with some configuration
            this.lookupFactory = completionLookupProvider.load(input);
        } else {
            this.lookupFactory = null;
        }
        this.delegateProducer = delegateProducer;
        success = true;
    } finally {
        if (!success) {
            IOUtils.closeWhileHandlingException(delegateProducer, input);
        } else {
            IOUtils.close(input);
        }
    }
}

Source File: PluginsService.java From Elasticsearch with Apache License 2.0

5 votes

/**
 * Reloads all Lucene SPI implementations using the new classloader.
 * This method must be called after the new classloader has been created to
 * register the services for use.
 */
static void reloadLuceneSPI(ClassLoader loader) {
    // do NOT change the order of these method calls!

    // Codecs:
    PostingsFormat.reloadPostingsFormats(loader);
    DocValuesFormat.reloadDocValuesFormats(loader);
    Codec.reloadCodecs(loader);
    // Analysis:
    CharFilterFactory.reloadCharFilters(loader);
    TokenFilterFactory.reloadTokenFilters(loader);
    TokenizerFactory.reloadTokenizers(loader);
}

Source File: BloomFilteringPostingsFormat.java From lucene-solr with Apache License 2.0

5 votes

public BloomFilteredFieldsProducer(SegmentReadState state)
    throws IOException {
  
  String bloomFileName = IndexFileNames.segmentFileName(
      state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
  ChecksumIndexInput bloomIn = null;
  boolean success = false;
  try {
    bloomIn = state.directory.openChecksumInput(bloomFileName, state.context);
    CodecUtil.checkIndexHeader(bloomIn, BLOOM_CODEC_NAME, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
    // // Load the hash function used in the BloomFilter
    // hashFunction = HashFunction.forName(bloomIn.readString());
    // Load the delegate postings format
    PostingsFormat delegatePostingsFormat = PostingsFormat.forName(bloomIn
        .readString());
    
    this.delegateFieldsProducer = delegatePostingsFormat
        .fieldsProducer(state);
    int numBlooms = bloomIn.readInt();
    for (int i = 0; i < numBlooms; i++) {
      int fieldNum = bloomIn.readInt();
      FuzzySet bloom = FuzzySet.deserialize(bloomIn);
      FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum);
      bloomsByFieldName.put(fieldInfo.name, bloom);
    }
    CodecUtil.checkFooter(bloomIn);
    IOUtils.close(bloomIn);
    success = true;
  } finally {
    if (!success) {
      IOUtils.closeWhileHandlingException(bloomIn, delegateFieldsProducer);
    }
  }
}

Source File: TestUtil.java From lucene-solr with Apache License 2.0

5 votes

/** Returns a random postings format that supports term ordinals */
public static PostingsFormat getPostingsFormatWithOrds(Random r) {
  switch (r.nextInt(2)) {
    case 0: return new LuceneFixedGap();
    case 1: return new BlockTreeOrdsPostingsFormat();
    // TODO: these don't actually support ords!
    //case 2: return new FSTOrdPostingsFormat();
    default: throw new AssertionError();
  }
}

Source File: PerFieldPostingsFormat.java From lucene-solr with Apache License 2.0

5 votes

@Override
public void write(Fields fields, NormsProducer norms) throws IOException {
  Map<PostingsFormat, FieldsGroup> formatToGroups = buildFieldsGroupMapping(fields);

  // Write postings
  boolean success = false;
  try {
    for (Map.Entry<PostingsFormat, FieldsGroup> ent : formatToGroups.entrySet()) {
      PostingsFormat format = ent.getKey();
      final FieldsGroup group = ent.getValue();

      // Exposes only the fields from this group:
      Fields maskedFields = new FilterFields(fields) {
        @Override
        public Iterator<String> iterator() {
          return group.fields.iterator();
        }
      };

      FieldsConsumer consumer = format.fieldsConsumer(group.state);
      toClose.add(consumer);
      consumer.write(maskedFields, norms);
    }
    success = true;
  } finally {
    if (!success) {
      IOUtils.closeWhileHandlingException(toClose);
    }
  }
}

Source File: PerFieldPostingsFormat.java From lucene-solr with Apache License 2.0

5 votes

@Override
public void merge(MergeState mergeState, NormsProducer norms) throws IOException {
  @SuppressWarnings("unchecked") Iterable<String> indexedFieldNames = () ->
      new MergedIterator<>(true,
          Arrays.stream(mergeState.fieldsProducers).map(FieldsProducer::iterator).toArray(Iterator[]::new));
  Map<PostingsFormat, FieldsGroup> formatToGroups = buildFieldsGroupMapping(indexedFieldNames);

  // Merge postings
  PerFieldMergeState pfMergeState = new PerFieldMergeState(mergeState);
  boolean success = false;
  try {
    for (Map.Entry<PostingsFormat, FieldsGroup> ent : formatToGroups.entrySet()) {
      PostingsFormat format = ent.getKey();
      final FieldsGroup group = ent.getValue();

      FieldsConsumer consumer = format.fieldsConsumer(group.state);
      toClose.add(consumer);
      consumer.merge(pfMergeState.apply(group.fields), norms);
    }
    success = true;
  } finally {
    pfMergeState.reset();
    if (!success) {
      IOUtils.closeWhileHandlingException(toClose);
    }
  }
}

Source File: PerFieldPostingsFormat.java From lucene-solr with Apache License 2.0

5 votes

public FieldsReader(final SegmentReadState readState) throws IOException {

      // Read _X.per and init each format:
      boolean success = false;
      try {
        // Read field name -> format name
        for (FieldInfo fi : readState.fieldInfos) {
          if (fi.getIndexOptions() != IndexOptions.NONE) {
            final String fieldName = fi.name;
            final String formatName = fi.getAttribute(PER_FIELD_FORMAT_KEY);
            if (formatName != null) {
              // null formatName means the field is in fieldInfos, but has no postings!
              final String suffix = fi.getAttribute(PER_FIELD_SUFFIX_KEY);
              if (suffix == null) {
                throw new IllegalStateException("missing attribute: " + PER_FIELD_SUFFIX_KEY + " for field: " + fieldName);
              }
              PostingsFormat format = PostingsFormat.forName(formatName);
              String segmentSuffix = getSuffix(formatName, suffix);
              if (!formats.containsKey(segmentSuffix)) {
                formats.put(segmentSuffix, format.fieldsProducer(new SegmentReadState(readState, segmentSuffix)));
              }
              fields.put(fieldName, formats.get(segmentSuffix));
            }
          }
        }
        success = true;
      } finally {
        if (!success) {
          IOUtils.closeWhileHandlingException(formats.values());
        }
      }

      this.segment = readState.segmentInfo.name;
    }

Source File: TestPerFieldPostingsFormat2.java From lucene-solr with Apache License 2.0

5 votes

@Override
public PostingsFormat getPostingsFormatForField(String field) {
  if (field.equals("id")) {
    return direct;
  } else {
    return luceneDefault;
  }
}

Source File: TestPerFieldPostingsFormat2.java From lucene-solr with Apache License 2.0

5 votes

public void testSameCodecDifferentInstance() throws Exception {
  Codec codec = new AssertingCodec() {
    @Override
    public PostingsFormat getPostingsFormatForField(String field) {
      if ("id".equals(field)) {
        return new DirectPostingsFormat();
      } else if ("date".equals(field)) {
        return new DirectPostingsFormat();
      } else {
        return super.getPostingsFormatForField(field);
      }
    }
  };
  doTestMixedPostings(codec);
}

Source File: TestPerFieldPostingsFormat2.java From lucene-solr with Apache License 2.0

5 votes

public void testSameCodecDifferentParams() throws Exception {
  Codec codec = new AssertingCodec() {
    @Override
    public PostingsFormat getPostingsFormatForField(String field) {
      if ("id".equals(field)) {
        return new LuceneVarGapFixedInterval(1);
      } else if ("date".equals(field)) {
        return new LuceneVarGapFixedInterval(2);
      } else {
        return super.getPostingsFormatForField(field);
      }
    }
  };
  doTestMixedPostings(codec);
}

Source File: TestAddIndexes.java From lucene-solr with Apache License 2.0

5 votes

@Override
public PostingsFormat getPostingsFormatForField(String field) {
  if (field.equals("id")) {
    return directFormat;
  } else {
    return defaultFormat;
  }
}

Source File: TestExternalCodecs.java From lucene-solr with Apache License 2.0

5 votes

@Override
public PostingsFormat getPostingsFormatForField(String field) {
  if (field.equals("field2") || field.equals("field1") || field.equals("id")) {
    return defaultFormat;
  } else {
    return ramFormat;
  }
}

Source File: MtasCodecPostingsFormat.java From mtas with Apache License 2.0

5 votes

@Override
public final FieldsConsumer fieldsConsumer(SegmentWriteState state)
    throws IOException {
  if (delegatePostingsFormat != null) {
    return new MtasFieldsConsumer(
        delegatePostingsFormat.fieldsConsumer(state), state, getName(),
        delegatePostingsFormat.getName());
  } else {
    PostingsFormat pf = Codec.forName(delegateCodecName).postingsFormat();
    return pf.fieldsConsumer(state);
  }
}

Source File: PerFieldMappingPostingFormatCodec.java From crate with Apache License 2.0

5 votes

@Override
public PostingsFormat getPostingsFormatForField(String field) {
    final MappedFieldType fieldType = mapperService.fullName(field);
    if (fieldType == null) {
        logger.warn("no index mapper found for field: [{}] returning default postings format", field);
    }
    return super.getPostingsFormatForField(field);
}

Source File: PluginsService.java From crate with Apache License 2.0

5 votes

/**
 * Reloads all Lucene SPI implementations using the new classloader.
 * This method must be called after the new classloader has been created to
 * register the services for use.
 */
static void reloadLuceneSPI(ClassLoader loader) {
    // do NOT change the order of these method calls!

    // Codecs:
    PostingsFormat.reloadPostingsFormats(loader);
    DocValuesFormat.reloadDocValuesFormats(loader);
    Codec.reloadCodecs(loader);
    // Analysis:
    CharFilterFactory.reloadCharFilters(loader);
    TokenFilterFactory.reloadTokenFilters(loader);
    TokenizerFactory.reloadTokenizers(loader);
}

org.apache.lucene.codecs.PostingsFormat Java Examples