org.apache.lucene.codecs.PostingsFormat Java Examples
The following examples show how to use
org.apache.lucene.codecs.PostingsFormat.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CompletionFieldsConsumer.java From lucene-solr with Apache License 2.0 | 6 votes |
CompletionFieldsConsumer(String codecName, PostingsFormat delegatePostingsFormat, SegmentWriteState state) throws IOException { this.codecName = codecName; this.delegatePostingsFormatName = delegatePostingsFormat.getName(); this.state = state; String dictFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, DICT_EXTENSION); boolean success = false; try { this.delegateFieldsConsumer = delegatePostingsFormat.fieldsConsumer(state); dictOut = state.directory.createOutput(dictFile, state.context); CodecUtil.writeIndexHeader(dictOut, codecName, COMPLETION_VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); success = true; } finally { if (success == false) { IOUtils.closeWhileHandlingException(dictOut, delegateFieldsConsumer); } } }
Example #2
Source File: TestRuleSetupAndRestoreClassEnv.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Check codec restrictions. * * @throws AssumptionViolatedException if the class does not work with a given codec. */ private void checkCodecRestrictions(Codec codec) { assumeFalse("Class not allowed to use codec: " + codec.getName() + ".", shouldAvoidCodec(codec.getName())); if (codec instanceof RandomCodec && !avoidCodecs.isEmpty()) { for (String name : ((RandomCodec)codec).formatNames) { assumeFalse("Class not allowed to use postings format: " + name + ".", shouldAvoidCodec(name)); } } PostingsFormat pf = codec.postingsFormat(); assumeFalse("Class not allowed to use postings format: " + pf.getName() + ".", shouldAvoidCodec(pf.getName())); assumeFalse("Class not allowed to use postings format: " + LuceneTestCase.TEST_POSTINGSFORMAT + ".", shouldAvoidCodec(LuceneTestCase.TEST_POSTINGSFORMAT)); }
Example #3
Source File: MtasCodecPostingsFormat.java From mtas with Apache License 2.0 | 6 votes |
/** * Instantiates a new mtas codec postings format. * * @param delegate the delegate */ public MtasCodecPostingsFormat(PostingsFormat delegate) { super(MtasCodec.MTAS_CODEC_NAME); delegateCodecName = delegate.getName(); delegatePostingsFormat = delegate; // preload to prevent NoClassDefFoundErrors try { Class.forName("mtas.codec.payload.MtasPayloadDecoder"); Class.forName("mtas.codec.payload.MtasBitInputStream"); Class.forName("mtas.analysis.token.MtasPosition"); Class.forName("mtas.analysis.token.MtasOffset"); Class.forName("mtas.codec.tree.MtasRBTree"); Class.forName("mtas.codec.MtasTerms"); Class.forName("mtas.codec.util.CodecInfo"); Class.forName("mtas.codec.tree.MtasTreeNodeId"); } catch (ClassNotFoundException e) { log.error(e); } }
Example #4
Source File: MtasCodec.java From mtas with Apache License 2.0 | 6 votes |
@Override public PostingsFormat postingsFormat() { initDelegate(); if (delegate.postingsFormat() instanceof PerFieldPostingsFormat) { Codec defaultCodec = Codec.getDefault(); PostingsFormat defaultPostingsFormat = defaultCodec.postingsFormat(); if (defaultPostingsFormat instanceof PerFieldPostingsFormat) { defaultPostingsFormat = ((PerFieldPostingsFormat) defaultPostingsFormat) .getPostingsFormatForField(null); if ((defaultPostingsFormat == null) || (defaultPostingsFormat instanceof PerFieldPostingsFormat)) { // fallback option return new MtasCodecPostingsFormat( PostingsFormat.forName("Lucene70")); } else { return new MtasCodecPostingsFormat(defaultPostingsFormat); } } else { return new MtasCodecPostingsFormat(defaultPostingsFormat); } } else { return new MtasCodecPostingsFormat(delegate.postingsFormat()); } }
Example #5
Source File: SolrResourceLoader.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Reloads all Lucene SPI implementations using the new classloader. * This method must be called after {@link #addToClassLoader(List)} * and before using this ResourceLoader. */ synchronized void reloadLuceneSPI() { // TODO improve to use a static Set<URL> to check when we need to if (!needToReloadLuceneSPI) { return; } needToReloadLuceneSPI = false; // reset log.debug("Reloading Lucene SPI"); // Codecs: PostingsFormat.reloadPostingsFormats(this.classLoader); DocValuesFormat.reloadDocValuesFormats(this.classLoader); Codec.reloadCodecs(this.classLoader); // Analysis: CharFilterFactory.reloadCharFilters(this.classLoader); TokenFilterFactory.reloadTokenFilters(this.classLoader); TokenizerFactory.reloadTokenizers(this.classLoader); }
Example #6
Source File: DirectPostingsFormat.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { FieldsProducer postings = PostingsFormat.forName("Lucene84").fieldsProducer(state); if (state.context.context != IOContext.Context.MERGE) { FieldsProducer loadedPostings; try { postings.checkIntegrity(); loadedPostings = new DirectFields(state, postings, minSkipCount, lowFreqCutoff); } finally { postings.close(); } return loadedPostings; } else { // Don't load postings for merge: return postings; } }
Example #7
Source File: TestSuggestField.java From lucene-solr with Apache License 2.0 | 6 votes |
static IndexWriterConfig iwcWithSuggestField(Analyzer analyzer, final Set<String> suggestFields) { IndexWriterConfig iwc = newIndexWriterConfig(random(), analyzer); iwc.setMergePolicy(newLogMergePolicy()); Codec filterCodec = new Lucene86Codec() { CompletionPostingsFormat.FSTLoadMode fstLoadMode = RandomPicks.randomFrom(random(), CompletionPostingsFormat.FSTLoadMode.values()); PostingsFormat postingsFormat = new Completion84PostingsFormat(fstLoadMode); @Override public PostingsFormat getPostingsFormatForField(String field) { if (suggestFields.contains(field)) { return postingsFormat; } return super.getPostingsFormatForField(field); } }; iwc.setCodec(filterCodec); return iwc; }
Example #8
Source File: PerFieldMappingPostingFormatCodec.java From Elasticsearch with Apache License 2.0 | 5 votes |
@Override public PostingsFormat getPostingsFormatForField(String field) { final MappedFieldType indexName = mapperService.indexName(field); if (indexName == null) { logger.warn("no index mapper found for field: [{}] returning default postings format", field); } else if (indexName instanceof CompletionFieldMapper.CompletionFieldType) { // CompletionFieldMapper needs a special postings format final CompletionFieldMapper.CompletionFieldType fieldType = (CompletionFieldMapper.CompletionFieldType) indexName; final PostingsFormat defaultFormat = super.getPostingsFormatForField(field); return fieldType.postingsFormat(defaultFormat); } return super.getPostingsFormatForField(field); }
Example #9
Source File: TestUtil.java From lucene-solr with Apache License 2.0 | 5 votes |
/** Return a Codec that can read any of the * default codecs and formats, but always writes in the specified * format. */ public static Codec alwaysPostingsFormat(final PostingsFormat format) { // TODO: we really need for postings impls etc to announce themselves // (and maybe their params, too) to infostream on flush and merge. // otherwise in a real debugging situation we won't know whats going on! if (LuceneTestCase.VERBOSE) { System.out.println("forcing postings format to:" + format); } return new AssertingCodec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return format; } }; }
Example #10
Source File: RandomCodec.java From lucene-solr with Apache License 2.0 | 5 votes |
private final void add(Set<String> avoidCodecs, PostingsFormat... postings) { for (PostingsFormat p : postings) { if (!avoidCodecs.contains(p.getName())) { formats.add(p); formatNames.add(p.getName()); } } }
Example #11
Source File: RandomCodec.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public PostingsFormat getPostingsFormatForField(String name) { PostingsFormat codec = previousMappings.get(name); if (codec == null) { codec = formats.get(Math.abs(perFieldSeed ^ name.hashCode()) % formats.size()); previousMappings.put(name, codec); // Safety: assert previousMappings.size() < 10000: "test went insane"; } return codec; }
Example #12
Source File: CrankyPostingsFormat.java From lucene-solr with Apache License 2.0 | 5 votes |
CrankyPostingsFormat(PostingsFormat delegate, Random random) { // we impersonate the passed-in codec, so we don't need to be in SPI, // and so we dont change file formats super(delegate.getName()); this.delegate = delegate; this.random = random; }
Example #13
Source File: CompletionFieldsProducer.java From lucene-solr with Apache License 2.0 | 5 votes |
CompletionFieldsProducer(String codecName, SegmentReadState state, FSTLoadMode fstLoadMode) throws IOException { String indexFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, INDEX_EXTENSION); delegateFieldsProducer = null; boolean success = false; try (ChecksumIndexInput index = state.directory.openChecksumInput(indexFile, state.context)) { // open up dict file containing all fsts String dictFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, DICT_EXTENSION); dictIn = state.directory.openInput(dictFile, state.context); CodecUtil.checkIndexHeader(dictIn, codecName, COMPLETION_CODEC_VERSION, COMPLETION_VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); // just validate the footer for the dictIn CodecUtil.retrieveChecksum(dictIn); // open up index file (fieldNumber, offset) CodecUtil.checkIndexHeader(index, codecName, COMPLETION_CODEC_VERSION, COMPLETION_VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); // load delegate PF PostingsFormat delegatePostingsFormat = PostingsFormat.forName(index.readString()); delegateFieldsProducer = delegatePostingsFormat.fieldsProducer(state); // read suggest field numbers and their offsets in the terms file from index int numFields = index.readVInt(); readers = new HashMap<>(numFields); for (int i = 0; i < numFields; i++) { int fieldNumber = index.readVInt(); long offset = index.readVLong(); long minWeight = index.readVLong(); long maxWeight = index.readVLong(); byte type = index.readByte(); FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNumber); // we don't load the FST yet readers.put(fieldInfo.name, new CompletionsTermsReader(dictIn, offset, minWeight, maxWeight, type, fstLoadMode)); } CodecUtil.checkFooter(index); success = true; } finally { if (success == false) { IOUtils.closeWhileHandlingException(delegateFieldsProducer, dictIn); } } }
Example #14
Source File: TestUtil.java From lucene-solr with Apache License 2.0 | 5 votes |
public static String getPostingsFormat(Codec codec, String field) { PostingsFormat p = codec.postingsFormat(); if (p instanceof PerFieldPostingsFormat) { return ((PerFieldPostingsFormat)p).getPostingsFormatForField(field).getName(); } else { return p.getName(); } }
Example #15
Source File: CompletionFieldMapper.java From Elasticsearch with Apache License 2.0 | 5 votes |
public synchronized PostingsFormat postingsFormat(PostingsFormat in) { if (in instanceof Completion090PostingsFormat) { throw new IllegalStateException("Double wrapping of " + Completion090PostingsFormat.class); } if (postingsFormat == null) { postingsFormat = new Completion090PostingsFormat(in, analyzingSuggestLookupProvider); } return postingsFormat; }
Example #16
Source File: Completion090PostingsFormat.java From Elasticsearch with Apache License 2.0 | 5 votes |
public CompletionFieldsProducer(SegmentReadState state) throws IOException { String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION); IndexInput input = state.directory.openInput(suggestFSTFile, state.context); version = CodecUtil.checkHeader(input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_VERSION_CURRENT); FieldsProducer delegateProducer = null; boolean success = false; try { PostingsFormat delegatePostingsFormat = PostingsFormat.forName(input.readString()); String providerName = input.readString(); CompletionLookupProvider completionLookupProvider = providers.get(providerName); if (completionLookupProvider == null) { throw new IllegalStateException("no provider with name [" + providerName + "] registered"); } // TODO: we could clone the ReadState and make it always forward IOContext.MERGE to prevent unecessary heap usage? delegateProducer = delegatePostingsFormat.fieldsProducer(state); /* * If we are merging we don't load the FSTs at all such that we * don't consume so much memory during merge */ if (state.context.context != Context.MERGE) { // TODO: maybe we can do this in a fully lazy fashion based on some configuration // eventually we should have some kind of curciut breaker that prevents us from going OOM here // with some configuration this.lookupFactory = completionLookupProvider.load(input); } else { this.lookupFactory = null; } this.delegateProducer = delegateProducer; success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(delegateProducer, input); } else { IOUtils.close(input); } } }
Example #17
Source File: PluginsService.java From Elasticsearch with Apache License 2.0 | 5 votes |
/** * Reloads all Lucene SPI implementations using the new classloader. * This method must be called after the new classloader has been created to * register the services for use. */ static void reloadLuceneSPI(ClassLoader loader) { // do NOT change the order of these method calls! // Codecs: PostingsFormat.reloadPostingsFormats(loader); DocValuesFormat.reloadDocValuesFormats(loader); Codec.reloadCodecs(loader); // Analysis: CharFilterFactory.reloadCharFilters(loader); TokenFilterFactory.reloadTokenFilters(loader); TokenizerFactory.reloadTokenizers(loader); }
Example #18
Source File: BloomFilteringPostingsFormat.java From lucene-solr with Apache License 2.0 | 5 votes |
public BloomFilteredFieldsProducer(SegmentReadState state) throws IOException { String bloomFileName = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION); ChecksumIndexInput bloomIn = null; boolean success = false; try { bloomIn = state.directory.openChecksumInput(bloomFileName, state.context); CodecUtil.checkIndexHeader(bloomIn, BLOOM_CODEC_NAME, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); // // Load the hash function used in the BloomFilter // hashFunction = HashFunction.forName(bloomIn.readString()); // Load the delegate postings format PostingsFormat delegatePostingsFormat = PostingsFormat.forName(bloomIn .readString()); this.delegateFieldsProducer = delegatePostingsFormat .fieldsProducer(state); int numBlooms = bloomIn.readInt(); for (int i = 0; i < numBlooms; i++) { int fieldNum = bloomIn.readInt(); FuzzySet bloom = FuzzySet.deserialize(bloomIn); FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum); bloomsByFieldName.put(fieldInfo.name, bloom); } CodecUtil.checkFooter(bloomIn); IOUtils.close(bloomIn); success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(bloomIn, delegateFieldsProducer); } } }
Example #19
Source File: TestUtil.java From lucene-solr with Apache License 2.0 | 5 votes |
/** Returns a random postings format that supports term ordinals */ public static PostingsFormat getPostingsFormatWithOrds(Random r) { switch (r.nextInt(2)) { case 0: return new LuceneFixedGap(); case 1: return new BlockTreeOrdsPostingsFormat(); // TODO: these don't actually support ords! //case 2: return new FSTOrdPostingsFormat(); default: throw new AssertionError(); } }
Example #20
Source File: PerFieldPostingsFormat.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public void write(Fields fields, NormsProducer norms) throws IOException { Map<PostingsFormat, FieldsGroup> formatToGroups = buildFieldsGroupMapping(fields); // Write postings boolean success = false; try { for (Map.Entry<PostingsFormat, FieldsGroup> ent : formatToGroups.entrySet()) { PostingsFormat format = ent.getKey(); final FieldsGroup group = ent.getValue(); // Exposes only the fields from this group: Fields maskedFields = new FilterFields(fields) { @Override public Iterator<String> iterator() { return group.fields.iterator(); } }; FieldsConsumer consumer = format.fieldsConsumer(group.state); toClose.add(consumer); consumer.write(maskedFields, norms); } success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(toClose); } } }
Example #21
Source File: PerFieldPostingsFormat.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public void merge(MergeState mergeState, NormsProducer norms) throws IOException { @SuppressWarnings("unchecked") Iterable<String> indexedFieldNames = () -> new MergedIterator<>(true, Arrays.stream(mergeState.fieldsProducers).map(FieldsProducer::iterator).toArray(Iterator[]::new)); Map<PostingsFormat, FieldsGroup> formatToGroups = buildFieldsGroupMapping(indexedFieldNames); // Merge postings PerFieldMergeState pfMergeState = new PerFieldMergeState(mergeState); boolean success = false; try { for (Map.Entry<PostingsFormat, FieldsGroup> ent : formatToGroups.entrySet()) { PostingsFormat format = ent.getKey(); final FieldsGroup group = ent.getValue(); FieldsConsumer consumer = format.fieldsConsumer(group.state); toClose.add(consumer); consumer.merge(pfMergeState.apply(group.fields), norms); } success = true; } finally { pfMergeState.reset(); if (!success) { IOUtils.closeWhileHandlingException(toClose); } } }
Example #22
Source File: PerFieldPostingsFormat.java From lucene-solr with Apache License 2.0 | 5 votes |
public FieldsReader(final SegmentReadState readState) throws IOException { // Read _X.per and init each format: boolean success = false; try { // Read field name -> format name for (FieldInfo fi : readState.fieldInfos) { if (fi.getIndexOptions() != IndexOptions.NONE) { final String fieldName = fi.name; final String formatName = fi.getAttribute(PER_FIELD_FORMAT_KEY); if (formatName != null) { // null formatName means the field is in fieldInfos, but has no postings! final String suffix = fi.getAttribute(PER_FIELD_SUFFIX_KEY); if (suffix == null) { throw new IllegalStateException("missing attribute: " + PER_FIELD_SUFFIX_KEY + " for field: " + fieldName); } PostingsFormat format = PostingsFormat.forName(formatName); String segmentSuffix = getSuffix(formatName, suffix); if (!formats.containsKey(segmentSuffix)) { formats.put(segmentSuffix, format.fieldsProducer(new SegmentReadState(readState, segmentSuffix))); } fields.put(fieldName, formats.get(segmentSuffix)); } } } success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(formats.values()); } } this.segment = readState.segmentInfo.name; }
Example #23
Source File: TestPerFieldPostingsFormat2.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public PostingsFormat getPostingsFormatForField(String field) { if (field.equals("id")) { return direct; } else { return luceneDefault; } }
Example #24
Source File: TestPerFieldPostingsFormat2.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testSameCodecDifferentInstance() throws Exception { Codec codec = new AssertingCodec() { @Override public PostingsFormat getPostingsFormatForField(String field) { if ("id".equals(field)) { return new DirectPostingsFormat(); } else if ("date".equals(field)) { return new DirectPostingsFormat(); } else { return super.getPostingsFormatForField(field); } } }; doTestMixedPostings(codec); }
Example #25
Source File: TestPerFieldPostingsFormat2.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testSameCodecDifferentParams() throws Exception { Codec codec = new AssertingCodec() { @Override public PostingsFormat getPostingsFormatForField(String field) { if ("id".equals(field)) { return new LuceneVarGapFixedInterval(1); } else if ("date".equals(field)) { return new LuceneVarGapFixedInterval(2); } else { return super.getPostingsFormatForField(field); } } }; doTestMixedPostings(codec); }
Example #26
Source File: TestAddIndexes.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public PostingsFormat getPostingsFormatForField(String field) { if (field.equals("id")) { return directFormat; } else { return defaultFormat; } }
Example #27
Source File: TestExternalCodecs.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public PostingsFormat getPostingsFormatForField(String field) { if (field.equals("field2") || field.equals("field1") || field.equals("id")) { return defaultFormat; } else { return ramFormat; } }
Example #28
Source File: MtasCodecPostingsFormat.java From mtas with Apache License 2.0 | 5 votes |
@Override public final FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { if (delegatePostingsFormat != null) { return new MtasFieldsConsumer( delegatePostingsFormat.fieldsConsumer(state), state, getName(), delegatePostingsFormat.getName()); } else { PostingsFormat pf = Codec.forName(delegateCodecName).postingsFormat(); return pf.fieldsConsumer(state); } }
Example #29
Source File: PerFieldMappingPostingFormatCodec.java From crate with Apache License 2.0 | 5 votes |
@Override public PostingsFormat getPostingsFormatForField(String field) { final MappedFieldType fieldType = mapperService.fullName(field); if (fieldType == null) { logger.warn("no index mapper found for field: [{}] returning default postings format", field); } return super.getPostingsFormatForField(field); }
Example #30
Source File: PluginsService.java From crate with Apache License 2.0 | 5 votes |
/** * Reloads all Lucene SPI implementations using the new classloader. * This method must be called after the new classloader has been created to * register the services for use. */ static void reloadLuceneSPI(ClassLoader loader) { // do NOT change the order of these method calls! // Codecs: PostingsFormat.reloadPostingsFormats(loader); DocValuesFormat.reloadDocValuesFormats(loader); Codec.reloadCodecs(loader); // Analysis: CharFilterFactory.reloadCharFilters(loader); TokenFilterFactory.reloadTokenFilters(loader); TokenizerFactory.reloadTokenizers(loader); }