Java Code Examples for org.apache.lucene.search.similarities.DefaultSimilarity

The following examples show how to use org.apache.lucene.search.similarities.DefaultSimilarity. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Elasticsearch   Source File: MoreLikeThisQuery.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Query rewrite(IndexReader reader) throws IOException {
    XMoreLikeThis mlt = new XMoreLikeThis(reader, similarity == null ? new DefaultSimilarity() : similarity);

    mlt.setFieldNames(moreLikeFields);
    mlt.setAnalyzer(analyzer);
    mlt.setMinTermFreq(minTermFrequency);
    mlt.setMinDocFreq(minDocFreq);
    mlt.setMaxDocFreq(maxDocFreq);
    mlt.setMaxQueryTerms(maxQueryTerms);
    mlt.setMinWordLen(minWordLen);
    mlt.setMaxWordLen(maxWordLen);
    mlt.setStopWords(stopWords);
    mlt.setBoost(boostTerms);
    mlt.setBoostFactor(boostTermsFactor);

    if (this.unlikeText != null || this.unlikeFields != null) {
        handleUnlike(mlt, this.unlikeText, this.unlikeFields);
    }
    
    return createQuery(mlt);
}
 
Example 2
Source Project: modernmt   Source File: ContextAnalyzerIndex.java    License: Apache License 2.0 6 votes vote down vote up
public ContextAnalyzerIndex(Directory directory, Rescorer rescorer) throws IOException {
    this.indexDirectory = directory;
    this.analyzer = new CorpusAnalyzer();
    this.rescorer = rescorer;

    // Index writer setup
    IndexWriterConfig indexConfig = new IndexWriterConfig(Version.LUCENE_4_10_4, this.analyzer);
    indexConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    indexConfig.setSimilarity(new DefaultSimilarity() {

        @Override
        public float lengthNorm(FieldInvertState state) {
            return 1.f;
        }

    });

    this.indexWriter = new IndexWriter(this.indexDirectory, indexConfig);

    // Ensure index exists
    if (!DirectoryReader.indexExists(directory))
        this.indexWriter.commit();
}
 
Example 3
Source Project: Elasticsearch   Source File: TermVectorsFilter.java    License: Apache License 2.0 5 votes vote down vote up
public TermVectorsFilter(Fields termVectorsByField, Fields topLevelFields, Set<String> selectedFields, @Nullable AggregatedDfs dfs) {
    this.fields = termVectorsByField;
    this.topLevelFields = topLevelFields;
    this.selectedFields = selectedFields;

    this.dfs = dfs;
    this.scoreTerms = new HashMap<>();
    this.sizes = AtomicLongMap.create();
    this.similarity = new DefaultSimilarity();
}
 
Example 4
Source Project: Elasticsearch   Source File: DefaultSimilarityProvider.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public DefaultSimilarity get() {
    return similarity;
}
 
Example 5
Source Project: Elasticsearch   Source File: XMoreLikeThis.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Constructor requiring an IndexReader.
 */
public XMoreLikeThis(IndexReader ir) {
    this(ir, new DefaultSimilarity());
}
 
Example 6
protected ITermDocKey<V> deserialize(final ITuple tuple,
            final boolean keyOnly) {
    
        // key is {term,docId,fieldId}
        // final byte[] key = tuple.getKey();
        //      
        // // decode the document identifier.
        // final long docId = KeyBuilder.decodeLong(key, key.length
        // - Bytes.SIZEOF_LONG /*docId*/ - Bytes.SIZEOF_INT/*fieldId*/);

        final ByteArrayBuffer kbuf = tuple.getKeyBuffer();

        /*
         * The byte offset of the docId in the key.
         * 
         * Note: This is also the byte length of the match on the unicode sort
         * key, which appears at the head of the key.
         */
        final int docIdOffset = kbuf.limit() - Bytes.SIZEOF_LONG /* docId */
                - (fieldsEnabled ? Bytes.SIZEOF_INT/* fieldId */: 0);

        final V docId = (V) (Object)Long.valueOf(KeyBuilder.decodeLong(kbuf.array(),
                docIdOffset));

        // Decode field when present
        final int fieldId;
        if (fieldsEnabled) {
            fieldId = KeyBuilder.decodeShort(kbuf.array(), kbuf.limit()
                    - Bytes.SIZEOF_INT);
        } else {
            fieldId = -1;
        }
        
        final int termWeightOffset = docIdOffset - Bytes.SIZEOF_BYTE;
        
        final byte termWeightCompact = kbuf.getByte(termWeightOffset);
        
        /*
         * See: http://lucene.apache.org/core/5_1_0/core/org/apache/lucene/search/similarities/DefaultSimilarity.html
         * 
         * For more information on the round-trip of normalized term weight.
         */
        
        final DefaultSimilarity similarity = new DefaultSimilarity();

        final double termWeight = similarity.decodeNormValue(termWeightCompact);

        if (keyOnly) {

            return new ReadOnlyTermDocKey(docId, fieldId, termWeight);
            
        }
        
//        final int termFreq;
//        final double termWeight;
//        try {
//
//            final DataInputBuffer dis = tuple.getValueStream();
//
//            termFreq = dis.readShort();
//
//            if(doublePrecision)
//                termWeight = dis.readDouble();
//            else
//                termWeight = dis.readFloat();
//            
//        } catch (IOException ex) {
//            
//            throw new RuntimeException(ex);
//
//        }
//
        return new ReadOnlyTermDocRecord<V>(null/* token */, docId, fieldId,
                /* termFreq, */ termWeight);

    }
 
Example 7
protected ITermDocKey deserialize(final ITuple tuple, final boolean keyOnly) {

        final ByteArrayBuffer kbuf = tuple.getKeyBuffer();

        // The byte length of the docId IV.
        final int byteLength;
        try {
//            byteLength = LongPacker.unpackInt((DataInput) tuple
//                    .getValueStream());
            byteLength = ShortPacker.unpackShort((DataInput) tuple
            		.getValueStream());
        } catch (IOException ex) {
            throw new RuntimeException(ex);
        }
        
        final int docIdOffset = kbuf.limit() - byteLength;

        // Decode the IV.
        final IV docId = (IV) IVUtility.decodeFromOffset(kbuf.array(),
                docIdOffset);

        final int termWeightOffset = docIdOffset - Bytes.SIZEOF_BYTE;
        
        final byte termWeightCompact = kbuf.getByte(termWeightOffset);
        
        /*
         * See: http://lucene.apache.org/core/old_versioned_docs/versions/3_0_2/api/all/org/apache/lucene/search/Similarity.html
         * 
         * For more information on the round-trip of normalized term weight.
         */
        
        final DefaultSimilarity similarity = new DefaultSimilarity(); 

        final double termWeight = similarity.decodeNormValue(termWeightCompact);

        if (keyOnly) {

            return new ReadOnlyTermDocKey(docId, NO_FIELD, termWeight);

        }

//        final int termFreq;
//        final double termWeight;
//        try {
//
//            final DataInputBuffer dis = tuple.getValueStream();
//
//            // skip the byte length of the IV.
//            LongPacker.unpackInt((DataInput) dis);
//            
//            termFreq = dis.readShort();
//            termFreq = LongPacker.unpackInt((DataInput) dis);

//            if (doublePrecision)
//                termWeight = dis.readDouble();
//            else
//                termWeight = dis.readFloat();
//
//        } catch (IOException ex) {
//
//            throw new RuntimeException(ex);
//
//        }

        return new ReadOnlyTermDocRecord(null/* token */, docId, NO_FIELD,
                /* termFreq, */ termWeight);

    }
 
Example 8
@Override
public byte[] serializeKey(final Object obj) {

    @SuppressWarnings("unchecked")
    final ITermDocKey<V> entry = (ITermDocKey<V>) obj;

    final String termText = entry.getToken();
    
    final double termWeight = entry.getLocalTermWeight();
    
    /*
     * See: http://lucene.apache.org/core/5_1_0/core/org/apache/lucene/search/similarities/DefaultSimilarity.html
     * 
     * For more information on the round-trip of normalized term weight.
     */
    
    final DefaultSimilarity similarity = new DefaultSimilarity();
    
    final long termWeightCompact = similarity.encodeNormValue((float) termWeight);
    
    final V docId = entry.getDocId();

    final IKeyBuilder keyBuilder = getKeyBuilder();

    keyBuilder.reset();

    // the token text (or its successor as desired).
    keyBuilder
            .appendText(termText, true/* unicode */, false/* successor */);
    
    keyBuilder.append(termWeightCompact);

    keyBuilder.append((V) docId);

    if (fieldsEnabled)
        keyBuilder.append(entry.getFieldId());

    final byte[] key = keyBuilder.getKey();

    if (log.isDebugEnabled()) {

        log.debug("{" + termText + "," + docId
                + (fieldsEnabled ? "," + entry.getFieldId() : "")
                + "}, key=" + BytesUtil.toString(key));

    }

    return key;

}
 
Example 9
@Override
public byte[] serializeKey(final Object obj) {

    final ITermDocKey entry = (ITermDocKey) obj;

    final String termText = entry.getToken();
    
    final double termWeight = entry.getLocalTermWeight();
    
    /*
     * See: http://lucene.apache.org/core/old_versioned_docs/versions/3_0_2/api/all/org/apache/lucene/search/Similarity.html
     * 
     * For more information on the round-trip of normalized term weight.
     */
    final DefaultSimilarity similarity = new DefaultSimilarity(); 
    final long termWeightCompact = similarity.encodeNormValue((float) termWeight);
    
    final IV docId = (IV)entry.getDocId();

    final IKeyBuilder keyBuilder = getKeyBuilder();

    keyBuilder.reset();

    // the token text (or its successor as desired).
    keyBuilder
            .appendText(termText, true/* unicode */, false/* successor */);

    keyBuilder.append(termWeightCompact);

    IVUtility.encode(keyBuilder, docId);

    final byte[] key = keyBuilder.getKey();

    if (log.isDebugEnabled()) {

        log.debug("{" + termText + "," + docId + "}, key="
                + BytesUtil.toString(key));

    }

    return key;

}