Java Code Examples for org.apache.lucene.document.Document#getField()

The following examples show how to use org.apache.lucene.document.Document#getField() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LuceneSearchIndex.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
private List<Doc> toDocs(ScoreDoc[] hits, Searcher searcher) throws IOException{
  List<Doc> documentList = new ArrayList<>();
  for (int i = 0; i < hits.length; ++i) {
    ScoreDoc scoreDoc = hits[i];
    Document doc = searcher.doc(scoreDoc.doc);
    IndexableField idField = doc.getField("_id");
    if(idField == null){
      // deleted between index hit and retrieval.
      continue;
    }
    final BytesRef ref = idField.binaryValue();
    final byte[] bytes = new byte[ref.length];
    System.arraycopy(ref.bytes, ref.offset, bytes, 0, ref.length);
    Doc outputDoc = new Doc(scoreDoc, bytes, 0 /*version*/);
    documentList.add(outputDoc);
  }
  return documentList;
}
 
Example 2
Source File: SnapshotTagResult.java    From spacewalk with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Constructs a result object
 * @param rankIn order of results returned from lucene
 * @param scoreIn score of this hit as defined by lucene query
 * @param doc lucene document containing data fields
 */
public SnapshotTagResult(int rankIn, float scoreIn, Document doc) {
    if (doc.getField("id") != null) {
        setId(doc.getField("id").stringValue());
    }
    if (doc.getField("name") != null) {
        setName(doc.getField("name").stringValue());
    }
    if (doc.getField("serverId") != null) {
        setServerId(doc.getField("serverId").stringValue());
    }
    if (doc.getField("snapshotId") != null) {
        setSnapshotId(doc.getField("snapshotId").stringValue());
    }
    setRank(rankIn);
    setScore(scoreIn);
}
 
Example 3
Source File: DocResult.java    From uyuni with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Constructs a result object
 * @param rankIn order of results returned from lucene
 * @param scoreIn score of this hit as defined by lucene query
 * @param doc lucene document containing data fields
 */
public DocResult(int rankIn, float scoreIn, Document doc) {
    if (doc.getField("url") != null) {
        setUrl(doc.getField("url").stringValue());
        setId(doc.getField("url").stringValue());
    }
    if (doc.getField("title") != null) {
        setTitle(doc.getField("title").stringValue());
        setName(doc.getField("title").stringValue());
    }
    else {
        setTitle("EMPTY");
        setName("EMPTY");
    }
    setRank(rankIn);
    setScore(scoreIn);
}
 
Example 4
Source File: DocResult.java    From spacewalk with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Constructs a result object
 * @param rankIn order of results returned from lucene
 * @param scoreIn score of this hit as defined by lucene query
 * @param doc lucene document containing data fields
 */
public DocResult(int rankIn, float scoreIn, Document doc) {
    if (doc.getField("url") != null) {
        setUrl(doc.getField("url").stringValue());
        setId(doc.getField("url").stringValue());
    }
    if (doc.getField("title") != null) {
        setTitle(doc.getField("title").stringValue());
        setName(doc.getField("title").stringValue());
    }
    else {
        setTitle("EMPTY");
        setName("EMPTY");
    }
    setRank(rankIn);
    setScore(scoreIn);
}
 
Example 5
Source File: DocumentDictionary.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** 
 * Returns the value of the <code>weightField</code> for the current document.
 * Retrieves the value for the <code>weightField</code> if it's stored (using <code>doc</code>)
 * or if it's indexed as {@link NumericDocValues} (using <code>docId</code>) for the document.
 * If no value is found, then the weight is 0.
 */
protected long getWeight(Document doc, int docId) throws IOException {
  IndexableField weight = doc.getField(weightField);
  if (weight != null) { // found weight as stored
    return (weight.numericValue() != null) ? weight.numericValue().longValue() : 0;
  } else if (weightValues != null) {  // found weight as NumericDocValue
    if (weightValues.docID() < docId) {
      weightValues.advance(docId);
    }
    if (weightValues.docID() == docId) {
      return weightValues.longValue();
    } else {
      // missing
      return 0;
    }
  } else { // fall back
    return 0;
  }
}
 
Example 6
Source File: HardwareDeviceResult.java    From spacewalk with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Constructs a result object
 * @param rankIn order of results returned from lucene
 * @param scoreIn score of this hit as defined by lucene query
 * @param doc lucene document containing data fields
 */
public HardwareDeviceResult(int rankIn, float scoreIn, Document doc) {
    if (doc.getField("description") != null) {
        setName(doc.getField("description").stringValue());
    }
    if (doc.getField("serverId") != null) {
        setServerId(doc.getField("serverId").stringValue());
    }
    if (doc.getField("id") != null) {
        setId(doc.getField("id").stringValue());
    }
    setRank(rankIn);
    setScore(scoreIn);
}
 
Example 7
Source File: DocumentDictionaryTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
public void testWithOptionalPayload() throws IOException {
  Directory dir = newDirectory();
  Analyzer analyzer = new MockAnalyzer(random());
  IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
  iwc.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);

  // Create a document that is missing the payload field
  Document doc = new Document();
  Field field = new TextField(FIELD_NAME, "some field", Field.Store.YES);
  doc.add(field);
  // do not store the payload or the contexts
  Field weight = new NumericDocValuesField(WEIGHT_FIELD_NAME, 100);
  doc.add(weight);
  writer.addDocument(doc);
  writer.commit();
  writer.close();
  IndexReader ir = DirectoryReader.open(dir);

  // Even though the payload field is missing, the dictionary iterator should not skip the document
  // because the payload field is optional.
  Dictionary dictionaryOptionalPayload =
      new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME);
  InputIterator inputIterator = dictionaryOptionalPayload.getEntryIterator();
  BytesRef f = inputIterator.next();
  assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
  IndexableField weightField = doc.getField(WEIGHT_FIELD_NAME);
  assertEquals(inputIterator.weight(), weightField.numericValue().longValue());
  IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
  assertNull(payloadField);
  assertTrue(inputIterator.payload().length == 0);
  IOUtils.close(ir, analyzer, dir);
}
 
Example 8
Source File: ServerCustomInfoResult.java    From spacewalk with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Constructs a result object
 * @param rankIn order of results returned from lucene
 * @param scoreIn score of this hit as defined by lucene query
 * @param doc lucene document containing data fields
 */
public ServerCustomInfoResult(int rankIn, float scoreIn, Document doc) {
    if (doc.getField("value") != null) {
        setValue(doc.getField("value").stringValue());
    }
    if (doc.getField("serverId") != null) {
        setServerId(doc.getField("serverId").stringValue());
    }
    if (doc.getField("id") != null) {
        setId(doc.getField("id").stringValue());
    }
    setRank(rankIn);
    setScore(scoreIn);
}
 
Example 9
Source File: SolrInformationServer.java    From SearchServices with GNU Lesser General Public License v3.0 5 votes vote down vote up
@Override
public Set<Long> getErrorDocIds() throws IOException
{
    Set<Long> errorDocIds = new HashSet<>();
    RefCounted<SolrIndexSearcher> refCounted = null;
    try
    {
        refCounted = this.core.getSearcher();
        SolrIndexSearcher searcher = refCounted.get();
        TermQuery errorQuery = new TermQuery(new Term(FIELD_DOC_TYPE, DOC_TYPE_ERROR_NODE));
        DocListCollector docListCollector = new DocListCollector();
        searcher.search(errorQuery, docListCollector);
        IntArrayList docList = docListCollector.getDocs();
        int size = docList.size();

        for (int i = 0; i < size; ++i)
        {
            int doc = docList.get(i);
            Document document = searcher.doc(doc, REQUEST_ONLY_ID_FIELD);
            IndexableField id = document.getField(FIELD_SOLR4_ID);
            String idString = id.stringValue();

            if (idString.startsWith(PREFIX_ERROR))
            {
                idString = idString.substring(PREFIX_ERROR.length());
            }

            errorDocIds.add(Long.valueOf(idString));
        }
    }
    finally
    {
        ofNullable(refCounted).ifPresent(RefCounted::decref);
    }
    return errorDocIds;
}
 
Example 10
Source File: DocsReader.java    From nifi with Apache License 2.0 5 votes vote down vote up
private long getByteOffset(final Document d, final RecordReader reader) {
    final IndexableField blockField = d.getField(FieldNames.BLOCK_INDEX);
    if ( blockField != null ) {
        final int blockIndex = blockField.numericValue().intValue();
        final TocReader tocReader = reader.getTocReader();
        return tocReader.getBlockOffset(blockIndex);
    }

    return d.getField(FieldNames.STORAGE_FILE_OFFSET).numericValue().longValue();
}
 
Example 11
Source File: FieldValueFeature.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public float score() throws IOException {

  try {
    final Document document = context.reader().document(itr.docID(),
        fieldAsSet);
    final IndexableField indexableField = document.getField(field);
    if (indexableField == null) {
      return getDefaultValue();
    }
    final Number number = indexableField.numericValue();
    if (number != null) {
      return number.floatValue();
    } else {
      final String string = indexableField.stringValue();
      if (string.length() == 1) {
        // boolean values in the index are encoded with the
        // a single char contained in TRUE_TOKEN or FALSE_TOKEN
        // (see BoolField)
        if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) {
          return 1;
        }
        if (string.charAt(0) == BoolField.FALSE_TOKEN[0]) {
          return 0;
        }
      }
    }
  } catch (final IOException e) {
    throw new FeatureException(
        e.toString() + ": " +
            "Unable to extract feature for "
            + name, e);
  }
  return getDefaultValue();
}
 
Example 12
Source File: SpatialClusteringComponent.java    From solr-spatial-clustering with Apache License 2.0 5 votes vote down vote up
private static String getFieldString(Document document, String name) {
    IndexableField field = document.getField(name);
    if (field == null) {
        return null;
    }

    return field.stringValue();
}
 
Example 13
Source File: IndexManager.java    From dependency-track with Apache License 2.0 5 votes vote down vote up
/**
 * Updates a Field in a Document.
 * @param doc the Lucene Document to update the field in
 * @param name the name of the field
 * @param value the value of the field
 * @since 3.0.0
 */
protected void updateField(final Document doc, final String name, String value) {
    if (StringUtils.isBlank(value)) {
        value = "";
    }
    final Field field = (Field) doc.getField(name);
    field.setStringValue(value);
}
 
Example 14
Source File: LargeFieldTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void assertLazyLoaded(Document d, String fieldName) {
  IndexableField field = d.getField(fieldName);
  if (fieldName == BIG_FIELD) {
    assertTrue(field instanceof SolrDocumentFetcher.LargeLazyField);
    assertTrue(((SolrDocumentFetcher.LargeLazyField)field).hasBeenLoaded());
  } else {
    assertTrue(field instanceof LazyDocument.LazyField);
    assertTrue(((LazyDocument.LazyField)field).hasBeenLoaded());
  }
}
 
Example 15
Source File: HardwareDeviceResult.java    From uyuni with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Constructs a result object
 * @param rankIn order of results returned from lucene
 * @param scoreIn score of this hit as defined by lucene query
 * @param doc lucene document containing data fields
 */
public HardwareDeviceResult(int rankIn, float scoreIn, Document doc) {
    if (doc.getField("description") != null) {
        setName(doc.getField("description").stringValue());
    }
    if (doc.getField("serverId") != null) {
        setServerId(doc.getField("serverId").stringValue());
    }
    if (doc.getField("id") != null) {
        setId(doc.getField("id").stringValue());
    }
    setRank(rankIn);
    setScore(scoreIn);
}
 
Example 16
Source File: LukeRequestHandler.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private static SimpleOrderedMap<Object> getIndexedFieldsInfo(SolrQueryRequest req)
    throws Exception {

  SolrIndexSearcher searcher = req.getSearcher();
  SolrParams params = req.getParams();

  Set<String> fields = null;
  String fl = params.get(CommonParams.FL);
  if (fl != null) {
    fields = new TreeSet<>(Arrays.asList(fl.split( "[,\\s]+" )));
  }

  LeafReader reader = searcher.getSlowAtomicReader();
  IndexSchema schema = searcher.getSchema();

  // Don't be tempted to put this in the loop below, the whole point here is to alphabetize the fields!
  Set<String> fieldNames = new TreeSet<>();
  for(FieldInfo fieldInfo : reader.getFieldInfos()) {
    fieldNames.add(fieldInfo.name);
  }

  // Walk the term enum and keep a priority queue for each map in our set
  SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<>();

  for (String fieldName : fieldNames) {
    if (fields != null && ! fields.contains(fieldName) && ! fields.contains("*")) {
      continue; //we're not interested in this field Still an issue here
    }

    SimpleOrderedMap<Object> fieldMap = new SimpleOrderedMap<>();

    SchemaField sfield = schema.getFieldOrNull( fieldName );
    FieldType ftype = (sfield==null)?null:sfield.getType();

    fieldMap.add( "type", (ftype==null)?null:ftype.getTypeName() );
    fieldMap.add("schema", getFieldFlags(sfield));
    if (sfield != null && schema.isDynamicField(sfield.getName()) && schema.getDynamicPattern(sfield.getName()) != null) {
      fieldMap.add("dynamicBase", schema.getDynamicPattern(sfield.getName()));
    }
    Terms terms = reader.terms(fieldName);
    if (terms == null) { // Not indexed, so we need to report what we can (it made it through the fl param if specified)
      finfo.add( fieldName, fieldMap );
      continue;
    }

    if(sfield != null && sfield.indexed() ) {
      if (params.getBool(INCLUDE_INDEX_FIELD_FLAGS,true)) {
        Document doc = getFirstLiveDoc(terms, reader);

        if (doc != null) {
          // Found a document with this field
          try {
            IndexableField fld = doc.getField(fieldName);
            if (fld != null) {
              fieldMap.add("index", getFieldFlags(fld));
            } else {
              // it is a non-stored field...
              fieldMap.add("index", "(unstored field)");
            }
          } catch (Exception ex) {
            log.warn("error reading field: {}", fieldName);
          }
        }
      }
      fieldMap.add("docs", terms.getDocCount());
    }
    if (fields != null && (fields.contains(fieldName) || fields.contains("*"))) {
      getDetailedFieldInfo(req, fieldName, fieldMap);
    }
    // Add the field
    finfo.add( fieldName, fieldMap );
  }
  return finfo;
}
 
Example 17
Source File: RoutingFieldMapper.java    From Elasticsearch with Apache License 2.0 4 votes vote down vote up
public String value(Document document) {
    Field field = (Field) document.getField(fieldType().names().indexName());
    return field == null ? null : (String)fieldType().value(field);
}
 
Example 18
Source File: DocumentUtil.java    From netbeans with Apache License 2.0 4 votes vote down vote up
@Override
public String convert(Document doc) {
    Field field = doc.getField(FIELD_SOURCE);
    return field == null ? null : field.stringValue();
}
 
Example 19
Source File: LumongoSegment.java    From lumongo with Apache License 2.0 4 votes vote down vote up
private ScoredResult.Builder handleDocResult(IndexSearcher is, SortRequest sortRequest, boolean sorting, ScoreDoc[] results, int i,
		FetchType resultFetchType, List<String> fieldsToReturn, List<String> fieldsToMask, List<LumongoHighlighter> highlighterList,
		List<AnalysisHandler> analysisHandlerList) throws Exception {
	int docId = results[i].doc;

	Set<String> fieldsToFetch = fetchSet;
	if (indexConfig.getIndexSettings().getStoreDocumentInIndex()) {
		if (FetchType.FULL.equals(resultFetchType)) {
			fieldsToFetch = fetchSetWithDocument;
		}
		else if (FetchType.META.equals(resultFetchType)) {
			fieldsToFetch = fetchSetWithMeta;
		}
	}

	Document d = is.doc(docId, fieldsToFetch);

	IndexableField f = d.getField(LumongoConstants.TIMESTAMP_FIELD);
	long timestamp = f.numericValue().longValue();

	ScoredResult.Builder srBuilder = ScoredResult.newBuilder();
	String uniqueId = d.get(LumongoConstants.ID_FIELD);

	if (!highlighterList.isEmpty() && !FetchType.FULL.equals(resultFetchType)) {
		throw new Exception("Highlighting requires a full fetch of the document");
	}

	if (!analysisHandlerList.isEmpty() && !FetchType.FULL.equals(resultFetchType)) {
		throw new Exception("Analysis requires a full fetch of the document");
	}

	if (!FetchType.NONE.equals(resultFetchType)) {
		handleStoredDoc(srBuilder, uniqueId, d, resultFetchType, fieldsToReturn, fieldsToMask, highlighterList, analysisHandlerList);
	}

	srBuilder.setScore(results[i].score);

	srBuilder.setUniqueId(uniqueId);

	srBuilder.setTimestamp(timestamp);

	srBuilder.setDocId(docId);
	srBuilder.setSegment(segmentNumber);
	srBuilder.setIndexName(indexName);
	srBuilder.setResultIndex(i);

	if (sorting) {
		handleSortValues(sortRequest, results[i], srBuilder);
	}
	return srBuilder;
}
 
Example 20
Source File: BooleanPerceptronClassifier.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/**
 * Creates a {@link BooleanPerceptronClassifier}
 *
 * @param indexReader     the reader on the index to be used for classification
 * @param analyzer       an {@link Analyzer} used to analyze unseen text
 * @param query          a {@link Query} to eventually filter the docs used for training the classifier, or {@code null}
 *                       if all the indexed docs should be used
 * @param batchSize      the size of the batch of docs to use for updating the perceptron weights
 * @param bias      the bias used for class separation
 * @param classFieldName the name of the field used as the output for the classifier
 * @param textFieldName  the name of the field used as input for the classifier
 * @throws IOException if the building of the underlying {@link FST} fails and / or {@link TermsEnum} for the text field
 *                     cannot be found
 */
public BooleanPerceptronClassifier(IndexReader indexReader, Analyzer analyzer, Query query, Integer batchSize,
                                   Double bias, String classFieldName, String textFieldName) throws IOException {
  this.textTerms = MultiTerms.getTerms(indexReader, textFieldName);

  if (textTerms == null) {
    throw new IOException("term vectors need to be available for field " + textFieldName);
  }

  this.analyzer = analyzer;
  this.textFieldName = textFieldName;

  if (bias == null || bias == 0d) {
    // automatic assign the bias to be the average total term freq
    double t = (double) indexReader.getSumTotalTermFreq(textFieldName) / (double) indexReader.getDocCount(textFieldName);
    if (t != -1) {
      this.bias = t;
    } else {
      throw new IOException(
              "bias cannot be assigned since term vectors for field "
                      + textFieldName + " do not exist");
    }
  } else {
    this.bias = bias;
  }

  // TODO : remove this map as soon as we have a writable FST
  SortedMap<String, Double> weights = new ConcurrentSkipListMap<>();

  TermsEnum termsEnum = textTerms.iterator();
  BytesRef textTerm;
  while ((textTerm = termsEnum.next()) != null) {
    weights.put(textTerm.utf8ToString(), (double) termsEnum.totalTermFreq());
  }
  updateFST(weights);

  IndexSearcher indexSearcher = new IndexSearcher(indexReader);

  int batchCount = 0;

  BooleanQuery.Builder q = new BooleanQuery.Builder();
  q.add(new BooleanClause(new WildcardQuery(new Term(classFieldName, "*")), BooleanClause.Occur.MUST));
  if (query != null) {
    q.add(new BooleanClause(query, BooleanClause.Occur.MUST));
  }
  // run the search and use stored field values
  for (ScoreDoc scoreDoc : indexSearcher.search(q.build(),
          Integer.MAX_VALUE).scoreDocs) {
    Document doc = indexSearcher.doc(scoreDoc.doc);

    IndexableField textField = doc.getField(textFieldName);

    // get the expected result
    IndexableField classField = doc.getField(classFieldName);

    if (textField != null && classField != null) {
      // assign class to the doc
      ClassificationResult<Boolean> classificationResult = assignClass(textField.stringValue());
      Boolean assignedClass = classificationResult.getAssignedClass();

      Boolean correctClass = Boolean.valueOf(classField.stringValue());
      long modifier = correctClass.compareTo(assignedClass);
      if (modifier != 0) {
        updateWeights(indexReader, scoreDoc.doc, assignedClass,
                weights, modifier, batchCount % batchSize == 0);
      }
      batchCount++;
    }
  }
  weights.clear(); // free memory while waiting for GC
}