Java Code Examples for org.apache.solr.common.SolrInputDocument#getFieldValue()

The following examples show how to use org.apache.solr.common.SolrInputDocument#getFieldValue() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BaseCloudSolrClient.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private static boolean hasInfoToFindLeaders(UpdateRequest updateRequest, String idField) {
  final Map<SolrInputDocument,Map<String,Object>> documents = updateRequest.getDocumentsMap();
  final Map<String,Map<String,Object>> deleteById = updateRequest.getDeleteByIdMap();

  final boolean hasNoDocuments = (documents == null || documents.isEmpty());
  final boolean hasNoDeleteById = (deleteById == null || deleteById.isEmpty());
  if (hasNoDocuments && hasNoDeleteById) {
    // no documents and no delete-by-id, so no info to find leader(s)
    return false;
  }

  if (documents != null) {
    for (final Map.Entry<SolrInputDocument,Map<String,Object>> entry : documents.entrySet()) {
      final SolrInputDocument doc = entry.getKey();
      final Object fieldValue = doc.getFieldValue(idField);
      if (fieldValue == null) {
        // a document with no id field value, so can't find leader for it
        return false;
      }
    }
  }

  return true;
}
 
Example 2
Source File: AddCoordinatesUpdateRequestProcessor.java    From apache-solr-essentials with Apache License 2.0 6 votes vote down vote up
@Override
public void processAdd(final AddUpdateCommand command) throws IOException  {
	final SolrInputDocument document = command.getSolrInputDocument();
	final String address = (String) document.getFieldValue("address");
	if (address != null && address.trim().length() !=  0) {
		try {
			final String id = String.valueOf(document.getFieldValue("id"));
			final String coordinates = service.getCoordinates(id, address);
			if (coordinates != null && coordinates.trim().length() != 0) {
				document.addField("coordinates", coordinates);
			} else {
				LOGGER.error("Document " + id + "  with address \"" + address+" \" hasn't been translated (null address)");
			}
		
			sleep();
		} catch (final Exception exception) {
			LOGGER.error("Unable to get coordinates for "+ document, exception);
		}
		super.processAdd(command);			
	}
}
 
Example 3
Source File: RemoveTrailingUnderscoreProcessor.java    From apache-solr-essentials with Apache License 2.0 6 votes vote down vote up
/**
 * Intercept the add document operation.
 * Here this process gets a chance to change the incoming {@link SolrInputDocument}.
 * 
 * @param command the update command.
 * @throws IOException in case of I/O failure.
 */
@Override
public void processAdd(final AddUpdateCommand command) throws IOException {
	// 1. Retrieve the SolrInputDocument that contains data to be indexed.
	final SolrInputDocument document = command.getSolrInputDocument();
	
	// 2. Loop through the target fields
	for (final String fieldName : fields) {
		
		// 3. Get the field values (for simplicity we assume fields are monovalued and are strings)
		final String fieldValue = (String) document.getFieldValue(fieldName);
		
		// 4. Check and eventually change the value of that field.
		if (fieldValue != null && fieldValue.endsWith("_")) {
			document.setField(fieldName, fieldValue.substring(0, fieldValue.length() -1));
		}
	}
	
	// 5. IMPORTANT: forward the control to the next processor in the chain.
	super.processAdd(command);
}
 
Example 4
Source File: RegexpBoostProcessor.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void processBoost(AddUpdateCommand command) {
  SolrInputDocument document = command.getSolrInputDocument();
  if (document.containsKey(inputFieldname)) {
    String value = (String) document.getFieldValue(inputFieldname);
    double boost = 1.0f;
    for (BoostEntry boostEntry : boostEntries) {
      if (boostEntry.getPattern().matcher(value).matches()) {
        if (log.isDebugEnabled()) {
          log.debug("Pattern match {} for {}", boostEntry.getPattern().pattern(), value);
        }
        boost = (boostEntry.getBoost() * 1000) * (boost * 1000) / 1000000;
      }
    }
    document.setField(boostFieldname, boost);

    if (log.isDebugEnabled()) {
      log.debug("Value {}, applied to field {}", boost, boostFieldname);
    }
  }
}
 
Example 5
Source File: MorphlineMapper.java    From examples with Apache License 2.0 6 votes vote down vote up
@Override
public void load(SolrInputDocument doc) throws IOException, SolrServerException {
  String uniqueKeyFieldName = getSchema().getUniqueKeyField().getName();
  Object id = doc.getFieldValue(uniqueKeyFieldName);
  if (id == null) {
    throw new IllegalArgumentException("Missing value for (required) unique document key: " + uniqueKeyFieldName
        + " (see Solr schema.xml)");
  }
  try {
    context.write(new Text(id.toString()), new SolrInputDocumentWritable(doc));
  } catch (InterruptedException e) {
    throw new IOException("Interrupted while writing " + doc, e);
  }

  if (LOG.isDebugEnabled()) {
    long numParserOutputBytes = 0;
    for (SolrInputField field : doc.values()) {
      numParserOutputBytes += sizeOf(field.getValue());
    }
    context.getCounter(MorphlineCounters.class.getName(), MorphlineCounters.PARSER_OUTPUT_BYTES.toString()).increment(numParserOutputBytes);
  }
  context.getCounter(MorphlineCounters.class.getName(), MorphlineCounters.DOCS_READ.toString()).increment(1);
}
 
Example 6
Source File: ClassificationUpdateProcessor.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * @param cmd the update command in input containing the Document to classify
 * @throws IOException If there is a low-level I/O error
 */
@Override
public void processAdd(AddUpdateCommand cmd)
    throws IOException {
  SolrInputDocument doc = cmd.getSolrInputDocument();
  Document luceneDocument = cmd.getLuceneDocument();
  String assignedClass;
  Object documentClass = doc.getFieldValue(trainingClassField);
  if (documentClass == null) {
    List<ClassificationResult<BytesRef>> assignedClassifications = classifier.getClasses(luceneDocument, maxOutputClasses);
    if (assignedClassifications != null) {
      for (ClassificationResult<BytesRef> singleClassification : assignedClassifications) {
        assignedClass = singleClassification.getAssignedClass().utf8ToString();
        doc.addField(predictedClassField, assignedClass);
      }
    }
  }
  super.processAdd(cmd);
}
 
Example 7
Source File: URLClassifyProcessor.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void processAdd(AddUpdateCommand command) throws IOException {
  if (isEnabled()) {
    SolrInputDocument document = command.getSolrInputDocument();
    if (document.containsKey(urlFieldname)) {
      String url = (String) document.getFieldValue(urlFieldname);
      try {
        URL normalizedURL = getNormalizedURL(url);
        document.setField(lengthFieldname, length(normalizedURL));
        document.setField(levelsFieldname, levels(normalizedURL));
        document.setField(toplevelpageFieldname, isTopLevelPage(normalizedURL) ? 1 : 0);
        document.setField(landingpageFieldname, isLandingPage(normalizedURL) ? 1 : 0);
        if (domainFieldname != null) {
          document.setField(domainFieldname, normalizedURL.getHost());
        }
        if (canonicalUrlFieldname != null) {
          document.setField(canonicalUrlFieldname, getCanonicalUrl(normalizedURL));
        }
        log.debug("{}", document);
      } catch (MalformedURLException | URISyntaxException e) {
        log.warn("cannot get the normalized url for '{}' due to {}", url, e.getMessage());
      }
    }
  }
  super.processAdd(command);
}
 
Example 8
Source File: TestRandomFlRTGCloud.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public Collection<String> assertRTGResults(final Collection<FlValidator> validators,
                                           final SolrInputDocument expected,
                                           final SolrDocument actual) {
  final Object origVal = expected.getFieldValue(fieldName);
  assertTrue(fl + ": orig field value is not supported: " + origVal, VALUES.containsKey(origVal));
  
  assertEquals(fl, VALUES.get(origVal), actual.getFirstValue(resultKey));
  return Collections.<String>singleton(resultKey);
}
 
Example 9
Source File: SplitCompoundFieldProcessor.java    From apache-solr-essentials with Apache License 2.0 5 votes vote down vote up
@Override
public void processAdd(final AddUpdateCommand command) throws IOException {
	// 1. Get the Solr (Input) document
	final SolrInputDocument document = command.getSolrInputDocument();
	
	// 2. Get the value of the compound field 
	final String compoundValue = (String) document.getFieldValue(COMPOUND_FIELD_NAME);

	// 3. Split the value and create the other fields
	if (compoundValue != null) {
		
		// 4. Create and populate the "year" field.
		if (compoundValue.length() >=4) {
			final String year = compoundValue.substring(0, 4);
			document.setField("year", year);
		}
		
		// 5. Create and populate the "language" field.
		if (compoundValue.length() >=39) {
			final String language = compoundValue.substring(36, 39);
			document.setField("language", language);				
		}
		
		// 6. Remove the compound field.
		document.remove(COMPOUND_FIELD_NAME);
	}
	
	// 7. IMPORTANT: forward the control to the next processor in the chain.
	super.processAdd(command);
}
 
Example 10
Source File: TimeRoutedAlias.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
String[] formattedRouteValues(SolrInputDocument doc) {
  String routeField = getRouteField();
  Date fieldValue = (Date) doc.getFieldValue(routeField);
  String dest = calcCandidateCollection(fieldValue.toInstant()).getDestinationCollection();
  int nonValuePrefix = getAliasName().length() + getRoutedAliasType().getSeparatorPrefix().length();
  return new String[]{dest.substring(nonValuePrefix)};
}
 
Example 11
Source File: DocBasedVersionConstraintsProcessor.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private DocFoundAndOldUserAndSolrVersions getUserVersionAndSolrVersionFromDocument(SolrInputDocument oldDoc) {
  Object[] oldUserVersions = getUserVersionsFromDocument(oldDoc);

  Object o = oldDoc.getFieldValue(solrVersionField.getName());
  if (o == null) {
    throw new SolrException(SERVER_ERROR, "No _version_ for document " + oldDoc);
  }
  long solrVersion = o instanceof Number ? ((Number) o).longValue() : Long.parseLong(o.toString());

  return new DocFoundAndOldUserAndSolrVersions(oldUserVersions, solrVersion);
}
 
Example 12
Source File: AtomicUpdateDocumentMerger.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 *
 * Merges an Atomic Update inside a document hierarchy
 * @param sdoc the doc containing update instructions
 * @param oldDocWithChildren the doc (children included) before the update
 * @param sdocWithChildren the updated doc prior to the update (children included)
 * @return root doc (children included) after update
 */
public SolrInputDocument mergeChildDoc(SolrInputDocument sdoc, SolrInputDocument oldDocWithChildren,
                                       SolrInputDocument sdocWithChildren) {
  // get path of document to be updated
  String updatedDocPath = (String) sdocWithChildren.getFieldValue(IndexSchema.NEST_PATH_FIELD_NAME);
  // get the SolrInputField containing the document which the AddUpdateCommand updates
  SolrInputField sifToReplace = getFieldFromHierarchy(oldDocWithChildren, updatedDocPath);
  // update SolrInputField, either appending or replacing the updated document
  updateDocInSif(sifToReplace, sdocWithChildren, sdoc);
  return oldDocWithChildren;
}
 
Example 13
Source File: DocumentBuilder.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private static String getID( SolrInputDocument doc, IndexSchema schema )
{
  String id = "";
  SchemaField sf = schema.getUniqueKeyField();
  if( sf != null ) {
    id = "[doc="+doc.getFieldValue( sf.getName() )+"] ";
  }
  return id;
}
 
Example 14
Source File: RowMutationHelper.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private static void validateAsRow(SolrInputDocument doc) {
  Object rowid = doc.getFieldValue(BlurConstants.ROW_ID);

  if (rowid == null)
    throw new IllegalArgumentException("Document must have rowid field.");

  for (String field : doc.getFieldNames()) {
    if (!BlurConstants.ROW_ID.equals(field)) {
      throw new IllegalArgumentException("Parent documents act as rows and cant have fields.");
    }
  }

}
 
Example 15
Source File: CompositeIdRouter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public int sliceHash(String id, SolrInputDocument doc, SolrParams params, DocCollection collection) {
  String shardFieldName = getRouteField(collection);
  if (shardFieldName != null && doc != null) {
    Object o = doc.getFieldValue(shardFieldName);
    if (o == null)
      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No value for :" + shardFieldName + ". Unable to identify shard");
    id = o.toString();
  }
  if (id.indexOf(SEPARATOR) < 0) {
    return Hash.murmurhash3_x86_32(id, 0, id.length(), 0);
  }

  return new KeyParser(id).getHash();
}
 
Example 16
Source File: DeleteStream.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Helper method that can handle String values when dealing with odd 
 * {@link Tuple} -&gt; {@link SolrInputDocument} conversions 
 * (ie: <code>tuple(..)</code> in tests)
 */
private static Long getVersion(final SolrInputDocument doc) throws NumberFormatException {
  if (! doc.containsKey(VERSION_FIELD)) {
    return null;
  }
  final Object v = doc.getFieldValue(VERSION_FIELD);
  if (null == v) {
    return null;
  }
  if (v instanceof Long) {
    return (Long)v;
  }
  return Long.parseLong(v.toString());
}
 
Example 17
Source File: DistributedUpdateProcessor.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
boolean getUpdatedDocument(AddUpdateCommand cmd, long versionOnUpdate) throws IOException {
  if (!AtomicUpdateDocumentMerger.isAtomicUpdate(cmd)) return false;

  Set<String> inPlaceUpdatedFields = AtomicUpdateDocumentMerger.computeInPlaceUpdatableFields(cmd);
  if (inPlaceUpdatedFields.size() > 0) { // non-empty means this is suitable for in-place updates
    if (docMerger.doInPlaceUpdateMerge(cmd, inPlaceUpdatedFields)) {
      return true;
    } else {
      // in-place update failed, so fall through and re-try the same with a full atomic update
    }
  }
  
  // full (non-inplace) atomic update
  SolrInputDocument sdoc = cmd.getSolrInputDocument();
  BytesRef idBytes = cmd.getIndexedId();
  String idString = cmd.getPrintableId();
  SolrInputDocument oldRootDocWithChildren = RealTimeGetComponent.getInputDocument(cmd.getReq().getCore(), idBytes, RealTimeGetComponent.Resolution.ROOT_WITH_CHILDREN);

  if (oldRootDocWithChildren == null) {
    if (versionOnUpdate > 0) {
      // could just let the optimistic locking throw the error
      throw new SolrException(ErrorCode.CONFLICT, "Document not found for update.  id=" + idString);
    } else if (req.getParams().get(ShardParams._ROUTE_) != null) {
      // the specified document could not be found in this shard
      // and was explicitly routed using _route_
      throw new SolrException(ErrorCode.BAD_REQUEST,
          "Could not find document id=" + idString +
              ", perhaps the wrong \"_route_\" param was supplied");
    }
  } else {
    oldRootDocWithChildren.remove(CommonParams.VERSION_FIELD);
  }


  SolrInputDocument mergedDoc;
  if(idField == null || oldRootDocWithChildren == null) {
    // create a new doc by default if an old one wasn't found
    mergedDoc = docMerger.merge(sdoc, new SolrInputDocument());
  } else {
    // Safety check: don't allow an update to an existing doc that has children, unless we actually support this.
    if (req.getSchema().isUsableForChildDocs() // however, next line we see it doesn't support child docs
        && req.getSchema().supportsPartialUpdatesOfChildDocs() == false
        && req.getSearcher().count(new TermQuery(new Term(IndexSchema.ROOT_FIELD_NAME, idBytes))) > 1) {
      throw new SolrException(ErrorCode.BAD_REQUEST, "This schema does not support partial updates to nested docs. See ref guide.");
    }

    String oldRootDocRootFieldVal = (String) oldRootDocWithChildren.getFieldValue(IndexSchema.ROOT_FIELD_NAME);
    if(req.getSchema().savesChildDocRelations() && oldRootDocRootFieldVal != null &&
        !idString.equals(oldRootDocRootFieldVal)) {
      // this is an update where the updated doc is not the root document
      SolrInputDocument sdocWithChildren = RealTimeGetComponent.getInputDocument(cmd.getReq().getCore(),
          idBytes, RealTimeGetComponent.Resolution.DOC_WITH_CHILDREN);
      mergedDoc = docMerger.mergeChildDoc(sdoc, oldRootDocWithChildren, sdocWithChildren);
    } else {
      mergedDoc = docMerger.merge(sdoc, oldRootDocWithChildren);
    }
  }
  cmd.solrDoc = mergedDoc;
  return true;
}
 
Example 18
Source File: AtomicUpdateDocumentMerger.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/**
 * Merges the fromDoc into the toDoc using the atomic update syntax.
 * 
 * @param fromDoc SolrInputDocument which will merged into the toDoc
 * @param toDoc the final SolrInputDocument that will be mutated with the values from the fromDoc atomic commands
 * @return toDoc with mutated values
 */
@SuppressWarnings({"unchecked"})
public SolrInputDocument merge(final SolrInputDocument fromDoc, SolrInputDocument toDoc) {
  for (SolrInputField sif : fromDoc.values()) {
   Object val = sif.getValue();
    if (val instanceof Map) {
      for (Entry<String,Object> entry : ((Map<String,Object>) val).entrySet()) {
        String key = entry.getKey();
        Object fieldVal = entry.getValue();
        switch (key) {
          case "add":
            doAdd(toDoc, sif, fieldVal);
            break;
          case "set":
            doSet(toDoc, sif, fieldVal);
            break;
          case "remove":
            doRemove(toDoc, sif, fieldVal);
            break;
          case "removeregex":
            doRemoveRegex(toDoc, sif, fieldVal);
            break;
          case "inc":
            doInc(toDoc, sif, fieldVal);
            break;
          case "add-distinct":
            doAddDistinct(toDoc, sif, fieldVal);
            break;
          default:
            Object id = toDoc.containsKey(idField.getName())? toDoc.getFieldValue(idField.getName()):
                fromDoc.getFieldValue(idField.getName());
            String err = "Unknown operation for the an atomic update, operation ignored: " + key;
            if (id != null) {
              err = err + " for id:" + id;
            }
            throw new SolrException(ErrorCode.BAD_REQUEST, err);
        }
        // validate that the field being modified is not the id field.
        if (idField.getName().equals(sif.getName())) {
          throw new SolrException(ErrorCode.BAD_REQUEST, "Invalid update of id field: " + sif);
        }

      }
    } else {
      // normal fields are treated as a "set"
      toDoc.put(sif.getName(), sif);
    }
  }
  
  return toDoc;
}
 
Example 19
Source File: CollectionPerTimeFrameAssignmentStrategy.java    From storm-solr with Apache License 2.0 4 votes vote down vote up
public String getCollectionForDoc(CloudSolrClient cloudSolrClient, SolrInputDocument doc) throws Exception {
  Object obj = doc.getFieldValue(fieldName);
  if (obj == null)
    throw new IllegalArgumentException("Document " + doc + " cannot be routed because " + fieldName + " is null!");

  Date timestamp = null;
  try {
    timestamp = asTimestamp(obj);
  } catch (ParseException pe) {
    throw new IllegalArgumentException("Cannot parse "+obj+" for "+fieldName+" into a java.util.Date due to: "+pe);
  }

  Date startFrom = null;
  // if they use a multi-day time frame, then we need a start date from which to start calculating offsets
  if (timeUnit.equals(TimeUnit.DAYS) && timeFrame > 1) {
    if (this.startDate == null) {
      throw new IllegalStateException("Must specify a start date for multi-day time frames!");
    }
    startFrom = this.startDate;
  } else {
    // start back at the beginning of the current day and increment from there until we find the correct "bucket"
    Calendar cal = Calendar.getInstance();
    cal.setTimeZone(TimeZone.getTimeZone(timezoneId));
    cal.setTimeInMillis(timestamp.getTime());
    cal.set(Calendar.HOUR_OF_DAY, 0);
    cal.set(Calendar.MINUTE, 0);
    cal.set(Calendar.SECOND, 0);
    cal.set(Calendar.MILLISECOND, 0);
    startFrom = cal.getTime();
  }

  // increment by the length of each timeframe (in millis) until we are past the timestamp,
  // the resulting value of prev will determine the collection
  Date next = startFrom;
  long prev = next.getTime();
  final long timeFrameMs = TimeUnit.MILLISECONDS.convert(timeFrame, timeUnit);
  while (timestamp.after(next)) {
    prev = next.getTime();
    next.setTime(prev + timeFrameMs);
  }

  String collection = collectionNameBase+dateFormatter.get().format(new Date(prev));
  checkCollectionExists(cloudSolrClient, collection);
  return collection;
}
 
Example 20
Source File: SolrProductIndexer.java    From scipio-erp with Apache License 2.0 4 votes vote down vote up
public String getDocId(SolrInputDocument doc) {
    return (String) doc.getFieldValue("id");
}