Java Code Examples for org.apache.nifi.processor.ProcessSession#remove()

The following examples show how to use org.apache.nifi.processor.ProcessSession#remove() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DataGeneratorTestProcessor.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile toRemove = session.get();
    if (toRemove != null) {
        LOG.warn("Removing flow file");
        session.remove(toRemove);
    }

    FlowFile flowFile = session.create();
    final Random random = new Random();
    final byte[] data = new byte[4096];
    random.nextBytes(data);

    flowFile = session.write(flowFile, new OutputStreamCallback() {
        @Override
        public void process(final OutputStream out) throws IOException {
            out.write(data);
        }
    });

    LOG.info("{} transferring {} to success", new Object[]{this, flowFile});
    session.transfer(flowFile, REL_SUCCESS);
    session.commit();
}
 
Example 2
Source File: HandleHttpRequest.java    From nifi with Apache License 2.0 6 votes vote down vote up
protected void handleFlowContentStreamingError(final ProcessSession session, HttpRequestContainer container,
    final HttpServletRequest request, Optional<FlowFile> flowFile, final Exception e) {
  // There may be many reasons which can produce an IOException on the HTTP stream and in some of them, eg.
  // bad requests, the connection to the client is not closed. In order to address also these cases, we try
  // and answer with a BAD_REQUEST, which lets the client know that the request has not been correctly
  // processed and makes it aware that the connection can be closed.
  getLogger().error("Failed to receive content from HTTP Request from {} due to {}",
          new Object[]{request.getRemoteAddr(), e});
  if (flowFile.isPresent())
    session.remove(flowFile.get());

  try {
      HttpServletResponse response = container.getResponse();
      response.sendError(HttpServletResponse.SC_BAD_REQUEST);
      container.getContext().complete();
  } catch (final IOException ioe) {
      getLogger().warn("Failed to send HTTP response to {} due to {}",
              new Object[]{request.getRemoteAddr(), ioe});
  }
}
 
Example 3
Source File: SplitText.java    From nifi with Apache License 2.0 6 votes vote down vote up
/**
 * Will concatenate the contents of the provided array of {@link FlowFile}s
 * into a single {@link FlowFile}. While this operation is as general as it
 * is described in the previous sentence, in the context of this processor
 * there can only be two {@link FlowFile}s with the first {@link FlowFile}
 * representing the header content of the split and the second
 * {@link FlowFile} represents the split itself.
 */
private FlowFile concatenateContents(FlowFile sourceFlowFile, ProcessSession session, FlowFile... flowFiles) {
    FlowFile mergedFlowFile = session.create(sourceFlowFile);
    for (FlowFile flowFile : flowFiles) {
        mergedFlowFile = session.append(mergedFlowFile, new OutputStreamCallback() {
            @Override
            public void process(OutputStream out) throws IOException {
                try (InputStream is = session.read(flowFile)) {
                    IOUtils.copy(is, out);
                }
            }
        });
    }
    session.remove(flowFiles[1]); // in current usage we always have 2 files
    return mergedFlowFile;
}
 
Example 4
Source File: DataGeneratorTestProcessor.java    From localization_nifi with Apache License 2.0 6 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile toRemove = session.get();
    if (toRemove != null) {
        LOG.warn("Removing flow file");
        session.remove(toRemove);
    }

    FlowFile flowFile = session.create();
    final Random random = new Random();
    final byte[] data = new byte[4096];
    random.nextBytes(data);

    flowFile = session.write(flowFile, new OutputStreamCallback() {
        @Override
        public void process(final OutputStream out) throws IOException {
            out.write(data);
        }
    });

    LOG.info("{} transferring {} to success", new Object[]{this, flowFile});
    session.transfer(flowFile, REL_SUCCESS);
    session.commit();
}
 
Example 5
Source File: TailFile.java    From nifi with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new FlowFile that contains the entire contents of the given
 * file and transfers that FlowFile to success. This method will commit the
 * given session and emit an appropriate Provenance Event.
 *
 * @param file the file to ingest
 * @param context the ProcessContext
 * @param session the ProcessSession
 * @param tfo the current state
 *
 * @return the new, updated state that reflects that the given file has been
 * ingested.
 */
private TailFileState consumeFileFully(final File file, final ProcessContext context, final ProcessSession session, TailFileObject tfo) {
    FlowFile flowFile = session.create();
    flowFile = session.importFrom(file.toPath(), true, flowFile);
    if (flowFile.getSize() == 0L) {
        session.remove(flowFile);
    } else {
        final Map<String, String> attributes = new HashMap<>(3);
        attributes.put(CoreAttributes.FILENAME.key(), file.getName());
        attributes.put(CoreAttributes.MIME_TYPE.key(), "text/plain");
        attributes.put("tailfile.original.path", tfo.getState().getFilename());
        flowFile = session.putAllAttributes(flowFile, attributes);
        session.getProvenanceReporter().receive(flowFile, file.toURI().toString());
        session.transfer(flowFile, REL_SUCCESS);
        getLogger().debug("Created {} from {} and routed to success", new Object[]{flowFile, file});

        // use a timestamp of lastModified() + 1 so that we do not ingest this file again.
        cleanup();
        tfo.setState(new TailFileState(context.getProperty(FILENAME).evaluateAttributeExpressions().getValue(), null, null, 0L, file.lastModified() + 1L, file.length(), null,
                tfo.getState().getBuffer()));

        // must ensure that we do session.commit() before persisting state in order to avoid data loss.
        session.commit();
        persistState(tfo, context);
    }

    return tfo.getState();
}
 
Example 6
Source File: TerminateOnce.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile != null) {
        session.remove(flowFile);
    }
}
 
Example 7
Source File: AbstractListenEventBatchingProcessor.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    final int maxBatchSize = context.getProperty(MAX_BATCH_SIZE).asInteger();
    final Map<String,FlowFileEventBatch> batches = getBatches(session, maxBatchSize, messageDemarcatorBytes);

    // if the size is 0 then there was nothing to process so return
    // we don't need to yield here because we have a long poll in side of getBatches
    if (batches.size() == 0) {
        return;
    }

    final List<E> allEvents = new ArrayList<>();

    for (Map.Entry<String,FlowFileEventBatch> entry : batches.entrySet()) {
        FlowFile flowFile = entry.getValue().getFlowFile();
        final List<E> events = entry.getValue().getEvents();

        if (flowFile.getSize() == 0L || events.size() == 0) {
            session.remove(flowFile);
            getLogger().debug("No data written to FlowFile from batch {}; removing FlowFile", new Object[] {entry.getKey()});
            continue;
        }

        final Map<String,String> attributes = getAttributes(entry.getValue());
        flowFile = session.putAllAttributes(flowFile, attributes);

        getLogger().debug("Transferring {} to success", new Object[] {flowFile});
        session.transfer(flowFile, REL_SUCCESS);
        session.adjustCounter("FlowFiles Transferred to Success", 1L, false);

        // the sender and command will be the same for all events based on the batch key
        final String transitUri = getTransitUri(entry.getValue());
        session.getProvenanceReporter().receive(flowFile, transitUri);

        allEvents.addAll(events);
    }

    // let sub-classes take any additional actions
    postProcess(context, session, allEvents);
}
 
Example 8
Source File: TerminateAll.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile;
    while ((flowFile = session.get()) != null) {
        session.remove(flowFile);
        session.adjustCounter("Removed", 1, false);
    }
}
 
Example 9
Source File: AbstractListenEventBatchingProcessor.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    final int maxBatchSize = context.getProperty(MAX_BATCH_SIZE).asInteger();
    final Map<String,FlowFileEventBatch> batches = getBatches(session, maxBatchSize, messageDemarcatorBytes);

    // if the size is 0 then there was nothing to process so return
    // we don't need to yield here because we have a long poll in side of getBatches
    if (batches.size() == 0) {
        return;
    }

    final List<E> allEvents = new ArrayList<>();

    for (Map.Entry<String,FlowFileEventBatch> entry : batches.entrySet()) {
        FlowFile flowFile = entry.getValue().getFlowFile();
        final List<E> events = entry.getValue().getEvents();

        if (flowFile.getSize() == 0L || events.size() == 0) {
            session.remove(flowFile);
            getLogger().debug("No data written to FlowFile from batch {}; removing FlowFile", new Object[] {entry.getKey()});
            continue;
        }

        final Map<String,String> attributes = getAttributes(entry.getValue());
        flowFile = session.putAllAttributes(flowFile, attributes);

        getLogger().debug("Transferring {} to success", new Object[] {flowFile});
        session.transfer(flowFile, REL_SUCCESS);
        session.adjustCounter("FlowFiles Transferred to Success", 1L, false);

        // the sender and command will be the same for all events based on the batch key
        final String transitUri = getTransitUri(entry.getValue());
        session.getProvenanceReporter().receive(flowFile, transitUri);

        allEvents.addAll(events);
    }

    // let sub-classes take any additional actions
    postProcess(context, session, allEvents);
}
 
Example 10
Source File: TerminateFlowFile.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }

    session.remove(flowFile);
}
 
Example 11
Source File: TailFile.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new FlowFile that contains the entire contents of the given
 * file and transfers that FlowFile to success. This method will commit the
 * given session and emit an appropriate Provenance Event.
 *
 * @param file the file to ingest
 * @param context the ProcessContext
 * @param session the ProcessSession
 * @param tfo the current state
 *
 * @return the new, updated state that reflects that the given file has been
 * ingested.
 */
private TailFileState consumeFileFully(final File file, final ProcessContext context, final ProcessSession session, TailFileObject tfo) {
    FlowFile flowFile = session.create();
    flowFile = session.importFrom(file.toPath(), true, flowFile);
    if (flowFile.getSize() == 0L) {
        session.remove(flowFile);
    } else {
        final Map<String, String> attributes = new HashMap<>(3);
        attributes.put(CoreAttributes.FILENAME.key(), file.getName());
        attributes.put(CoreAttributes.MIME_TYPE.key(), "text/plain");
        attributes.put("tailfile.original.path", tfo.getState().getFilename());
        flowFile = session.putAllAttributes(flowFile, attributes);
        session.getProvenanceReporter().receive(flowFile, file.toURI().toString());
        session.transfer(flowFile, REL_SUCCESS);
        getLogger().debug("Created {} from {} and routed to success", new Object[]{flowFile, file});

        // use a timestamp of lastModified() + 1 so that we do not ingest this file again.
        cleanup();
        tfo.setState(new TailFileState(context.getProperty(FILENAME).evaluateAttributeExpressions().getValue(), null, null, 0L, file.lastModified() + 1L, file.length(), null,
                tfo.getState().getBuffer()));

        // must ensure that we do session.commit() before persisting state in order to avoid data loss.
        session.commit();
        persistState(tfo, context);
    }

    return tfo.getState();
}
 
Example 12
Source File: PutElasticsearchRecord.java    From nifi with Apache License 2.0 5 votes vote down vote up
private void removeBadRecordFlowFiles(List<FlowFile> bad, ProcessSession session) {
    for (FlowFile badFlowFile : bad) {
        session.remove(badFlowFile);
    }

    bad.clear();
}
 
Example 13
Source File: PutElasticsearchRecord.java    From nifi with Apache License 2.0 4 votes vote down vote up
private FlowFile indexDocuments(BulkOperation bundle, ProcessSession session, FlowFile input) throws Exception {
    IndexOperationResponse response = clientService.bulk(bundle.getOperationList());
    if (response.hasErrors()) {
        if(logErrors || getLogger().isDebugEnabled()) {
            List<Map<String, Object>> errors = response.getItems();
            ObjectMapper mapper = new ObjectMapper();
            mapper.enable(SerializationFeature.INDENT_OUTPUT);
            String output = String.format("An error was encountered while processing bulk operations. Server response below:\n\n%s", mapper.writeValueAsString(errors));

            if (logErrors) {
                getLogger().error(output);
            } else {
                getLogger().debug(output);
            }
        }

        if (writerFactory != null) {
            FlowFile errorFF = session.create(input);
            try (OutputStream os = session.write(errorFF);
                 RecordSetWriter writer = writerFactory.createWriter(getLogger(), bundle.getSchema(), os )) {

                int added = 0;
                writer.beginRecordSet();
                for (int index = 0; index < response.getItems().size(); index++) {
                    Map<String, Object> current = response.getItems().get(index);
                    String key = current.keySet().stream().findFirst().get();
                    Map<String, Object> inner = (Map<String, Object>) current.get(key);
                    if (inner.containsKey("error")) {
                        writer.write(bundle.getOriginalRecords().get(index));
                        added++;
                    }
                }
                writer.finishRecordSet();
                writer.close();
                os.close();

                errorFF = session.putAttribute(errorFF, ATTR_RECORD_COUNT, String.valueOf(added));

                session.transfer(errorFF, REL_FAILED_RECORDS);

                return errorFF;
            } catch (Exception ex) {
                getLogger().error("", ex);
                session.remove(errorFF);
                throw ex;
            }
        }

        return null;
    } else {
        return null;
    }
}
 
Example 14
Source File: GeoEnrichIPRecord.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    FlowFile input = session.get();
    if (input == null) {
        return;
    }

    FlowFile output = session.create(input);
    FlowFile notFound = splitOutput ? session.create(input) : null;
    final DatabaseReader dbReader = databaseReaderRef.get();
    try (InputStream is = session.read(input);
         OutputStream os = session.write(output);
         OutputStream osNotFound = splitOutput ? session.write(notFound) : null) {
        RecordPathCache cache = new RecordPathCache(GEO_PROPERTIES.size() + 1);
        Map<PropertyDescriptor, RecordPath> paths = new HashMap<>();
        for (PropertyDescriptor descriptor : GEO_PROPERTIES) {
            if (!context.getProperty(descriptor).isSet()) {
                continue;
            }
            String rawPath = context.getProperty(descriptor).evaluateAttributeExpressions(input).getValue();
            RecordPath compiled = cache.getCompiled(rawPath);
            paths.put(descriptor, compiled);
        }

        String rawIpPath = context.getProperty(IP_RECORD_PATH).evaluateAttributeExpressions(input).getValue();
        RecordPath ipPath = cache.getCompiled(rawIpPath);

        RecordReader reader = readerFactory.createRecordReader(input, is, getLogger());
        RecordSchema schema = writerFactory.getSchema(input.getAttributes(), reader.getSchema());
        RecordSetWriter writer = writerFactory.createWriter(getLogger(), schema, os);
        RecordSetWriter notFoundWriter = splitOutput ? writerFactory.createWriter(getLogger(), schema, osNotFound) : null;
        Record record;
        Relationship targetRelationship = REL_NOT_FOUND;
        writer.beginRecordSet();

        if (notFoundWriter != null) {
            notFoundWriter.beginRecordSet();
        }

        int foundCount = 0;
        int notFoundCount = 0;
        while ((record = reader.nextRecord()) != null) {
            CityResponse response = geocode(ipPath, record, dbReader);
            boolean wasEnriched = enrichRecord(response, record, paths);
            if (wasEnriched) {
                targetRelationship = REL_FOUND;
            }
            if (!splitOutput || (splitOutput && wasEnriched)) {
                writer.write(record);
                foundCount++;
            } else {
                notFoundWriter.write(record);
                notFoundCount++;
            }
        }
        writer.finishRecordSet();
        writer.close();

        if (notFoundWriter != null) {
            notFoundWriter.finishRecordSet();
            notFoundWriter.close();
        }

        is.close();
        os.close();
        if (osNotFound != null) {
            osNotFound.close();
        }

        output = session.putAllAttributes(output, buildAttributes(foundCount, writer.getMimeType()));
        if (!splitOutput) {
            session.transfer(output, targetRelationship);
            session.remove(input);
        } else {
            if (notFoundCount > 0) {
                notFound = session.putAllAttributes(notFound, buildAttributes(notFoundCount, writer.getMimeType()));
                session.transfer(notFound, REL_NOT_FOUND);
            } else {
                session.remove(notFound);
            }
            session.transfer(output, REL_FOUND);
            session.transfer(input, REL_ORIGINAL);
            session.getProvenanceReporter().modifyContent(notFound);
        }
        session.getProvenanceReporter().modifyContent(output);
    } catch (Exception ex) {
        getLogger().error("Error enriching records.", ex);
        session.rollback();
        context.yield();
    }
}
 
Example 15
Source File: MergeContent.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public FlowFile merge(final Bin bin, final ProcessContext context) {
    final ProcessSession session = bin.getSession();
    final List<FlowFile> contents = bin.getContents();

    FlowFile bundle = session.create(contents);

    try {
        bundle = session.write(bundle, new OutputStreamCallback() {
            @Override
            public void process(final OutputStream rawOut) throws IOException {
                try (final OutputStream bufferedOut = new BufferedOutputStream(rawOut)) {
                    // we don't want the packager closing the stream. V1 creates a TAR Output Stream, which then gets
                    // closed, which in turn closes the underlying OutputStream, and we want to protect ourselves against that.
                    final OutputStream out = new NonCloseableOutputStream(bufferedOut);

                    for (final FlowFile flowFile : contents) {
                        bin.getSession().read(flowFile, false, new InputStreamCallback() {
                            @Override
                            public void process(final InputStream rawIn) throws IOException {
                                try (final InputStream in = new BufferedInputStream(rawIn)) {
                                    final Map<String, String> attributes = new HashMap<>(flowFile.getAttributes());

                                    // for backward compatibility purposes, we add the "legacy" NiFi attributes
                                    attributes.put("nf.file.name", attributes.get(CoreAttributes.FILENAME.key()));
                                    attributes.put("nf.file.path", attributes.get(CoreAttributes.PATH.key()));
                                    if (attributes.containsKey(CoreAttributes.MIME_TYPE.key())) {
                                        attributes.put("content-type", attributes.get(CoreAttributes.MIME_TYPE.key()));
                                    }
                                    packager.packageFlowFile(in, out, attributes, flowFile.getSize());
                                }
                            }
                        });
                    }
                }
            }
        });
    } catch (final Exception e) {
        session.remove(bundle);
        throw e;
    }

    bundle = session.putAttribute(bundle, CoreAttributes.FILENAME.key(), createFilename(contents) + ".pkg");
    session.getProvenanceReporter().join(contents, bundle);
    return bundle;
}
 
Example 16
Source File: ScrollElasticsearchHttp.java    From nifi with Apache License 2.0 4 votes vote down vote up
private void getPage(final Response getResponse, final URL url, final ProcessContext context,
        final ProcessSession session, FlowFile flowFile, final ComponentLog logger, final long startNanos, Charset charset)
        throws IOException {
    final int statusCode = getResponse.code();

    if (isSuccess(statusCode)) {
        ResponseBody body = getResponse.body();
        final byte[] bodyBytes = body.bytes();
        JsonNode responseJson = parseJsonResponse(new ByteArrayInputStream(bodyBytes));
        String scrollId = responseJson.get("_scroll_id").asText();

        StringBuilder builder = new StringBuilder();

        builder.append("{ \"hits\" : [");

        JsonNode hits = responseJson.get("hits").get("hits");
        if (hits.size() == 0) {
            finishQuery(context.getStateManager());
            session.remove(flowFile);
            return;
        }

        for(int i = 0; i < hits.size(); i++) {
            JsonNode hit = hits.get(i);
            String retrievedIndex = hit.get("_index").asText();
            String retrievedType = hit.get("_type").asText();

            JsonNode source = hit.get("_source");
            flowFile = session.putAttribute(flowFile, "es.index", retrievedIndex);
            flowFile = session.putAttribute(flowFile, "es.type", retrievedType);
            flowFile = session.putAttribute(flowFile, "mime.type", "application/json");

            builder.append(source.toString());
            if (i < hits.size() - 1) {
                builder.append(", ");
            }
        }
        builder.append("] }");
        logger.debug("Elasticsearch retrieved " + responseJson.size() + " documents, routing to success");

        flowFile = session.write(flowFile, out -> {
            out.write(builder.toString().getBytes(charset));
        });
        session.transfer(flowFile, REL_SUCCESS);

        saveScrollId(context.getStateManager(), scrollId);

        // emit provenance event
        final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
        session.getProvenanceReporter().receive(flowFile, url.toExternalForm(), millis);
    } else {
        // 5xx -> RETRY, but a server error might last a while, so yield
        if (statusCode / 100 == 5) {

            logger.warn("Elasticsearch returned code {} with message {}, removing the flow file. This is likely a server problem, yielding...",
                    new Object[]{statusCode, getResponse.message()});
            session.remove(flowFile);
            context.yield();
        }  else {
            logger.warn("Elasticsearch returned code {} with message {}", new Object[]{statusCode, getResponse.message()});
            session.remove(flowFile);
        }
    }
}
 
Example 17
Source File: InvokeGRPC.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile fileToProcess = null;
    if (context.hasIncomingConnection()) {
        fileToProcess = session.get();

        // If we have no FlowFile, and all incoming connections are self-loops then we can continue on.
        // However, if we have no FlowFile and we have connections coming from other Processors, then
        // we know that we should run only if we have a FlowFile.
        if (fileToProcess == null && context.hasNonLoopConnection()) {
            return;
        }
    }

    final ComponentLog logger = getLogger();
    final FlowFileServiceGrpc.FlowFileServiceBlockingStub blockingStub = blockingStubReference.get();
    final String host = context.getProperty(PROP_SERVICE_HOST).getValue();
    final String port = context.getProperty(PROP_SERVICE_PORT).getValue();
    fileToProcess = session.putAttribute(fileToProcess, SERVICE_HOST, host);
    fileToProcess = session.putAttribute(fileToProcess, SERVICE_PORT, port);
    FlowFile responseFlowFile = null;
    try {
        final FlowFileRequest.Builder requestBuilder = FlowFileRequest.newBuilder()
                .setId(fileToProcess.getId())
                .putAllAttributes(fileToProcess.getAttributes());

        // if the processor is configured to send the content, turn the content into bytes
        // and add it to the request.
        final boolean sendContent = context.getProperty(PROP_SEND_CONTENT).asBoolean();
        if (sendContent) {
            try (final InputStream contents = session.read(fileToProcess)) {
                requestBuilder.setContent(ByteString.readFrom(contents));
            }
            // emit provenance event
            session.getProvenanceReporter().send(fileToProcess, getRemote(host, port), true);
        }
        final FlowFileRequest flowFileRequest = requestBuilder.build();
        logRequest(logger, host, port, flowFileRequest);

        final FlowFileReply flowFileReply = blockingStub.send(flowFileRequest);
        logReply(logger, host, port, flowFileReply);

        final FlowFileReply.ResponseCode responseCode = flowFileReply.getResponseCode();
        final String body = flowFileReply.getBody();

        fileToProcess = session.putAttribute(fileToProcess, RESPONSE_CODE, String.valueOf(responseCode));
        fileToProcess = session.putAttribute(fileToProcess, RESPONSE_BODY, body);

        responseFlowFile = session.create(fileToProcess);
        route(fileToProcess, responseFlowFile, session, context, responseCode);

    } catch (final Exception e) {
        // penalize or yield
        if (fileToProcess != null) {
            logger.error("Routing to {} due to exception: {}", new Object[]{REL_FAILURE.getName(), e}, e);
            fileToProcess = session.penalize(fileToProcess);
            fileToProcess = session.putAttribute(fileToProcess, EXCEPTION_CLASS, e.getClass().getName());
            fileToProcess = session.putAttribute(fileToProcess, EXCEPTION_MESSAGE, e.getMessage());
            // transfer original to failure
            session.transfer(fileToProcess, REL_FAILURE);
        } else {
            logger.error("Yielding processor due to exception encountered as a source processor: {}", e);
            context.yield();
        }

        // cleanup
        try {
            if (responseFlowFile != null) {
                session.remove(responseFlowFile);
            }
        } catch (final Exception e1) {
            logger.error("Could not cleanup response flowfile due to exception: {}", new Object[]{e1}, e1);
        }
    }
}
 
Example 18
Source File: TailFile.java    From nifi with Apache License 2.0 4 votes vote down vote up
/**
 * Finds any files that have rolled over and have not yet been ingested by
 * this Processor. Each of these files that is found will be ingested as its
 * own FlowFile. If a file is found that has been partially ingested, the
 * rest of the file will be ingested as a single FlowFile but the data that
 * already has been ingested will not be ingested again.
 *
 * @param context the ProcessContext to use in order to obtain Processor
 * configuration.
 * @param session the ProcessSession to use in order to interact with
 * FlowFile creation and content.
 * @param expectedChecksum the checksum value that is expected for the
 * oldest file from offset 0 through &lt;position&gt;.
 * @param timestamp the latest Last Modfiied Timestamp that has been
 * consumed. Any data that was written before this data will not be
 * ingested.
 * @param position the byte offset in the file being tailed, where tailing
 * last left off.
 *
 * @return <code>true</code> if the file being tailed has rolled over, false
 * otherwise
 */
private boolean recoverRolledFiles(final ProcessContext context, final ProcessSession session, final String tailFile, final List<File> rolledOffFiles, final Long expectedChecksum,
        final long timestamp, final long position) {
    try {
        getLogger().debug("Recovering Rolled Off Files; total number of files rolled off = {}", new Object[]{rolledOffFiles.size()});
        TailFileObject tfo = states.get(tailFile);

        // For first file that we find, it may or may not be the file that we were last reading from.
        // As a result, we have to read up to the position we stored, while calculating the checksum. If the checksums match,
        // then we know we've already processed this file. If the checksums do not match, then we have not
        // processed this file and we need to seek back to position 0 and ingest the entire file.
        // For all other files that have been rolled over, we need to just ingest the entire file.
        boolean rolloverOccurred = !rolledOffFiles.isEmpty();
        if (rolloverOccurred && expectedChecksum != null && rolledOffFiles.get(0).length() >= position) {
            final File firstFile = rolledOffFiles.get(0);

            final long startNanos = System.nanoTime();
            if (position > 0) {
                try (final InputStream fis = new FileInputStream(firstFile);
                        final CheckedInputStream in = new CheckedInputStream(fis, new CRC32())) {
                    StreamUtils.copy(in, new NullOutputStream(), position);

                    final long checksumResult = in.getChecksum().getValue();
                    if (checksumResult == expectedChecksum) {
                        getLogger().debug("Checksum for {} matched expected checksum. Will skip first {} bytes", new Object[]{firstFile, position});

                        // This is the same file that we were reading when we shutdown. Start reading from this point on.
                        rolledOffFiles.remove(0);
                        FlowFile flowFile = session.create();
                        flowFile = session.importFrom(in, flowFile);
                        if (flowFile.getSize() == 0L) {
                            session.remove(flowFile);
                            // use a timestamp of lastModified() + 1 so that we do not ingest this file again.
                            cleanup();
                            tfo.setState(new TailFileState(tailFile, null, null, 0L, firstFile.lastModified() + 1L, firstFile.length(), null, tfo.getState().getBuffer()));
                        } else {
                            final Map<String, String> attributes = new HashMap<>(3);
                            attributes.put(CoreAttributes.FILENAME.key(), firstFile.getName());
                            attributes.put(CoreAttributes.MIME_TYPE.key(), "text/plain");
                            attributes.put("tailfile.original.path", tailFile);
                            flowFile = session.putAllAttributes(flowFile, attributes);

                            session.getProvenanceReporter().receive(flowFile, firstFile.toURI().toString(), "FlowFile contains bytes 0 through " + position + " of source file",
                                    TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos));
                            session.transfer(flowFile, REL_SUCCESS);
                            getLogger().debug("Created {} from rolled over file {} and routed to success", new Object[]{flowFile, firstFile});

                            // use a timestamp of lastModified() + 1 so that we do not ingest this file again.
                            cleanup();
                            tfo.setState(new TailFileState(tailFile, null, null, 0L, firstFile.lastModified() + 1L, firstFile.length(), null, tfo.getState().getBuffer()));

                            // must ensure that we do session.commit() before persisting state in order to avoid data loss.
                            session.commit();
                            persistState(tfo, context);
                        }
                    } else {
                        getLogger().debug("Checksum for {} did not match expected checksum. Checksum for file was {} but expected {}. Will consume entire file",
                                new Object[]{firstFile, checksumResult, expectedChecksum});
                    }
                }
            }
        }

        // For each file that we found that matches our Rollover Pattern, and has a last modified date later than the timestamp
        // that we recovered from the state file, we need to consume the entire file. The only exception to this is the file that
        // we were reading when we last stopped, as it may already have been partially consumed. That is taken care of in the
        // above block of code.
        for (final File file : rolledOffFiles) {
            tfo.setState(consumeFileFully(file, context, session, tfo));
        }

        return rolloverOccurred;
    } catch (final IOException e) {
        getLogger().error("Failed to recover files that have rolled over due to {}", new Object[]{e});
        return false;
    }
}
 
Example 19
Source File: TailFile.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
/**
 * Finds any files that have rolled over and have not yet been ingested by
 * this Processor. Each of these files that is found will be ingested as its
 * own FlowFile. If a file is found that has been partially ingested, the
 * rest of the file will be ingested as a single FlowFile but the data that
 * already has been ingested will not be ingested again.
 *
 * @param context the ProcessContext to use in order to obtain Processor
 * configuration.
 * @param session the ProcessSession to use in order to interact with
 * FlowFile creation and content.
 * @param expectedChecksum the checksum value that is expected for the
 * oldest file from offset 0 through &lt;position&gt;.
 * @param timestamp the latest Last Modfiied Timestamp that has been
 * consumed. Any data that was written before this data will not be
 * ingested.
 * @param position the byte offset in the file being tailed, where tailing
 * last left off.
 *
 * @return <code>true</code> if the file being tailed has rolled over, false
 * otherwise
 */
private boolean recoverRolledFiles(final ProcessContext context, final ProcessSession session, final String tailFile, final List<File> rolledOffFiles, final Long expectedChecksum,
        final long timestamp, final long position) {
    try {
        getLogger().debug("Recovering Rolled Off Files; total number of files rolled off = {}", new Object[]{rolledOffFiles.size()});
        TailFileObject tfo = states.get(tailFile);

        // For first file that we find, it may or may not be the file that we were last reading from.
        // As a result, we have to read up to the position we stored, while calculating the checksum. If the checksums match,
        // then we know we've already processed this file. If the checksums do not match, then we have not
        // processed this file and we need to seek back to position 0 and ingest the entire file.
        // For all other files that have been rolled over, we need to just ingest the entire file.
        boolean rolloverOccurred = !rolledOffFiles.isEmpty();
        if (rolloverOccurred && expectedChecksum != null && rolledOffFiles.get(0).length() >= position) {
            final File firstFile = rolledOffFiles.get(0);

            final long startNanos = System.nanoTime();
            if (position > 0) {
                try (final InputStream fis = new FileInputStream(firstFile);
                        final CheckedInputStream in = new CheckedInputStream(fis, new CRC32())) {
                    StreamUtils.copy(in, new NullOutputStream(), position);

                    final long checksumResult = in.getChecksum().getValue();
                    if (checksumResult == expectedChecksum) {
                        getLogger().debug("Checksum for {} matched expected checksum. Will skip first {} bytes", new Object[]{firstFile, position});

                        // This is the same file that we were reading when we shutdown. Start reading from this point on.
                        rolledOffFiles.remove(0);
                        FlowFile flowFile = session.create();
                        flowFile = session.importFrom(in, flowFile);
                        if (flowFile.getSize() == 0L) {
                            session.remove(flowFile);
                            // use a timestamp of lastModified() + 1 so that we do not ingest this file again.
                            cleanup();
                            tfo.setState(new TailFileState(tailFile, null, null, 0L, firstFile.lastModified() + 1L, firstFile.length(), null, tfo.getState().getBuffer()));
                        } else {
                            final Map<String, String> attributes = new HashMap<>(3);
                            attributes.put(CoreAttributes.FILENAME.key(), firstFile.getName());
                            attributes.put(CoreAttributes.MIME_TYPE.key(), "text/plain");
                            attributes.put("tailfile.original.path", tailFile);
                            flowFile = session.putAllAttributes(flowFile, attributes);

                            session.getProvenanceReporter().receive(flowFile, firstFile.toURI().toString(), "FlowFile contains bytes 0 through " + position + " of source file",
                                    TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos));
                            session.transfer(flowFile, REL_SUCCESS);
                            getLogger().debug("Created {} from rolled over file {} and routed to success", new Object[]{flowFile, firstFile});

                            // use a timestamp of lastModified() + 1 so that we do not ingest this file again.
                            cleanup();
                            tfo.setState(new TailFileState(tailFile, null, null, 0L, firstFile.lastModified() + 1L, firstFile.length(), null, tfo.getState().getBuffer()));

                            // must ensure that we do session.commit() before persisting state in order to avoid data loss.
                            session.commit();
                            persistState(tfo, context);
                        }
                    } else {
                        getLogger().debug("Checksum for {} did not match expected checksum. Checksum for file was {} but expected {}. Will consume entire file",
                                new Object[]{firstFile, checksumResult, expectedChecksum});
                    }
                }
            }
        }

        // For each file that we found that matches our Rollover Pattern, and has a last modified date later than the timestamp
        // that we recovered from the state file, we need to consume the entire file. The only exception to this is the file that
        // we were reading when we last stopped, as it may already have been partially consumed. That is taken care of in the
        // above block of code.
        for (final File file : rolledOffFiles) {
            tfo.setState(consumeFileFully(file, context, session, tfo));
        }

        return rolloverOccurred;
    } catch (final IOException e) {
        getLogger().error("Failed to recover files that have rolled over due to {}", new Object[]{e});
        return false;
    }
}
 
Example 20
Source File: MergeContent.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public FlowFile merge(final Bin bin, final ProcessContext context) {
    final List<FlowFile> contents = bin.getContents();

    final ProcessSession session = bin.getSession();
    FlowFile bundle = session.create(bin.getContents());
    final AtomicReference<String> bundleMimeTypeRef = new AtomicReference<>(null);
    try {
        bundle = session.write(bundle, new OutputStreamCallback() {
            @Override
            public void process(final OutputStream out) throws IOException {
                final byte[] header = getDelimiterContent(context, contents, HEADER);
                if (header != null) {
                    out.write(header);
                }

                boolean isFirst = true;
                final Iterator<FlowFile> itr = contents.iterator();
                while (itr.hasNext()) {
                    final FlowFile flowFile = itr.next();
                    bin.getSession().read(flowFile, false, new InputStreamCallback() {
                        @Override
                        public void process(final InputStream in) throws IOException {
                            StreamUtils.copy(in, out);
                        }
                    });

                    if (itr.hasNext()) {
                        final byte[] demarcator = getDelimiterContent(context, contents, DEMARCATOR);
                        if (demarcator != null) {
                            out.write(demarcator);
                        }
                    }

                    final String flowFileMimeType = flowFile.getAttribute(CoreAttributes.MIME_TYPE.key());
                    if (isFirst) {
                        bundleMimeTypeRef.set(flowFileMimeType);
                        isFirst = false;
                    } else {
                        if (bundleMimeTypeRef.get() != null && !bundleMimeTypeRef.get().equals(flowFileMimeType)) {
                            bundleMimeTypeRef.set(null);
                        }
                    }
                }

                final byte[] footer = getDelimiterContent(context, contents, FOOTER);
                if (footer != null) {
                    out.write(footer);
                }
            }
        });
    } catch (final Exception e) {
        session.remove(bundle);
        throw e;
    }

    session.getProvenanceReporter().join(contents, bundle);
    bundle = session.putAttribute(bundle, CoreAttributes.FILENAME.key(), createFilename(contents));
    if (bundleMimeTypeRef.get() != null) {
        this.mimeType = bundleMimeTypeRef.get();
    }

    return bundle;
}