Java Code Examples for org.apache.nifi.processor.ProcessSession#commit()

The following examples show how to use org.apache.nifi.processor.ProcessSession#commit() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GetTCP.java    From localization_nifi with Apache License 2.0 6 votes vote down vote up
@Override
public void handle(InetSocketAddress sourceAddress, byte[] message, boolean partialMessage) {
    ProcessSession session = this.sessionFactory.createSession();
    FlowFile flowFile = session.create();
    flowFile = session.write(flowFile, new OutputStreamCallback() {
        @Override
        public void process(OutputStream out) throws IOException {
            out.write(message);
        }
    });
    flowFile = session.putAttribute(flowFile, SOURCE_ENDPOINT_ATTRIBUTE, sourceAddress.toString());
    if (!GetTCP.this.dynamicAttributes.isEmpty()) {
        flowFile = session.putAllAttributes(flowFile, GetTCP.this.dynamicAttributes);
    }
    if (partialMessage) {
        session.transfer(flowFile, REL_PARTIAL);
    } else {
        session.transfer(flowFile, REL_SUCCESS);
    }
    session.commit();
}
 
Example 2
Source File: DataGeneratorTestProcessor.java    From localization_nifi with Apache License 2.0 6 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile toRemove = session.get();
    if (toRemove != null) {
        LOG.warn("Removing flow file");
        session.remove(toRemove);
    }

    FlowFile flowFile = session.create();
    final Random random = new Random();
    final byte[] data = new byte[4096];
    random.nextBytes(data);

    flowFile = session.write(flowFile, new OutputStreamCallback() {
        @Override
        public void process(final OutputStream out) throws IOException {
            out.write(data);
        }
    });

    LOG.info("{} transferring {} to success", new Object[]{this, flowFile});
    session.transfer(flowFile, REL_SUCCESS);
    session.commit();
}
 
Example 3
Source File: AbstractPutEventProcessor.java    From localization_nifi with Apache License 2.0 6 votes vote down vote up
/**
 * Helper method to acquire an available ChannelSender from the pool. If the pool is empty then the a new sender is created.
 *
 * @param context
 *            - the current process context.
 *
 * @param session
 *            - the current process session.
 * @param flowFile
 *            - the FlowFile being processed in this session.
 *
 * @return ChannelSender - the sender that has been acquired or null if no sender is available and a new sender cannot be created.
 */
protected ChannelSender acquireSender(final ProcessContext context, final ProcessSession session, final FlowFile flowFile) {
    ChannelSender sender = senderPool.poll();
    if (sender == null) {
        try {
            getLogger().debug("No available connections, creating a new one...");
            sender = createSender(context);
        } catch (IOException e) {
            getLogger().error("No available connections, and unable to create a new one, transferring {} to failure",
                    new Object[]{flowFile}, e);
            session.transfer(flowFile, REL_FAILURE);
            session.commit();
            context.yield();
            sender = null;
        }
    }

    return sender;
}
 
Example 4
Source File: StandardFunnel.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSessionFactory sessionFactory) throws ProcessException {
    final ProcessSession session = sessionFactory.createSession();

    try {
        onTrigger(context, session);
        session.commit();
    } catch (final ProcessException e) {
        session.rollback();
        throw e;
    } catch (final Throwable t) {
        session.rollback();
        throw new RuntimeException(t);
    }
}
 
Example 5
Source File: AbstractPort.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSessionFactory sessionFactory) throws ProcessException {
    final ProcessSession session = sessionFactory.createSession();

    try {
        onTrigger(context, session);
        session.commit();
    } catch (final Throwable t) {
        session.rollback();
        throw t;
    }
}
 
Example 6
Source File: ConsumeMQTT.java    From nifi with Apache License 2.0 5 votes vote down vote up
private void transferQueue(ProcessSession session){
    while (!mqttQueue.isEmpty()) {
        FlowFile messageFlowfile = session.create();
        final MQTTQueueMessage mqttMessage = mqttQueue.peek();

        Map<String, String> attrs = new HashMap<>();
        attrs.put(BROKER_ATTRIBUTE_KEY, broker);
        attrs.put(TOPIC_ATTRIBUTE_KEY, mqttMessage.getTopic());
        attrs.put(QOS_ATTRIBUTE_KEY, String.valueOf(mqttMessage.getQos()));
        attrs.put(IS_DUPLICATE_ATTRIBUTE_KEY, String.valueOf(mqttMessage.isDuplicate()));
        attrs.put(IS_RETAINED_ATTRIBUTE_KEY, String.valueOf(mqttMessage.isRetained()));

        messageFlowfile = session.putAllAttributes(messageFlowfile, attrs);

        messageFlowfile = session.write(messageFlowfile, new OutputStreamCallback() {
            @Override
            public void process(final OutputStream out) throws IOException {
                out.write(mqttMessage.getPayload());
            }
        });

        String transitUri = new StringBuilder(broker).append(mqttMessage.getTopic()).toString();
        session.getProvenanceReporter().receive(messageFlowfile, transitUri);
        session.transfer(messageFlowfile, REL_MESSAGE);
        session.commit();
        if (!mqttQueue.remove(mqttMessage) && logger.isWarnEnabled()) {
            logger.warn(new StringBuilder("FlowFile ")
                    .append(messageFlowfile.getAttribute(CoreAttributes.UUID.key()))
                    .append(" for Mqtt message ")
                    .append(mqttMessage)
                    .append(" had already been removed from queue, possible duplication of flow files")
                    .toString());
        }
    }
}
 
Example 7
Source File: ListS3.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
private boolean commit(final ProcessContext context, final ProcessSession session, int listCount) {
    boolean willCommit = listCount > 0;
    if (willCommit) {
        getLogger().info("Successfully listed {} new files from S3; routing to success", new Object[] {listCount});
        session.commit();
        persistState(context);
    }
    return willCommit;
}
 
Example 8
Source File: ListenLumberjack.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
protected void postProcess(final ProcessContext context, final ProcessSession session, final List<LumberjackEvent> events) {
    // first commit the session so we guarantee we have all the events successfully
    // written to FlowFiles and transferred to the success relationship
    session.commit();
    // respond to each event to acknowledge successful receipt
    for (final LumberjackEvent event : events) {
        respond(event, LumberjackResponse.ok(event.getSeqNumber()));
    }
}
 
Example 9
Source File: ConsumeAMQP.java    From nifi with Apache License 2.0 5 votes vote down vote up
/**
 * Will construct a {@link FlowFile} containing the body of the consumed AMQP message (if {@link GetResponse} returned by {@link AMQPConsumer} is
 * not null) and AMQP properties that came with message which are added to a {@link FlowFile} as attributes, transferring {@link FlowFile} to
 * 'success' {@link Relationship}.
 */
@Override
protected void processResource(final Connection connection, final AMQPConsumer consumer, final ProcessContext context, final ProcessSession session) {
    GetResponse lastReceived = null;

    for (int i = 0; i < context.getProperty(BATCH_SIZE).asInteger(); i++) {
        final GetResponse response = consumer.consume();
        if (response == null) {
            if (lastReceived == null) {
                // If no messages received, then yield.
                context.yield();
            }

            break;
        }

        FlowFile flowFile = session.create();
        flowFile = session.write(flowFile, out -> out.write(response.getBody()));

        final BasicProperties amqpProperties = response.getProps();
        final Map<String, String> attributes = buildAttributes(amqpProperties);
        flowFile = session.putAllAttributes(flowFile, attributes);

        session.getProvenanceReporter().receive(flowFile, connection.toString() + "/" + context.getProperty(QUEUE).getValue());
        session.transfer(flowFile, REL_SUCCESS);
        lastReceived = response;
    }

    session.commit();

    if (lastReceived != null) {
        try {
            consumer.acknowledge(lastReceived);
        } catch (IOException e) {
            throw new ProcessException("Failed to acknowledge message", e);
        }
    }
}
 
Example 10
Source File: PutUDP.java    From nifi with Apache License 2.0 5 votes vote down vote up
/**
 * event handler method to handle the FlowFile being forwarded to the Processor by the framework. The FlowFile contents is sent out as a UDP datagram using an acquired ChannelSender object. If the
 * FlowFile contents was sent out successfully then the FlowFile is forwarded to the success relationship. If an error occurred then the FlowFile is forwarded to the failure relationship.
 *
 * @param context
 *            - the current process context.
 *
 * @param sessionFactory
 *            - a factory object to obtain a process session.
 */
@Override
public void onTrigger(final ProcessContext context, final ProcessSessionFactory sessionFactory) throws ProcessException {
    final ProcessSession session = sessionFactory.createSession();
    final FlowFile flowFile = session.get();
    if (flowFile == null) {
        final PruneResult result = pruneIdleSenders(context.getProperty(IDLE_EXPIRATION).asTimePeriod(TimeUnit.MILLISECONDS).longValue());
        // yield if we closed an idle connection, or if there were no connections in the first place
        if (result.getNumClosed() > 0 || (result.getNumClosed() == 0 && result.getNumConsidered() == 0)) {
            context.yield();
        }
        return;
    }

    ChannelSender sender = acquireSender(context, session, flowFile);
    if (sender == null) {
        return;
    }

    try {
        byte[] content = readContent(session, flowFile);
        StopWatch stopWatch = new StopWatch(true);
        sender.send(content);
        session.getProvenanceReporter().send(flowFile, transitUri, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
        session.transfer(flowFile, REL_SUCCESS);
        session.commit();
    } catch (Exception e) {
        getLogger().error("Exception while handling a process session, transferring {} to failure.", new Object[] { flowFile }, e);
        onFailure(context, session, flowFile);
    } finally {
        relinquishSender(sender);
    }
}
 
Example 11
Source File: StandardFunnel.java    From nifi with Apache License 2.0 5 votes vote down vote up
private void onTrigger(final ProcessContext context, final ProcessSession session) {
    readLock.lock();
    try {
        Set<Relationship> available = context.getAvailableRelationships();
        int iterations = 0;
        while (!available.isEmpty()) {
            final List<FlowFile> flowFiles = session.get(1000);
            if (flowFiles.isEmpty()) {
                break;
            }

            session.transfer(flowFiles, Relationship.ANONYMOUS);
            session.commit();

            // If there are fewer than 1,000 FlowFiles available to transfer, or if we
            // have hit the configured FlowFile cap, we want to stop. This prevents us from
            // holding the Timer-Driven Thread for an excessive amount of time.
            if (flowFiles.size() < 1000 || ++iterations >= maxIterations) {
                break;
            }

            available = context.getAvailableRelationships();
        }
    } finally {
        readLock.unlock();
    }
}
 
Example 12
Source File: ListenBeats.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
protected void postProcess(final ProcessContext context, final ProcessSession session, final List<BeatsEvent> events) {
    // first commit the session so we guarantee we have all the events successfully
    // written to FlowFiles and transferred to the success relationship
    session.commit();
    // respond to each event to acknowledge successful receipt
    for (final BeatsEvent event : events) {
        respond(event, BeatsResponse.ok(event.getSeqNumber()));
    }
}
 
Example 13
Source File: TestStandardProcessSession.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Test
public void testAppendToFlowFileWhereResourceClaimHasMultipleContentClaims() throws IOException {
    final Relationship relationship = new Relationship.Builder().name("A").build();

    FlowFile ffa = session.create();
    ffa = session.write(ffa, (out) -> out.write('A'));
    session.transfer(ffa, relationship);

    FlowFile ffb = session.create();
    ffb = session.write(ffb, (out) -> out.write('B'));
    session.transfer(ffb, relationship);
    session.commit();

    final ProcessSession newSession = new StandardProcessSession(context, () -> false);
    FlowFile toUpdate = newSession.get();
    newSession.append(toUpdate, out -> out.write('C'));

    // Read the content back and ensure that it is correct
    final byte[] buff;
    try (final ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
        newSession.read(toUpdate, in -> StreamUtils.copy(in, baos));
        buff = baos.toByteArray();
    }

    final String output = new String(buff, StandardCharsets.UTF_8);
    assertEquals("AC", output);
    newSession.transfer(toUpdate);
    newSession.commit();
}
 
Example 14
Source File: TailFile.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
/**
 * Finds any files that have rolled over and have not yet been ingested by
 * this Processor. Each of these files that is found will be ingested as its
 * own FlowFile. If a file is found that has been partially ingested, the
 * rest of the file will be ingested as a single FlowFile but the data that
 * already has been ingested will not be ingested again.
 *
 * @param context the ProcessContext to use in order to obtain Processor
 * configuration.
 * @param session the ProcessSession to use in order to interact with
 * FlowFile creation and content.
 * @param expectedChecksum the checksum value that is expected for the
 * oldest file from offset 0 through &lt;position&gt;.
 * @param timestamp the latest Last Modfiied Timestamp that has been
 * consumed. Any data that was written before this data will not be
 * ingested.
 * @param position the byte offset in the file being tailed, where tailing
 * last left off.
 *
 * @return <code>true</code> if the file being tailed has rolled over, false
 * otherwise
 */
private boolean recoverRolledFiles(final ProcessContext context, final ProcessSession session, final String tailFile, final List<File> rolledOffFiles, final Long expectedChecksum,
        final long timestamp, final long position) {
    try {
        getLogger().debug("Recovering Rolled Off Files; total number of files rolled off = {}", new Object[]{rolledOffFiles.size()});
        TailFileObject tfo = states.get(tailFile);

        // For first file that we find, it may or may not be the file that we were last reading from.
        // As a result, we have to read up to the position we stored, while calculating the checksum. If the checksums match,
        // then we know we've already processed this file. If the checksums do not match, then we have not
        // processed this file and we need to seek back to position 0 and ingest the entire file.
        // For all other files that have been rolled over, we need to just ingest the entire file.
        boolean rolloverOccurred = !rolledOffFiles.isEmpty();
        if (rolloverOccurred && expectedChecksum != null && rolledOffFiles.get(0).length() >= position) {
            final File firstFile = rolledOffFiles.get(0);

            final long startNanos = System.nanoTime();
            if (position > 0) {
                try (final InputStream fis = new FileInputStream(firstFile);
                        final CheckedInputStream in = new CheckedInputStream(fis, new CRC32())) {
                    StreamUtils.copy(in, new NullOutputStream(), position);

                    final long checksumResult = in.getChecksum().getValue();
                    if (checksumResult == expectedChecksum) {
                        getLogger().debug("Checksum for {} matched expected checksum. Will skip first {} bytes", new Object[]{firstFile, position});

                        // This is the same file that we were reading when we shutdown. Start reading from this point on.
                        rolledOffFiles.remove(0);
                        FlowFile flowFile = session.create();
                        flowFile = session.importFrom(in, flowFile);
                        if (flowFile.getSize() == 0L) {
                            session.remove(flowFile);
                            // use a timestamp of lastModified() + 1 so that we do not ingest this file again.
                            cleanup();
                            tfo.setState(new TailFileState(tailFile, null, null, 0L, firstFile.lastModified() + 1L, firstFile.length(), null, tfo.getState().getBuffer()));
                        } else {
                            final Map<String, String> attributes = new HashMap<>(3);
                            attributes.put(CoreAttributes.FILENAME.key(), firstFile.getName());
                            attributes.put(CoreAttributes.MIME_TYPE.key(), "text/plain");
                            attributes.put("tailfile.original.path", tailFile);
                            flowFile = session.putAllAttributes(flowFile, attributes);

                            session.getProvenanceReporter().receive(flowFile, firstFile.toURI().toString(), "FlowFile contains bytes 0 through " + position + " of source file",
                                    TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos));
                            session.transfer(flowFile, REL_SUCCESS);
                            getLogger().debug("Created {} from rolled over file {} and routed to success", new Object[]{flowFile, firstFile});

                            // use a timestamp of lastModified() + 1 so that we do not ingest this file again.
                            cleanup();
                            tfo.setState(new TailFileState(tailFile, null, null, 0L, firstFile.lastModified() + 1L, firstFile.length(), null, tfo.getState().getBuffer()));

                            // must ensure that we do session.commit() before persisting state in order to avoid data loss.
                            session.commit();
                            persistState(tfo, context);
                        }
                    } else {
                        getLogger().debug("Checksum for {} did not match expected checksum. Checksum for file was {} but expected {}. Will consume entire file",
                                new Object[]{firstFile, checksumResult, expectedChecksum});
                    }
                }
            }
        }

        // For each file that we found that matches our Rollover Pattern, and has a last modified date later than the timestamp
        // that we recovered from the state file, we need to consume the entire file. The only exception to this is the file that
        // we were reading when we last stopped, as it may already have been partially consumed. That is taken care of in the
        // above block of code.
        for (final File file : rolledOffFiles) {
            tfo.setState(consumeFileFully(file, context, session, tfo));
        }

        return rolloverOccurred;
    } catch (final IOException e) {
        getLogger().error("Failed to recover files that have rolled over due to {}", new Object[]{e});
        return false;
    }
}
 
Example 15
Source File: PutHBaseRecord.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final int batchSize = context.getProperty(BATCH_SIZE).asInteger();
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }

    final RecordReaderFactory recordParserFactory = context.getProperty(RECORD_READER_FACTORY)
            .asControllerService(RecordReaderFactory.class);
    List<PutFlowFile> flowFiles = new ArrayList<>();
    final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
    final String rowFieldName = context.getProperty(ROW_FIELD_NAME).evaluateAttributeExpressions(flowFile).getValue();
    final String columnFamily = context.getProperty(COLUMN_FAMILY).evaluateAttributeExpressions(flowFile).getValue();
    final String timestampFieldName = context.getProperty(TIMESTAMP_FIELD_NAME).evaluateAttributeExpressions(flowFile).getValue();
    final String fieldEncodingStrategy = context.getProperty(FIELD_ENCODING_STRATEGY).getValue();
    final String complexFieldStrategy = context.getProperty(COMPLEX_FIELD_STRATEGY).getValue();
    final String rowEncodingStrategy = context.getProperty(ROW_ID_ENCODING_STRATEGY).getValue();
    final String recordPathText = context.getProperty(VISIBILITY_RECORD_PATH).getValue();

    RecordPath recordPath = null;
    if (recordPathCache != null && !StringUtils.isEmpty(recordPathText)) {
        recordPath = recordPathCache.getCompiled(recordPathText);
    }

    final long start = System.nanoTime();
    int index = 0;
    int columns = 0;
    boolean failed = false;
    String startIndexStr = flowFile.getAttribute("restart.index");
    int startIndex = -1;
    if (startIndexStr != null) {
        startIndex = Integer.parseInt(startIndexStr);
    }

    PutFlowFile last  = null;
    try (final InputStream in = session.read(flowFile);
         final RecordReader reader = recordParserFactory.createRecordReader(flowFile, in, getLogger())) {
        Record record;
        if (startIndex >= 0) {
            while ( index++ < startIndex && (reader.nextRecord()) != null) {}
        }

        while ((record = reader.nextRecord()) != null) {
            PutFlowFile putFlowFile = createPut(context, record, reader.getSchema(), recordPath, flowFile, rowFieldName, columnFamily,
                    timestampFieldName, fieldEncodingStrategy, rowEncodingStrategy, complexFieldStrategy);
            if (putFlowFile.getColumns().size() == 0) {
                continue;
            }
            flowFiles.add(putFlowFile);
            index++;

            if (flowFiles.size() == batchSize) {
                columns += addBatch(tableName, flowFiles);
                last = flowFiles.get(flowFiles.size() - 1);
                flowFiles = new ArrayList<>();
            }
        }
        if (flowFiles.size() > 0) {
            columns += addBatch(tableName, flowFiles);
            last = flowFiles.get(flowFiles.size() - 1);
        }
    } catch (Exception ex) {
        getLogger().error("Failed to put records to HBase.", ex);
        failed = true;
    }

    if (!failed) {
        if (columns > 0) {
            sendProvenance(session, flowFile, columns, System.nanoTime() - start, last);
        }
        flowFile = session.removeAttribute(flowFile, "restart.index");
        session.transfer(flowFile, REL_SUCCESS);
    } else {
        String restartIndex = Integer.toString(index - flowFiles.size());
        flowFile = session.putAttribute(flowFile, "restart.index", restartIndex);
        if (columns > 0) {
            sendProvenance(session, flowFile, columns, System.nanoTime() - start, last);
        }
        flowFile = session.penalize(flowFile);
        session.transfer(flowFile, REL_FAILURE);
    }



    session.commit();
}
 
Example 16
Source File: AbstractFlowFileServerProtocol.java    From nifi with Apache License 2.0 4 votes vote down vote up
protected int commitReceiveTransaction(Peer peer, FlowFileTransaction transaction) throws IOException {
    CommunicationsSession commsSession = peer.getCommunicationsSession();
    ProcessSession session = transaction.getSession();
    final Response confirmTransactionResponse = readTransactionResponse(false, commsSession);
    logger.debug("{} Received {} from {}", this, confirmTransactionResponse, peer);

    switch (confirmTransactionResponse.getCode()) {
        case CONFIRM_TRANSACTION:
            break;
        case BAD_CHECKSUM:
            session.rollback();
            throw new IOException(this + " Received a BadChecksum response from peer " + peer);
        default:
            throw new ProtocolException(this + " Received unexpected Response Code from peer " + peer + " : " + confirmTransactionResponse + "; expected 'Confirm Transaction' Response Code");
    }

    // Commit the session so that we have persisted the data
    session.commit();

    if (transaction.getContext().getAvailableRelationships().isEmpty()) {
        // Confirm that we received the data and the peer can now discard it but that the peer should not
        // send any more data for a bit
        logger.debug("{} Sending TRANSACTION_FINISHED_BUT_DESTINATION_FULL to {}", this, peer);
        writeTransactionResponse(false, ResponseCode.TRANSACTION_FINISHED_BUT_DESTINATION_FULL, commsSession);
    } else {
        // Confirm that we received the data and the peer can now discard it
        logger.debug("{} Sending TRANSACTION_FINISHED to {}", this, peer);
        writeTransactionResponse(false, ResponseCode.TRANSACTION_FINISHED, commsSession);
    }

    Set<FlowFile> flowFilesReceived = transaction.getFlowFilesSent();
    long bytesReceived = transaction.getBytesSent();
    StopWatch stopWatch = transaction.getStopWatch();
    stopWatch.stop();
    final String flowFileDescription = flowFilesReceived.size() < 20 ? flowFilesReceived.toString() : flowFilesReceived.size() + " FlowFiles";
    final String uploadDataRate = stopWatch.calculateDataRate(bytesReceived);
    final long uploadMillis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
    final String dataSize = FormatUtils.formatDataSize(bytesReceived);
    logger.info("{} Successfully received {} ({}) from {} in {} milliseconds at a rate of {}", new Object[]{
        this, flowFileDescription, dataSize, peer, uploadMillis, uploadDataRate});

    return flowFilesReceived.size();
}
 
Example 17
Source File: TailFile.java    From nifi with Apache License 2.0 4 votes vote down vote up
/**
 * Finds any files that have rolled over and have not yet been ingested by
 * this Processor. Each of these files that is found will be ingested as its
 * own FlowFile. If a file is found that has been partially ingested, the
 * rest of the file will be ingested as a single FlowFile but the data that
 * already has been ingested will not be ingested again.
 *
 * @param context the ProcessContext to use in order to obtain Processor
 * configuration.
 * @param session the ProcessSession to use in order to interact with
 * FlowFile creation and content.
 * @param expectedChecksum the checksum value that is expected for the
 * oldest file from offset 0 through &lt;position&gt;.
 * @param timestamp the latest Last Modfiied Timestamp that has been
 * consumed. Any data that was written before this data will not be
 * ingested.
 * @param position the byte offset in the file being tailed, where tailing
 * last left off.
 *
 * @return <code>true</code> if the file being tailed has rolled over, false
 * otherwise
 */
private boolean recoverRolledFiles(final ProcessContext context, final ProcessSession session, final String tailFile, final List<File> rolledOffFiles, final Long expectedChecksum,
        final long timestamp, final long position) {
    try {
        getLogger().debug("Recovering Rolled Off Files; total number of files rolled off = {}", new Object[]{rolledOffFiles.size()});
        TailFileObject tfo = states.get(tailFile);

        // For first file that we find, it may or may not be the file that we were last reading from.
        // As a result, we have to read up to the position we stored, while calculating the checksum. If the checksums match,
        // then we know we've already processed this file. If the checksums do not match, then we have not
        // processed this file and we need to seek back to position 0 and ingest the entire file.
        // For all other files that have been rolled over, we need to just ingest the entire file.
        boolean rolloverOccurred = !rolledOffFiles.isEmpty();
        if (rolloverOccurred && expectedChecksum != null && rolledOffFiles.get(0).length() >= position) {
            final File firstFile = rolledOffFiles.get(0);

            final long startNanos = System.nanoTime();
            if (position > 0) {
                try (final InputStream fis = new FileInputStream(firstFile);
                        final CheckedInputStream in = new CheckedInputStream(fis, new CRC32())) {
                    StreamUtils.copy(in, new NullOutputStream(), position);

                    final long checksumResult = in.getChecksum().getValue();
                    if (checksumResult == expectedChecksum) {
                        getLogger().debug("Checksum for {} matched expected checksum. Will skip first {} bytes", new Object[]{firstFile, position});

                        // This is the same file that we were reading when we shutdown. Start reading from this point on.
                        rolledOffFiles.remove(0);
                        FlowFile flowFile = session.create();
                        flowFile = session.importFrom(in, flowFile);
                        if (flowFile.getSize() == 0L) {
                            session.remove(flowFile);
                            // use a timestamp of lastModified() + 1 so that we do not ingest this file again.
                            cleanup();
                            tfo.setState(new TailFileState(tailFile, null, null, 0L, firstFile.lastModified() + 1L, firstFile.length(), null, tfo.getState().getBuffer()));
                        } else {
                            final Map<String, String> attributes = new HashMap<>(3);
                            attributes.put(CoreAttributes.FILENAME.key(), firstFile.getName());
                            attributes.put(CoreAttributes.MIME_TYPE.key(), "text/plain");
                            attributes.put("tailfile.original.path", tailFile);
                            flowFile = session.putAllAttributes(flowFile, attributes);

                            session.getProvenanceReporter().receive(flowFile, firstFile.toURI().toString(), "FlowFile contains bytes 0 through " + position + " of source file",
                                    TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos));
                            session.transfer(flowFile, REL_SUCCESS);
                            getLogger().debug("Created {} from rolled over file {} and routed to success", new Object[]{flowFile, firstFile});

                            // use a timestamp of lastModified() + 1 so that we do not ingest this file again.
                            cleanup();
                            tfo.setState(new TailFileState(tailFile, null, null, 0L, firstFile.lastModified() + 1L, firstFile.length(), null, tfo.getState().getBuffer()));

                            // must ensure that we do session.commit() before persisting state in order to avoid data loss.
                            session.commit();
                            persistState(tfo, context);
                        }
                    } else {
                        getLogger().debug("Checksum for {} did not match expected checksum. Checksum for file was {} but expected {}. Will consume entire file",
                                new Object[]{firstFile, checksumResult, expectedChecksum});
                    }
                }
            }
        }

        // For each file that we found that matches our Rollover Pattern, and has a last modified date later than the timestamp
        // that we recovered from the state file, we need to consume the entire file. The only exception to this is the file that
        // we were reading when we last stopped, as it may already have been partially consumed. That is taken care of in the
        // above block of code.
        for (final File file : rolledOffFiles) {
            tfo.setState(consumeFileFully(file, context, session, tfo));
        }

        return rolloverOccurred;
    } catch (final IOException e) {
        getLogger().error("Failed to recover files that have rolled over due to {}", new Object[]{e});
        return false;
    }
}
 
Example 18
Source File: GetMongo.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final ComponentLog logger = getLogger();

    final Document query = context.getProperty(QUERY).isSet() ? Document.parse(context.getProperty(QUERY).getValue()) : null;
    final Document projection = context.getProperty(PROJECTION).isSet() ? Document.parse(context.getProperty(PROJECTION).getValue()) : null;
    final Document sort = context.getProperty(SORT).isSet() ? Document.parse(context.getProperty(SORT).getValue()) : null;

    final MongoCollection<Document> collection = getCollection(context);

    try {
        final FindIterable<Document> it = query != null ? collection.find(query) : collection.find();
        if (projection != null) {
            it.projection(projection);
        }
        if (sort != null) {
            it.sort(sort);
        }
        if (context.getProperty(LIMIT).isSet()) {
            it.limit(context.getProperty(LIMIT).asInteger());
        }
        if (context.getProperty(BATCH_SIZE).isSet()) {
            it.batchSize(context.getProperty(BATCH_SIZE).asInteger());
        }

        final MongoCursor<Document> cursor = it.iterator();
        try {
            FlowFile flowFile = null;
            while (cursor.hasNext()) {
                flowFile = session.create();
                flowFile = session.write(flowFile, new OutputStreamCallback() {
                    @Override
                    public void process(OutputStream out) throws IOException {
                        IOUtils.write(cursor.next().toJson(), out);
                    }
                });

                session.getProvenanceReporter().receive(flowFile, context.getProperty(URI).getValue());
                session.transfer(flowFile, REL_SUCCESS);
            }

            session.commit();

        } finally {
            cursor.close();
        }

    } catch (final RuntimeException e) {
        context.yield();
        session.rollback();
        logger.error("Failed to execute query {} due to {}", new Object[] { query, e }, e);
    }
}
 
Example 19
Source File: ContentAcknowledgmentServlet.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
protected void doDelete(final HttpServletRequest request, final HttpServletResponse response) throws ServletException, IOException {
    final X509Certificate[] certs = (X509Certificate[]) request.getAttribute("javax.servlet.request.X509Certificate");
    String foundSubject = DEFAULT_FOUND_SUBJECT;
    if (certs != null && certs.length > 0) {
        for (final X509Certificate cert : certs) {
            foundSubject = cert.getSubjectDN().getName();
            if (authorizedPattern.matcher(foundSubject).matches()) {
                break;
            } else {
                logger.warn(processor + " rejecting transfer attempt from " + foundSubject + " because the DN is not authorized");
                response.sendError(HttpServletResponse.SC_FORBIDDEN, "not allowed based on dn");
                return;
            }
        }
    }

    final String uri = request.getRequestURI();
    final int slashIndex = uri.lastIndexOf("/");
    int questionIndex = uri.indexOf("?");
    if (questionIndex < 0) {
        questionIndex = uri.length();
    }

    final String uuid = uri.substring(slashIndex + 1, questionIndex);
    final FlowFileEntryTimeWrapper timeWrapper = flowFileMap.remove(uuid);
    if (timeWrapper == null) {
        logger.warn("received DELETE for HOLD with ID " + uuid + " from Remote Host: [" + request.getRemoteHost()
                + "] Port [" + request.getRemotePort() + "] SubjectDN [" + foundSubject + "], but no HOLD exists with that ID; sending response with Status Code 404");
        response.sendError(HttpServletResponse.SC_NOT_FOUND);
        return;
    }

    try {
        final Set<FlowFile> flowFiles = timeWrapper.getFlowFiles();

        final long transferTime = System.currentTimeMillis() - timeWrapper.getEntryTime();
        long totalFlowFileSize = 0;
        for (final FlowFile flowFile : flowFiles) {
            totalFlowFileSize += flowFile.getSize();
        }

        double seconds = (double) transferTime / 1000D;
        if (seconds <= 0D) {
            seconds = .00000001D;
        }
        final double bytesPerSecond = ((double) totalFlowFileSize / seconds);
        final String transferRate = FormatUtils.formatDataSize(bytesPerSecond) + "/sec";

        logger.info("received {} files/{} bytes from Remote Host: [{}] Port [{}] SubjectDN [{}] in {} milliseconds at a rate of {}; "
                + "transferring to 'success': {}",
                new Object[]{flowFiles.size(), totalFlowFileSize, request.getRemoteHost(), request.getRemotePort(), foundSubject, transferTime, transferRate, flowFiles});

        final ProcessSession session = timeWrapper.getSession();
        session.transfer(flowFiles, ListenHTTP.RELATIONSHIP_SUCCESS);
        session.commit();

        response.setStatus(HttpServletResponse.SC_OK);
        response.flushBuffer();
    } catch (final Throwable t) {
        timeWrapper.getSession().rollback();
        logger.error("received DELETE for HOLD with ID {} from Remote Host: [{}] Port [{}] SubjectDN [{}], but failed to process the request due to {}",
                new Object[]{uuid, request.getRemoteHost(), request.getRemotePort(), foundSubject, t.toString()});
        if (logger.isDebugEnabled()) {
            logger.error("", t);
        }

        response.sendError(HttpServletResponse.SC_NOT_FOUND);
    }
}
 
Example 20
Source File: PutUDP.java    From nifi with Apache License 2.0 2 votes vote down vote up
/**
 * event handler method to perform the required actions when a failure has occurred. The FlowFile is penalized, forwarded to the failure relationship and the context is yielded.
 *
 * @param context
 *            - the current process context.
 *
 * @param session
 *            - the current process session.
 * @param flowFile
 *            - the FlowFile that has failed to have been processed.
 */
protected void onFailure(final ProcessContext context, final ProcessSession session, final FlowFile flowFile) {
    session.transfer(session.penalize(flowFile), REL_FAILURE);
    session.commit();
    context.yield();
}