Java Code Examples for org.apache.nifi.processor.ProcessSession#importFrom()

The following examples show how to use org.apache.nifi.processor.ProcessSession#importFrom() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TailFile.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new FlowFile that contains the entire contents of the given
 * file and transfers that FlowFile to success. This method will commit the
 * given session and emit an appropriate Provenance Event.
 *
 * @param file the file to ingest
 * @param context the ProcessContext
 * @param session the ProcessSession
 * @param tfo the current state
 *
 * @return the new, updated state that reflects that the given file has been
 * ingested.
 */
private TailFileState consumeFileFully(final File file, final ProcessContext context, final ProcessSession session, TailFileObject tfo) {
    FlowFile flowFile = session.create();
    flowFile = session.importFrom(file.toPath(), true, flowFile);
    if (flowFile.getSize() == 0L) {
        session.remove(flowFile);
    } else {
        final Map<String, String> attributes = new HashMap<>(3);
        attributes.put(CoreAttributes.FILENAME.key(), file.getName());
        attributes.put(CoreAttributes.MIME_TYPE.key(), "text/plain");
        attributes.put("tailfile.original.path", tfo.getState().getFilename());
        flowFile = session.putAllAttributes(flowFile, attributes);
        session.getProvenanceReporter().receive(flowFile, file.toURI().toString());
        session.transfer(flowFile, REL_SUCCESS);
        getLogger().debug("Created {} from {} and routed to success", new Object[]{flowFile, file});

        // use a timestamp of lastModified() + 1 so that we do not ingest this file again.
        cleanup();
        tfo.setState(new TailFileState(context.getProperty(FILENAME).evaluateAttributeExpressions().getValue(), null, null, 0L, file.lastModified() + 1L, file.length(), null,
                tfo.getState().getBuffer()));

        // must ensure that we do session.commit() before persisting state in order to avoid data loss.
        session.commit();
        persistState(tfo, context);
    }

    return tfo.getState();
}
 
Example 2
Source File: GetIgniteCache.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
/**
 * Handle flow file and gets the entry from the cache based on the key attribute
 */
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();

    if (flowFile == null) {
        return;
    }

    String key = context.getProperty(IGNITE_CACHE_ENTRY_KEY).evaluateAttributeExpressions(flowFile).getValue();
    if ( StringUtils.isEmpty(key) ) {
        flowFile = session.putAttribute(flowFile, IGNITE_GET_FAILED_REASON_ATTRIBUTE_KEY, IGNITE_GET_FAILED_MISSING_KEY_MESSAGE);
        session.transfer(flowFile, REL_FAILURE);
    } else {
        try {
            byte [] value = getIgniteCache().get(key);
            if ( value == null || value.length == 0 ) {
                flowFile = session.putAttribute(flowFile, IGNITE_GET_FAILED_REASON_ATTRIBUTE_KEY,
                    IGNITE_GET_FAILED_MISSING_ENTRY_MESSAGE);
                session.transfer(flowFile, REL_FAILURE);
            } else {
                ByteArrayInputStream bais = new ByteArrayInputStream(value);
                flowFile = session.importFrom(bais, flowFile);
                session.transfer(flowFile,REL_SUCCESS);
            }
        } catch(Exception exception) {
            flowFile = session.putAttribute(flowFile, IGNITE_GET_FAILED_REASON_ATTRIBUTE_KEY,
                 IGNITE_GET_FAILED_MESSAGE_PREFIX + exception);
            getLogger().error("Failed to get value for key {} from IgniteDB due to {}", new Object[] { key, exception }, exception);
            session.transfer(flowFile, REL_FAILURE);
            context.yield();
        }
    }
}
 
Example 3
Source File: TailFile.java    From nifi with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new FlowFile that contains the entire contents of the given
 * file and transfers that FlowFile to success. This method will commit the
 * given session and emit an appropriate Provenance Event.
 *
 * @param file the file to ingest
 * @param context the ProcessContext
 * @param session the ProcessSession
 * @param tfo the current state
 *
 * @return the new, updated state that reflects that the given file has been
 * ingested.
 */
private TailFileState consumeFileFully(final File file, final ProcessContext context, final ProcessSession session, TailFileObject tfo) {
    FlowFile flowFile = session.create();
    flowFile = session.importFrom(file.toPath(), true, flowFile);
    if (flowFile.getSize() == 0L) {
        session.remove(flowFile);
    } else {
        final Map<String, String> attributes = new HashMap<>(3);
        attributes.put(CoreAttributes.FILENAME.key(), file.getName());
        attributes.put(CoreAttributes.MIME_TYPE.key(), "text/plain");
        attributes.put("tailfile.original.path", tfo.getState().getFilename());
        flowFile = session.putAllAttributes(flowFile, attributes);
        session.getProvenanceReporter().receive(flowFile, file.toURI().toString());
        session.transfer(flowFile, REL_SUCCESS);
        getLogger().debug("Created {} from {} and routed to success", new Object[]{flowFile, file});

        // use a timestamp of lastModified() + 1 so that we do not ingest this file again.
        cleanup();
        tfo.setState(new TailFileState(context.getProperty(FILENAME).evaluateAttributeExpressions().getValue(), null, null, 0L, file.lastModified() + 1L, file.length(), null,
                tfo.getState().getBuffer()));

        // must ensure that we do session.commit() before persisting state in order to avoid data loss.
        session.commit();
        persistState(tfo, context);
    }

    return tfo.getState();
}
 
Example 4
Source File: GetIgniteCache.java    From nifi with Apache License 2.0 5 votes vote down vote up
/**
 * Handle flow file and gets the entry from the cache based on the key attribute
 */
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();

    if (flowFile == null) {
        return;
    }

    String key = context.getProperty(IGNITE_CACHE_ENTRY_KEY).evaluateAttributeExpressions(flowFile).getValue();
    if ( StringUtils.isEmpty(key) ) {
        flowFile = session.putAttribute(flowFile, IGNITE_GET_FAILED_REASON_ATTRIBUTE_KEY, IGNITE_GET_FAILED_MISSING_KEY_MESSAGE);
        session.transfer(flowFile, REL_FAILURE);
    } else {
        try {
            byte [] value = getIgniteCache().get(key);
            if ( value == null || value.length == 0 ) {
                flowFile = session.putAttribute(flowFile, IGNITE_GET_FAILED_REASON_ATTRIBUTE_KEY,
                    IGNITE_GET_FAILED_MISSING_ENTRY_MESSAGE);
                session.transfer(flowFile, REL_FAILURE);
            } else {
                ByteArrayInputStream bais = new ByteArrayInputStream(value);
                flowFile = session.importFrom(bais, flowFile);
                session.transfer(flowFile,REL_SUCCESS);
            }
        } catch(Exception exception) {
            flowFile = session.putAttribute(flowFile, IGNITE_GET_FAILED_REASON_ATTRIBUTE_KEY,
                 IGNITE_GET_FAILED_MESSAGE_PREFIX + exception);
            getLogger().error("Failed to get value for key {} from IgniteDB due to {}", new Object[] { key, exception }, exception);
            session.transfer(flowFile, REL_FAILURE);
            context.yield();
        }
    }
}
 
Example 5
Source File: AbstractMongoProcessor.java    From nifi with Apache License 2.0 5 votes vote down vote up
protected void writeBatch(String payload, FlowFile parent, ProcessContext context, ProcessSession session,
                          Map<String, String> extraAttributes, Relationship rel) throws UnsupportedEncodingException {
    String charset = context.getProperty(CHARSET).evaluateAttributeExpressions(parent).getValue();

    FlowFile flowFile = parent != null ? session.create(parent) : session.create();
    flowFile = session.importFrom(new ByteArrayInputStream(payload.getBytes(charset)), flowFile);
    flowFile = session.putAllAttributes(flowFile, extraAttributes);
    if (parent == null) {
        session.getProvenanceReporter().receive(flowFile, getURI(context));
    }
    session.transfer(flowFile, rel);
}
 
Example 6
Source File: TailFile.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
/**
 * Finds any files that have rolled over and have not yet been ingested by
 * this Processor. Each of these files that is found will be ingested as its
 * own FlowFile. If a file is found that has been partially ingested, the
 * rest of the file will be ingested as a single FlowFile but the data that
 * already has been ingested will not be ingested again.
 *
 * @param context the ProcessContext to use in order to obtain Processor
 * configuration.
 * @param session the ProcessSession to use in order to interact with
 * FlowFile creation and content.
 * @param expectedChecksum the checksum value that is expected for the
 * oldest file from offset 0 through &lt;position&gt;.
 * @param timestamp the latest Last Modfiied Timestamp that has been
 * consumed. Any data that was written before this data will not be
 * ingested.
 * @param position the byte offset in the file being tailed, where tailing
 * last left off.
 *
 * @return <code>true</code> if the file being tailed has rolled over, false
 * otherwise
 */
private boolean recoverRolledFiles(final ProcessContext context, final ProcessSession session, final String tailFile, final List<File> rolledOffFiles, final Long expectedChecksum,
        final long timestamp, final long position) {
    try {
        getLogger().debug("Recovering Rolled Off Files; total number of files rolled off = {}", new Object[]{rolledOffFiles.size()});
        TailFileObject tfo = states.get(tailFile);

        // For first file that we find, it may or may not be the file that we were last reading from.
        // As a result, we have to read up to the position we stored, while calculating the checksum. If the checksums match,
        // then we know we've already processed this file. If the checksums do not match, then we have not
        // processed this file and we need to seek back to position 0 and ingest the entire file.
        // For all other files that have been rolled over, we need to just ingest the entire file.
        boolean rolloverOccurred = !rolledOffFiles.isEmpty();
        if (rolloverOccurred && expectedChecksum != null && rolledOffFiles.get(0).length() >= position) {
            final File firstFile = rolledOffFiles.get(0);

            final long startNanos = System.nanoTime();
            if (position > 0) {
                try (final InputStream fis = new FileInputStream(firstFile);
                        final CheckedInputStream in = new CheckedInputStream(fis, new CRC32())) {
                    StreamUtils.copy(in, new NullOutputStream(), position);

                    final long checksumResult = in.getChecksum().getValue();
                    if (checksumResult == expectedChecksum) {
                        getLogger().debug("Checksum for {} matched expected checksum. Will skip first {} bytes", new Object[]{firstFile, position});

                        // This is the same file that we were reading when we shutdown. Start reading from this point on.
                        rolledOffFiles.remove(0);
                        FlowFile flowFile = session.create();
                        flowFile = session.importFrom(in, flowFile);
                        if (flowFile.getSize() == 0L) {
                            session.remove(flowFile);
                            // use a timestamp of lastModified() + 1 so that we do not ingest this file again.
                            cleanup();
                            tfo.setState(new TailFileState(tailFile, null, null, 0L, firstFile.lastModified() + 1L, firstFile.length(), null, tfo.getState().getBuffer()));
                        } else {
                            final Map<String, String> attributes = new HashMap<>(3);
                            attributes.put(CoreAttributes.FILENAME.key(), firstFile.getName());
                            attributes.put(CoreAttributes.MIME_TYPE.key(), "text/plain");
                            attributes.put("tailfile.original.path", tailFile);
                            flowFile = session.putAllAttributes(flowFile, attributes);

                            session.getProvenanceReporter().receive(flowFile, firstFile.toURI().toString(), "FlowFile contains bytes 0 through " + position + " of source file",
                                    TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos));
                            session.transfer(flowFile, REL_SUCCESS);
                            getLogger().debug("Created {} from rolled over file {} and routed to success", new Object[]{flowFile, firstFile});

                            // use a timestamp of lastModified() + 1 so that we do not ingest this file again.
                            cleanup();
                            tfo.setState(new TailFileState(tailFile, null, null, 0L, firstFile.lastModified() + 1L, firstFile.length(), null, tfo.getState().getBuffer()));

                            // must ensure that we do session.commit() before persisting state in order to avoid data loss.
                            session.commit();
                            persistState(tfo, context);
                        }
                    } else {
                        getLogger().debug("Checksum for {} did not match expected checksum. Checksum for file was {} but expected {}. Will consume entire file",
                                new Object[]{firstFile, checksumResult, expectedChecksum});
                    }
                }
            }
        }

        // For each file that we found that matches our Rollover Pattern, and has a last modified date later than the timestamp
        // that we recovered from the state file, we need to consume the entire file. The only exception to this is the file that
        // we were reading when we last stopped, as it may already have been partially consumed. That is taken care of in the
        // above block of code.
        for (final File file : rolledOffFiles) {
            tfo.setState(consumeFileFully(file, context, session, tfo));
        }

        return rolloverOccurred;
    } catch (final IOException e) {
        getLogger().error("Failed to recover files that have rolled over due to {}", new Object[]{e});
        return false;
    }
}
 
Example 7
Source File: GetHDFS.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
protected void processBatchOfFiles(final List<Path> files, final ProcessContext context, final ProcessSession session) {
    // process the batch of files
    InputStream stream = null;
    CompressionCodec codec = null;
    Configuration conf = getConfiguration();
    FileSystem hdfs = getFileSystem();
    final boolean keepSourceFiles = context.getProperty(KEEP_SOURCE_FILE).asBoolean();
    final Double bufferSizeProp = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B);
    int bufferSize = bufferSizeProp != null ? bufferSizeProp.intValue() : conf.getInt(BUFFER_SIZE_KEY,
            BUFFER_SIZE_DEFAULT);
    final Path rootDir = new Path(context.getProperty(DIRECTORY).evaluateAttributeExpressions().getValue());

    final CompressionType compressionType = CompressionType.valueOf(context.getProperty(COMPRESSION_CODEC).toString());
    final boolean inferCompressionCodec = compressionType == CompressionType.AUTOMATIC;
    if (inferCompressionCodec || compressionType != CompressionType.NONE) {
        codec = getCompressionCodec(context, getConfiguration());
    }
    final CompressionCodecFactory compressionCodecFactory = new CompressionCodecFactory(conf);
    for (final Path file : files) {
        try {
            if (!hdfs.exists(file)) {
                continue; // if file is no longer there then move on
            }
            final String originalFilename = file.getName();
            final String relativePath = getPathDifference(rootDir, file);

            stream = hdfs.open(file, bufferSize);

            final String outputFilename;
            // Check if we should infer compression codec
            if (inferCompressionCodec) {
                codec = compressionCodecFactory.getCodec(file);
            }
            // Check if compression codec is defined (inferred or otherwise)
            if (codec != null) {
                stream = codec.createInputStream(stream);
                outputFilename = StringUtils.removeEnd(originalFilename, codec.getDefaultExtension());
            } else {
                outputFilename = originalFilename;
            }

            FlowFile flowFile = session.create();

            final StopWatch stopWatch = new StopWatch(true);
            flowFile = session.importFrom(stream, flowFile);
            stopWatch.stop();
            final String dataRate = stopWatch.calculateDataRate(flowFile.getSize());
            final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);

            flowFile = session.putAttribute(flowFile, CoreAttributes.PATH.key(), relativePath);
            flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), outputFilename);

            if (!keepSourceFiles && !hdfs.delete(file, false)) {
                getLogger().warn("Could not remove {} from HDFS. Not ingesting this file ...",
                        new Object[]{file});
                session.remove(flowFile);
                continue;
            }

            final String transitUri = (originalFilename.startsWith("/")) ? "hdfs:/" + originalFilename : "hdfs://" + originalFilename;
            session.getProvenanceReporter().receive(flowFile, transitUri);
            session.transfer(flowFile, REL_SUCCESS);
            getLogger().info("retrieved {} from HDFS {} in {} milliseconds at a rate of {}",
                    new Object[]{flowFile, file, millis, dataRate});
            session.commit();
        } catch (final Throwable t) {
            getLogger().error("Error retrieving file {} from HDFS due to {}", new Object[]{file, t});
            session.rollback();
            context.yield();
        } finally {
            IOUtils.closeQuietly(stream);
            stream = null;
        }
    }
}
 
Example 8
Source File: AbstractFlowFileServerProtocol.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Override
public int receiveFlowFiles(final Peer peer, final ProcessContext context, final ProcessSession session, final FlowFileCodec codec) throws IOException, ProtocolException {
    if (!handshakeCompleted) {
        throw new IllegalStateException("Handshake has not been completed");
    }
    if (shutdown) {
        throw new IllegalStateException("Protocol is shutdown");
    }

    logger.debug("{} receiving FlowFiles from {}", this, peer);

    final CommunicationsSession commsSession = peer.getCommunicationsSession();
    final DataInputStream dis = new DataInputStream(commsSession.getInput().getInputStream());
    String remoteDn = commsSession.getUserDn();
    if (remoteDn == null) {
        remoteDn = "none";
    }

    final StopWatch stopWatch = new StopWatch(true);
    final CRC32 crc = new CRC32();

    // Peer has data. Otherwise, we would not have been called, because they would not have sent
    // a SEND_FLOWFILES request to use. Just decode the bytes into FlowFiles until peer says he's
    // finished sending data.
    final Set<FlowFile> flowFilesReceived = new HashSet<>();
    long bytesReceived = 0L;
    boolean continueTransaction = true;
    while (continueTransaction) {
        final long startNanos = System.nanoTime();
        final InputStream flowFileInputStream = handshakeProperties.isUseGzip() ? new CompressionInputStream(dis) : dis;
        final CheckedInputStream checkedInputStream = new CheckedInputStream(flowFileInputStream, crc);

        final DataPacket dataPacket = codec.decode(checkedInputStream);
        if (dataPacket == null) {
            logger.debug("{} Received null dataPacket indicating the end of transaction from {}", this, peer);
            break;
        }
        FlowFile flowFile = session.create();
        flowFile = session.importFrom(dataPacket.getData(), flowFile);
        flowFile = session.putAllAttributes(flowFile, dataPacket.getAttributes());

        final long transferNanos = System.nanoTime() - startNanos;
        final long transferMillis = TimeUnit.MILLISECONDS.convert(transferNanos, TimeUnit.NANOSECONDS);
        final String sourceSystemFlowFileUuid = dataPacket.getAttributes().get(CoreAttributes.UUID.key());

        final String host = StringUtils.isEmpty(peer.getHost()) ? "unknown" : peer.getHost();
        final String port = peer.getPort() <= 0 ? "unknown" : String.valueOf(peer.getPort());

        final Map<String,String> attributes = new HashMap<>(4);
        attributes.put(CoreAttributes.UUID.key(), UUID.randomUUID().toString());
        attributes.put(SiteToSiteAttributes.S2S_HOST.key(), host);
        attributes.put(SiteToSiteAttributes.S2S_ADDRESS.key(), host + ":" + port);

        flowFile = session.putAllAttributes(flowFile, attributes);

        final String transitUri = createTransitUri(peer, sourceSystemFlowFileUuid);
        session.getProvenanceReporter().receive(flowFile, transitUri, sourceSystemFlowFileUuid == null
                ? null : "urn:nifi:" + sourceSystemFlowFileUuid, "Remote Host=" + peer.getHost() + ", Remote DN=" + remoteDn, transferMillis);
        session.transfer(flowFile, Relationship.ANONYMOUS);
        flowFilesReceived.add(flowFile);
        bytesReceived += flowFile.getSize();

        final Response transactionResponse = readTransactionResponse(false, commsSession);
        switch (transactionResponse.getCode()) {
            case CONTINUE_TRANSACTION:
                logger.debug("{} Received ContinueTransaction indicator from {}", this, peer);
                break;
            case FINISH_TRANSACTION:
                logger.debug("{} Received FinishTransaction indicator from {}", this, peer);
                continueTransaction = false;
                break;
            case CANCEL_TRANSACTION:
                logger.info("{} Received CancelTransaction indicator from {} with explanation {}", this, peer, transactionResponse.getMessage());
                session.rollback();
                return 0;
            default:
                throw new ProtocolException("Received unexpected response from peer: when expecting Continue Transaction or Finish Transaction, received" + transactionResponse);
        }
    }

    // we received a FINISH_TRANSACTION indicator. Send back a CONFIRM_TRANSACTION message
    // to peer so that we can verify that the connection is still open. This is a two-phase commit,
    // which helps to prevent the chances of data duplication. Without doing this, we may commit the
    // session and then when we send the response back to the peer, the peer may have timed out and may not
    // be listening. As a result, it will re-send the data. By doing this two-phase commit, we narrow the
    // Critical Section involved in this transaction so that rather than the Critical Section being the
    // time window involved in the entire transaction, it is reduced to a simple round-trip conversation.
    logger.debug("{} Sending CONFIRM_TRANSACTION Response Code to {}", this, peer);
    String calculatedCRC = String.valueOf(crc.getValue());
    writeTransactionResponse(false, ResponseCode.CONFIRM_TRANSACTION, commsSession, calculatedCRC);

    FlowFileTransaction transaction = new FlowFileTransaction(session, context, stopWatch, bytesReceived, flowFilesReceived, calculatedCRC);
    return commitReceiveTransaction(peer, transaction);
}
 
Example 9
Source File: StandardRemoteGroupPort.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
private int receiveFlowFiles(final Transaction transaction, final ProcessContext context, final ProcessSession session) throws IOException, ProtocolException {
    final String userDn = transaction.getCommunicant().getDistinguishedName();

    final StopWatch stopWatch = new StopWatch(true);
    final Set<FlowFile> flowFilesReceived = new HashSet<>();
    long bytesReceived = 0L;

    while (true) {
        final long start = System.nanoTime();
        final DataPacket dataPacket = transaction.receive();
        if (dataPacket == null) {
            break;
        }

        FlowFile flowFile = session.create();
        flowFile = session.putAllAttributes(flowFile, dataPacket.getAttributes());

        final Communicant communicant = transaction.getCommunicant();
        final String host = StringUtils.isEmpty(communicant.getHost()) ? "unknown" : communicant.getHost();
        final String port = communicant.getPort() < 0 ? "unknown" : String.valueOf(communicant.getPort());

        final Map<String,String> attributes = new HashMap<>(2);
        attributes.put(SiteToSiteAttributes.S2S_HOST.key(), host);
        attributes.put(SiteToSiteAttributes.S2S_ADDRESS.key(), host + ":" + port);

        flowFile = session.putAllAttributes(flowFile, attributes);

        flowFile = session.importFrom(dataPacket.getData(), flowFile);
        final long receiveNanos = System.nanoTime() - start;
        flowFilesReceived.add(flowFile);

        String sourceFlowFileIdentifier = dataPacket.getAttributes().get(CoreAttributes.UUID.key());
        if (sourceFlowFileIdentifier == null) {
            sourceFlowFileIdentifier = "<Unknown Identifier>";
        }

        final String transitUri = transaction.getCommunicant().createTransitUri(sourceFlowFileIdentifier);
        session.getProvenanceReporter().receive(flowFile, transitUri, "urn:nifi:" + sourceFlowFileIdentifier,
                "Remote DN=" + userDn, TimeUnit.NANOSECONDS.toMillis(receiveNanos));

        session.transfer(flowFile, Relationship.ANONYMOUS);
        bytesReceived += dataPacket.getSize();
    }

    // Confirm that what we received was the correct data.
    transaction.confirm();

    // Commit the session so that we have persisted the data
    session.commit();

    transaction.complete();

    if (!flowFilesReceived.isEmpty()) {
        stopWatch.stop();
        final String flowFileDescription = flowFilesReceived.size() < 20 ? flowFilesReceived.toString() : flowFilesReceived.size() + " FlowFiles";
        final String uploadDataRate = stopWatch.calculateDataRate(bytesReceived);
        final long uploadMillis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
        final String dataSize = FormatUtils.formatDataSize(bytesReceived);
        logger.info("{} Successfully received {} ({}) from {} in {} milliseconds at a rate of {}", new Object[]{
            this, flowFileDescription, dataSize, transaction.getCommunicant().getUrl(), uploadMillis, uploadDataRate});
    }

    return flowFilesReceived.size();
}
 
Example 10
Source File: ExecuteInfluxDBQuery.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {

    String query = null;
    String database = null;
    TimeUnit queryResultTimeunit = null;
    Charset charset = null;
    FlowFile outgoingFlowFile = null;

    // If there are incoming connections, prepare query params from flow file
    if ( context.hasIncomingConnection() ) {
        FlowFile incomingFlowFile = session.get();

        if ( incomingFlowFile == null && context.hasNonLoopConnection() ) {
            return;
        }

        charset = Charset.forName(context.getProperty(CHARSET).evaluateAttributeExpressions(incomingFlowFile).getValue());
        if ( incomingFlowFile.getSize() == 0 ) {
            if ( context.getProperty(INFLUX_DB_QUERY).isSet() ) {
                query = context.getProperty(INFLUX_DB_QUERY).evaluateAttributeExpressions(incomingFlowFile).getValue();
            } else {
                String message = "FlowFile query is empty and no scheduled query is set";
                getLogger().error(message);
                incomingFlowFile = session.putAttribute(incomingFlowFile, INFLUX_DB_ERROR_MESSAGE, message);
                session.transfer(incomingFlowFile, REL_FAILURE);
                return;
            }
        } else {

            try {
                query = getQuery(session, charset, incomingFlowFile);
            } catch(IOException ioe) {
                getLogger().error("Exception while reading from FlowFile " + ioe.getLocalizedMessage(), ioe);
                throw new ProcessException(ioe);
            }
        }
        outgoingFlowFile = incomingFlowFile;

    } else {
        outgoingFlowFile = session.create();
        charset = Charset.forName(context.getProperty(CHARSET).evaluateAttributeExpressions(outgoingFlowFile).getValue());
        query = context.getProperty(INFLUX_DB_QUERY).evaluateAttributeExpressions(outgoingFlowFile).getValue();
    }

    database = context.getProperty(DB_NAME).evaluateAttributeExpressions(outgoingFlowFile).getValue();
    queryResultTimeunit = TimeUnit.valueOf(context.getProperty(INFLUX_DB_QUERY_RESULT_TIMEUNIT).evaluateAttributeExpressions(outgoingFlowFile).getValue());

    try {
        long startTimeMillis = System.currentTimeMillis();
        int chunkSize = context.getProperty(INFLUX_DB_QUERY_CHUNK_SIZE).evaluateAttributeExpressions(outgoingFlowFile).asInteger();
        List<QueryResult> result = executeQuery(context, database, query, queryResultTimeunit, chunkSize);

        String json = result.size() == 1 ? gson.toJson(result.get(0)) : gson.toJson(result);

        if ( getLogger().isDebugEnabled() ) {
            getLogger().debug("Query result {} ", new Object[] {result});
        }

        ByteArrayInputStream bais = new ByteArrayInputStream(json.getBytes(charset));
        session.importFrom(bais, outgoingFlowFile);
        bais.close();

        final long endTimeMillis = System.currentTimeMillis();

        if ( ! hasErrors(result) ) {
            outgoingFlowFile = session.putAttribute(outgoingFlowFile, INFLUX_DB_EXECUTED_QUERY, String.valueOf(query));
            session.getProvenanceReporter().send(outgoingFlowFile, makeProvenanceUrl(context, database),
                    (endTimeMillis - startTimeMillis));
            session.transfer(outgoingFlowFile, REL_SUCCESS);
        } else {
            outgoingFlowFile = populateErrorAttributes(session, outgoingFlowFile, query, queryErrors(result));
            session.transfer(outgoingFlowFile, REL_FAILURE);
        }

    } catch (Exception exception) {
        outgoingFlowFile = populateErrorAttributes(session, outgoingFlowFile, query, exception.getMessage());
        if ( exception.getCause() instanceof SocketTimeoutException ) {
            getLogger().error("Failed to read from InfluxDB due SocketTimeoutException to {} and retrying",
                    new Object[]{exception.getCause().getLocalizedMessage()}, exception.getCause());
            session.transfer(outgoingFlowFile, REL_RETRY);
        } else {
            getLogger().error("Failed to read from InfluxDB due to {}",
                    new Object[]{exception.getLocalizedMessage()}, exception);
            session.transfer(outgoingFlowFile, REL_FAILURE);
        }
        context.yield();
    }
}
 
Example 11
Source File: TailFile.java    From nifi with Apache License 2.0 4 votes vote down vote up
/**
 * Finds any files that have rolled over and have not yet been ingested by
 * this Processor. Each of these files that is found will be ingested as its
 * own FlowFile. If a file is found that has been partially ingested, the
 * rest of the file will be ingested as a single FlowFile but the data that
 * already has been ingested will not be ingested again.
 *
 * @param context the ProcessContext to use in order to obtain Processor
 * configuration.
 * @param session the ProcessSession to use in order to interact with
 * FlowFile creation and content.
 * @param expectedChecksum the checksum value that is expected for the
 * oldest file from offset 0 through &lt;position&gt;.
 * @param timestamp the latest Last Modfiied Timestamp that has been
 * consumed. Any data that was written before this data will not be
 * ingested.
 * @param position the byte offset in the file being tailed, where tailing
 * last left off.
 *
 * @return <code>true</code> if the file being tailed has rolled over, false
 * otherwise
 */
private boolean recoverRolledFiles(final ProcessContext context, final ProcessSession session, final String tailFile, final List<File> rolledOffFiles, final Long expectedChecksum,
        final long timestamp, final long position) {
    try {
        getLogger().debug("Recovering Rolled Off Files; total number of files rolled off = {}", new Object[]{rolledOffFiles.size()});
        TailFileObject tfo = states.get(tailFile);

        // For first file that we find, it may or may not be the file that we were last reading from.
        // As a result, we have to read up to the position we stored, while calculating the checksum. If the checksums match,
        // then we know we've already processed this file. If the checksums do not match, then we have not
        // processed this file and we need to seek back to position 0 and ingest the entire file.
        // For all other files that have been rolled over, we need to just ingest the entire file.
        boolean rolloverOccurred = !rolledOffFiles.isEmpty();
        if (rolloverOccurred && expectedChecksum != null && rolledOffFiles.get(0).length() >= position) {
            final File firstFile = rolledOffFiles.get(0);

            final long startNanos = System.nanoTime();
            if (position > 0) {
                try (final InputStream fis = new FileInputStream(firstFile);
                        final CheckedInputStream in = new CheckedInputStream(fis, new CRC32())) {
                    StreamUtils.copy(in, new NullOutputStream(), position);

                    final long checksumResult = in.getChecksum().getValue();
                    if (checksumResult == expectedChecksum) {
                        getLogger().debug("Checksum for {} matched expected checksum. Will skip first {} bytes", new Object[]{firstFile, position});

                        // This is the same file that we were reading when we shutdown. Start reading from this point on.
                        rolledOffFiles.remove(0);
                        FlowFile flowFile = session.create();
                        flowFile = session.importFrom(in, flowFile);
                        if (flowFile.getSize() == 0L) {
                            session.remove(flowFile);
                            // use a timestamp of lastModified() + 1 so that we do not ingest this file again.
                            cleanup();
                            tfo.setState(new TailFileState(tailFile, null, null, 0L, firstFile.lastModified() + 1L, firstFile.length(), null, tfo.getState().getBuffer()));
                        } else {
                            final Map<String, String> attributes = new HashMap<>(3);
                            attributes.put(CoreAttributes.FILENAME.key(), firstFile.getName());
                            attributes.put(CoreAttributes.MIME_TYPE.key(), "text/plain");
                            attributes.put("tailfile.original.path", tailFile);
                            flowFile = session.putAllAttributes(flowFile, attributes);

                            session.getProvenanceReporter().receive(flowFile, firstFile.toURI().toString(), "FlowFile contains bytes 0 through " + position + " of source file",
                                    TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos));
                            session.transfer(flowFile, REL_SUCCESS);
                            getLogger().debug("Created {} from rolled over file {} and routed to success", new Object[]{flowFile, firstFile});

                            // use a timestamp of lastModified() + 1 so that we do not ingest this file again.
                            cleanup();
                            tfo.setState(new TailFileState(tailFile, null, null, 0L, firstFile.lastModified() + 1L, firstFile.length(), null, tfo.getState().getBuffer()));

                            // must ensure that we do session.commit() before persisting state in order to avoid data loss.
                            session.commit();
                            persistState(tfo, context);
                        }
                    } else {
                        getLogger().debug("Checksum for {} did not match expected checksum. Checksum for file was {} but expected {}. Will consume entire file",
                                new Object[]{firstFile, checksumResult, expectedChecksum});
                    }
                }
            }
        }

        // For each file that we found that matches our Rollover Pattern, and has a last modified date later than the timestamp
        // that we recovered from the state file, we need to consume the entire file. The only exception to this is the file that
        // we were reading when we last stopped, as it may already have been partially consumed. That is taken care of in the
        // above block of code.
        for (final File file : rolledOffFiles) {
            tfo.setState(consumeFileFully(file, context, session, tfo));
        }

        return rolloverOccurred;
    } catch (final IOException e) {
        getLogger().error("Failed to recover files that have rolled over due to {}", new Object[]{e});
        return false;
    }
}
 
Example 12
Source File: FetchGCSObject.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }

    final long startNanos = System.nanoTime();

    final String bucketName = context.getProperty(BUCKET).evaluateAttributeExpressions(flowFile).getValue();
    final String key = context.getProperty(KEY).evaluateAttributeExpressions(flowFile).getValue();
    final Long generation = context.getProperty(GENERATION).evaluateAttributeExpressions(flowFile).asLong();
    final String encryptionKey = context.getProperty(ENCRYPTION_KEY).evaluateAttributeExpressions(flowFile).getValue();

    final Storage storage = getCloudService();
    final BlobId blobId = BlobId.of(bucketName, key, generation);

    try {
        final List<Storage.BlobSourceOption> blobSourceOptions = new ArrayList<>(2);

        if (encryptionKey != null) {
            blobSourceOptions.add(Storage.BlobSourceOption.decryptionKey(encryptionKey));
        }

        if (generation != null) {
            blobSourceOptions.add(Storage.BlobSourceOption.generationMatch());
        }

        final Blob blob = storage.get(blobId);
        if (blob == null) {
            throw new StorageException(404, "Blob " + blobId + " not found");
        }

        final ReadChannel reader = storage.reader(blobId, blobSourceOptions.toArray(new Storage.BlobSourceOption[0]));
        flowFile = session.importFrom(Channels.newInputStream(reader), flowFile);

        final Map<String, String> attributes = StorageAttributes.createAttributes(blob);
        flowFile = session.putAllAttributes(flowFile, attributes);
    } catch (StorageException e) {
        getLogger().error("Failed to fetch GCS Object due to {}", new Object[] {e}, e);
        flowFile = session.penalize(flowFile);
        session.transfer(flowFile, REL_FAILURE);
        return;
    }

    session.transfer(flowFile, REL_SUCCESS);

    final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
    getLogger().info("Successfully retrieved GCS Object for {} in {} millis; routing to success", new Object[]{flowFile, millis});
    session.getProvenanceReporter().fetch(flowFile, "https://" + bucketName + ".storage.googleapis.com/" + key, millis);
}
 
Example 13
Source File: GetHDFS.java    From nifi with Apache License 2.0 4 votes vote down vote up
protected void processBatchOfFiles(final List<Path> files, final ProcessContext context, final ProcessSession session) {
    // process the batch of files
    InputStream stream = null;
    CompressionCodec codec = null;
    Configuration conf = getConfiguration();
    FileSystem hdfs = getFileSystem();
    final boolean keepSourceFiles = context.getProperty(KEEP_SOURCE_FILE).asBoolean();
    final Double bufferSizeProp = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B);
    int bufferSize = bufferSizeProp != null ? bufferSizeProp.intValue() : conf.getInt(BUFFER_SIZE_KEY,
            BUFFER_SIZE_DEFAULT);
    final Path rootDir = new Path(context.getProperty(DIRECTORY).evaluateAttributeExpressions().getValue());

    final CompressionType compressionType = CompressionType.valueOf(context.getProperty(COMPRESSION_CODEC).toString());
    final boolean inferCompressionCodec = compressionType == CompressionType.AUTOMATIC;
    if (inferCompressionCodec || compressionType != CompressionType.NONE) {
        codec = getCompressionCodec(context, getConfiguration());
    }
    final CompressionCodecFactory compressionCodecFactory = new CompressionCodecFactory(conf);
    for (final Path file : files) {
        try {
            if (!getUserGroupInformation().doAs((PrivilegedExceptionAction<Boolean>) () -> hdfs.exists(file))) {
                continue; // if file is no longer there then move on
            }
            final String originalFilename = file.getName();
            final String relativePath = getPathDifference(rootDir, file);

            stream = getUserGroupInformation().doAs((PrivilegedExceptionAction<FSDataInputStream>) () -> hdfs.open(file, bufferSize));

            final String outputFilename;
            // Check if we should infer compression codec
            if (inferCompressionCodec) {
                codec = compressionCodecFactory.getCodec(file);
            }
            // Check if compression codec is defined (inferred or otherwise)
            if (codec != null) {
                stream = codec.createInputStream(stream);
                outputFilename = StringUtils.removeEnd(originalFilename, codec.getDefaultExtension());
            } else {
                outputFilename = originalFilename;
            }

            FlowFile flowFile = session.create();

            final StopWatch stopWatch = new StopWatch(true);
            flowFile = session.importFrom(stream, flowFile);
            stopWatch.stop();
            final String dataRate = stopWatch.calculateDataRate(flowFile.getSize());
            final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);

            flowFile = session.putAttribute(flowFile, CoreAttributes.PATH.key(), relativePath.isEmpty() ? "." : relativePath);
            flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), outputFilename);

            if (!keepSourceFiles && !getUserGroupInformation().doAs((PrivilegedExceptionAction<Boolean>) () -> hdfs.delete(file, false))) {
                getLogger().warn("Could not remove {} from HDFS. Not ingesting this file ...",
                        new Object[]{file});
                session.remove(flowFile);
                continue;
            }

            session.getProvenanceReporter().receive(flowFile, file.toString());
            session.transfer(flowFile, REL_SUCCESS);
            getLogger().info("retrieved {} from HDFS {} in {} milliseconds at a rate of {}",
                    new Object[]{flowFile, file, millis, dataRate});
            session.commit();
        } catch (final Throwable t) {
            getLogger().error("Error retrieving file {} from HDFS due to {}", new Object[]{file, t});
            session.rollback();
            context.yield();
        } finally {
            IOUtils.closeQuietly(stream);
            stream = null;
        }
    }
}
 
Example 14
Source File: GetRethinkDB.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }

    Charset charset = Charset.forName(context.getProperty(CHARSET).evaluateAttributeExpressions(flowFile).getValue());
    String id = context.getProperty(RETHINKDB_DOCUMENT_ID).evaluateAttributeExpressions(flowFile).getValue();
    String readMode = context.getProperty(READ_MODE).evaluateAttributeExpressions(flowFile).getValue();

    if ( StringUtils.isEmpty(id) ) {
        getLogger().error(DOCUMENT_ID_EMPTY_MESSAGE);
        flowFile = session.putAttribute(flowFile, RETHINKDB_ERROR_MESSAGE, DOCUMENT_ID_EMPTY_MESSAGE);
        session.transfer(flowFile, REL_FAILURE);
        return;
    }

    try {
        long startTimeMillis = System.currentTimeMillis();
        Map<String,Object> document = getDocument(id, readMode);

        if ( document == null ) {
            getLogger().debug("Document with id '" + id + "' not found");
            flowFile = session.putAttribute(flowFile, RETHINKDB_ERROR_MESSAGE, "Document with id '" + id + "' not found");
            session.transfer(flowFile, REL_NOT_FOUND);
            return;
        }

        String json = gson.toJson(document);

        byte [] documentBytes = json.getBytes(charset);

        if ( documentBytes.length > maxDocumentsSize ) {
            getLogger().error("Document too big with size " + documentBytes.length + " and max limit is " + maxDocumentsSize );
            flowFile = session.putAttribute(flowFile, RETHINKDB_ERROR_MESSAGE, "Document too big size " + documentBytes.length + " bytes");
            session.transfer(flowFile, REL_FAILURE);
            return;
        }

        ByteArrayInputStream bais = new ByteArrayInputStream(documentBytes);
        session.importFrom(bais, flowFile);
        final long endTimeMillis = System.currentTimeMillis();

        getLogger().debug("Json document {} retrieved Result: {}", new Object[] {id, document});

        session.transfer(flowFile, REL_SUCCESS);
        session.getProvenanceReporter().fetch(flowFile,
            new StringBuilder("rethinkdb://").append(databaseName).append("/").append(tableName).append("/").append(id).toString(),
            (endTimeMillis - startTimeMillis));

    } catch (Exception exception) {
        getLogger().error("Failed to get document from RethinkDB due to error {}",
                new Object[]{exception.getLocalizedMessage()}, exception);
        flowFile = session.putAttribute(flowFile, RETHINKDB_ERROR_MESSAGE, exception.getMessage() + "");
        session.transfer(flowFile, REL_FAILURE);
        context.yield();
    }
}
 
Example 15
Source File: DeleteRethinkDB.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }

    Charset charset = Charset.forName(context.getProperty(CHARSET).evaluateAttributeExpressions(flowFile).getValue());
    String id = context.getProperty(RETHINKDB_DOCUMENT_ID).evaluateAttributeExpressions(flowFile).getValue();
    String durablity = context.getProperty(DURABILITY).evaluateAttributeExpressions(flowFile).getValue();
    Boolean returnChanges = context.getProperty(RETURN_CHANGES).evaluateAttributeExpressions(flowFile).asBoolean();

    if ( StringUtils.isEmpty(id) ) {
        getLogger().error(DOCUMENT_ID_EMPTY_MESSAGE);
        flowFile = session.putAttribute(flowFile, RETHINKDB_ERROR_MESSAGE, DOCUMENT_ID_EMPTY_MESSAGE);
        session.transfer(flowFile, REL_FAILURE);
        return;
    }

    try {
        long startTimeMillis = System.currentTimeMillis();
        Map<String,Object> result = deleteDocument(id, durablity, returnChanges);
        final long endTimeMillis = System.currentTimeMillis();

        getLogger().debug("Json document {} deleted Result: {}", new Object[] {id, result});

        flowFile = populateAttributes(session, flowFile, result);

        Long deletedCount = ((Long)result.get(RESULT_DELETED_KEY)).longValue();

        if ( deletedCount == 0L ) {
            getLogger().debug("Deleted count should be 1 but was " + deletedCount + " for document with id '" + id + "'");

            flowFile = populateAttributes(session, flowFile, result);

            flowFile = session.putAttribute(flowFile, RETHINKDB_ERROR_MESSAGE, "Deleted count should be 1 but was " + deletedCount + " for document with id '" + id + "'");
            session.transfer(flowFile, REL_NOT_FOUND);
            return;
        }

        if ( returnChanges ) {
            String json = gson.toJson(((List)result.get(RESULT_CHANGES_KEY)).get(0));

            byte [] documentBytes = json.getBytes(charset);

            ByteArrayInputStream bais = new ByteArrayInputStream(documentBytes);
            session.importFrom(bais, flowFile);

            session.getProvenanceReporter().modifyContent(flowFile,
                    new StringBuilder("rethinkdb://").append(databaseName).append("/").append(tableName).append("/").append(id).toString(),
                    (endTimeMillis - startTimeMillis));
        }

        session.transfer(flowFile, REL_SUCCESS);


    } catch (Exception exception) {
        getLogger().error("Failed to delete document from RethinkDB due to error {}",
                new Object[]{exception.getLocalizedMessage()}, exception);
        flowFile = session.putAttribute(flowFile, RETHINKDB_ERROR_MESSAGE, exception.getMessage());
        session.transfer(flowFile, REL_FAILURE);
        context.yield();
    }
}
 
Example 16
Source File: StandardRemoteGroupPort.java    From nifi with Apache License 2.0 4 votes vote down vote up
private int receiveFlowFiles(final Transaction transaction, final ProcessContext context, final ProcessSession session) throws IOException, ProtocolException {
    final String userDn = transaction.getCommunicant().getDistinguishedName();

    final StopWatch stopWatch = new StopWatch(true);
    final Set<FlowFile> flowFilesReceived = new HashSet<>();
    long bytesReceived = 0L;

    while (true) {
        final long start = System.nanoTime();
        final DataPacket dataPacket = transaction.receive();
        if (dataPacket == null) {
            break;
        }

        FlowFile flowFile = session.create();
        flowFile = session.putAllAttributes(flowFile, dataPacket.getAttributes());

        final Communicant communicant = transaction.getCommunicant();
        final String host = StringUtils.isEmpty(communicant.getHost()) ? "unknown" : communicant.getHost();
        final String port = communicant.getPort() < 0 ? "unknown" : String.valueOf(communicant.getPort());

        final Map<String,String> attributes = new HashMap<>(2);
        attributes.put(SiteToSiteAttributes.S2S_HOST.key(), host);
        attributes.put(SiteToSiteAttributes.S2S_ADDRESS.key(), host + ":" + port);
        attributes.put(SiteToSiteAttributes.S2S_PORT_ID.key(), getTargetIdentifier());

        flowFile = session.putAllAttributes(flowFile, attributes);

        flowFile = session.importFrom(dataPacket.getData(), flowFile);
        final long receiveNanos = System.nanoTime() - start;
        flowFilesReceived.add(flowFile);

        String sourceFlowFileIdentifier = dataPacket.getAttributes().get(CoreAttributes.UUID.key());
        if (sourceFlowFileIdentifier == null) {
            sourceFlowFileIdentifier = "<Unknown Identifier>";
        }

        final String transitUri = transaction.getCommunicant().createTransitUri(sourceFlowFileIdentifier);
        session.getProvenanceReporter().receive(flowFile, transitUri, "urn:nifi:" + sourceFlowFileIdentifier,
                "Remote DN=" + userDn, TimeUnit.NANOSECONDS.toMillis(receiveNanos));

        session.transfer(flowFile, Relationship.ANONYMOUS);
        bytesReceived += dataPacket.getSize();
    }

    // Confirm that what we received was the correct data.
    transaction.confirm();

    // Commit the session so that we have persisted the data
    session.commit();

    transaction.complete();

    if (!flowFilesReceived.isEmpty()) {
        stopWatch.stop();
        final String flowFileDescription = flowFilesReceived.size() < 20 ? flowFilesReceived.toString() : flowFilesReceived.size() + " FlowFiles";
        final String uploadDataRate = stopWatch.calculateDataRate(bytesReceived);
        final long uploadMillis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
        final String dataSize = FormatUtils.formatDataSize(bytesReceived);
        logger.info("{} Successfully received {} ({}) from {} in {} milliseconds at a rate of {}", new Object[]{
            this, flowFileDescription, dataSize, transaction.getCommunicant().getUrl(), uploadMillis, uploadDataRate});
    }

    return flowFilesReceived.size();
}