Java Code Examples for org.apache.nifi.flowfile.FlowFile#getSize()

The following examples show how to use org.apache.nifi.flowfile.FlowFile#getSize() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: StandardProcessSession.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Override
public void remove(FlowFile flowFile) {
    verifyTaskActive();

    flowFile = validateRecordState(flowFile);
    final StandardRepositoryRecord record = getRecord(flowFile);
    record.markForDelete();
    removedFlowFiles.add(flowFile.getAttribute(CoreAttributes.UUID.key()));

    // if original connection is null, the FlowFile was created in this session, so we
    // do not want to count it toward the removed count.
    if (record.getOriginalQueue() == null) {
        // if we've generated any Fork events, remove them because the FlowFile was created
        // and then removed in this session.
        generatedProvenanceEvents.remove(flowFile);
        removeForkEvents(flowFile);
    } else {
        removedCount++;
        removedBytes += flowFile.getSize();
        provenanceReporter.drop(flowFile, flowFile.getAttribute(CoreAttributes.DISCARD_REASON.key()));
    }
}
 
Example 2
Source File: StandardProcessSession.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Override
public void remove(Collection<FlowFile> flowFiles) {
    verifyTaskActive();

    flowFiles = validateRecordState(flowFiles);
    for (final FlowFile flowFile : flowFiles) {
        final StandardRepositoryRecord record = getRecord(flowFile);
        record.markForDelete();
        removedFlowFiles.add(flowFile.getAttribute(CoreAttributes.UUID.key()));

        // if original connection is null, the FlowFile was created in this session, so we
        // do not want to count it toward the removed count.
        if (record.getOriginalQueue() == null) {
            generatedProvenanceEvents.remove(flowFile);
            removeForkEvents(flowFile);
        } else {
            removedCount++;
            removedBytes += flowFile.getSize();
            provenanceReporter.drop(flowFile, flowFile.getAttribute(CoreAttributes.DISCARD_REASON.key()));
        }
    }
}
 
Example 3
Source File: AbstractListenEventBatchingProcessor.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    final int maxBatchSize = context.getProperty(MAX_BATCH_SIZE).asInteger();
    final Map<String,FlowFileEventBatch> batches = getBatches(session, maxBatchSize, messageDemarcatorBytes);

    // if the size is 0 then there was nothing to process so return
    // we don't need to yield here because we have a long poll in side of getBatches
    if (batches.size() == 0) {
        return;
    }

    final List<E> allEvents = new ArrayList<>();

    for (Map.Entry<String,FlowFileEventBatch> entry : batches.entrySet()) {
        FlowFile flowFile = entry.getValue().getFlowFile();
        final List<E> events = entry.getValue().getEvents();

        if (flowFile.getSize() == 0L || events.size() == 0) {
            session.remove(flowFile);
            getLogger().debug("No data written to FlowFile from batch {}; removing FlowFile", new Object[] {entry.getKey()});
            continue;
        }

        final Map<String,String> attributes = getAttributes(entry.getValue());
        flowFile = session.putAllAttributes(flowFile, attributes);

        getLogger().debug("Transferring {} to success", new Object[] {flowFile});
        session.transfer(flowFile, REL_SUCCESS);
        session.adjustCounter("FlowFiles Transferred to Success", 1L, false);

        // the sender and command will be the same for all events based on the batch key
        final String transitUri = getTransitUri(entry.getValue());
        session.getProvenanceReporter().receive(flowFile, transitUri);

        allEvents.addAll(events);
    }

    // let sub-classes take any additional actions
    postProcess(context, session, allEvents);
}
 
Example 4
Source File: StandardProvenanceEventRecord.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
@Override
public ProvenanceEventBuilder fromFlowFile(final FlowFile flowFile) {
    setFlowFileEntryDate(flowFile.getEntryDate());
    setLineageStartDate(flowFile.getLineageStartDate());
    setAttributes(Collections.<String, String>emptyMap(), flowFile.getAttributes());
    uuid = flowFile.getAttribute(CoreAttributes.UUID.key());
    this.contentSize = flowFile.getSize();
    return this;
}
 
Example 5
Source File: StandardProcessSession.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
@Override
public void transfer(final Collection<FlowFile> flowFiles, final Relationship relationship) {
    validateRecordState(flowFiles);

    boolean autoTerminated = false;
    boolean selfRelationship = false;
    final int numDestinations = context.getConnections(relationship).size();
    if (numDestinations == 0 && context.getConnectable().isAutoTerminated(relationship)) {
        // auto terminated.
        autoTerminated = true;
    } else if (numDestinations == 0 && relationship == Relationship.SELF) {
        selfRelationship = true;
    } else if (numDestinations == 0) {
        // the relationship specified is not known in this session/context
        throw new IllegalArgumentException("Relationship '" + relationship.getName() + "' is not known");
    }

    final int multiplier = Math.max(1, numDestinations);

    long contentSize = 0L;
    for (final FlowFile flowFile : flowFiles) {
        final StandardRepositoryRecord record = records.get(flowFile);
        record.setTransferRelationship(relationship);
        updateLastQueuedDate(record);

        contentSize += flowFile.getSize() * multiplier;
    }

    if (autoTerminated) {
        removedCount += multiplier * flowFiles.size();
        removedBytes += contentSize;
    } else if (!selfRelationship) {
        flowFilesOut += multiplier * flowFiles.size();
        contentSizeOut += multiplier * contentSize;
    }
}
 
Example 6
Source File: StandardProcessSession.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Override
public void transfer(FlowFile flowFile, final Relationship relationship) {
    verifyTaskActive();
    flowFile = validateRecordState(flowFile);
    final int numDestinations = context.getConnections(relationship).size();
    final int multiplier = Math.max(1, numDestinations);

    boolean autoTerminated = false;
    boolean selfRelationship = false;
    if (numDestinations == 0 && context.getConnectable().isAutoTerminated(relationship)) {
        // auto terminated.
        autoTerminated = true;
    } else if (numDestinations == 0 && relationship == Relationship.SELF) {
        selfRelationship = true;
    } else if (numDestinations == 0) {
        // the relationship specified is not known in this session/context
        throw new IllegalArgumentException("Relationship '" + relationship.getName() + "' is not known");
    }
    final StandardRepositoryRecord record = getRecord(flowFile);
    record.setTransferRelationship(relationship);
    updateLastQueuedDate(record);

    if (autoTerminated) {
        removedCount += multiplier;
        removedBytes += flowFile.getSize();
    } else if (!selfRelationship) {
        flowFilesOut += multiplier;
        contentSizeOut += flowFile.getSize() * multiplier;
    }
}
 
Example 7
Source File: AbstractHiveQLProcessor.java    From nifi with Apache License 2.0 5 votes vote down vote up
/**
 * Determines the HiveQL statement that should be executed for the given FlowFile
 *
 * @param session  the session that can be used to access the given FlowFile
 * @param flowFile the FlowFile whose HiveQL statement should be executed
 * @return the HiveQL that is associated with the given FlowFile
 */
protected String getHiveQL(final ProcessSession session, final FlowFile flowFile, final Charset charset) {
    // Read the HiveQL from the FlowFile's content
    final byte[] buffer = new byte[(int) flowFile.getSize()];
    session.read(flowFile, new InputStreamCallback() {
        @Override
        public void process(final InputStream in) throws IOException {
            StreamUtils.fillBuffer(in, buffer);
        }
    });

    // Create the PreparedStatement to use for this FlowFile.
    return new String(buffer, charset);
}
 
Example 8
Source File: PublisherLease.java    From nifi with Apache License 2.0 5 votes vote down vote up
void publish(final FlowFile flowFile, final InputStream flowFileContent, final byte[] messageKey, final byte[] demarcatorBytes, final String topic) throws IOException {
    if (tracker == null) {
        tracker = new InFlightMessageTracker();
    }

    try {
        byte[] messageContent;
        if (demarcatorBytes == null || demarcatorBytes.length == 0) {
            if (flowFile.getSize() > maxMessageSize) {
                tracker.fail(flowFile, new TokenTooLargeException("A message in the stream exceeds the maximum allowed message size of " + maxMessageSize + " bytes."));
                return;
            }
            // Send FlowFile content as it is, to support sending 0 byte message.
            messageContent = new byte[(int) flowFile.getSize()];
            StreamUtils.fillBuffer(flowFileContent, messageContent);
            publish(flowFile, messageKey, messageContent, topic, tracker);
            return;
        }

        try (final StreamDemarcator demarcator = new StreamDemarcator(flowFileContent, demarcatorBytes, maxMessageSize)) {
            while ((messageContent = demarcator.nextToken()) != null) {
                publish(flowFile, messageKey, messageContent, topic, tracker);

                if (tracker.isFailed(flowFile)) {
                    // If we have a failure, don't try to send anything else.
                    return;
                }
            }
            tracker.trackEmpty(flowFile);
        } catch (final TokenTooLargeException ttle) {
            tracker.fail(flowFile, ttle);
        }
    } catch (final Exception e) {
        tracker.fail(flowFile, e);
        poison();
        throw e;
    }
}
 
Example 9
Source File: MockProcessSession.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Override
public MockFlowFile clone(FlowFile flowFile, final long offset, final long size) {
    flowFile = validateState(flowFile);
    if (offset + size > flowFile.getSize()) {
        throw new FlowFileHandlingException("Specified offset of " + offset + " and size " + size + " exceeds size of " + flowFile.toString());
    }

    final MockFlowFile newFlowFile = new MockFlowFile(sharedState.nextFlowFileId(), flowFile);
    final byte[] newContent = Arrays.copyOfRange(((MockFlowFile) flowFile).getData(), (int) offset, (int) (offset + size));
    newFlowFile.setData(newContent);

    currentVersions.put(newFlowFile.getId(), newFlowFile);
    beingProcessed.add(newFlowFile.getId());
    return newFlowFile;
}
 
Example 10
Source File: PublisherLease.java    From nifi with Apache License 2.0 5 votes vote down vote up
void publish(final FlowFile flowFile, final InputStream flowFileContent, final byte[] messageKey, final byte[] demarcatorBytes, final String topic) throws IOException {
    if (tracker == null) {
        tracker = new InFlightMessageTracker(logger);
    }

    try {
        byte[] messageContent;
        if (demarcatorBytes == null || demarcatorBytes.length == 0) {
            if (flowFile.getSize() > maxMessageSize) {
                tracker.fail(flowFile, new TokenTooLargeException("A message in the stream exceeds the maximum allowed message size of " + maxMessageSize + " bytes."));
                return;
            }
            // Send FlowFile content as it is, to support sending 0 byte message.
            messageContent = new byte[(int) flowFile.getSize()];
            StreamUtils.fillBuffer(flowFileContent, messageContent);
            publish(flowFile, messageKey, messageContent, topic, tracker);
            return;
        }

        try (final StreamDemarcator demarcator = new StreamDemarcator(flowFileContent, demarcatorBytes, maxMessageSize)) {
            while ((messageContent = demarcator.nextToken()) != null) {
                publish(flowFile, messageKey, messageContent, topic, tracker);

                if (tracker.isFailed(flowFile)) {
                    // If we have a failure, don't try to send anything else.
                    return;
                }
                tracker.trackEmpty(flowFile);
            }
        } catch (final TokenTooLargeException ttle) {
            tracker.fail(flowFile, ttle);
        }
    } catch (final Exception e) {
        tracker.fail(flowFile, e);
        poison();
        throw e;
    }
}
 
Example 11
Source File: AbstractFlowFileServerProtocol.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Override
public int receiveFlowFiles(final Peer peer, final ProcessContext context, final ProcessSession session, final FlowFileCodec codec) throws IOException, ProtocolException {
    if (!handshakeCompleted) {
        throw new IllegalStateException("Handshake has not been completed");
    }
    if (shutdown) {
        throw new IllegalStateException("Protocol is shutdown");
    }

    logger.debug("{} receiving FlowFiles from {}", this, peer);

    final CommunicationsSession commsSession = peer.getCommunicationsSession();
    final DataInputStream dis = new DataInputStream(commsSession.getInput().getInputStream());
    String remoteDn = commsSession.getUserDn();
    if (remoteDn == null) {
        remoteDn = "none";
    }

    final StopWatch stopWatch = new StopWatch(true);
    final CRC32 crc = new CRC32();

    // Peer has data. Otherwise, we would not have been called, because they would not have sent
    // a SEND_FLOWFILES request to use. Just decode the bytes into FlowFiles until peer says he's
    // finished sending data.
    final Set<FlowFile> flowFilesReceived = new HashSet<>();
    long bytesReceived = 0L;
    boolean continueTransaction = true;
    while (continueTransaction) {
        final long startNanos = System.nanoTime();
        final InputStream flowFileInputStream = handshakeProperties.isUseGzip() ? new CompressionInputStream(dis) : dis;
        final CheckedInputStream checkedInputStream = new CheckedInputStream(flowFileInputStream, crc);

        final DataPacket dataPacket = codec.decode(checkedInputStream);
        if (dataPacket == null) {
            logger.debug("{} Received null dataPacket indicating the end of transaction from {}", this, peer);
            break;
        }
        FlowFile flowFile = session.create();
        flowFile = session.importFrom(dataPacket.getData(), flowFile);
        flowFile = session.putAllAttributes(flowFile, dataPacket.getAttributes());

        final long transferNanos = System.nanoTime() - startNanos;
        final long transferMillis = TimeUnit.MILLISECONDS.convert(transferNanos, TimeUnit.NANOSECONDS);
        final String sourceSystemFlowFileUuid = dataPacket.getAttributes().get(CoreAttributes.UUID.key());

        final String host = StringUtils.isEmpty(peer.getHost()) ? "unknown" : peer.getHost();
        final String port = peer.getPort() <= 0 ? "unknown" : String.valueOf(peer.getPort());

        final Map<String,String> attributes = new HashMap<>(4);
        attributes.put(CoreAttributes.UUID.key(), UUID.randomUUID().toString());
        attributes.put(SiteToSiteAttributes.S2S_HOST.key(), host);
        attributes.put(SiteToSiteAttributes.S2S_ADDRESS.key(), host + ":" + port);

        flowFile = session.putAllAttributes(flowFile, attributes);

        final String transitUri = createTransitUri(peer, sourceSystemFlowFileUuid);
        session.getProvenanceReporter().receive(flowFile, transitUri, sourceSystemFlowFileUuid == null
                ? null : "urn:nifi:" + sourceSystemFlowFileUuid, "Remote Host=" + peer.getHost() + ", Remote DN=" + remoteDn, transferMillis);
        session.transfer(flowFile, Relationship.ANONYMOUS);
        flowFilesReceived.add(flowFile);
        bytesReceived += flowFile.getSize();

        final Response transactionResponse = readTransactionResponse(false, commsSession);
        switch (transactionResponse.getCode()) {
            case CONTINUE_TRANSACTION:
                logger.debug("{} Received ContinueTransaction indicator from {}", this, peer);
                break;
            case FINISH_TRANSACTION:
                logger.debug("{} Received FinishTransaction indicator from {}", this, peer);
                continueTransaction = false;
                break;
            case CANCEL_TRANSACTION:
                logger.info("{} Received CancelTransaction indicator from {} with explanation {}", this, peer, transactionResponse.getMessage());
                session.rollback();
                return 0;
            default:
                throw new ProtocolException("Received unexpected response from peer: when expecting Continue Transaction or Finish Transaction, received" + transactionResponse);
        }
    }

    // we received a FINISH_TRANSACTION indicator. Send back a CONFIRM_TRANSACTION message
    // to peer so that we can verify that the connection is still open. This is a two-phase commit,
    // which helps to prevent the chances of data duplication. Without doing this, we may commit the
    // session and then when we send the response back to the peer, the peer may have timed out and may not
    // be listening. As a result, it will re-send the data. By doing this two-phase commit, we narrow the
    // Critical Section involved in this transaction so that rather than the Critical Section being the
    // time window involved in the entire transaction, it is reduced to a simple round-trip conversation.
    logger.debug("{} Sending CONFIRM_TRANSACTION Response Code to {}", this, peer);
    String calculatedCRC = String.valueOf(crc.getValue());
    writeTransactionResponse(false, ResponseCode.CONFIRM_TRANSACTION, commsSession, calculatedCRC);

    FlowFileTransaction transaction = new FlowFileTransaction(session, context, stopWatch, bytesReceived, flowFilesReceived, calculatedCRC);
    return commitReceiveTransaction(peer, transaction);
}
 
Example 12
Source File: TailFile.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
/**
 * Finds any files that have rolled over and have not yet been ingested by
 * this Processor. Each of these files that is found will be ingested as its
 * own FlowFile. If a file is found that has been partially ingested, the
 * rest of the file will be ingested as a single FlowFile but the data that
 * already has been ingested will not be ingested again.
 *
 * @param context the ProcessContext to use in order to obtain Processor
 * configuration.
 * @param session the ProcessSession to use in order to interact with
 * FlowFile creation and content.
 * @param expectedChecksum the checksum value that is expected for the
 * oldest file from offset 0 through &lt;position&gt;.
 * @param timestamp the latest Last Modfiied Timestamp that has been
 * consumed. Any data that was written before this data will not be
 * ingested.
 * @param position the byte offset in the file being tailed, where tailing
 * last left off.
 *
 * @return <code>true</code> if the file being tailed has rolled over, false
 * otherwise
 */
private boolean recoverRolledFiles(final ProcessContext context, final ProcessSession session, final String tailFile, final List<File> rolledOffFiles, final Long expectedChecksum,
        final long timestamp, final long position) {
    try {
        getLogger().debug("Recovering Rolled Off Files; total number of files rolled off = {}", new Object[]{rolledOffFiles.size()});
        TailFileObject tfo = states.get(tailFile);

        // For first file that we find, it may or may not be the file that we were last reading from.
        // As a result, we have to read up to the position we stored, while calculating the checksum. If the checksums match,
        // then we know we've already processed this file. If the checksums do not match, then we have not
        // processed this file and we need to seek back to position 0 and ingest the entire file.
        // For all other files that have been rolled over, we need to just ingest the entire file.
        boolean rolloverOccurred = !rolledOffFiles.isEmpty();
        if (rolloverOccurred && expectedChecksum != null && rolledOffFiles.get(0).length() >= position) {
            final File firstFile = rolledOffFiles.get(0);

            final long startNanos = System.nanoTime();
            if (position > 0) {
                try (final InputStream fis = new FileInputStream(firstFile);
                        final CheckedInputStream in = new CheckedInputStream(fis, new CRC32())) {
                    StreamUtils.copy(in, new NullOutputStream(), position);

                    final long checksumResult = in.getChecksum().getValue();
                    if (checksumResult == expectedChecksum) {
                        getLogger().debug("Checksum for {} matched expected checksum. Will skip first {} bytes", new Object[]{firstFile, position});

                        // This is the same file that we were reading when we shutdown. Start reading from this point on.
                        rolledOffFiles.remove(0);
                        FlowFile flowFile = session.create();
                        flowFile = session.importFrom(in, flowFile);
                        if (flowFile.getSize() == 0L) {
                            session.remove(flowFile);
                            // use a timestamp of lastModified() + 1 so that we do not ingest this file again.
                            cleanup();
                            tfo.setState(new TailFileState(tailFile, null, null, 0L, firstFile.lastModified() + 1L, firstFile.length(), null, tfo.getState().getBuffer()));
                        } else {
                            final Map<String, String> attributes = new HashMap<>(3);
                            attributes.put(CoreAttributes.FILENAME.key(), firstFile.getName());
                            attributes.put(CoreAttributes.MIME_TYPE.key(), "text/plain");
                            attributes.put("tailfile.original.path", tailFile);
                            flowFile = session.putAllAttributes(flowFile, attributes);

                            session.getProvenanceReporter().receive(flowFile, firstFile.toURI().toString(), "FlowFile contains bytes 0 through " + position + " of source file",
                                    TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos));
                            session.transfer(flowFile, REL_SUCCESS);
                            getLogger().debug("Created {} from rolled over file {} and routed to success", new Object[]{flowFile, firstFile});

                            // use a timestamp of lastModified() + 1 so that we do not ingest this file again.
                            cleanup();
                            tfo.setState(new TailFileState(tailFile, null, null, 0L, firstFile.lastModified() + 1L, firstFile.length(), null, tfo.getState().getBuffer()));

                            // must ensure that we do session.commit() before persisting state in order to avoid data loss.
                            session.commit();
                            persistState(tfo, context);
                        }
                    } else {
                        getLogger().debug("Checksum for {} did not match expected checksum. Checksum for file was {} but expected {}. Will consume entire file",
                                new Object[]{firstFile, checksumResult, expectedChecksum});
                    }
                }
            }
        }

        // For each file that we found that matches our Rollover Pattern, and has a last modified date later than the timestamp
        // that we recovered from the state file, we need to consume the entire file. The only exception to this is the file that
        // we were reading when we last stopped, as it may already have been partially consumed. That is taken care of in the
        // above block of code.
        for (final File file : rolledOffFiles) {
            tfo.setState(consumeFileFully(file, context, session, tfo));
        }

        return rolloverOccurred;
    } catch (final IOException e) {
        getLogger().error("Failed to recover files that have rolled over due to {}", new Object[]{e});
        return false;
    }
}
 
Example 13
Source File: StandardProcessSession.java    From nifi with Apache License 2.0 4 votes vote down vote up
private InputStream getInputStream(final FlowFile flowFile, final ContentClaim claim, final long offset, final boolean allowCachingOfStream) throws ContentNotFoundException {
    // If there's no content, don't bother going to the Content Repository because it is generally expensive and we know
    // that there is no actual content.
    if (flowFile.getSize() == 0L) {
        return new ByteArrayInputStream(new byte[0]);
    }

    try {
        // If the recursion set is empty, we can use the same input stream that we already have open. However, if
        // the recursion set is NOT empty, we can't do this because we may be reading the input of FlowFile 1 while in the
        // callback for reading FlowFile 1 and if we used the same stream we'd be destroying the ability to read from FlowFile 1.
        if (allowCachingOfStream && readRecursionSet.isEmpty() && writeRecursionSet.isEmpty()) {
            if (currentReadClaim == claim) {
                if (currentReadClaimStream != null && currentReadClaimStream.getCurrentOffset() <= offset) {
                    final long bytesToSkip = offset - currentReadClaimStream.getCurrentOffset();
                    if (bytesToSkip > 0) {
                        StreamUtils.skip(currentReadClaimStream, bytesToSkip);
                    }

                    return new DisableOnCloseInputStream(currentReadClaimStream);
                }
            }

            claimCache.flush(claim);

            if (currentReadClaimStream != null) {
                currentReadClaimStream.close();
            }

            currentReadClaim = claim;
            currentReadClaimStream = new ContentClaimInputStream(context.getContentRepository(), claim, offset);

            // Use a non-closeable stream because we want to keep it open after the callback has finished so that we can
            // reuse the same InputStream for the next FlowFile
            final InputStream disableOnClose = new DisableOnCloseInputStream(currentReadClaimStream);
            return disableOnClose;
        } else {
            claimCache.flush(claim);

            final InputStream rawInStream = new ContentClaimInputStream(context.getContentRepository(), claim, offset);
            return rawInStream;
        }
    } catch (final ContentNotFoundException cnfe) {
        throw cnfe;
    } catch (final EOFException eof) {
        throw new ContentNotFoundException(claim, eof);
    } catch (final IOException ioe) {
        throw new FlowFileAccessException("Failed to read content of " + flowFile, ioe);
    }
}
 
Example 14
Source File: SegmentContent.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }

    final String segmentId = UUID.randomUUID().toString();
    final long segmentSize = context.getProperty(SIZE).asDataSize(DataUnit.B).longValue();

    final String originalFileName = flowFile.getAttribute(CoreAttributes.FILENAME.key());

    if (flowFile.getSize() <= segmentSize) {
        flowFile = session.putAttribute(flowFile, SEGMENT_ID, segmentId);
        flowFile = session.putAttribute(flowFile, SEGMENT_INDEX, "1");
        flowFile = session.putAttribute(flowFile, SEGMENT_COUNT, "1");
        flowFile = session.putAttribute(flowFile, SEGMENT_ORIGINAL_FILENAME, originalFileName);

        flowFile = session.putAttribute(flowFile, FRAGMENT_ID, segmentId);
        flowFile = session.putAttribute(flowFile, FRAGMENT_INDEX, "1");
        flowFile = session.putAttribute(flowFile, FRAGMENT_COUNT, "1");

        FlowFile clone = session.clone(flowFile);
        session.transfer(flowFile, REL_ORIGINAL);
        session.transfer(clone, REL_SEGMENTS);
        return;
    }

    int totalSegments = (int) (flowFile.getSize() / segmentSize);
    if (totalSegments * segmentSize < flowFile.getSize()) {
        totalSegments++;
    }

    final Map<String, String> segmentAttributes = new HashMap<>();
    segmentAttributes.put(SEGMENT_ID, segmentId);
    segmentAttributes.put(SEGMENT_COUNT, String.valueOf(totalSegments));
    segmentAttributes.put(SEGMENT_ORIGINAL_FILENAME, originalFileName);

    segmentAttributes.put(FRAGMENT_ID, segmentId);
    segmentAttributes.put(FRAGMENT_COUNT, String.valueOf(totalSegments));

    final Set<FlowFile> segmentSet = new HashSet<>();
    for (int i = 1; i <= totalSegments; i++) {
        final long segmentOffset = segmentSize * (i - 1);
        FlowFile segment = session.clone(flowFile, segmentOffset, Math.min(segmentSize, flowFile.getSize() - segmentOffset));
        segmentAttributes.put(SEGMENT_INDEX, String.valueOf(i));
        segmentAttributes.put(FRAGMENT_INDEX, String.valueOf(i));
        segment = session.putAllAttributes(segment, segmentAttributes);
        segmentSet.add(segment);
    }

    session.transfer(segmentSet, REL_SEGMENTS);
    flowFile = FragmentAttributes.copyAttributesToOriginal(session, flowFile, segmentId, totalSegments);
    session.transfer(flowFile, REL_ORIGINAL);

    if (totalSegments <= 10) {
        getLogger().info("Segmented {} into {} segments: {}", new Object[]{flowFile, totalSegments, segmentSet});
    } else {
        getLogger().info("Segmented {} into {} segments", new Object[]{flowFile, totalSegments});
    }
}
 
Example 15
Source File: StandardRemoteGroupPort.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
private int transferFlowFiles(final Transaction transaction, final ProcessContext context, final ProcessSession session, final FlowFile firstFlowFile) throws IOException, ProtocolException {
    FlowFile flowFile = firstFlowFile;

    try {
        final String userDn = transaction.getCommunicant().getDistinguishedName();
        final long startSendingNanos = System.nanoTime();
        final StopWatch stopWatch = new StopWatch(true);
        long bytesSent = 0L;

        final Set<FlowFile> flowFilesSent = new HashSet<>();
        boolean continueTransaction = true;
        while (continueTransaction) {
            final long startNanos = System.nanoTime();
            // call codec.encode within a session callback so that we have the InputStream to read the FlowFile
            final FlowFile toWrap = flowFile;
            session.read(flowFile, new InputStreamCallback() {
                @Override
                public void process(final InputStream in) throws IOException {
                    final DataPacket dataPacket = new StandardDataPacket(toWrap.getAttributes(), in, toWrap.getSize());
                    transaction.send(dataPacket);
                }
            });

            final long transferNanos = System.nanoTime() - startNanos;
            final long transferMillis = TimeUnit.MILLISECONDS.convert(transferNanos, TimeUnit.NANOSECONDS);

            flowFilesSent.add(flowFile);
            bytesSent += flowFile.getSize();
            logger.debug("{} Sent {} to {}", this, flowFile, transaction.getCommunicant().getUrl());

            final String transitUri = transaction.getCommunicant().createTransitUri(flowFile.getAttribute(CoreAttributes.UUID.key()));
            session.getProvenanceReporter().send(flowFile, transitUri, "Remote DN=" + userDn, transferMillis, false);
            session.remove(flowFile);

            final long sendingNanos = System.nanoTime() - startSendingNanos;
            if (sendingNanos < BATCH_SEND_NANOS) {
                flowFile = session.get();
            } else {
                flowFile = null;
            }

            continueTransaction = (flowFile != null);
        }

        transaction.confirm();

        // consume input stream entirely, ignoring its contents. If we
        // don't do this, the Connection will not be returned to the pool
        stopWatch.stop();
        final String uploadDataRate = stopWatch.calculateDataRate(bytesSent);
        final long uploadMillis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
        final String dataSize = FormatUtils.formatDataSize(bytesSent);

        transaction.complete();
        session.commit();

        final String flowFileDescription = (flowFilesSent.size() < 20) ? flowFilesSent.toString() : flowFilesSent.size() + " FlowFiles";
        logger.info("{} Successfully sent {} ({}) to {} in {} milliseconds at a rate of {}", new Object[]{
            this, flowFileDescription, dataSize, transaction.getCommunicant().getUrl(), uploadMillis, uploadDataRate});

        return flowFilesSent.size();
    } catch (final Exception e) {
        session.rollback();
        throw e;
    }

}
 
Example 16
Source File: PutWebSocket.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession processSession) throws ProcessException {
    final FlowFile flowfile = processSession.get();
    if (flowfile == null) {
        return;
    }

    final String sessionId = context.getProperty(PROP_WS_SESSION_ID)
            .evaluateAttributeExpressions(flowfile).getValue();
    final String webSocketServiceId = context.getProperty(PROP_WS_CONTROLLER_SERVICE_ID)
            .evaluateAttributeExpressions(flowfile).getValue();
    final String webSocketServiceEndpoint = context.getProperty(PROP_WS_CONTROLLER_SERVICE_ENDPOINT)
            .evaluateAttributeExpressions(flowfile).getValue();
    final String messageTypeStr = context.getProperty(PROP_WS_MESSAGE_TYPE)
            .evaluateAttributeExpressions(flowfile).getValue();
    final WebSocketMessage.Type messageType = WebSocketMessage.Type.valueOf(messageTypeStr);

    if (StringUtils.isEmpty(sessionId)) {
        getLogger().debug("Specific SessionID not specified. Message will be broadcast to all connected clients.");
    }

    if (StringUtils.isEmpty(webSocketServiceId)
            || StringUtils.isEmpty(webSocketServiceEndpoint)) {
        transferToFailure(processSession, flowfile, "Required WebSocket attribute was not found.");
        return;
    }

    final ControllerService controllerService = context.getControllerServiceLookup().getControllerService(webSocketServiceId);
    if (controllerService == null) {
        transferToFailure(processSession, flowfile, "WebSocket ControllerService was not found.");
        return;
    } else if (!(controllerService instanceof WebSocketService)) {
        transferToFailure(processSession, flowfile, "The ControllerService found was not a WebSocket ControllerService but a "
                + controllerService.getClass().getName());
        return;
    }

    final WebSocketService webSocketService = (WebSocketService)controllerService;
    final byte[] messageContent = new byte[(int) flowfile.getSize()];
    final long startSending = System.currentTimeMillis();

    final AtomicReference<String> transitUri = new AtomicReference<>();
    final Map<String, String> attrs = new HashMap<>();
    attrs.put(ATTR_WS_CS_ID, webSocketService.getIdentifier());

    if (!StringUtils.isEmpty(sessionId)) {
        attrs.put(ATTR_WS_SESSION_ID, sessionId);
    }

    attrs.put(ATTR_WS_ENDPOINT_ID, webSocketServiceEndpoint);
    attrs.put(ATTR_WS_MESSAGE_TYPE, messageTypeStr);

    processSession.read(flowfile, in -> {
        StreamUtils.fillBuffer(in, messageContent, true);
    });

    try {

        webSocketService.sendMessage(webSocketServiceEndpoint, sessionId, sender -> {
            switch (messageType) {
                case TEXT:
                    sender.sendString(new String(messageContent, CHARSET_NAME));
                    break;
                case BINARY:
                    sender.sendBinary(ByteBuffer.wrap(messageContent));
                    break;
            }

            attrs.put(ATTR_WS_LOCAL_ADDRESS, sender.getLocalAddress().toString());
            attrs.put(ATTR_WS_REMOTE_ADDRESS, sender.getRemoteAddress().toString());
            transitUri.set(sender.getTransitUri());
        });

        final FlowFile updatedFlowFile = processSession.putAllAttributes(flowfile, attrs);
        final long transmissionMillis = System.currentTimeMillis() - startSending;
        processSession.getProvenanceReporter().send(updatedFlowFile, transitUri.get(), transmissionMillis);

        processSession.transfer(updatedFlowFile, REL_SUCCESS);

    } catch (WebSocketConfigurationException|IllegalStateException|IOException e) {
        // WebSocketConfigurationException: If the corresponding WebSocketGatewayProcessor has been stopped.
        // IllegalStateException: Session is already closed or not found.
        // IOException: other IO error.
        getLogger().error("Failed to send message via WebSocket due to " + e, e);
        transferToFailure(processSession, flowfile, e.toString());
    }

}
 
Example 17
Source File: PutRethinkDB.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }

    if ( flowFile.getSize() == 0) {
        getLogger().error("Empty message");
        flowFile = session.putAttribute(flowFile, RETHINKDB_ERROR_MESSAGE, "Empty message size " + flowFile.getSize());
        session.transfer(flowFile, REL_FAILURE);
        return;
    }

    if ( flowFile.getSize() > maxDocumentsSize) {
        getLogger().error("Message size exceeded {} max allowed is {}", new Object[] { flowFile.getSize(), maxDocumentsSize});
        flowFile = session.putAttribute(flowFile, RETHINKDB_ERROR_MESSAGE, "Max message size exceeded " + flowFile.getSize());
        session.transfer(flowFile, REL_FAILURE);
        return;
    }

    Charset charset = Charset.forName(context.getProperty(CHARSET).evaluateAttributeExpressions(flowFile).getValue());
    String conflictStrategy = context.getProperty(CONFLICT_STRATEGY).evaluateAttributeExpressions(flowFile).getValue();
    String durability = context.getProperty(DURABILITY).evaluateAttributeExpressions(flowFile).getValue();

    try {
        long startTimeMillis = System.currentTimeMillis();
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        session.exportTo(flowFile, baos);
        String documents = new String(baos.toByteArray(), charset);
        JSONParser parser = new JSONParser();
        Object jsonDocuments = parser.parse(documents);

        Insert insert = getRdbTable().insert(jsonDocuments)
            .optArg(CONFLICT_OPTION_KEY, conflictStrategy)
            .optArg(DURABILITY_OPTION_KEY, durability);

        HashMap<String,Object> result = runInsert(insert);
        final long endTimeMillis = System.currentTimeMillis();
        getLogger().debug("Json documents {} inserted Result: {}", new Object[] {documents, result});
        flowFile = populateAttributes(session, flowFile, result);

        if ( (Long)result.get(RESULT_ERROR_KEY) != 0 ) {
            getLogger().error("There were errors while inserting data documents {} result {}",
               new Object [] {documents, result});
            session.transfer(flowFile, REL_FAILURE);
        } else {
            session.transfer(flowFile, REL_SUCCESS);
            session.getProvenanceReporter().send(flowFile,
                new StringBuilder("rethinkdb://").append(databaseName).append("/").append(tableName).toString(),
                (endTimeMillis - startTimeMillis));
        }
    } catch (Exception exception) {
        getLogger().error("Failed to insert into RethinkDB due to {}",
                new Object[]{exception.getLocalizedMessage()}, exception);
        flowFile = session.putAttribute(flowFile, RETHINKDB_ERROR_MESSAGE, String.valueOf(exception.getMessage()));
        session.transfer(flowFile, REL_FAILURE);
        context.yield();
    }
}
 
Example 18
Source File: PutDynamoDB.java    From nifi with Apache License 2.0 4 votes vote down vote up
private boolean isDataValid(FlowFile flowFile, String jsonDocument) {
    return (flowFile.getSize() + jsonDocument.length()) < DYNAMODB_MAX_ITEM_SIZE;
}
 
Example 19
Source File: StandardProcessSession.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void exportTo(final FlowFile source, final OutputStream destination) {
    validateRecordState(source);
    final StandardRepositoryRecord record = records.get(source);

    if(record.getCurrentClaim() == null) {
        return;
    }

    try {
        ensureNotAppending(record.getCurrentClaim());
        claimCache.flush(record.getCurrentClaim());
    } catch (final IOException e) {
        throw new FlowFileAccessException("Failed to access ContentClaim for " + source.toString(), e);
    }

    try (final InputStream rawIn = getInputStream(source, record.getCurrentClaim(), record.getCurrentClaimOffset(), true);
            final InputStream limitedIn = new LimitedInputStream(rawIn, source.getSize());
            final InputStream disableOnCloseIn = new DisableOnCloseInputStream(limitedIn);
            final ByteCountingInputStream countingStream = new ByteCountingInputStream(disableOnCloseIn, this.bytesRead)) {

        // We want to differentiate between IOExceptions thrown by the repository and IOExceptions thrown from
        // Processor code. As a result, as have the FlowFileAccessInputStream that catches IOException from the repository
        // and translates into either FlowFileAccessException or ContentNotFoundException. We keep track of any
        // ContentNotFoundException because if it is thrown, the Processor code may catch it and do something else with it
        // but in reality, if it is thrown, we want to know about it and handle it, even if the Processor code catches it.
        final FlowFileAccessInputStream ffais = new FlowFileAccessInputStream(countingStream, source, record.getCurrentClaim());
        boolean cnfeThrown = false;

        try {
            recursionSet.add(source);
            StreamUtils.copy(ffais, destination, source.getSize());
        } catch (final ContentNotFoundException cnfe) {
            cnfeThrown = true;
            throw cnfe;
        } finally {
            recursionSet.remove(source);
            IOUtils.closeQuietly(ffais);
            // if cnfeThrown is true, we don't need to re-throw the Exception; it will propagate.
            if (!cnfeThrown && ffais.getContentNotFoundException() != null) {
                throw ffais.getContentNotFoundException();
            }
        }

    } catch (final ContentNotFoundException nfe) {
        handleContentNotFound(nfe, record);
    } catch (final IOException ex) {
        throw new ProcessException("IOException thrown from " + connectableDescription + ": " + ex.toString(), ex);
    }
}
 
Example 20
Source File: SequenceFileWriterImpl.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
protected void processInputStream(InputStream stream, FlowFile flowFile, final Writer writer) throws IOException {
    int fileSize = (int) flowFile.getSize();
    final InputStreamWritable inStreamWritable = new InputStreamWritable(new BufferedInputStream(stream), fileSize);
    String key = flowFile.getAttribute(CoreAttributes.FILENAME.key());
    writer.append(new Text(key), inStreamWritable);
}