Java Code Examples for org.apache.nifi.processor.ProcessSession#write()

The following examples show how to use org.apache.nifi.processor.ProcessSession#write() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DeleteRowsWriter.java    From nifi with Apache License 2.0 6 votes vote down vote up
/**
 * Creates and transfers a new flow file whose contents are the JSON-serialized value of the specified event, and the sequence ID attribute set
 *
 * @param session   A reference to a ProcessSession from which the flow file(s) will be created and transferred
 * @param eventInfo An event whose value will become the contents of the flow file
 * @return The next available CDC sequence ID for use by the CDC processor
 */
@Override
public long writeEvent(final ProcessSession session, String transitUri, final DeleteRowsEventInfo eventInfo, final long currentSequenceId, Relationship relationship) {
    final AtomicLong seqId = new AtomicLong(currentSequenceId);
    for (Serializable[] row : eventInfo.getRows()) {

        FlowFile flowFile = session.create();
        flowFile = session.write(flowFile, outputStream -> {

            super.startJson(outputStream, eventInfo);
            super.writeJson(eventInfo);

            final BitSet bitSet = eventInfo.getIncludedColumns();
            writeRow(eventInfo, row, bitSet);

            super.endJson();
        });

        flowFile = session.putAllAttributes(flowFile, getCommonAttributes(seqId.get(), eventInfo));
        session.transfer(flowFile, relationship);
        session.getProvenanceReporter().receive(flowFile, transitUri);
        seqId.getAndIncrement();
    }
    return seqId.get();
}
 
Example 2
Source File: ListenHTTPServlet.java    From nifi with Apache License 2.0 6 votes vote down vote up
private Set<FlowFile> handleMultipartRequest(HttpServletRequest request, ProcessSession session, String foundSubject) throws IOException, IllegalStateException, ServletException {
    Set<FlowFile> flowFileSet = new HashSet<>();
    String tempDir = System.getProperty("java.io.tmpdir");
    request.setAttribute(Request.MULTIPART_CONFIG_ELEMENT, new MultipartConfigElement(tempDir, multipartRequestMaxSize, multipartRequestMaxSize, multipartReadBufferSize));
    List<Part> requestParts = ImmutableList.copyOf(request.getParts());
    for (int i = 0; i < requestParts.size(); i++) {
        Part part = requestParts.get(i);
        FlowFile flowFile = session.create();
        try (OutputStream flowFileOutputStream = session.write(flowFile)) {
            StreamUtils.copy(part.getInputStream(), flowFileOutputStream);
        }
        flowFile = saveRequestDetailsAsAttributes(request, session, foundSubject, flowFile);
        flowFile = savePartDetailsAsAttributes(session, part, flowFile, i, requestParts.size());
        flowFileSet.add(flowFile);
    }
    return flowFileSet;
}
 
Example 3
Source File: UpdateRowsWriter.java    From nifi with Apache License 2.0 6 votes vote down vote up
/**
 * Creates and transfers a new flow file whose contents are the JSON-serialized value of the specified event, and the sequence ID attribute set
 *
 * @param session   A reference to a ProcessSession from which the flow file(s) will be created and transferred
 * @param eventInfo An event whose value will become the contents of the flow file
 * @return The next available CDC sequence ID for use by the CDC processor
 */
@Override
public long writeEvent(final ProcessSession session, String transitUri, final UpdateRowsEventInfo eventInfo, final long currentSequenceId, Relationship relationship) {
    final AtomicLong seqId = new AtomicLong(currentSequenceId);
    for (Map.Entry<Serializable[], Serializable[]> row : eventInfo.getRows()) {

        FlowFile flowFile = session.create();
        flowFile = session.write(flowFile, outputStream -> {

            super.startJson(outputStream, eventInfo);
            super.writeJson(eventInfo);

            final BitSet bitSet = eventInfo.getIncludedColumns();
            writeRow(eventInfo, row, bitSet);

            super.endJson();
        });

        flowFile = session.putAllAttributes(flowFile, getCommonAttributes(seqId.get(), eventInfo));
        session.transfer(flowFile, relationship);
        session.getProvenanceReporter().receive(flowFile, transitUri);
        seqId.getAndIncrement();
    }
    return seqId.get();
}
 
Example 4
Source File: AbstractFlumeProcessor.java    From nifi with Apache License 2.0 6 votes vote down vote up
protected static void transferEvent(final Event event, ProcessSession session,
    Relationship relationship) {
    FlowFile flowFile = session.create();
    flowFile = session.putAllAttributes(flowFile, event.getHeaders());

    flowFile = session.write(flowFile, new OutputStreamCallback() {
        @Override
        public void process(final OutputStream out) throws IOException {
            out.write(event.getBody());
        }
    });

    session.getProvenanceReporter()
        .create(flowFile);
    session.transfer(flowFile, relationship);
}
 
Example 5
Source File: JsonQueryElasticsearch.java    From nifi with Apache License 2.0 5 votes vote down vote up
private FlowFile writeAggregationFlowFileContents(String name, String json, ProcessSession session, FlowFile aggFlowFile, Map<String, String> attributes) {
    aggFlowFile = session.write(aggFlowFile, out -> out.write(json.getBytes()));
    if (name != null) {
        aggFlowFile = session.putAttribute(aggFlowFile, "aggregation.name", name);
    }

    return session.putAllAttributes(aggFlowFile, attributes);
}
 
Example 6
Source File: TestFreeFormTextRecordSetWriterProcessor.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();

    final RecordSetWriterFactory writerFactory = context.getProperty(WRITER).asControllerService(RecordSetWriterFactory.class);
    final FlowFile flowFileRef = flowFile;
    flowFile = session.write(flowFile, out -> {
        try {
            // The "reader" RecordSchema must be passed in here as the controller service expects to inherit it from the record itself
            // See the InheritSchemaFromRecord class for more details
            final RecordSchema schema = writerFactory.getSchema(flowFileRef.getAttributes(), recordSchema);

            boolean multipleRecords = Boolean.parseBoolean(context.getProperty(MULTIPLE_RECORDS).getValue());
            RecordSet recordSet = getRecordSet(multipleRecords);

            final RecordSetWriter writer = writerFactory.createWriter(getLogger(), schema, out, flowFileRef);

            writer.write(recordSet);
            writer.flush();


        } catch (Exception e) {
            throw new ProcessException(e.getMessage());
        }

    });
    session.transfer(flowFile, SUCCESS);
}
 
Example 7
Source File: TestSplitJson.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
@Test
public void testSplit_pathToArrayWithNulls_nullStringRepresentation() throws Exception {
    final TestRunner testRunner = TestRunners.newTestRunner(new SplitJson());
    testRunner.setProperty(SplitJson.ARRAY_JSON_PATH_EXPRESSION, "$.arrayOfNulls");
    testRunner.setProperty(SplitJson.NULL_VALUE_DEFAULT_REPRESENTATION,
            AbstractJsonPathProcessor.NULL_STRING_OPTION);

    ProcessSession session = testRunner.getProcessSessionFactory().createSession();
    FlowFile ff = session.create();

    ff = session.write(ff, new OutputStreamCallback() {
        @Override
        public void process(OutputStream out) throws IOException {
            try (OutputStream outputStream = new BufferedOutputStream(out)) {
                outputStream.write("{\"stringField\": \"String Value\", \"arrayOfNulls\": [null, null, null]}".getBytes(StandardCharsets.UTF_8));
            }
        }
    });

    testRunner.enqueue(ff);
    testRunner.run();

    /* assert that three files were transferred to split and each has the word null in it */
    int expectedFiles = 3;
    testRunner.assertTransferCount(SplitJson.REL_SPLIT, expectedFiles);
    for (int i = 0; i < expectedFiles; i++) {
        testRunner.getFlowFilesForRelationship(SplitJson.REL_SPLIT).get(i).assertContentEquals("null");
    }
}
 
Example 8
Source File: ParseEvtx.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    ComponentLog logger = getLogger();
    final FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    String basename = getBasename(flowFile, logger);
    String granularity = context.getProperty(GRANULARITY).getValue();
    if (FILE.equals(granularity)) {
        // File granularity will emit a FlowFile for each input
        FlowFile original = session.clone(flowFile);
        AtomicReference<Exception> exceptionReference = new AtomicReference<>(null);
        FlowFile updated = session.write(flowFile, (in, out) -> {
            processFileGranularity(session, logger, original, basename, exceptionReference, in, out);
        });
        session.transfer(original, REL_ORIGINAL);
        resultProcessor.process(session, logger, updated, exceptionReference.get(), getName(basename, null, null, XML_EXTENSION));
    } else {
        session.read(flowFile, in -> {
            if (RECORD.equals(granularity)) {
                // Record granularity will emit a FlowFile for every record (event)
                processRecordGranularity(session, logger, flowFile, basename, in);
            } else if (CHUNK.equals(granularity)) {
                // Chunk granularity will emit a FlowFile for each chunk of the file
                processChunkGranularity(session, logger, flowFile, basename, in);
            }
        });
        session.transfer(flowFile, REL_ORIGINAL);
    }
}
 
Example 9
Source File: SequenceFileWriterImpl.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Override
public FlowFile writeSequenceFile(final FlowFile flowFile, final ProcessSession session,
        final Configuration configuration, final CompressionType compressionType, final CompressionCodec compressionCodec) {

    if (flowFile.getSize() > Integer.MAX_VALUE) {
        throw new IllegalArgumentException("Cannot write " + flowFile
                + "to Sequence File because its size is greater than the largest possible Integer");
    }
    final String sequenceFilename = flowFile.getAttribute(CoreAttributes.FILENAME.key()) + ".sf";

    // Analytics running on HDFS want data that is written with a BytesWritable. However, creating a
    // BytesWritable requires that we buffer the entire file into memory in a byte array.
    // We can create an FSFilterableOutputStream to wrap the FSDataOutputStream and use that to replace
    // the InputStreamWritable class name with the BytesWritable class name when we write the header.
    // This allows the Sequence File to say that the Values are of type BytesWritable (so they can be
    // read via the BytesWritable class) while allowing us to stream the data rather than buffering
    // entire files in memory.
    final byte[] toReplace, replaceWith;
    try {
        toReplace = InputStreamWritable.class.getCanonicalName().getBytes("UTF-8");
        replaceWith = BytesWritable.class.getCanonicalName().getBytes("UTF-8");
    } catch (final UnsupportedEncodingException e) {
        // This won't happen.
        throw new RuntimeException("UTF-8 is not a supported Character Format");
    }

    final StopWatch watch = new StopWatch(true);
    FlowFile sfFlowFile = session.write(flowFile, new StreamCallback() {

        @Override
        public void process(InputStream in, OutputStream out) throws IOException {
            // Use a FilterableOutputStream to change 'InputStreamWritable' to 'BytesWritable' - see comment
            // above for an explanation of why we want to do this.
            final ByteFilteringOutputStream bwos = new ByteFilteringOutputStream(out);

            // TODO: Adding this filter could be dangerous... A Sequence File's header contains 3 bytes: "SEQ",
            // followed by 1 byte that is the Sequence File version, followed by 2 "entries." These "entries"
            // contain the size of the Key/Value type and the Key/Value type. So, we will be writing the
            // value type as InputStreamWritable -- which we need to change to BytesWritable. This means that
            // we must also change the "size" that is written, but replacing this single byte could be
            // dangerous. However, we know exactly what will be written to the header, and we limit this at one
            // replacement, so we should be just fine.
            bwos.addFilter(toReplace, replaceWith, 1);
            bwos.addFilter((byte) InputStreamWritable.class.getCanonicalName().length(),
                    (byte) BytesWritable.class.getCanonicalName().length(), 1);

            try (final FSDataOutputStream fsDataOutputStream = new FSDataOutputStream(bwos, new Statistics(""));
                    final SequenceFile.Writer writer = SequenceFile.createWriter(configuration,
                            SequenceFile.Writer.stream(fsDataOutputStream),
                            SequenceFile.Writer.keyClass(Text.class),
                            SequenceFile.Writer.valueClass(InputStreamWritable.class),
                            SequenceFile.Writer.compression(compressionType, compressionCodec))) {

                processInputStream(in, flowFile, writer);

            } finally {
                watch.stop();
            }
        }
    });
    logger.debug("Wrote Sequence File {} ({}).",
            new Object[]{sequenceFilename, watch.calculateDataRate(flowFile.getSize())});
    return sfFlowFile;
}
 
Example 10
Source File: FetchElasticsearch5.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {

    synchronized (esClient) {
        if(esClient.get() == null) {
            super.setup(context);
        }
    }

    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }

    final String index = context.getProperty(INDEX).evaluateAttributeExpressions(flowFile).getValue();
    final String docId = context.getProperty(DOC_ID).evaluateAttributeExpressions(flowFile).getValue();
    final String docType = context.getProperty(TYPE).evaluateAttributeExpressions(flowFile).getValue();
    final Charset charset = Charset.forName(context.getProperty(CHARSET).evaluateAttributeExpressions(flowFile).getValue());

    final ComponentLog logger = getLogger();
    try {

        logger.debug("Fetching {}/{}/{} from Elasticsearch", new Object[]{index, docType, docId});
        GetRequestBuilder getRequestBuilder = esClient.get().prepareGet(index, docType, docId);
        final GetResponse getResponse = getRequestBuilder.execute().actionGet();

        if (getResponse == null || !getResponse.isExists()) {
            logger.warn("Failed to read {}/{}/{} from Elasticsearch: Document not found",
                    new Object[]{index, docType, docId});

            // We couldn't find the document, so penalize it and send it to "not found"
            flowFile = session.penalize(flowFile);
            session.transfer(flowFile, REL_NOT_FOUND);
        } else {
            flowFile = session.putAllAttributes(flowFile, new HashMap<String, String>() {{
                put("filename", docId);
                put("es.index", index);
                put("es.type", docType);
            }});
            flowFile = session.write(flowFile, new OutputStreamCallback() {
                @Override
                public void process(OutputStream out) throws IOException {
                    out.write(getResponse.getSourceAsString().getBytes(charset));
                }
            });
            logger.debug("Elasticsearch document " + docId + " fetched, routing to success");
            // The document is JSON, so update the MIME type of the flow file
            flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/json");
            session.getProvenanceReporter().fetch(flowFile, getResponse.remoteAddress().getAddress());
            session.transfer(flowFile, REL_SUCCESS);
        }
    } catch (NoNodeAvailableException
            | ElasticsearchTimeoutException
            | ReceiveTimeoutTransportException
            | NodeClosedException exceptionToRetry) {
        logger.error("Failed to read into Elasticsearch due to {}, this may indicate an error in configuration "
                        + "(hosts, username/password, etc.), or this issue may be transient. Routing to retry",
                new Object[]{exceptionToRetry.getLocalizedMessage()}, exceptionToRetry);
        session.transfer(flowFile, REL_RETRY);
        context.yield();

    } catch (Exception e) {
        logger.error("Failed to read {} from Elasticsearch due to {}", new Object[]{flowFile, e.getLocalizedMessage()}, e);
        session.transfer(flowFile, REL_FAILURE);
        context.yield();
    }
}
 
Example 11
Source File: YandexTranslate.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }

    final StopWatch stopWatch = new StopWatch(true);
    final String key = context.getProperty(KEY).getValue();
    final String sourceLanguage = context.getProperty(SOURCE_LANGUAGE).evaluateAttributeExpressions(flowFile).getValue();
    final String targetLanguage = context.getProperty(TARGET_LANGUAGE).evaluateAttributeExpressions(flowFile).getValue();
    final String encoding = context.getProperty(CHARACTER_SET).evaluateAttributeExpressions(flowFile).getValue();

    final List<String> attributeNames = new ArrayList<>();
    final List<String> textValues = new ArrayList<>();
    for (final PropertyDescriptor descriptor : context.getProperties().keySet()) {
        if (descriptor.isDynamic()) {
            attributeNames.add(descriptor.getName()); // add to list so that we know the order when the translations come back.
            textValues.add(context.getProperty(descriptor).evaluateAttributeExpressions(flowFile).getValue());
        }
    }

    if (context.getProperty(TRANSLATE_CONTENT).asBoolean()) {
        final byte[] buff = new byte[(int) flowFile.getSize()];
        session.read(flowFile, new InputStreamCallback() {
            @Override
            public void process(final InputStream in) throws IOException {
                StreamUtils.fillBuffer(in, buff);
            }
        });
        final String content = new String(buff, Charset.forName(encoding));
        textValues.add(content);
    }

    final Invocation invocation = prepareResource(key, textValues, sourceLanguage, targetLanguage);

    final Response response;
    try {
        response = invocation.invoke();
    } catch (final Exception e) {
        getLogger().error("Failed to make request to Yandex to transate text for {} due to {}; routing to comms.failure", new Object[]{flowFile, e});
        session.transfer(flowFile, REL_COMMS_FAILURE);
        return;
    }

    if (response.getStatus() != Response.Status.OK.getStatusCode()) {
        getLogger().error("Failed to translate text using Yandex for {}; response was {}: {}; routing to {}", new Object[]{
                flowFile, response.getStatus(), response.getStatusInfo().getReasonPhrase(), REL_TRANSLATION_FAILED.getName()});
        flowFile = session.putAttribute(flowFile, "yandex.translate.failure.reason", response.getStatusInfo().getReasonPhrase());
        session.transfer(flowFile, REL_TRANSLATION_FAILED);
        return;
    }

    final Map<String, String> newAttributes = new HashMap<>();
    final Translation translation = response.readEntity(Translation.class);
    final List<String> texts = translation.getText();
    for (int i = 0; i < texts.size(); i++) {
        final String text = texts.get(i);
        if (i < attributeNames.size()) {
            final String attributeName = attributeNames.get(i);
            newAttributes.put(attributeName, text);
        } else {
            flowFile = session.write(flowFile, new OutputStreamCallback() {
                @Override
                public void process(final OutputStream out) throws IOException {
                    out.write(text.getBytes(encoding));
                }
            });

            newAttributes.put("language", targetLanguage);
        }
    }

    if (!newAttributes.isEmpty()) {
        flowFile = session.putAllAttributes(flowFile, newAttributes);
    }

    stopWatch.stop();
    session.transfer(flowFile, REL_SUCCESS);
    getLogger().info("Successfully translated {} items for {} from {} to {} in {}; routing to success",
            new Object[]{texts.size(), flowFile, sourceLanguage, targetLanguage, stopWatch.getDuration()});
}
 
Example 12
Source File: PutElasticsearchRecord.java    From nifi with Apache License 2.0 4 votes vote down vote up
private FlowFile indexDocuments(BulkOperation bundle, ProcessSession session, FlowFile input) throws Exception {
    IndexOperationResponse response = clientService.bulk(bundle.getOperationList());
    if (response.hasErrors()) {
        if(logErrors || getLogger().isDebugEnabled()) {
            List<Map<String, Object>> errors = response.getItems();
            ObjectMapper mapper = new ObjectMapper();
            mapper.enable(SerializationFeature.INDENT_OUTPUT);
            String output = String.format("An error was encountered while processing bulk operations. Server response below:\n\n%s", mapper.writeValueAsString(errors));

            if (logErrors) {
                getLogger().error(output);
            } else {
                getLogger().debug(output);
            }
        }

        if (writerFactory != null) {
            FlowFile errorFF = session.create(input);
            try (OutputStream os = session.write(errorFF);
                 RecordSetWriter writer = writerFactory.createWriter(getLogger(), bundle.getSchema(), os )) {

                int added = 0;
                writer.beginRecordSet();
                for (int index = 0; index < response.getItems().size(); index++) {
                    Map<String, Object> current = response.getItems().get(index);
                    String key = current.keySet().stream().findFirst().get();
                    Map<String, Object> inner = (Map<String, Object>) current.get(key);
                    if (inner.containsKey("error")) {
                        writer.write(bundle.getOriginalRecords().get(index));
                        added++;
                    }
                }
                writer.finishRecordSet();
                writer.close();
                os.close();

                errorFF = session.putAttribute(errorFF, ATTR_RECORD_COUNT, String.valueOf(added));

                session.transfer(errorFF, REL_FAILED_RECORDS);

                return errorFF;
            } catch (Exception ex) {
                getLogger().error("", ex);
                session.remove(errorFF);
                throw ex;
            }
        }

        return null;
    } else {
        return null;
    }
}
 
Example 13
Source File: ConsumeGCPubSub.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    if (subscriber == null) {

        if (storedException.get() != null) {
            getLogger().error("Failed to create Google Cloud PubSub subscriber due to {}", new Object[]{storedException.get()});
        } else {
            getLogger().error("Google Cloud PubSub Subscriber was not properly created. Yielding the processor...");
        }

        context.yield();
        return;
    }

    final PullResponse pullResponse = subscriber.pullCallable().call(pullRequest);
    final List<String> ackIds = new ArrayList<>();

    for (ReceivedMessage message : pullResponse.getReceivedMessagesList()) {
        if (message.hasMessage()) {
            FlowFile flowFile = session.create();

            final Map<String, String> attributes = new HashMap<>();
            ackIds.add(message.getAckId());

            attributes.put(ACK_ID_ATTRIBUTE, message.getAckId());
            attributes.put(SERIALIZED_SIZE_ATTRIBUTE, String.valueOf(message.getSerializedSize()));
            attributes.put(MESSAGE_ID_ATTRIBUTE, message.getMessage().getMessageId());
            attributes.put(MSG_ATTRIBUTES_COUNT_ATTRIBUTE, String.valueOf(message.getMessage().getAttributesCount()));
            attributes.put(MSG_PUBLISH_TIME_ATTRIBUTE, String.valueOf(message.getMessage().getPublishTime().getSeconds()));
            attributes.putAll(message.getMessage().getAttributesMap());

            flowFile = session.putAllAttributes(flowFile, attributes);
            flowFile = session.write(flowFile, out -> out.write(message.getMessage().getData().toByteArray()));

            session.transfer(flowFile, REL_SUCCESS);
            session.getProvenanceReporter().receive(flowFile, getSubscriptionName(context));
        }
    }

    if (!ackIds.isEmpty()) {
        AcknowledgeRequest acknowledgeRequest = AcknowledgeRequest.newBuilder()
                .addAllAckIds(ackIds)
                .setSubscription(getSubscriptionName(context))
                .build();
        subscriber.acknowledgeCallable().call(acknowledgeRequest);
    }
}
 
Example 14
Source File: EncodeContent.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }

    final ComponentLog logger = getLogger();

    boolean encode = context.getProperty(MODE).getValue().equalsIgnoreCase(ENCODE_MODE);
    String encoding = context.getProperty(ENCODING).getValue();
    StreamCallback encoder = null;

    // Select the encoder/decoder to use
    if (encode) {
        if (encoding.equalsIgnoreCase(BASE64_ENCODING)) {
            encoder = new EncodeBase64();
        } else if (encoding.equalsIgnoreCase(BASE32_ENCODING)) {
            encoder = new EncodeBase32();
        } else if (encoding.equalsIgnoreCase(HEX_ENCODING)) {
            encoder = new EncodeHex();
        }
    } else {
        if (encoding.equalsIgnoreCase(BASE64_ENCODING)) {
            encoder = new DecodeBase64();
        } else if (encoding.equalsIgnoreCase(BASE32_ENCODING)) {
            encoder = new DecodeBase32();
        } else if (encoding.equalsIgnoreCase(HEX_ENCODING)) {
            encoder = new DecodeHex();
        }
    }

    if (encoder == null) {
        logger.warn("Unknown operation: {} {}", new Object[]{encode ? "encode" : "decode", encoding});
        return;
    }

    try {
        final StopWatch stopWatch = new StopWatch(true);
        flowFile = session.write(flowFile, encoder);

        logger.info("Successfully {} {}", new Object[]{encode ? "encoded" : "decoded", flowFile});
        session.getProvenanceReporter().modifyContent(flowFile, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
        session.transfer(flowFile, REL_SUCCESS);
    } catch (Exception e) {
        logger.error("Failed to {} {} due to {}", new Object[]{encode ? "encode" : "decode", flowFile, e});
        session.transfer(flowFile, REL_FAILURE);
    }
}
 
Example 15
Source File: DafCSVCleansing.java    From daf-kylo with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    ComponentLog logger = getLogger();

    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }

    String separatorString = context.getProperty(SEPARATOR_CHAR).evaluateAttributeExpressions(flowFile).getValue();
    String quoteString = context.getProperty(QUOTE_CHAR).evaluateAttributeExpressions(flowFile).getValue();
    String escapeString = context.getProperty(ESCAPE_CHAR).evaluateAttributeExpressions(flowFile).getValue();

    if (StringUtils.startsWith(separatorString, "\\")) {
        separatorString = StringEscapeUtils.unescapeJava(separatorString);
    }

    if (StringUtils.startsWith(quoteString, "\\")) {
        quoteString = StringEscapeUtils.unescapeJava(quoteString);
    }

    if (StringUtils.startsWith(escapeString, "\\")) {
        escapeString = StringEscapeUtils.unescapeJava(escapeString);
    }

    char separatorChar = separatorString.charAt(0);
    char quoteChar = quoteString.charAt(0);
    char escapeChar = escapeString.charAt(0);

    CSVParser parser = new CSVParserBuilder().withSeparator(separatorChar).withQuoteChar(quoteChar).withEscapeChar(escapeChar).build();
    StopWatch stopWatch = new StopWatch(true);

    try {
        flowFile = session.write(flowFile, (in, out) -> {
            CSVReader reader = new CSVReaderBuilder(new BufferedReader(new InputStreamReader(in))).withCSVParser(parser).build();
            CSVWriter writer = new CSVWriter(new BufferedWriter(new OutputStreamWriter(out)), separatorChar, quoteChar, escapeChar, CSVWriter.DEFAULT_LINE_END);

            reader.forEach(row -> {
                String[] cleanLine = new String[row.length];

                for (int i = 0; i < row.length; i++) {
                    cleanLine[i] = StringUtils.replaceAll(row[i], "\r\n|\n|\r", " ");
                }
                writer.writeNext(cleanLine);
            });

            writer.close();
        });
    } catch (Exception ex) {
        logger.error("Error CSV processing", ex);
        logger.info("Transferred {} to 'failure'", new Object[]{flowFile});
        session.getProvenanceReporter().modifyContent(flowFile, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
        session.transfer(flowFile, REL_FAILURE);
        return;
    }

    logger.info("Transferred {} to 'success'", new Object[]{flowFile});
    session.getProvenanceReporter().modifyContent(flowFile, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
    session.transfer(flowFile, REL_SUCCESS);
}
 
Example 16
Source File: MergeContent.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Override
public FlowFile merge(final Bin bin, final ProcessContext context) {
    final List<FlowFile> contents = bin.getContents();

    final ProcessSession session = bin.getSession();
    FlowFile bundle = session.create(bin.getContents());
    final AtomicReference<String> bundleMimeTypeRef = new AtomicReference<>(null);
    bundle = session.write(bundle, new OutputStreamCallback() {
        @Override
        public void process(final OutputStream out) throws IOException {
            final byte[] header = getDelimiterContent(context, contents, HEADER);
            if (header != null) {
                out.write(header);
            }

            boolean isFirst = true;
            final Iterator<FlowFile> itr = contents.iterator();
            while (itr.hasNext()) {
                final FlowFile flowFile = itr.next();
                bin.getSession().read(flowFile, false, new InputStreamCallback() {
                    @Override
                    public void process(final InputStream in) throws IOException {
                        StreamUtils.copy(in, out);
                    }
                });

                if (itr.hasNext()) {
                    final byte[] demarcator = getDelimiterContent(context, contents, DEMARCATOR);
                    if (demarcator != null) {
                        out.write(demarcator);
                    }
                }

                final String flowFileMimeType = flowFile.getAttribute(CoreAttributes.MIME_TYPE.key());
                if (isFirst) {
                    bundleMimeTypeRef.set(flowFileMimeType);
                    isFirst = false;
                } else {
                    if (bundleMimeTypeRef.get() != null && !bundleMimeTypeRef.get().equals(flowFileMimeType)) {
                        bundleMimeTypeRef.set(null);
                    }
                }
            }

            final byte[] footer = getDelimiterContent(context, contents, FOOTER);
            if (footer != null) {
                out.write(footer);
            }
        }
    });

    session.getProvenanceReporter().join(contents, bundle);
    bundle = session.putAttribute(bundle, CoreAttributes.FILENAME.key(), createFilename(contents));
    if (bundleMimeTypeRef.get() != null) {
        this.mimeType = bundleMimeTypeRef.get();
    }

    return bundle;
}
 
Example 17
Source File: JsonQueryElasticsearch.java    From nifi with Apache License 2.0 4 votes vote down vote up
private FlowFile writeHitFlowFile(String json, ProcessSession session, FlowFile hitFlowFile, Map<String, String> attributes) {
    hitFlowFile = session.write(hitFlowFile, out -> out.write(json.getBytes()));

    return session.putAllAttributes(hitFlowFile, attributes);
}
 
Example 18
Source File: InferAvroSchema.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final FlowFile original = session.get();
    if (original == null) {
        return;
    }

    try {

        final AtomicReference<String> avroSchema = new AtomicReference<>();
        switch (context.getProperty(INPUT_CONTENT_TYPE).getValue()) {
            case USE_MIME_TYPE:
                avroSchema.set(inferAvroSchemaFromMimeType(original, context, session));
                break;
            case JSON_CONTENT:
                avroSchema.set(inferAvroSchemaFromJSON(original, context, session));
                break;
            case CSV_CONTENT:
                avroSchema.set(inferAvroSchemaFromCSV(original, context, session));
                break;
            default:
                //Shouldn't be possible but just in case
                session.transfer(original, REL_UNSUPPORTED_CONTENT);
                break;
        }


        if (StringUtils.isNotEmpty(avroSchema.get())) {

            String destination = context.getProperty(SCHEMA_DESTINATION).getValue();
            FlowFile avroSchemaFF = null;

            switch (destination) {
                case DESTINATION_ATTRIBUTE:
                    avroSchemaFF = session.putAttribute(session.clone(original), AVRO_SCHEMA_ATTRIBUTE_NAME, avroSchema.get());
                    //Leaves the original CoreAttributes.MIME_TYPE in place.
                    break;
                case DESTINATION_CONTENT:
                    avroSchemaFF = session.write(session.create(), new OutputStreamCallback() {
                        @Override
                        public void process(OutputStream out) throws IOException {
                            out.write(avroSchema.get().getBytes());
                        }
                    });
                    avroSchemaFF = session.putAttribute(avroSchemaFF, CoreAttributes.MIME_TYPE.key(), AVRO_MIME_TYPE);
                    break;
                default:
                    break;
            }

            //Transfer the sessions.
            avroSchemaFF = session.putAttribute(avroSchemaFF, CoreAttributes.FILENAME.key(), (original.getAttribute(CoreAttributes.FILENAME.key()) + AVRO_FILE_EXTENSION));
            session.transfer(avroSchemaFF, REL_SUCCESS);
            session.transfer(original, REL_ORIGINAL);
        } else {
            //If the avroSchema is null then the content type is unknown and therefore unsupported
            session.transfer(original, REL_UNSUPPORTED_CONTENT);
        }

    } catch (Exception ex) {
        getLogger().error("Failed to infer Avro schema for {} due to {}", new Object[] {original, ex});
        session.transfer(original, REL_FAILURE);
    }
}
 
Example 19
Source File: SequenceFileWriterImpl.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public FlowFile writeSequenceFile(final FlowFile flowFile, final ProcessSession session,
        final Configuration configuration, final CompressionType compressionType, final CompressionCodec compressionCodec) {

    if (flowFile.getSize() > Integer.MAX_VALUE) {
        throw new IllegalArgumentException("Cannot write " + flowFile
                + "to Sequence File because its size is greater than the largest possible Integer");
    }
    final String sequenceFilename = flowFile.getAttribute(CoreAttributes.FILENAME.key()) + ".sf";

    // Analytics running on HDFS want data that is written with a BytesWritable. However, creating a
    // BytesWritable requires that we buffer the entire file into memory in a byte array.
    // We can create an FSFilterableOutputStream to wrap the FSDataOutputStream and use that to replace
    // the InputStreamWritable class name with the BytesWritable class name when we write the header.
    // This allows the Sequence File to say that the Values are of type BytesWritable (so they can be
    // read via the BytesWritable class) while allowing us to stream the data rather than buffering
    // entire files in memory.
    final byte[] toReplace, replaceWith;
    try {
        toReplace = InputStreamWritable.class.getCanonicalName().getBytes("UTF-8");
        replaceWith = BytesWritable.class.getCanonicalName().getBytes("UTF-8");
    } catch (final UnsupportedEncodingException e) {
        // This won't happen.
        throw new RuntimeException("UTF-8 is not a supported Character Format");
    }

    final StopWatch watch = new StopWatch(true);
    FlowFile sfFlowFile = session.write(flowFile, new StreamCallback() {

        @Override
        public void process(InputStream in, OutputStream out) throws IOException {
            // Use a FilterableOutputStream to change 'InputStreamWritable' to 'BytesWritable' - see comment
            // above for an explanation of why we want to do this.
            final ByteFilteringOutputStream bwos = new ByteFilteringOutputStream(out);

            // TODO: Adding this filter could be dangerous... A Sequence File's header contains 3 bytes: "SEQ",
            // followed by 1 byte that is the Sequence File version, followed by 2 "entries." These "entries"
            // contain the size of the Key/Value type and the Key/Value type. So, we will be writing the
            // value type as InputStreamWritable -- which we need to change to BytesWritable. This means that
            // we must also change the "size" that is written, but replacing this single byte could be
            // dangerous. However, we know exactly what will be written to the header, and we limit this at one
            // replacement, so we should be just fine.
            bwos.addFilter(toReplace, replaceWith, 1);
            bwos.addFilter((byte) InputStreamWritable.class.getCanonicalName().length(),
                    (byte) BytesWritable.class.getCanonicalName().length(), 1);

            try (final FSDataOutputStream fsDataOutputStream = new FSDataOutputStream(bwos, new Statistics(""));
                    final SequenceFile.Writer writer = SequenceFile.createWriter(configuration,
                            SequenceFile.Writer.stream(fsDataOutputStream),
                            SequenceFile.Writer.keyClass(Text.class),
                            SequenceFile.Writer.valueClass(InputStreamWritable.class),
                            SequenceFile.Writer.compression(compressionType, compressionCodec))) {

                processInputStream(in, flowFile, writer);

            } finally {
                watch.stop();
            }
        }
    });
    logger.debug("Wrote Sequence File {} ({}).",
            new Object[]{sequenceFilename, watch.calculateDataRate(flowFile.getSize())});
    return sfFlowFile;
}
 
Example 20
Source File: QueryElasticsearchHttp.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
private int getPage(final Response getResponse, final URL url, final ProcessContext context,
        final ProcessSession session, FlowFile flowFile, final ComponentLog logger,
        final long startNanos, boolean targetIsContent)
        throws IOException {
    List<FlowFile> page = new ArrayList<>();
    final int statusCode = getResponse.code();

    if (isSuccess(statusCode)) {
        ResponseBody body = getResponse.body();
        final byte[] bodyBytes = body.bytes();
        JsonNode responseJson = parseJsonResponse(new ByteArrayInputStream(bodyBytes));
        JsonNode hits = responseJson.get("hits").get("hits");

        for(int i = 0; i < hits.size(); i++) {
            JsonNode hit = hits.get(i);
            String retrievedId = hit.get("_id").asText();
            String retrievedIndex = hit.get("_index").asText();
            String retrievedType = hit.get("_type").asText();

            FlowFile documentFlowFile = null;
            if (flowFile != null) {
                documentFlowFile = targetIsContent ? session.create(flowFile) : session.clone(flowFile);
            } else {
                documentFlowFile = session.create();
            }

            JsonNode source = hit.get("_source");
            documentFlowFile = session.putAttribute(documentFlowFile, "es.id", retrievedId);
            documentFlowFile = session.putAttribute(documentFlowFile, "es.index", retrievedIndex);
            documentFlowFile = session.putAttribute(documentFlowFile, "es.type", retrievedType);

            if (targetIsContent) {
                documentFlowFile = session.putAttribute(documentFlowFile, "filename", retrievedId);
                documentFlowFile = session.putAttribute(documentFlowFile, "mime.type", "application/json");
                documentFlowFile = session.write(documentFlowFile, out -> {
                    out.write(source.toString().getBytes());
                });
            } else {
                Map<String, String> attributes = new HashMap<>();
                for(Iterator<Entry<String, JsonNode>> it = source.getFields(); it.hasNext(); ) {
                    Entry<String, JsonNode> entry = it.next();
                    attributes.put(ATTRIBUTE_PREFIX + entry.getKey(), entry.getValue().asText());
                }
                documentFlowFile = session.putAllAttributes(documentFlowFile, attributes);
            }
            page.add(documentFlowFile);
        }
        logger.debug("Elasticsearch retrieved " + responseJson.size() + " documents, routing to success");

        session.transfer(page, REL_SUCCESS);
    } else {
        try {
            // 5xx -> RETRY, but a server error might last a while, so yield
            if (statusCode / 100 == 5) {
                throw new RetryableException(String.format("Elasticsearch returned code %s with message %s, transferring flow file to retry. This is likely a server problem, yielding...",
                        statusCode, getResponse.message()));
            } else if (context.hasIncomingConnection()) {  // 1xx, 3xx, 4xx -> NO RETRY
                throw new UnretryableException(String.format("Elasticsearch returned code %s with message %s, transferring flow file to failure",
                        statusCode, getResponse.message()));
            } else {
                logger.warn("Elasticsearch returned code {} with message {}", new Object[]{statusCode, getResponse.message()});
            }
        } finally {
            if (!page.isEmpty()) {
                session.remove(page);
                page.clear();
            }
        }
    }

    // emit provenance event
    final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
    if (!page.isEmpty()) {
        if (context.hasNonLoopConnection()) {
            page.forEach(f -> session.getProvenanceReporter().fetch(f, url.toExternalForm(), millis));
        } else {
            page.forEach(f -> session.getProvenanceReporter().receive(f, url.toExternalForm(), millis));
        }
    }
    return page.size();
}