Java Code Examples for org.apache.nifi.processor.ProcessSession#clone()

The following examples show how to use org.apache.nifi.processor.ProcessSession#clone() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DuplicateFlowFile.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }

    for (int i = 1; i <= context.getProperty(NUM_COPIES).evaluateAttributeExpressions(flowFile).asInteger(); i++) {
        FlowFile copy = session.clone(flowFile);
        copy = session.putAttribute(copy, COPY_INDEX_ATTRIBUTE, Integer.toString(i));
        session.transfer(copy, REL_SUCCESS);
    }

    flowFile = session.putAttribute(flowFile, COPY_INDEX_ATTRIBUTE, "0");
    session.transfer(flowFile, REL_SUCCESS);
}
 
Example 2
Source File: DuplicateFlowFile.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }

    for (int i=0; i < context.getProperty(NUM_COPIES).asInteger(); i++) {
        final FlowFile copy = session.clone(flowFile);
        session.transfer(copy, REL_SUCCESS);
    }

    session.transfer(flowFile, REL_SUCCESS);
}
 
Example 3
Source File: UpdateAttribute.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
private boolean evaluateCriteria(final ProcessSession session, final ProcessContext context, final Criteria criteria, final FlowFile flowfile, final Map<FlowFile,
        List<Rule>> matchedRules, final Map<String, String> statefulAttributes) {
        final ComponentLog logger = getLogger();
    final List<Rule> rules = criteria.getRules();

    // consider each rule and hold a copy of the flowfile for each matched rule
    for (final Rule rule : rules) {
        // evaluate the rule
        if (evaluateRule(context, rule, flowfile, statefulAttributes)) {
            final FlowFile flowfileToUse;

            // determine if we should use the original flow file or clone
            if (FlowFilePolicy.USE_ORIGINAL.equals(criteria.getFlowFilePolicy()) || matchedRules.isEmpty()) {
                flowfileToUse = flowfile;
            } else {
                // clone the original for this rule
                flowfileToUse = session.clone(flowfile);
            }

            // store the flow file to use when executing this rule
            List<Rule> rulesForFlowFile = matchedRules.get(flowfileToUse);
            if (rulesForFlowFile == null) {
                rulesForFlowFile = new ArrayList<>();
                matchedRules.put(flowfileToUse, rulesForFlowFile);
            }
            rulesForFlowFile.add(rule);

            // log if appropriate
            if (debugEnabled) {
                logger.debug(this + " all conditions met for rule '" + rule.getName() + "'. Using flow file - " + flowfileToUse);
            }
        }
    }

    return !matchedRules.isEmpty();
}
 
Example 4
Source File: ParseEvtx.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    ComponentLog logger = getLogger();
    final FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    String basename = getBasename(flowFile, logger);
    String granularity = context.getProperty(GRANULARITY).getValue();
    if (FILE.equals(granularity)) {
        // File granularity will emit a FlowFile for each input
        FlowFile original = session.clone(flowFile);
        AtomicReference<Exception> exceptionReference = new AtomicReference<>(null);
        FlowFile updated = session.write(flowFile, (in, out) -> {
            processFileGranularity(session, logger, original, basename, exceptionReference, in, out);
        });
        session.transfer(original, REL_ORIGINAL);
        resultProcessor.process(session, logger, updated, exceptionReference.get(), getName(basename, null, null, XML_EXTENSION));
    } else {
        session.read(flowFile, in -> {
            if (RECORD.equals(granularity)) {
                // Record granularity will emit a FlowFile for every record (event)
                processRecordGranularity(session, logger, flowFile, basename, in);
            } else if (CHUNK.equals(granularity)) {
                // Chunk granularity will emit a FlowFile for each chunk of the file
                processChunkGranularity(session, logger, flowFile, basename, in);
            }
        });
        session.transfer(flowFile, REL_ORIGINAL);
    }
}
 
Example 5
Source File: ConversionProcessor.java    From scalable-ocr with Apache License 2.0 5 votes vote down vote up
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
  final ProcessorLog log = this.getLogger();
  final AtomicReference<List<Map.Entry<File, Boolean>>> value = new AtomicReference<>();
  final File tempDir = new File(context.getProperty(TEMP_DIR).getValue());
  System.getProperties().setProperty("jna.library.path", context.getProperty(JNI_PATH).getValue());
  FlowFile flowfile = session.get();
  session.read(flowfile, in -> {
    try {
      value.set(convert(in, tempDir));
    }
    catch(Exception e) {
      log.error("Unable to convert: " + e.getMessage(), e);
    }
  });
  if(value.get() != null) {
    for(Map.Entry<File, Boolean> kv : value.get()) {
      final File convertedFile = kv.getKey();
      try {
        final int pageNumber = getPageNumber(convertedFile.getName());
        if(kv.getValue()) {
          FlowFile ff = session.clone(flowfile);
          ff = session.putAttribute(ff, "pageNumber", "" + pageNumber);
          ff = session.write(ff, out -> IOUtils.copy(new BufferedInputStream(new FileInputStream(convertedFile)), out));
          session.transfer(ff, SUCCESS);
        }
      }
      finally {
        if(convertedFile != null && convertedFile.exists()) {
          convertedFile.delete();
        }
      }
    }
  }
  session.transfer(flowfile, RAW);
}
 
Example 6
Source File: UpdateAttribute.java    From nifi with Apache License 2.0 5 votes vote down vote up
private boolean evaluateCriteria(final ProcessSession session, final ProcessContext context, final Criteria criteria, final FlowFile flowfile, final Map<FlowFile,
        List<Rule>> matchedRules, final Map<String, String> statefulAttributes) {
        final ComponentLog logger = getLogger();
    final List<Rule> rules = criteria.getRules();

    // consider each rule and hold a copy of the flowfile for each matched rule
    for (final Rule rule : rules) {
        // evaluate the rule
        if (evaluateRule(context, rule, flowfile, statefulAttributes)) {
            final FlowFile flowfileToUse;

            // determine if we should use the original flow file or clone
            if (FlowFilePolicy.USE_ORIGINAL.equals(criteria.getFlowFilePolicy()) || matchedRules.isEmpty()) {
                flowfileToUse = flowfile;
            } else {
                // clone the original for this rule
                flowfileToUse = session.clone(flowfile);
            }

            // store the flow file to use when executing this rule
            List<Rule> rulesForFlowFile = matchedRules.get(flowfileToUse);
            if (rulesForFlowFile == null) {
                rulesForFlowFile = new ArrayList<>();
                matchedRules.put(flowfileToUse, rulesForFlowFile);
            }
            rulesForFlowFile.add(rule);

            // log if appropriate
            if (debugEnabled) {
                logger.debug(this + " all conditions met for rule '" + rule.getName() + "'. Using flow file - " + flowfileToUse);
            }
        }
    }

    return !matchedRules.isEmpty();
}
 
Example 7
Source File: ParseEvtx.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    ComponentLog logger = getLogger();
    final FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    String basename = getBasename(flowFile, logger);
    String granularity = context.getProperty(GRANULARITY).getValue();
    if (FILE.equals(granularity)) {
        // File granularity will emit a FlowFile for each input
        FlowFile original = session.clone(flowFile);
        AtomicReference<Exception> exceptionReference = new AtomicReference<>(null);
        FlowFile updated = session.write(flowFile, (in, out) -> {
            processFileGranularity(session, logger, original, basename, exceptionReference, in, out);
        });
        session.transfer(original, REL_ORIGINAL);
        resultProcessor.process(session, logger, updated, exceptionReference.get(), getName(basename, null, null, XML_EXTENSION));
    } else {
        session.read(flowFile, in -> {
            if (RECORD.equals(granularity)) {
                // Record granularity will emit a FlowFile for every record (event)
                processRecordGranularity(session, logger, flowFile, basename, in);
            } else if (CHUNK.equals(granularity)) {
                // Chunk granularity will emit a FlowFile for each chunk of the file
                processChunkGranularity(session, logger, flowFile, basename, in);
            }
        });
        session.transfer(flowFile, REL_ORIGINAL);
    }
}
 
Example 8
Source File: RouteHL7.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }

    final Charset charset = Charset.forName(context.getProperty(CHARACTER_SET).evaluateAttributeExpressions(flowFile).getValue());

    final byte[] buffer = new byte[(int) flowFile.getSize()];
    session.read(flowFile, new InputStreamCallback() {
        @Override
        public void process(final InputStream in) throws IOException {
            StreamUtils.fillBuffer(in, buffer);
        }
    });

    @SuppressWarnings("resource")
    final HapiContext hapiContext = new DefaultHapiContext();
    hapiContext.setValidationContext((ca.uhn.hl7v2.validation.ValidationContext) ValidationContextFactory.noValidation());

    final PipeParser parser = hapiContext.getPipeParser();
    final String hl7Text = new String(buffer, charset);
    final HL7Message message;
    try {
        final Message hapiMessage = parser.parse(hl7Text);
        message = new HapiMessage(hapiMessage);
    } catch (final Exception e) {
        getLogger().error("Failed to parse {} as HL7 due to {}; routing to failure", new Object[]{flowFile, e});
        session.transfer(flowFile, REL_FAILURE);
        return;
    }

    final Set<String> matchingRels = new HashSet<>();
    final Map<Relationship, HL7Query> queryMap = queries;
    for (final Map.Entry<Relationship, HL7Query> entry : queryMap.entrySet()) {
        final Relationship relationship = entry.getKey();
        final HL7Query query = entry.getValue();

        final QueryResult result = query.evaluate(message);
        if (result.isMatch()) {
            FlowFile clone = session.clone(flowFile);
            clone = session.putAttribute(clone, "RouteHL7.Route", relationship.getName());
            session.transfer(clone, relationship);
            session.getProvenanceReporter().route(clone, relationship);
            matchingRels.add(relationship.getName());
        }
    }

    session.transfer(flowFile, REL_ORIGINAL);
    getLogger().info("Routed a copy of {} to {} relationships: {}", new Object[]{flowFile, matchingRels.size(), matchingRels});
}
 
Example 9
Source File: SegmentContent.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }

    final String segmentId = UUID.randomUUID().toString();
    final long segmentSize = context.getProperty(SIZE).asDataSize(DataUnit.B).longValue();

    final String originalFileName = flowFile.getAttribute(CoreAttributes.FILENAME.key());

    if (flowFile.getSize() <= segmentSize) {
        flowFile = session.putAttribute(flowFile, SEGMENT_ID, segmentId);
        flowFile = session.putAttribute(flowFile, SEGMENT_INDEX, "1");
        flowFile = session.putAttribute(flowFile, SEGMENT_COUNT, "1");
        flowFile = session.putAttribute(flowFile, SEGMENT_ORIGINAL_FILENAME, originalFileName);

        flowFile = session.putAttribute(flowFile, FRAGMENT_ID, segmentId);
        flowFile = session.putAttribute(flowFile, FRAGMENT_INDEX, "1");
        flowFile = session.putAttribute(flowFile, FRAGMENT_COUNT, "1");

        FlowFile clone = session.clone(flowFile);
        session.transfer(flowFile, REL_ORIGINAL);
        session.transfer(clone, REL_SEGMENTS);
        return;
    }

    int totalSegments = (int) (flowFile.getSize() / segmentSize);
    if (totalSegments * segmentSize < flowFile.getSize()) {
        totalSegments++;
    }

    final Map<String, String> segmentAttributes = new HashMap<>();
    segmentAttributes.put(SEGMENT_ID, segmentId);
    segmentAttributes.put(SEGMENT_COUNT, String.valueOf(totalSegments));
    segmentAttributes.put(SEGMENT_ORIGINAL_FILENAME, originalFileName);

    segmentAttributes.put(FRAGMENT_ID, segmentId);
    segmentAttributes.put(FRAGMENT_COUNT, String.valueOf(totalSegments));

    final Set<FlowFile> segmentSet = new HashSet<>();
    for (int i = 1; i <= totalSegments; i++) {
        final long segmentOffset = segmentSize * (i - 1);
        FlowFile segment = session.clone(flowFile, segmentOffset, Math.min(segmentSize, flowFile.getSize() - segmentOffset));
        segmentAttributes.put(SEGMENT_INDEX, String.valueOf(i));
        segmentAttributes.put(FRAGMENT_INDEX, String.valueOf(i));
        segment = session.putAllAttributes(segment, segmentAttributes);
        segmentSet.add(segment);
    }

    session.transfer(segmentSet, REL_SEGMENTS);
    flowFile = FragmentAttributes.copyAttributesToOriginal(session, flowFile, segmentId, totalSegments);
    session.transfer(flowFile, REL_ORIGINAL);

    if (totalSegments <= 10) {
        getLogger().info("Segmented {} into {} segments: {}", new Object[]{flowFile, totalSegments, segmentSet});
    } else {
        getLogger().info("Segmented {} into {} segments", new Object[]{flowFile, totalSegments});
    }
}
 
Example 10
Source File: MergeContent.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Override
protected boolean processBin(final Bin bin, final ProcessContext context) throws ProcessException {

    final String mergeFormat = context.getProperty(MERGE_FORMAT).getValue();
    MergeBin merger;
    switch (mergeFormat) {
        case MERGE_FORMAT_TAR_VALUE:
            merger = new TarMerge();
            break;
        case MERGE_FORMAT_ZIP_VALUE:
            merger = new ZipMerge(context.getProperty(COMPRESSION_LEVEL).asInteger());
            break;
        case MERGE_FORMAT_FLOWFILE_STREAM_V3_VALUE:
            merger = new FlowFileStreamMerger(new FlowFilePackagerV3(), "application/flowfile-v3");
            break;
        case MERGE_FORMAT_FLOWFILE_STREAM_V2_VALUE:
            merger = new FlowFileStreamMerger(new FlowFilePackagerV2(), "application/flowfile-v2");
            break;
        case MERGE_FORMAT_FLOWFILE_TAR_V1_VALUE:
            merger = new FlowFileStreamMerger(new FlowFilePackagerV1(), "application/flowfile-v1");
            break;
        case MERGE_FORMAT_CONCAT_VALUE:
            merger = new BinaryConcatenationMerge();
            break;
        case MERGE_FORMAT_AVRO_VALUE:
            merger = new AvroMerge();
            break;
        default:
            throw new AssertionError();
    }

    final AttributeStrategy attributeStrategy;
    switch (context.getProperty(ATTRIBUTE_STRATEGY).getValue()) {
        case ATTRIBUTE_STRATEGY_ALL_UNIQUE:
            attributeStrategy = new KeepUniqueAttributeStrategy();
            break;
        case ATTRIBUTE_STRATEGY_ALL_COMMON:
        default:
            attributeStrategy = new KeepCommonAttributeStrategy();
            break;
    }

    final List<FlowFile> contents = bin.getContents();
    final ProcessSession binSession = bin.getSession();

    if (MERGE_STRATEGY_DEFRAGMENT.equals(context.getProperty(MERGE_STRATEGY).getValue())) {
        final String error = getDefragmentValidationError(bin.getContents());

        // Fail the flow files and commit them
        if (error != null) {
            final String binDescription = contents.size() <= 10 ? contents.toString() : contents.size() + " FlowFiles";
            getLogger().error(error + "; routing {} to failure", new Object[]{binDescription});
            binSession.transfer(contents, REL_FAILURE);
            binSession.commit();

            return true;
        }

        Collections.sort(contents, new FragmentComparator());
    }

    FlowFile bundle = merger.merge(bin, context);

    // keep the filename, as it is added to the bundle.
    final String filename = bundle.getAttribute(CoreAttributes.FILENAME.key());

    // merge all of the attributes
    final Map<String, String> bundleAttributes = attributeStrategy.getMergedAttributes(contents);
    bundleAttributes.put(CoreAttributes.MIME_TYPE.key(), merger.getMergedContentType());
    // restore the filename of the bundle
    bundleAttributes.put(CoreAttributes.FILENAME.key(), filename);
    bundleAttributes.put(MERGE_COUNT_ATTRIBUTE, Integer.toString(contents.size()));
    bundleAttributes.put(MERGE_BIN_AGE_ATTRIBUTE, Long.toString(bin.getBinAge()));

    bundle = binSession.putAllAttributes(bundle, bundleAttributes);

    final String inputDescription = contents.size() < 10 ? contents.toString() : contents.size() + " FlowFiles";
    getLogger().info("Merged {} into {}", new Object[]{inputDescription, bundle});
    binSession.transfer(bundle, REL_MERGED);

    for (final FlowFile unmerged : merger.getUnmergedFlowFiles()) {
        final FlowFile unmergedCopy = binSession.clone(unmerged);
        binSession.transfer(unmergedCopy, REL_FAILURE);
    }

    // We haven't committed anything, parent will take care of it
    return false;
}
 
Example 11
Source File: QueryElasticsearchHttp.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
private int getPage(final Response getResponse, final URL url, final ProcessContext context,
        final ProcessSession session, FlowFile flowFile, final ComponentLog logger,
        final long startNanos, boolean targetIsContent)
        throws IOException {
    List<FlowFile> page = new ArrayList<>();
    final int statusCode = getResponse.code();

    if (isSuccess(statusCode)) {
        ResponseBody body = getResponse.body();
        final byte[] bodyBytes = body.bytes();
        JsonNode responseJson = parseJsonResponse(new ByteArrayInputStream(bodyBytes));
        JsonNode hits = responseJson.get("hits").get("hits");

        for(int i = 0; i < hits.size(); i++) {
            JsonNode hit = hits.get(i);
            String retrievedId = hit.get("_id").asText();
            String retrievedIndex = hit.get("_index").asText();
            String retrievedType = hit.get("_type").asText();

            FlowFile documentFlowFile = null;
            if (flowFile != null) {
                documentFlowFile = targetIsContent ? session.create(flowFile) : session.clone(flowFile);
            } else {
                documentFlowFile = session.create();
            }

            JsonNode source = hit.get("_source");
            documentFlowFile = session.putAttribute(documentFlowFile, "es.id", retrievedId);
            documentFlowFile = session.putAttribute(documentFlowFile, "es.index", retrievedIndex);
            documentFlowFile = session.putAttribute(documentFlowFile, "es.type", retrievedType);

            if (targetIsContent) {
                documentFlowFile = session.putAttribute(documentFlowFile, "filename", retrievedId);
                documentFlowFile = session.putAttribute(documentFlowFile, "mime.type", "application/json");
                documentFlowFile = session.write(documentFlowFile, out -> {
                    out.write(source.toString().getBytes());
                });
            } else {
                Map<String, String> attributes = new HashMap<>();
                for(Iterator<Entry<String, JsonNode>> it = source.getFields(); it.hasNext(); ) {
                    Entry<String, JsonNode> entry = it.next();
                    attributes.put(ATTRIBUTE_PREFIX + entry.getKey(), entry.getValue().asText());
                }
                documentFlowFile = session.putAllAttributes(documentFlowFile, attributes);
            }
            page.add(documentFlowFile);
        }
        logger.debug("Elasticsearch retrieved " + responseJson.size() + " documents, routing to success");

        session.transfer(page, REL_SUCCESS);
    } else {
        try {
            // 5xx -> RETRY, but a server error might last a while, so yield
            if (statusCode / 100 == 5) {
                throw new RetryableException(String.format("Elasticsearch returned code %s with message %s, transferring flow file to retry. This is likely a server problem, yielding...",
                        statusCode, getResponse.message()));
            } else if (context.hasIncomingConnection()) {  // 1xx, 3xx, 4xx -> NO RETRY
                throw new UnretryableException(String.format("Elasticsearch returned code %s with message %s, transferring flow file to failure",
                        statusCode, getResponse.message()));
            } else {
                logger.warn("Elasticsearch returned code {} with message {}", new Object[]{statusCode, getResponse.message()});
            }
        } finally {
            if (!page.isEmpty()) {
                session.remove(page);
                page.clear();
            }
        }
    }

    // emit provenance event
    final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
    if (!page.isEmpty()) {
        if (context.hasNonLoopConnection()) {
            page.forEach(f -> session.getProvenanceReporter().fetch(f, url.toExternalForm(), millis));
        } else {
            page.forEach(f -> session.getProvenanceReporter().receive(f, url.toExternalForm(), millis));
        }
    }
    return page.size();
}
 
Example 12
Source File: RouteHL7.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }

    final Charset charset = Charset.forName(context.getProperty(CHARACTER_SET).evaluateAttributeExpressions(flowFile).getValue());

    final byte[] buffer = new byte[(int) flowFile.getSize()];
    session.read(flowFile, new InputStreamCallback() {
        @Override
        public void process(final InputStream in) throws IOException {
            StreamUtils.fillBuffer(in, buffer);
        }
    });

    @SuppressWarnings("resource")
    final HapiContext hapiContext = new DefaultHapiContext();
    hapiContext.setValidationContext((ca.uhn.hl7v2.validation.ValidationContext) ValidationContextFactory.noValidation());

    final PipeParser parser = hapiContext.getPipeParser();
    final String hl7Text = new String(buffer, charset);
    final HL7Message message;
    try {
        final Message hapiMessage = parser.parse(hl7Text);
        message = new HapiMessage(hapiMessage);
    } catch (final Exception e) {
        getLogger().error("Failed to parse {} as HL7 due to {}; routing to failure", new Object[]{flowFile, e});
        session.transfer(flowFile, REL_FAILURE);
        return;
    }

    final Set<String> matchingRels = new HashSet<>();
    final Map<Relationship, HL7Query> queryMap = queries;
    for (final Map.Entry<Relationship, HL7Query> entry : queryMap.entrySet()) {
        final Relationship relationship = entry.getKey();
        final HL7Query query = entry.getValue();

        final QueryResult result = query.evaluate(message);
        if (result.isMatch()) {
            FlowFile clone = session.clone(flowFile);
            clone = session.putAttribute(clone, "RouteHL7.Route", relationship.getName());
            session.transfer(clone, relationship);
            session.getProvenanceReporter().route(clone, relationship);
            matchingRels.add(relationship.getName());
        }
    }

    session.transfer(flowFile, REL_ORIGINAL);
    getLogger().info("Routed a copy of {} to {} relationships: {}", new Object[]{flowFile, matchingRels.size(), matchingRels});
}
 
Example 13
Source File: SegmentContent.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }

    final String segmentId = UUID.randomUUID().toString();
    final long segmentSize = context.getProperty(SIZE).asDataSize(DataUnit.B).longValue();

    final String originalFileName = flowFile.getAttribute(CoreAttributes.FILENAME.key());

    if (flowFile.getSize() <= segmentSize) {
        flowFile = session.putAttribute(flowFile, SEGMENT_ID, segmentId);
        flowFile = session.putAttribute(flowFile, SEGMENT_INDEX, "1");
        flowFile = session.putAttribute(flowFile, SEGMENT_COUNT, "1");
        flowFile = session.putAttribute(flowFile, SEGMENT_ORIGINAL_FILENAME, originalFileName);

        flowFile = session.putAttribute(flowFile, FRAGMENT_ID, segmentId);
        flowFile = session.putAttribute(flowFile, FRAGMENT_INDEX, "1");
        flowFile = session.putAttribute(flowFile, FRAGMENT_COUNT, "1");

        FlowFile clone = session.clone(flowFile);
        session.transfer(flowFile, REL_ORIGINAL);
        session.transfer(clone, REL_SEGMENTS);
        return;
    }

    int totalSegments = (int) (flowFile.getSize() / segmentSize);
    if (totalSegments * segmentSize < flowFile.getSize()) {
        totalSegments++;
    }

    final Map<String, String> segmentAttributes = new HashMap<>();
    segmentAttributes.put(SEGMENT_ID, segmentId);
    segmentAttributes.put(SEGMENT_COUNT, String.valueOf(totalSegments));
    segmentAttributes.put(SEGMENT_ORIGINAL_FILENAME, originalFileName);

    segmentAttributes.put(FRAGMENT_ID, segmentId);
    segmentAttributes.put(FRAGMENT_COUNT, String.valueOf(totalSegments));

    final Set<FlowFile> segmentSet = new HashSet<>();
    for (int i = 1; i <= totalSegments; i++) {
        final long segmentOffset = segmentSize * (i - 1);
        FlowFile segment = session.clone(flowFile, segmentOffset, Math.min(segmentSize, flowFile.getSize() - segmentOffset));
        segmentAttributes.put(SEGMENT_INDEX, String.valueOf(i));
        segmentAttributes.put(FRAGMENT_INDEX, String.valueOf(i));
        segment = session.putAllAttributes(segment, segmentAttributes);
        segmentSet.add(segment);
    }

    session.transfer(segmentSet, REL_SEGMENTS);
    flowFile = FragmentAttributes.copyAttributesToOriginal(session, flowFile, segmentId, totalSegments);
    session.transfer(flowFile, REL_ORIGINAL);

    if (totalSegments <= 10) {
        getLogger().info("Segmented {} into {} segments: {}", new Object[]{flowFile, totalSegments, segmentSet});
    } else {
        getLogger().info("Segmented {} into {} segments", new Object[]{flowFile, totalSegments});
    }
}
 
Example 14
Source File: MergeContent.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
protected BinProcessingResult processBin(final Bin bin, final ProcessContext context) throws ProcessException {
    final BinProcessingResult binProcessingResult = new BinProcessingResult(true);
    final String mergeFormat = context.getProperty(MERGE_FORMAT).getValue();
    MergeBin merger;
    switch (mergeFormat) {
        case MERGE_FORMAT_TAR_VALUE:
            merger = new TarMerge();
            break;
        case MERGE_FORMAT_ZIP_VALUE:
            merger = new ZipMerge(context.getProperty(COMPRESSION_LEVEL).asInteger());
            break;
        case MERGE_FORMAT_FLOWFILE_STREAM_V3_VALUE:
            merger = new FlowFileStreamMerger(new FlowFilePackagerV3(), "application/flowfile-v3");
            break;
        case MERGE_FORMAT_FLOWFILE_STREAM_V2_VALUE:
            merger = new FlowFileStreamMerger(new FlowFilePackagerV2(), "application/flowfile-v2");
            break;
        case MERGE_FORMAT_FLOWFILE_TAR_V1_VALUE:
            merger = new FlowFileStreamMerger(new FlowFilePackagerV1(), "application/flowfile-v1");
            break;
        case MERGE_FORMAT_CONCAT_VALUE:
            merger = new BinaryConcatenationMerge();
            break;
        case MERGE_FORMAT_AVRO_VALUE:
            merger = new AvroMerge();
            break;
        default:
            throw new AssertionError();
    }

    final AttributeStrategy attributeStrategy = AttributeStrategyUtil.strategyFor(context);

    final List<FlowFile> contents = bin.getContents();
    final ProcessSession binSession = bin.getSession();

    if (MERGE_STRATEGY_DEFRAGMENT.equals(context.getProperty(MERGE_STRATEGY).getValue())) {
        final String error = getDefragmentValidationError(bin.getContents());

        // Fail the flow files and commit them
        if (error != null) {
            final String binDescription = contents.size() <= 10 ? contents.toString() : contents.size() + " FlowFiles";
            getLogger().error(error + "; routing {} to failure", new Object[]{binDescription});
            binSession.transfer(contents, REL_FAILURE);
            binSession.commit();

            return binProcessingResult;
        }

        Collections.sort(contents, new FragmentComparator());
    }

    FlowFile bundle = merger.merge(bin, context);

    // keep the filename, as it is added to the bundle.
    final String filename = bundle.getAttribute(CoreAttributes.FILENAME.key());

    // merge all of the attributes
    final Map<String, String> bundleAttributes = attributeStrategy.getMergedAttributes(contents);
    bundleAttributes.put(CoreAttributes.MIME_TYPE.key(), merger.getMergedContentType());
    // restore the filename of the bundle
    bundleAttributes.put(CoreAttributes.FILENAME.key(), filename);
    bundleAttributes.put(MERGE_COUNT_ATTRIBUTE, Integer.toString(contents.size()));
    bundleAttributes.put(MERGE_BIN_AGE_ATTRIBUTE, Long.toString(bin.getBinAge()));

    bundle = binSession.putAllAttributes(bundle, bundleAttributes);

    final String inputDescription = contents.size() < 10 ? contents.toString() : contents.size() + " FlowFiles";
    getLogger().info("Merged {} into {}", new Object[]{inputDescription, bundle});
    binSession.transfer(bundle, REL_MERGED);
    binProcessingResult.getAttributes().put(MERGE_UUID_ATTRIBUTE, bundle.getAttribute(CoreAttributes.UUID.key()));

    for (final FlowFile unmerged : merger.getUnmergedFlowFiles()) {
        final FlowFile unmergedCopy = binSession.clone(unmerged);
        binSession.transfer(unmergedCopy, REL_FAILURE);
    }

    // We haven't committed anything, parent will take care of it
    binProcessingResult.setCommitted(false);
    return binProcessingResult;
}