Java Code Examples for com.datatorrent.api.DAG#setInputPortAttribute()

The following examples show how to use com.datatorrent.api.DAG#setInputPortAttribute() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HDFSFileCopyModule.java    From attic-apex-malhar with Apache License 2.0 6 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration conf)
{

  BlockWriter blockWriter = dag.addOperator("BlockWriter", new BlockWriter());
  Synchronizer synchronizer = dag.addOperator("BlockSynchronizer", new Synchronizer());

  dag.setInputPortAttribute(blockWriter.input, PortContext.PARTITION_PARALLEL, true);
  dag.setInputPortAttribute(blockWriter.blockMetadataInput, PortContext.PARTITION_PARALLEL, true);
  dag.addStream("CompletedBlockmetadata", blockWriter.blockMetadataOutput, synchronizer.blocksMetadataInput);

  HDFSFileMerger merger = new HDFSFileMerger();
  merger = dag.addOperator("FileMerger", merger);
  dag.addStream("MergeTrigger", synchronizer.trigger, merger.input);

  merger.setFilePath(outputDirectoryPath);
  merger.setOverwriteOnConflict(overwriteOnConflict);
  blockWriter.setBlocksDirectory(blocksDirectory);
  merger.setBlocksDirectory(blocksDirectory);

  filesMetadataInput.set(synchronizer.filesMetadataInput);
  blocksMetadataInput.set(blockWriter.blockMetadataInput);
  blockData.set(blockWriter.input);
}
 
Example 2
Source File: Application.java    From examples with Apache License 2.0 6 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration conf)
{
  // create operators
  FileReader reader = dag.addOperator("read",  FileReader.class);
  FileWriter writer = dag.addOperator("write", FileWriter.class);

  // using parallel partitioning ensures that lines from a single file are handled
  // by the same writer
  //
  dag.setInputPortAttribute(writer.input, PARTITION_PARALLEL, true);
  dag.setInputPortAttribute(writer.control, PARTITION_PARALLEL, true);

  dag.addStream("data", reader.output, writer.input);
  dag.addStream("ctrl", reader.control, writer.control);
}
 
Example 3
Source File: Application.java    From attic-apex-malhar with Apache License 2.0 6 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration conf)
{
  JsonGenerator generator = dag.addOperator("JsonGenerator", JsonGenerator.class);
  JsonParser jsonParser = dag.addOperator("jsonParser", JsonParser.class);

  CsvFormatter formatter = dag.addOperator("formatter", CsvFormatter.class);
  formatter.setSchema(SchemaUtils.jarResourceFileToString(filename));
  dag.setInputPortAttribute(formatter.in, PortContext.TUPLE_CLASS, PojoEvent.class);

  HDFSOutputOperator<String> hdfsOutput = dag.addOperator("HDFSOutputOperator", HDFSOutputOperator.class);
  hdfsOutput.setLineDelimiter("");

  dag.addStream("parserStream", generator.out, jsonParser.in);
  dag.addStream("formatterStream", jsonParser.out, formatter.in);
  dag.addStream("outputStream", formatter.out, hdfsOutput.input);

}
 
Example 4
Source File: DeduperPartitioningTest.java    From attic-apex-malhar with Apache License 2.0 6 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration conf)
{
  TestGenerator gen = dag.addOperator("Generator", new TestGenerator());

  dedup = dag.addOperator("Deduper", new TestDeduper());
  dedup.setKeyExpression("id");
  dedup.setTimeExpression("eventTime.getTime()");
  dedup.setBucketSpan(60);
  dedup.setExpireBefore(600);

  ConsoleOutputOperator console = dag.addOperator("Console", new ConsoleOutputOperator());
  dag.addStream("Generator to Dedup", gen.output, dedup.input);
  dag.addStream("Dedup to Console", dedup.unique, console.input);
  dag.setInputPortAttribute(dedup.input, Context.PortContext.TUPLE_CLASS, TestEvent.class);
  dag.setOutputPortAttribute(dedup.unique, Context.PortContext.TUPLE_CLASS, TestEvent.class);
  dag.setAttribute(dedup, Context.OperatorContext.PARTITIONER,
      new StatelessPartitioner<TimeBasedDedupOperator>(NUM_DEDUP_PARTITIONS));
}
 
Example 5
Source File: SimpleTransformApplication.java    From attic-apex-malhar with Apache License 2.0 6 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration conf)
{
  pojoDataGenerator = dag.addOperator("Input", new POJOGenerator());
  TransformOperator transform = dag.addOperator("Process", new TransformOperator());
  // Set expression map
  Map<String, String> expMap = new HashMap<>();
  expMap.put("name", "{$.firstName}.concat(\" \").concat({$.lastName})");
  expMap.put("age", "(new java.util.Date()).getYear() - {$.dateOfBirth}.getYear()");
  expMap.put("address", "{$.address}.toLowerCase()");
  transform.setExpressionMap(expMap);
  FunctionOperator.MapFunctionOperator<Object, ?> output = dag.addOperator("out",
      new FunctionOperator.MapFunctionOperator<>(outputFn));

  dag.addStream("InputToTransform", pojoDataGenerator.output, transform.input);
  dag.addStream("TransformToOutput", transform.output, output.input);

  dag.setInputPortAttribute(transform.input, Context.PortContext.TUPLE_CLASS, CustomerEvent.class);
  dag.setOutputPortAttribute(transform.output, Context.PortContext.TUPLE_CLASS, CustomerInfo.class);
  setPartitioner(dag,conf,transform);
}
 
Example 6
Source File: DeduperOrderingTest.java    From attic-apex-malhar with Apache License 2.0 6 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration conf)
{
  RandomDedupDataGenerator random = dag.addOperator("Input", RandomDedupDataGenerator.class);

  TimeBasedDedupOperator dedup = dag.addOperator("Dedup", TimeBasedDedupOperator.class);
  dedup.setKeyExpression("key");
  dedup.setTimeExpression("date.getTime()");
  dedup.setBucketSpan(10);
  dedup.setExpireBefore(60);
  dedup.setPreserveTupleOrder(true);
  FileAccessFSImpl fAccessImpl = new TFileImpl.DTFileImpl();
  fAccessImpl.setBasePath(dag.getAttributes().get(DAG.APPLICATION_PATH) + "/bucket_data");
  dedup.managedState.setFileAccess(fAccessImpl);
  dag.setInputPortAttribute(dedup.input, Context.PortContext.TUPLE_CLASS, TestPojo.class);

  verifier = dag.addOperator("Verifier", Verifier.class);

  dag.addStream("Input to Dedup", random.output, dedup.input);
  dag.addStream("Dedup to Unique", dedup.unique, verifier.unique);
  dag.addStream("Dedup to Duplicate", dedup.duplicate, verifier.duplicate);
  dag.addStream("Dedup to Expired", dedup.expired, verifier.expired);
}
 
Example 7
Source File: Application.java    From streaming-benchmarks with Apache License 2.0 6 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration conf)
{
  // Create operators for each step
  // settings are applied by the platform using the config file.
  KafkaSinglePortStringInputOperator kafkaInput = dag.addOperator("kafkaInput", new KafkaSinglePortStringInputOperator());
  DeserializeJSON deserializeJSON = dag.addOperator("deserialize", new DeserializeJSON());
  FilterTuples filterTuples = dag.addOperator("filterTuples", new FilterTuples());
  FilterFields filterFields = dag.addOperator("filterFields", new FilterFields());
  RedisJoin redisJoin = dag.addOperator("redisJoin", new RedisJoin());
  CampaignProcessor campaignProcessor = dag.addOperator("campaignProcessor", new CampaignProcessor());

  // Connect the Ports in the Operators
  dag.addStream("deserialize", kafkaInput.outputPort, deserializeJSON.input).setLocality(DAG.Locality.CONTAINER_LOCAL);
  dag.addStream("filterTuples", deserializeJSON.output, filterTuples.input).setLocality(DAG.Locality.CONTAINER_LOCAL);
  dag.addStream("filterFields", filterTuples.output, filterFields.input).setLocality(DAG.Locality.CONTAINER_LOCAL);
  dag.addStream("redisJoin", filterFields.output, redisJoin.input).setLocality(DAG.Locality.CONTAINER_LOCAL);
  dag.addStream("output", redisJoin.output, campaignProcessor.input);

  dag.setInputPortAttribute(deserializeJSON.input, Context.PortContext.PARTITION_PARALLEL, true);
  dag.setInputPortAttribute(filterTuples.input, Context.PortContext.PARTITION_PARALLEL, true);
  dag.setInputPortAttribute(filterFields.input, Context.PortContext.PARTITION_PARALLEL, true);
  dag.setInputPortAttribute(redisJoin.input, Context.PortContext.PARTITION_PARALLEL, true);
}
 
Example 8
Source File: Application.java    From examples with Apache License 2.0 6 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration conf)
{
  // create operators
  FileReader reader = dag.addOperator("read",  FileReader.class);
  FileWriter writer = dag.addOperator("write", FileWriter.class);

  reader.setScanner(new FileReaderMultiDir.SlicedDirectoryScanner());

  // using parallel partitioning ensures that lines from a single file are handled
  // by the same writer
  //
  dag.setInputPortAttribute(writer.input, PARTITION_PARALLEL, true);
  dag.setInputPortAttribute(writer.control, PARTITION_PARALLEL, true);

  dag.addStream("data", reader.output, writer.input);
  dag.addStream("ctrl", reader.control, writer.control);
}
 
Example 9
Source File: Application.java    From attic-apex-malhar with Apache License 2.0 5 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration conf)
{
  // Test Data Generator Operator
  RandomDataGeneratorOperator gen = dag.addOperator("RandomGenerator", new RandomDataGeneratorOperator());

  // Dedup Operator. Configuration through resources/META-INF/properties.xml
  TimeBasedDedupOperator dedup = dag.addOperator("Deduper", new TimeBasedDedupOperator());

  // Console output operator for unique tuples
  ConsoleOutputOperator consoleUnique = dag.addOperator("ConsoleUnique", new ConsoleOutputOperator());

  // Console output operator for duplicate tuples
  ConsoleOutputOperator consoleDuplicate = dag.addOperator("ConsoleDuplicate", new ConsoleOutputOperator());

  // Console output operator for duplicate tuples
  ConsoleOutputOperator consoleExpired = dag.addOperator("ConsoleExpired", new ConsoleOutputOperator());

  // Streams
  dag.addStream("Generator to Dedup", gen.output, dedup.input);

  // Connect Dedup unique to Console
  dag.addStream("Dedup Unique to Console", dedup.unique, consoleUnique.input);
  // Connect Dedup duplicate to Console
  dag.addStream("Dedup Duplicate to Console", dedup.duplicate, consoleDuplicate.input);
  // Connect Dedup expired to Console
  dag.addStream("Dedup Expired to Console", dedup.expired, consoleExpired.input);

  // Set Attribute TUPLE_CLASS for supplying schema information to the port
  dag.setInputPortAttribute(dedup.input, Context.PortContext.TUPLE_CLASS, TestEvent.class);

  // Uncomment the following line to create multiple partitions for Dedup operator. In this case: 2
  // dag.setAttribute(dedup, Context.OperatorContext.PARTITIONER, new StatelessPartitioner<TimeBasedDedupOperator>(2));
}
 
Example 10
Source File: Application.java    From attic-apex-malhar with Apache License 2.0 5 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration conf)
{
  RandomNumberGenerator random = dag.addOperator("randomInt",     RandomNumberGenerator.class);
  TestPartition testPartition  = dag.addOperator("testPartition", TestPartition.class);
  Codec3 codec = new Codec3();
  dag.setInputPortAttribute(testPartition.in, PortContext.STREAM_CODEC, codec);

  //Add locality if needed, e.g.: .setLocality(Locality.CONTAINER_LOCAL);
  dag.addStream("randomData", random.out, testPartition.in);
}
 
Example 11
Source File: KafkaInputBenchmark.java    From attic-apex-malhar with Apache License 2.0 5 votes vote down vote up
@Override
  public void populateDAG(DAG dag, Configuration conf)
  {

    dag.setAttribute(DAG.APPLICATION_NAME, "KafkaInputOperatorPartitionDemo");
    BenchmarkKafkaInputOperator bpkio = new BenchmarkKafkaInputOperator();

    String type = conf.get("kafka.consumertype", "simple");

    KafkaConsumer consumer = null;

    if (type.equals("highlevel")) {
      // Create template high-level consumer

      Properties props = new Properties();
      props.put("group.id", "main_group");
      props.put("auto.offset.reset", "smallest");
      consumer = new HighlevelKafkaConsumer(props);
    } else {
      // topic is set via property file
      consumer = new SimpleKafkaConsumer(null, 10000, 100000, "test_kafka_autop_client", null);
    }

    bpkio.setZookeeper(conf.get("dt.kafka.zookeeper"));
    bpkio.setInitialPartitionCount(1);
    //bpkio.setTuplesBlast(1024 * 1024);
    bpkio.setConsumer(consumer);

    bpkio = dag.addOperator("KafkaBenchmarkConsumer", bpkio);

    CollectorModule cm = dag.addOperator("DataBlackhole", CollectorModule.class);
    dag.addStream("end", bpkio.oport, cm.inputPort).setLocality(Locality.CONTAINER_LOCAL);
    dag.setInputPortAttribute(cm.inputPort, PortContext.PARTITION_PARALLEL, true);
    dag.setAttribute(bpkio, OperatorContext.COUNTERS_AGGREGATOR, new KafkaConsumer.KafkaMeterStatsAggregator());
//    dag.setAttribute(bpkio, OperatorContext.STATS_LISTENER, KafkaMeterStatsListener.class);

  }
 
Example 12
Source File: UniqueValueCountBenchmarkApplication.java    From attic-apex-malhar with Apache License 2.0 5 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration entries)
{
  dag.setAttribute(dag.APPLICATION_NAME, "UniqueValueCountDemo");
  dag.setAttribute(dag.DEBUG, true);


  /* Generate random key-value pairs */
  RandomEventGenerator randGen = dag.addOperator("randomgen", new RandomEventGenerator());
  randGen.setMaxvalue(999999);
  randGen.setTuplesBlastIntervalMillis(50);
  dag.setAttribute(randGen, Context.OperatorContext.PARTITIONER, new StatelessPartitioner<RandomEventGenerator>(3));

  /* Initialize with three partition to start with */
  UniqueCounter<Integer> uniqCount = dag.addOperator("uniqevalue", new UniqueCounter<Integer>());
  MapToKeyHashValuePairConverter<Integer, Integer> converter =
      dag.addOperator("converter", new MapToKeyHashValuePairConverter());

  dag.setAttribute(uniqCount, Context.OperatorContext.PARTITIONER,
      new StatelessPartitioner<UniqueCounter<Integer>>(3));
  dag.setInputPortAttribute(uniqCount.data, Context.PortContext.PARTITION_PARALLEL, true);
  uniqCount.setCumulative(false);

  Counter counter = dag.addOperator("count", new Counter());
  ConsoleOutputOperator output = dag.addOperator("output", new ConsoleOutputOperator());

  dag.addStream("datain", randGen.integer_data, uniqCount.data);
  dag.addStream("convert", uniqCount.count, converter.input).setLocality(Locality.THREAD_LOCAL);
  dag.addStream("consoutput", converter.output, counter.input);
  dag.addStream("final", counter.output, output.input);
}
 
Example 13
Source File: ApplicationWithGenerator.java    From streaming-benchmarks with Apache License 2.0 5 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration configuration)
{
   // Create operators for each step
   // settings are applied by the platform using the config file.
  JsonGenerator eventGenerator = dag.addOperator("eventGenerator", new JsonGenerator());
  FilterTuples filterTuples = dag.addOperator("filterTuples", new FilterTuples());
  FilterFields filterFields = dag.addOperator("filterFields", new FilterFields());
  RedisJoin redisJoin = dag.addOperator("redisJoin", new RedisJoin());
  CampaignProcessor campaignProcessor = dag.addOperator("campaignProcessor", new CampaignProcessor());

  eventGenerator.setNumAdsPerCampaign(Integer.parseInt(configuration.get("numberOfAds")));
  eventGenerator.setNumCampaigns(Integer.parseInt(configuration.get("numberOfCampaigns")));
  setupRedis(eventGenerator.getCampaigns(), configuration.get("redis"));

  // Connect the Ports in the Operators
  dag.addStream("filterTuples", eventGenerator.out, filterTuples.input).setLocality(DAG.Locality.CONTAINER_LOCAL);
  dag.addStream("filterFields", filterTuples.output, filterFields.input).setLocality(DAG.Locality.CONTAINER_LOCAL);
  dag.addStream("redisJoin", filterFields.output, redisJoin.input).setLocality(DAG.Locality.CONTAINER_LOCAL);
  dag.addStream("output", redisJoin.output, campaignProcessor.input);

  dag.setInputPortAttribute(filterTuples.input, Context.PortContext.PARTITION_PARALLEL, true);
  dag.setInputPortAttribute(filterFields.input, Context.PortContext.PARTITION_PARALLEL, true);
  dag.setInputPortAttribute(redisJoin.input, Context.PortContext.PARTITION_PARALLEL, true);

  dag.setAttribute(eventGenerator, Context.OperatorContext.PARTITIONER, new StatelessPartitioner<EventGenerator>(8));
  dag.setAttribute(campaignProcessor, Context.OperatorContext.PARTITIONER, new StatelessPartitioner<CampaignProcessor>(8));
}
 
Example 14
Source File: StramLocalClusterTest.java    From attic-apex-core with Apache License 2.0 5 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration conf)
{
  TestGeneratorInputOperator input = dag.addOperator("Input", new TestGeneratorInputOperator());
  test = dag.addOperator("Test", new DynamicLoader());

  dag.addStream("S1", input.outport, test.input);
  dag.setAttribute(Context.DAGContext.LIBRARY_JARS, generatedJar);
  dag.setInputPortAttribute(test.input, Context.PortContext.TUPLE_CLASS, pojo);
}
 
Example 15
Source File: ThroughputBasedApplication.java    From examples with Apache License 2.0 5 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration conf)
{
  ThroughputBasedReader reader = dag.addOperator("read", ThroughputBasedReader.class);
  BytesFileWriter writer = dag.addOperator("write", BytesFileWriter.class);

  dag.setInputPortAttribute(writer.input, PARTITION_PARALLEL, true);
  dag.setInputPortAttribute(writer.control, PARTITION_PARALLEL, true);

  dag.addStream("data", reader.output, writer.input);
  dag.addStream("ctrl", reader.control, writer.control);
}
 
Example 16
Source File: JdbcPollerApplication.java    From examples with Apache License 2.0 5 votes vote down vote up
public void populateDAG(DAG dag, Configuration conf)
{
  JdbcPOJOPollInputOperator poller = dag.addOperator("JdbcPoller", new JdbcPOJOPollInputOperator());

  JdbcStore store = new JdbcStore();
  poller.setStore(store);

  poller.setFieldInfos(addFieldInfos());

  FileLineOutputOperator writer = dag.addOperator("Writer", new FileLineOutputOperator());
  dag.setInputPortAttribute(writer.input, PortContext.PARTITION_PARALLEL, true);
  writer.setRotationWindows(60);

  dag.addStream("dbrecords", poller.outputPort, writer.input);
}
 
Example 17
Source File: Application.java    From attic-apex-malhar with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public void populateDAG(DAG dag, Configuration conf)
{

  try {
    URI duri = PubSubHelper.getURIWithDefault(dag, "localhost:9090");

    PubSubWebSocketInputOperator userTxWsInput = getPubSubWebSocketInputOperator("userTxInput", dag, duri, "examples.app.frauddetect.submitTransaction");
    PubSubWebSocketOutputOperator ccUserAlertWsOutput = getPubSubWebSocketOutputOperator("ccUserAlertQueryOutput", dag, duri, "examples.app.frauddetect.fraudAlert");
    PubSubWebSocketOutputOperator avgUserAlertwsOutput = getPubSubWebSocketOutputOperator("avgUserAlertQueryOutput", dag, duri, "examples.app.frauddetect.fraudAlert");
    PubSubWebSocketOutputOperator binUserAlertwsOutput = getPubSubWebSocketOutputOperator("binUserAlertOutput", dag, duri, "examples.app.frauddetect.fraudAlert");
    PubSubWebSocketOutputOperator txSummaryWsOutput = getPubSubWebSocketOutputOperator("txSummaryWsOutput", dag, duri, "examples.app.frauddetect.txSummary");
    SlidingWindowSumKeyVal<KeyValPair<MerchantKey, String>, Integer> smsOperator = dag.addOperator("movingSum", SlidingWindowSumKeyVal.class);

    MerchantTransactionGenerator txReceiver = dag.addOperator("txReceiver", MerchantTransactionGenerator.class);
    MerchantTransactionInputHandler txInputHandler = dag.addOperator("txInputHandler", new MerchantTransactionInputHandler());
    BankIdNumberSamplerOperator binSampler = dag.addOperator("bankInfoFraudDetector", BankIdNumberSamplerOperator.class);

    MerchantTransactionBucketOperator txBucketOperator = dag.addOperator("txFilter", MerchantTransactionBucketOperator.class);
    RangeKeyVal rangeOperator = dag.addOperator("rangePerMerchant", new RangeKeyVal<MerchantKey, Long>());
    SimpleMovingAverage<MerchantKey, Long> smaOperator = dag.addOperator("smaPerMerchant", SimpleMovingAverage.class);
    TransactionStatsAggregator txStatsAggregator = dag.addOperator("txStatsAggregator", TransactionStatsAggregator.class);
    AverageAlertingOperator avgAlertingOperator = dag.addOperator("avgAlerter", AverageAlertingOperator.class);
    CreditCardAmountSamplerOperator ccSamplerOperator = dag.addOperator("amountFraudDetector", CreditCardAmountSamplerOperator.class);
    HdfsStringOutputOperator hdfsOutputOperator = getHdfsOutputOperator("hdfsOutput", dag, "fraud");

    MongoDBOutputOperator mongoTxStatsOperator = dag.addOperator("mongoTxStatsOutput", MongoDBOutputOperator.class);
    MongoDBOutputOperator mongoBinAlertsOperator = dag.addOperator("mongoBinAlertsOutput", MongoDBOutputOperator.class);
    MongoDBOutputOperator mongoCcAlertsOperator = dag.addOperator("mongoCcAlertsOutput", MongoDBOutputOperator.class);
    MongoDBOutputOperator mongoAvgAlertsOperator = dag.addOperator("mongoAvgAlertsOutput", MongoDBOutputOperator.class);

    dag.addStream("userTxStream", userTxWsInput.outputPort, txInputHandler.userTxInputPort);
    dag.addStream("transactions", txReceiver.txOutputPort, txBucketOperator.inputPort).setLocality(DAG.Locality.CONTAINER_LOCAL);
    dag.addStream("txData", txReceiver.txDataOutputPort, hdfsOutputOperator.input); // dump all tx into Hdfs
    dag.addStream("userTransactions", txInputHandler.txOutputPort, txBucketOperator.txUserInputPort);
    dag.addStream("bankInfoData", txBucketOperator.binCountOutputPort, smsOperator.data);
    dag.addStream("bankInfoCount", smsOperator.integerSum, binSampler.txCountInputPort);
    dag.addStream("filteredTransactions", txBucketOperator.txOutputPort, rangeOperator.data, smaOperator.data, avgAlertingOperator.txInputPort);

    KeyPartitionCodec<MerchantKey, Long> txCodec = new KeyPartitionCodec<MerchantKey, Long>();
    dag.setInputPortAttribute(rangeOperator.data, Context.PortContext.STREAM_CODEC, txCodec);
    dag.setInputPortAttribute(smaOperator.data, Context.PortContext.STREAM_CODEC, txCodec);
    dag.setInputPortAttribute(avgAlertingOperator.txInputPort, Context.PortContext.STREAM_CODEC, txCodec);

    dag.addStream("creditCardData", txBucketOperator.ccAlertOutputPort, ccSamplerOperator.inputPort);
    dag.addStream("txnSummaryData", txBucketOperator.summaryTxnOutputPort, txSummaryWsOutput.input);
    dag.addStream("smaAlerts", smaOperator.doubleSMA, avgAlertingOperator.smaInputPort);
    dag.addStream("binAlerts", binSampler.countAlertOutputPort, mongoBinAlertsOperator.inputPort);
    dag.addStream("binAlertsNotification", binSampler.countAlertNotificationPort, binUserAlertwsOutput.input);
    dag.addStream("rangeData", rangeOperator.range, txStatsAggregator.rangeInputPort);
    dag.addStream("smaData", smaOperator.longSMA, txStatsAggregator.smaInputPort);
    dag.addStream("txStatsOutput", txStatsAggregator.txDataOutputPort, mongoTxStatsOperator.inputPort);
    dag.addStream("avgAlerts", avgAlertingOperator.avgAlertOutputPort, mongoAvgAlertsOperator.inputPort);
    dag.addStream("avgAlertsNotification", avgAlertingOperator.avgAlertNotificationPort, avgUserAlertwsOutput.input);
    dag.addStream("ccAlerts", ccSamplerOperator.ccAlertOutputPort, mongoCcAlertsOperator.inputPort);
    dag.addStream("ccAlertsNotification", ccSamplerOperator.ccAlertNotificationPort, ccUserAlertWsOutput.input);

  } catch (Exception exc) {
    DTThrowable.rethrow(exc);
  }
}
 
Example 18
Source File: FSRecordReaderModule.java    From attic-apex-malhar with Apache License 2.0 4 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration configuration)
{
  FileSplitterInput fileSplitter = dag.addOperator("FileSplitter", createFileSplitter());
  FSRecordReader recordReader = dag.addOperator("BlockReader", createRecordReader());

  dag.addStream("BlockMetadata", fileSplitter.blocksMetadataOutput, recordReader.blocksMetadataInput);

  if (sequentialFileRead) {
    dag.setInputPortAttribute(recordReader.blocksMetadataInput, Context.PortContext.STREAM_CODEC,
        new SequentialFileBlockMetadataCodec());
  }

  FileSplitterInput.TimeBasedDirectoryScanner fileScanner = fileSplitter.getScanner();
  fileScanner.setFiles(files);
  if (scanIntervalMillis != 0) {
    fileScanner.setScanIntervalMillis(scanIntervalMillis);
  }
  fileScanner.setRecursive(recursive);
  if (filePatternRegularExp != null) {
    fileSplitter.getScanner().setFilePatternRegularExp(filePatternRegularExp);
  }
  recordReader.setBasePath(files);
  fileSplitter.setBlocksThreshold(blocksThreshold);

  if (minReaders != 0) {
    recordReader.setMinReaders(minReaders);
  }
  if (maxReaders != 0) {
    recordReader.setMaxReaders(maxReaders);
  }
  if (repartitionCheckInterval != 0) {
    recordReader.setIntervalMillis(repartitionCheckInterval);
  }

  /**
   * Override the split size or input blocks of a file. If not specified,
   * it would use default blockSize of the filesystem.
   */
  if (blockSize != 0) {
    fileSplitter.setBlockSize(blockSize);
  }
  records.set(recordReader.records);
}
 
Example 19
Source File: S3OutputModule.java    From attic-apex-malhar with Apache License 2.0 4 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration conf)
{
  // DAG for S3 Output Module as follows:
  //   ---- S3InitiateFileUploadOperator -----|
  //             |                    S3FileMerger
  //   ----  S3BlockUploadOperator ------------|

  S3InitiateFileUploadOperator initiateUpload = dag.addOperator("InitiateUpload", createS3InitiateUpload());
  initiateUpload.setAccessKey(accessKey);
  initiateUpload.setSecretAccessKey(secretAccessKey);
  initiateUpload.setBucketName(bucketName);
  initiateUpload.setOutputDirectoryPath(outputDirectoryPath);

  S3BlockUploadOperator blockUploader = dag.addOperator("BlockUpload", createS3BlockUpload());
  blockUploader.setAccessKey(accessKey);
  blockUploader.setSecretAccessKey(secretAccessKey);
  blockUploader.setBucketName(bucketName);

  S3FileMerger fileMerger = dag.addOperator("FileMerger", createS3FileMerger());
  fileMerger.setAccessKey(accessKey);
  fileMerger.setSecretAccessKey(secretAccessKey);
  fileMerger.setBucketName(bucketName);

  if (endPoint != null) {
    initiateUpload.setEndPoint(endPoint);
    blockUploader.setEndPoint(endPoint);
    fileMerger.setEndPoint(endPoint);
  }

  dag.setInputPortAttribute(blockUploader.blockInput, Context.PortContext.PARTITION_PARALLEL, true);
  dag.setInputPortAttribute(blockUploader.blockMetadataInput, Context.PortContext.PARTITION_PARALLEL, true);

  dag.setAttribute(initiateUpload, TIMEOUT_WINDOW_COUNT, timeOutWindowCount);
  dag.setAttribute(blockUploader, TIMEOUT_WINDOW_COUNT, timeOutWindowCount);
  dag.setAttribute(fileMerger, TIMEOUT_WINDOW_COUNT, timeOutWindowCount);
  dag.setUnifierAttribute(blockUploader.output, TIMEOUT_WINDOW_COUNT, timeOutWindowCount);

  dag.setAttribute(fileMerger,Context.OperatorContext.PARTITIONER, new StatelessPartitioner<S3FileMerger>(mergerCount));
  // Add Streams
  dag.addStream("InitiateUploadIDToMerger", initiateUpload.fileMetadataOutput, fileMerger.filesMetadataInput);
  dag.addStream("InitiateUploadIDToWriter", initiateUpload.uploadMetadataOutput, blockUploader.uploadMetadataInput);
  dag.addStream("WriterToMerger", blockUploader.output, fileMerger.uploadMetadataInput);

  // Set the proxy ports
  filesMetadataInput.set(initiateUpload.filesMetadataInput);
  blocksMetadataInput.set(blockUploader.blockMetadataInput);
  blockData.set(blockUploader.blockInput);
}
 
Example 20
Source File: Application1.java    From attic-apex-malhar with Apache License 2.0 4 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration c)
{
  int topNtupleCount = 10;
  LogstreamPropertyRegistry registry = new LogstreamPropertyRegistry();
  // set app name
  dag.setAttribute(DAG.APPLICATION_NAME, "Logstream Application");
  dag.setAttribute(DAG.STREAMING_WINDOW_SIZE_MILLIS, 500);

  RabbitMQLogsInputOperator logInput = dag.addOperator("LogInput", new RabbitMQLogsInputOperator());
  logInput.setRegistry(registry);
  logInput.addPropertiesFromString(new String[] {"localhost:5672", "logsExchange", "direct", "apache:mysql:syslog:system"});

  JsonByteArrayOperator jsonToMap = dag.addOperator("JsonToMap", new JsonByteArrayOperator());
  jsonToMap.setConcatenationCharacter('_');

  FilterOperator filterOperator = dag.addOperator("FilterOperator", new FilterOperator());
  filterOperator.setRegistry(registry);
  filterOperator.addFilterCondition(new String[] {"type=apache", "response", "response.equals(\"404\")"});
  filterOperator.addFilterCondition(new String[] {"type=apache", "agentinfo_name", "agentinfo_name.equals(\"Firefox\")"});
  filterOperator.addFilterCondition(new String[] {"type=apache", "default=true"});
  filterOperator.addFilterCondition(new String[] {"type=mysql", "default=true"});
  filterOperator.addFilterCondition(new String[] {"type=syslog", "default=true"});
  filterOperator.addFilterCondition(new String[] {"type=system", "default=true"});

  DimensionOperator dimensionOperator = dag.addOperator("DimensionOperator", new DimensionOperator());
  dimensionOperator.setRegistry(registry);
  String[] dimensionInputString1 = new String[] {"type=apache", "timebucket=s", "dimensions=request", "dimensions=clientip", "dimensions=clientip:request", "values=bytes.sum:bytes.avg"};
  //String[] dimensionInputString1 = new String[] {"type=apache", "timebucket=s", "dimensions=request", "dimensions=clientip","values=bytes.sum"};
  String[] dimensionInputString2 = new String[] {"type=system", "timebucket=s", "dimensions=disk", "values=writes.avg"};
  String[] dimensionInputString3 = new String[] {"type=syslog", "timebucket=s", "dimensions=program", "values=pid.count"};
  dimensionOperator.addPropertiesFromString(dimensionInputString1);
  dimensionOperator.addPropertiesFromString(dimensionInputString2);
  dimensionOperator.addPropertiesFromString(dimensionInputString3);

  LogstreamTopN topN = dag.addOperator("TopN", new LogstreamTopN());
  topN.setN(topNtupleCount);
  topN.setRegistry(registry);

  LogstreamWidgetOutputOperator widgetOut = dag.addOperator("WidgetOut", new LogstreamWidgetOutputOperator());
  widgetOut.logstreamTopNInput.setN(topNtupleCount);
  widgetOut.setRegistry(registry);

  ConsoleOutputOperator consoleOut = dag.addOperator("ConsoleOut", new ConsoleOutputOperator());

  dag.addStream("inputJSonToMap", logInput.outputPort, jsonToMap.input);
  dag.addStream("toFilterOper", jsonToMap.outputFlatMap, filterOperator.input);
  dag.addStream("toDimensionOper", filterOperator.outputMap, dimensionOperator.in);
  dag.addStream("toTopN", dimensionOperator.aggregationsOutput, topN.data);
  dag.addStream("toWS", topN.top, widgetOut.logstreamTopNInput, consoleOut.input);

  dag.setInputPortAttribute(jsonToMap.input, PortContext.PARTITION_PARALLEL, true);
  dag.setInputPortAttribute(filterOperator.input, PortContext.PARTITION_PARALLEL, true);
  dag.setInputPortAttribute(consoleOut.input, PortContext.PARTITION_PARALLEL, true);
}