Java Code Examples for com.datatorrent.api.DAG#setAttribute()

The following examples show how to use com.datatorrent.api.DAG#setAttribute() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ApplicationCCP.java    From examples with Apache License 2.0 6 votes vote down vote up
public void populateDAG(DAG dag, Configuration conf)
{
  POJOGenerator generator = dag.addOperator("POJOGenerator", POJOGenerator.class);
  CsvParser csvParser = dag.addOperator("csvParser", CsvParser.class);
  FilterOperator filterOperator = dag.addOperator("filter", new FilterOperator());
  TopNAccounts topN = dag.addOperator("topN", new TopNAccounts());
  CsvFormatter formatter = dag.addOperator("formatter", new CsvFormatter());
  ConsoleOutputOperator console = dag.addOperator("console", new ConsoleOutputOperator());

  dag.addStream("data", generator.out, csvParser.in).setLocality(DAG.Locality.THREAD_LOCAL);
  dag.addStream("pojo", csvParser.out, filterOperator.input, topN.in);
  dag.addStream("filtered", filterOperator.truePort, formatter.in);
  dag.addStream("string", formatter.out, console.input).setLocality(DAG.Locality.THREAD_LOCAL);

  dag.setAttribute(Context.DAGContext.METRICS_TRANSPORT, null);
  dag.setAttribute(topN, Context.OperatorContext.METRICS_AGGREGATOR, new TopNAggregator());
}
 
Example 2
Source File: ApplicationCPPAppMetrics.java    From examples with Apache License 2.0 6 votes vote down vote up
public void populateDAG(DAG dag, Configuration conf)
{
  POJOGenerator generator = dag.addOperator("POJOGenerator", POJOGenerator.class);
  CsvParser csvParser = dag.addOperator("csvParser", CsvParser.class);
  FilterOperator filterOperator = dag.addOperator("filter", new FilterOperator());
  TopNAccounts topN = dag.addOperator("topN", new TopNAccounts());
  CsvFormatter formatter = dag.addOperator("formatter", new CsvFormatter());
  ConsoleOutputOperator console = dag.addOperator("console", new ConsoleOutputOperator());

  dag.addStream("data", generator.out, csvParser.in).setLocality(DAG.Locality.THREAD_LOCAL);
  dag.addStream("pojo", csvParser.out, filterOperator.input, topN.in);
  dag.addStream("filtered", filterOperator.truePort, formatter.in);
  dag.addStream("string", formatter.out, console.input).setLocality(DAG.Locality.THREAD_LOCAL);

  dag.setAttribute(Context.DAGContext.METRICS_TRANSPORT, null);
  dag.setAttribute(topN, Context.OperatorContext.METRICS_AGGREGATOR, new TopNAggregator());
  dag.setAttribute(AppMetricProcessor.APP_METRIC_PROCESSOR, new AppMetricsService());
}
 
Example 3
Source File: DeduperPartitioningTest.java    From attic-apex-malhar with Apache License 2.0 6 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration conf)
{
  TestGenerator gen = dag.addOperator("Generator", new TestGenerator());

  dedup = dag.addOperator("Deduper", new TestDeduper());
  dedup.setKeyExpression("id");
  dedup.setTimeExpression("eventTime.getTime()");
  dedup.setBucketSpan(60);
  dedup.setExpireBefore(600);

  ConsoleOutputOperator console = dag.addOperator("Console", new ConsoleOutputOperator());
  dag.addStream("Generator to Dedup", gen.output, dedup.input);
  dag.addStream("Dedup to Console", dedup.unique, console.input);
  dag.setInputPortAttribute(dedup.input, Context.PortContext.TUPLE_CLASS, TestEvent.class);
  dag.setOutputPortAttribute(dedup.unique, Context.PortContext.TUPLE_CLASS, TestEvent.class);
  dag.setAttribute(dedup, Context.OperatorContext.PARTITIONER,
      new StatelessPartitioner<TimeBasedDedupOperator>(NUM_DEDUP_PARTITIONS));
}
 
Example 4
Source File: POJOPartitionJoinOperatorTest.java    From attic-apex-malhar with Apache License 2.0 6 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration configuration)
{
  TestGenerator gen1 = dag.addOperator("Generator1", new TestGenerator());
  TestGenerator gen2 = dag.addOperator("Generator2", new TestGenerator());

  joinOp = dag.addOperator("Join", new PartitionTestJoinOperator());
  joinOp.setLeftKeyExpression("id");
  joinOp.setRightKeyExpression("id");
  joinOp.setIncludeFieldStr("id,eventTime;id,eventTime");
  joinOp.setExpiryTime(10000L);

  ConsoleOutputOperator console = dag.addOperator("Console", new ConsoleOutputOperator());

  dag.addStream("Gen1ToJoin", gen1.output, joinOp.input1);
  dag.addStream("Gen2ToJoin", gen2.output, joinOp.input2);
  dag.addStream("JoinToConsole", joinOp.outputPort, console.input);
  dag.setInputPortAttribute(joinOp.input1, DAG.InputPortMeta.TUPLE_CLASS,TestEvent.class);
  dag.setInputPortAttribute(joinOp.input2, DAG.InputPortMeta.TUPLE_CLASS,TestEvent.class);
  dag.setOutputPortAttribute(joinOp.outputPort, DAG.InputPortMeta.TUPLE_CLASS,TestEvent.class);
  dag.setAttribute(joinOp, Context.OperatorContext.PARTITIONER,
      new StatelessPartitioner<PartitionTestJoinOperator>(NUM_OF_PARTITIONS));
}
 
Example 5
Source File: UniqueKeyValCountExample.java    From attic-apex-malhar with Apache License 2.0 6 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration entries)
{
      /* Generate random key-value pairs */
  RandomDataGenerator randGen = dag.addOperator("randomgen", new RandomDataGenerator());

      /* Initialize with three partition to start with */
  UniqueCounter<KeyValPair<String, Object>> uniqCount =
      dag.addOperator("uniqevalue", new UniqueCounter<KeyValPair<String, Object>>());
  MapToKeyHashValuePairConverter<KeyValPair<String, Object>, Integer> converter = dag.addOperator("converter", new MapToKeyHashValuePairConverter());
  uniqCount.setCumulative(false);
  dag.setAttribute(randGen, Context.OperatorContext.PARTITIONER, new StatelessPartitioner<UniqueCounter<KeyValPair<String, Object>>>(3));

  ConsoleOutputOperator output = dag.addOperator("output", new ConsoleOutputOperator());

  dag.addStream("datain", randGen.outPort, uniqCount.data);
  dag.addStream("convert", uniqCount.count, converter.input).setLocality(Locality.THREAD_LOCAL);
  dag.addStream("consoutput", converter.output, output.input);
}
 
Example 6
Source File: Application.java    From attic-apex-malhar with Apache License 2.0 5 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration conf)
{
  // Creating an example application with three operators
  // The last operator is slowing down the DAG
  // With the use of the stats listener the input operator is slowed when the window difference crosses a threshold

  RandomNumberGenerator randomGenerator = dag.addOperator("RandomGenerator", RandomNumberGenerator.class);
  PassThroughOperator<Double> passThrough = dag.addOperator("PassThrough", PassThroughOperator.class);
  SlowDevNullOperator<Double> devNull = dag.addOperator("SlowNull", SlowDevNullOperator.class);

  // Important to use the same stats listener object for all operators so that we can centrally collect stats and make
  // the decision
  StatsListener statsListener = new ThrottlingStatsListener();
  Collection<StatsListener> statsListeners = Lists.newArrayList(statsListener);
  dag.setAttribute(randomGenerator, Context.OperatorContext.STATS_LISTENERS, statsListeners);
  dag.setAttribute(passThrough, Context.OperatorContext.STATS_LISTENERS, statsListeners);
  dag.setAttribute(devNull, Context.OperatorContext.STATS_LISTENERS, statsListeners);

  // Increase timeout for the slow operator, this specifies the maximum timeout for an operator to process a window
  // It is specified in number of windows, since 1 window is 500ms, 30 minutes is 30 * 60 * 2 = 3600 windows
  dag.setAttribute(devNull, Context.OperatorContext.TIMEOUT_WINDOW_COUNT, 3600);

  // If there are unifiers that are slow then set timeout for them
  // dag.setUnifierAttribute(passThrough.output, Context.OperatorContext.TIMEOUT_WINDOW_COUNT, 3600);

  dag.addStream("randomData", randomGenerator.out, passThrough.input);
  dag.addStream("passData", passThrough.output, devNull.input);
}
 
Example 7
Source File: HiveMapInsertBenchmarkingApp.java    From attic-apex-malhar with Apache License 2.0 5 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration conf)
{
  HiveStore store = new HiveStore();
  store.setDatabaseUrl(conf.get("dt.application.HiveMapInsertBenchmarkingApp.operator.HiveOperator.store.dbUrl"));
  store.setConnectionProperties(conf.get(
      "dt.application.HiveMapInsertBenchmarkingApp.operator.HiveOperator.store.connectionProperties"));
  store.setFilepath(conf.get("dt.application.HiveMapInsertBenchmarkingApp.operator.HiveOperator.store.filepath"));
  try {
    hiveInitializeMapDatabase(store, conf.get(
        "dt.application.HiveMapInsertBenchmarkingApp.operator.HiveOperator.tablename"), ":");
  } catch (SQLException ex) {
    LOG.debug(ex.getMessage());
  }
  dag.setAttribute(DAG.STREAMING_WINDOW_SIZE_MILLIS, 1000);
  RandomEventGenerator eventGenerator = dag.addOperator("EventGenerator", RandomEventGenerator.class);
  RandomMapOutput mapGenerator = dag.addOperator("MapGenerator", RandomMapOutput.class);
  dag.setAttribute(eventGenerator, PortContext.QUEUE_CAPACITY, 10000);
  dag.setAttribute(mapGenerator, PortContext.QUEUE_CAPACITY, 10000);
  HiveOperator hiveInsert = dag.addOperator("HiveOperator", new HiveOperator());
  hiveInsert.setStore(store);
  FSRollingMapTestImpl rollingMapFsWriter = dag.addOperator("RollingFsMapWriter", new FSRollingMapTestImpl());
  rollingMapFsWriter.setFilePath(store.filepath);
  ArrayList<String> hivePartitionColumns = new ArrayList<String>();
  hivePartitionColumns.add("dt");
  hiveInsert.setHivePartitionColumns(hivePartitionColumns);
  dag.addStream("EventGenerator2Map", eventGenerator.integer_data, mapGenerator.input);
  dag.addStream("MapGenerator2HdfsOutput", mapGenerator.map_data, rollingMapFsWriter.input);
  dag.addStream("FsWriter2Hive", rollingMapFsWriter.outputPort, hiveInsert.input);

}
 
Example 8
Source File: S3TupleOutputModule.java    From attic-apex-malhar with Apache License 2.0 5 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration conf)
{
  FSRecordCompactionOperator<INPUT> s3compaction = dag.addOperator("S3Compaction", new FSRecordCompactionOperator<INPUT>());
  s3compaction.setConverter(getConverter());
  s3compaction.setMaxIdleWindows(maxIdleWindows);
  s3compaction.setMaxLength(maxLength);

  StatelessThroughputBasedPartitioner<FSRecordCompactionOperator<INPUT>> partitioner = new StatelessThroughputBasedPartitioner<FSRecordCompactionOperator<INPUT>>();
  partitioner.setMaximumEvents(maxTuplesPerSecPerPartition);
  partitioner.setMinimumEvents(minTuplesPerSecPerPartition);
  partitioner.setCooldownMillis(coolDownMillis);
  dag.setAttribute(s3compaction, OperatorContext.STATS_LISTENERS, Arrays.asList(new StatsListener[] {partitioner}));
  dag.setAttribute(s3compaction, OperatorContext.PARTITIONER, partitioner);

  S3Reconciler s3Reconciler = dag.addOperator("S3Reconciler", new S3Reconciler());
  s3Reconciler.setAccessKey(accessKey);
  s3Reconciler.setSecretKey(secretAccessKey);
  s3Reconciler.setBucketName(bucketName);
  if (region != null) {
    s3Reconciler.setRegion(region);
  }
  s3Reconciler.setDirectoryName(outputDirectoryPath);

  S3ReconcilerQueuePartitioner<S3Reconciler> reconcilerPartitioner = new S3ReconcilerQueuePartitioner<S3Reconciler>();
  reconcilerPartitioner.setCooldownMillis(coolDownMillis);
  reconcilerPartitioner.setMinPartitions(minS3UploadPartitions);
  reconcilerPartitioner.setMaxPartitions(maxS3UploadPartitions);
  reconcilerPartitioner.setMaxQueueSizePerPartition(maxQueueSizeS3Upload);

  dag.setAttribute(s3Reconciler, OperatorContext.STATS_LISTENERS,
      Arrays.asList(new StatsListener[] {reconcilerPartitioner}));
  dag.setAttribute(s3Reconciler, OperatorContext.PARTITIONER, reconcilerPartitioner);

  dag.addStream("write-to-s3", s3compaction.output, s3Reconciler.input);
  input.set(s3compaction.input);
  output.set(s3Reconciler.outputPort);
}
 
Example 9
Source File: ApplicationDimensionComputation.java    From streaming-benchmarks with Apache License 2.0 5 votes vote down vote up
protected PubSubWebSocketAppDataResult createQueryResult(DAG dag, Configuration conf, AppDataSingleSchemaDimensionStoreHDHT store)
{
  PubSubWebSocketAppDataResult wsOut = new PubSubWebSocketAppDataResult();
  URI queryUri = getQueryUri(dag, conf);
  wsOut.setUri(queryUri);
  dag.addOperator("QueryResult", wsOut);
  // Set remaining dag options

  dag.setAttribute(store, Context.OperatorContext.COUNTERS_AGGREGATOR,
      new BasicCounters.LongAggregator<MutableLong>());
  
  return wsOut;
}
 
Example 10
Source File: Application.java    From attic-apex-malhar with Apache License 2.0 5 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration entries)
{
      /* Generate random key-value pairs */
  RandomKeysGenerator randGen = dag.addOperator("randomgen", new RandomKeysGenerator());


      /* Initialize with three partition to start with */
  // UniqueCount1 uniqCount = dag.addOperator("uniqevalue", new UniqueCount1());
  UniqueCounter<Integer> uniqCount = dag.addOperator("uniqevalue", new UniqueCounter<Integer>());

  MapToKeyHashValuePairConverter<Integer, Integer> converter = dag.addOperator("converter", new MapToKeyHashValuePairConverter());

  uniqCount.setCumulative(false);
  dag.setAttribute(uniqCount, Context.OperatorContext.PARTITIONER, new StatelessPartitioner<UniqueCounter<Integer>>(3));

  CountVerifier<Integer> verifier = dag.addOperator("verifier", new CountVerifier<Integer>());
  StreamDuplicater<KeyHashValPair<Integer, Integer>> dup = dag.addOperator("dup", new StreamDuplicater<KeyHashValPair<Integer, Integer>>());
  ConsoleOutputOperator output = dag.addOperator("output", new ConsoleOutputOperator());

  ConsoleOutputOperator successOutput = dag.addOperator("successoutput", new ConsoleOutputOperator());
  successOutput.setStringFormat("Success %d");
  ConsoleOutputOperator failureOutput = dag.addOperator("failureoutput", new ConsoleOutputOperator());
  failureOutput.setStringFormat("Failure %d");

  // success and failure counters.
  Counter successcounter = dag.addOperator("successcounter", new Counter());
  Counter failurecounter = dag.addOperator("failurecounter", new Counter());

  dag.addStream("datain", randGen.outPort, uniqCount.data);
  dag.addStream("dataverification0", randGen.verificationPort, verifier.in1);
  dag.addStream("convert", uniqCount.count, converter.input).setLocality(Locality.THREAD_LOCAL);
  dag.addStream("split", converter.output, dup.data);
  dag.addStream("consoutput", dup.out1, output.input);
  dag.addStream("dataverification1", dup.out2, verifier.in2);
  dag.addStream("successc", verifier.successPort, successcounter.input);
  dag.addStream("failurec", verifier.failurePort, failurecounter.input);
  dag.addStream("succconsoutput", successcounter.output, successOutput.input);
  dag.addStream("failconsoutput", failurecounter.output, failureOutput.input);
}
 
Example 11
Source File: ApplicationDimensionComputation.java    From streaming-benchmarks with Apache License 2.0 5 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration configuration) 
{
  DimensionTupleGenerateOperator generateOperator = new DimensionTupleGenerateOperator();
  dag.addOperator("Generator", generateOperator);
  dag.setAttribute(generateOperator, Context.OperatorContext.PARTITIONER, new StatelessPartitioner<EventGenerator>(PARTITION_NUM));
  
  populateDimensionsDAG(dag, configuration, generateOperator.outputPort);
}
 
Example 12
Source File: ApplicationWithGenerator.java    From streaming-benchmarks with Apache License 2.0 5 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration configuration)
{
   // Create operators for each step
   // settings are applied by the platform using the config file.
  JsonGenerator eventGenerator = dag.addOperator("eventGenerator", new JsonGenerator());
  FilterTuples filterTuples = dag.addOperator("filterTuples", new FilterTuples());
  FilterFields filterFields = dag.addOperator("filterFields", new FilterFields());
  RedisJoin redisJoin = dag.addOperator("redisJoin", new RedisJoin());
  CampaignProcessor campaignProcessor = dag.addOperator("campaignProcessor", new CampaignProcessor());

  eventGenerator.setNumAdsPerCampaign(Integer.parseInt(configuration.get("numberOfAds")));
  eventGenerator.setNumCampaigns(Integer.parseInt(configuration.get("numberOfCampaigns")));
  setupRedis(eventGenerator.getCampaigns(), configuration.get("redis"));

  // Connect the Ports in the Operators
  dag.addStream("filterTuples", eventGenerator.out, filterTuples.input).setLocality(DAG.Locality.CONTAINER_LOCAL);
  dag.addStream("filterFields", filterTuples.output, filterFields.input).setLocality(DAG.Locality.CONTAINER_LOCAL);
  dag.addStream("redisJoin", filterFields.output, redisJoin.input).setLocality(DAG.Locality.CONTAINER_LOCAL);
  dag.addStream("output", redisJoin.output, campaignProcessor.input);

  dag.setInputPortAttribute(filterTuples.input, Context.PortContext.PARTITION_PARALLEL, true);
  dag.setInputPortAttribute(filterFields.input, Context.PortContext.PARTITION_PARALLEL, true);
  dag.setInputPortAttribute(redisJoin.input, Context.PortContext.PARTITION_PARALLEL, true);

  dag.setAttribute(eventGenerator, Context.OperatorContext.PARTITIONER, new StatelessPartitioner<EventGenerator>(8));
  dag.setAttribute(campaignProcessor, Context.OperatorContext.PARTITIONER, new StatelessPartitioner<CampaignProcessor>(8));
}
 
Example 13
Source File: StramLocalClusterTest.java    From attic-apex-core with Apache License 2.0 5 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration conf)
{
  TestGeneratorInputOperator input = dag.addOperator("Input", new TestGeneratorInputOperator());
  test = dag.addOperator("Test", new DynamicLoader());

  dag.addStream("S1", input.outport, test.input);
  dag.setAttribute(Context.DAGContext.LIBRARY_JARS, generatedJar);
  dag.setInputPortAttribute(test.input, Context.PortContext.TUPLE_CLASS, pojo);
}
 
Example 14
Source File: KafkaInputBenchmark.java    From attic-apex-malhar with Apache License 2.0 5 votes vote down vote up
@Override
  public void populateDAG(DAG dag, Configuration conf)
  {

    dag.setAttribute(DAG.APPLICATION_NAME, "KafkaInputOperatorPartitionDemo");
    BenchmarkKafkaInputOperator bpkio = new BenchmarkKafkaInputOperator();

    String type = conf.get("kafka.consumertype", "simple");

    KafkaConsumer consumer = null;

    if (type.equals("highlevel")) {
      // Create template high-level consumer

      Properties props = new Properties();
      props.put("group.id", "main_group");
      props.put("auto.offset.reset", "smallest");
      consumer = new HighlevelKafkaConsumer(props);
    } else {
      // topic is set via property file
      consumer = new SimpleKafkaConsumer(null, 10000, 100000, "test_kafka_autop_client", null);
    }

    bpkio.setZookeeper(conf.get("dt.kafka.zookeeper"));
    bpkio.setInitialPartitionCount(1);
    //bpkio.setTuplesBlast(1024 * 1024);
    bpkio.setConsumer(consumer);

    bpkio = dag.addOperator("KafkaBenchmarkConsumer", bpkio);

    CollectorModule cm = dag.addOperator("DataBlackhole", CollectorModule.class);
    dag.addStream("end", bpkio.oport, cm.inputPort).setLocality(Locality.CONTAINER_LOCAL);
    dag.setInputPortAttribute(cm.inputPort, PortContext.PARTITION_PARALLEL, true);
    dag.setAttribute(bpkio, OperatorContext.COUNTERS_AGGREGATOR, new KafkaConsumer.KafkaMeterStatsAggregator());
//    dag.setAttribute(bpkio, OperatorContext.STATS_LISTENER, KafkaMeterStatsListener.class);

  }
 
Example 15
Source File: KafkaInputOperatorTest.java    From attic-apex-malhar with Apache License 2.0 5 votes vote down vote up
private void setupHasFailureTest(KafkaSinglePortInputOperator operator, DAG dag)
{
  operator.setHoldingBufferSize(5000);
  dag.setAttribute(Context.DAGContext.CHECKPOINT_WINDOW_COUNT, 1);
  //dag.setAttribute(Context.OperatorContext.STORAGE_AGENT, new FSStorageAgent(
  //  APPLICATION_PATH + "failureck", new Configuration()));
  operator.setMaxTuplesPerWindow(tuplesPerWindow);
}
 
Example 16
Source File: ApplicationTest.java    From attic-apex-malhar with Apache License 2.0 5 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration conf)
{
  dag.setAttribute(com.datatorrent.api.Context.DAGContext.STREAMING_WINDOW_SIZE_MILLIS, 1000);
  FlumeInputOperator flume = dag.addOperator("FlumeOperator", new FlumeInputOperator());
  flume.setConnectAddresses(new String[]{"sink1:127.0.0.1:9098"});
  flume.setCodec(new EventCodec());
  Counter counter = dag.addOperator("Counter", new Counter());

  dag.addStream("Slices", flume.output, counter.input).setLocality(Locality.CONTAINER_LOCAL);
}
 
Example 17
Source File: TwitterTopCounterApplication.java    From attic-apex-malhar with Apache License 2.0 5 votes vote down vote up
@Override
public void populateDAG(DAG dag, Configuration conf)
{
  // Setup the operator to get the data from twitter sample stream injected into the system.
  TwitterSampleInput twitterFeed = new TwitterSampleInput();
  twitterFeed = dag.addOperator("TweetSampler", twitterFeed);

  //  Setup the operator to get the URLs extracted from the twitter statuses
  TwitterStatusURLExtractor urlExtractor = dag.addOperator("URLExtractor", TwitterStatusURLExtractor.class);

  // Setup a node to count the unique urls within a window.
  UniqueCounter<String> uniqueCounter = dag.addOperator("UniqueURLCounter", new UniqueCounter<String>());
  // Get the aggregated url counts and count them over last 5 mins.
  dag.setAttribute(uniqueCounter, Context.OperatorContext.APPLICATION_WINDOW_COUNT, 600);
  dag.setAttribute(uniqueCounter, Context.OperatorContext.SLIDE_BY_WINDOW_COUNT, 1);


  WindowedTopCounter<String> topCounts = dag.addOperator("TopCounter", new WindowedTopCounter<String>());
  topCounts.setTopCount(10);
  topCounts.setSlidingWindowWidth(1);
  topCounts.setDagWindowWidth(1);

  // Feed the statuses from feed into the input of the url extractor.
  dag.addStream("TweetStream", twitterFeed.status, urlExtractor.input).setLocality(Locality.CONTAINER_LOCAL);
  //  Start counting the urls coming out of URL extractor
  dag.addStream("TwittedURLs", urlExtractor.url, uniqueCounter.data).setLocality(locality);
  // Count unique urls
  dag.addStream("UniqueURLCounts", uniqueCounter.count, topCounts.input);

  consoleOutput(dag, "topURLs", topCounts.output, SNAPSHOT_SCHEMA, "url");
}
 
Example 18
Source File: KafkaInputOperatorTest.java    From attic-apex-malhar with Apache License 2.0 4 votes vote down vote up
/**
  * Test AbstractKafkaSinglePortInputOperator (i.e. an input adapter for
  * Kafka, aka consumer). This module receives data from an outside test
  * generator through Kafka message bus and feed that data into Malhar
  * streaming platform.
  *
  * [Generate message and send that to Kafka message bus] ==> [Receive that
  * message through Kafka input adapter(i.e. consumer) and send using
  * emitTuples() interface on output port during onMessage call]
  *
  *
  * @throws Exception
  */
 public void testKafkaInputOperator(int sleepTime, final int totalCount, KafkaConsumer consumer, boolean isValid, boolean idempotent) throws Exception
 {
   // initial the latch for this test
   latch = new CountDownLatch(1);


// Start producer
   KafkaTestProducer p = new KafkaTestProducer(TEST_TOPIC);
   p.setSendCount(totalCount);
   new Thread(p).start();

   // Create DAG for testing.
   LocalMode lma = LocalMode.newInstance();
   DAG dag = lma.getDAG();



   // Create KafkaSinglePortStringInputOperator
   KafkaSinglePortStringInputOperator node = dag.addOperator("Kafka message consumer", KafkaSinglePortStringInputOperator.class);
   if (isSuicide) {
     // make some extreme assumptions to make it fail if checkpointing wrong offsets
     dag.setAttribute(Context.DAGContext.CHECKPOINT_WINDOW_COUNT, 1);
     dag.setAttribute(Context.OperatorContext.STORAGE_AGENT, new FSStorageAgent("target/ck", new Configuration()));
     node.setMaxTuplesPerWindow(500);
   }

   if (idempotent) {
     node.setWindowDataManager(new FSWindowDataManager());
   }
   consumer.setTopic(TEST_TOPIC);

   node.setConsumer(consumer);

   consumer.setCacheSize(5000);

   if (isValid) {
     node.setZookeeper("localhost:" + KafkaOperatorTestBase.TEST_ZOOKEEPER_PORT[0]);
   }

   // Create Test tuple collector
   CollectorModule<String> collector = dag.addOperator("TestMessageCollector", new CollectorModule<String>());

   // Connect ports
   dag.addStream("Kafka message", node.outputPort, collector.inputPort).setLocality(Locality.CONTAINER_LOCAL);

   // Create local cluster
   final LocalMode.Controller lc = lma.getController();
   lc.setHeartbeatMonitoringEnabled(false);

   lc.runAsync();

   // Wait 30s for consumer finish consuming all the messages
   Assert.assertTrue("TIMEOUT: 30s ", latch.await(300000, TimeUnit.MILLISECONDS));

   // Check results
   Assert.assertTrue("Expected count >= " + totalCount + "; Actual count " + tupleCount.intValue(),
       totalCount <= tupleCount.intValue());
   logger.debug(String.format("Number of emitted tuples: %d", tupleCount.intValue()));

   p.close();
   lc.shutdown();
 }
 
Example 19
Source File: OffsetManagerTest.java    From attic-apex-malhar with Apache License 2.0 4 votes vote down vote up
public void testPartitionableInputOperator(KafkaConsumer consumer, int expectedCount) throws Exception
{
  // Set to 3 because we want to make sure END_TUPLE from both 2 partitions are received and offsets has been updated to 102
  latch = new CountDownLatch(3);

  // Start producer
  KafkaTestProducer p = new KafkaTestProducer(TEST_TOPIC, true);
  p.setProducerType("sync");
  p.setSendCount(totalCount);
  // wait the producer send all messages
  p.run();
  p.close();

  // Create DAG for testing.
  LocalMode lma = LocalMode.newInstance();
  DAG dag = lma.getDAG();

  // Create KafkaSinglePortStringInputOperator
  KafkaSinglePortStringInputOperator node = dag.addOperator("Kafka message consumer", KafkaSinglePortStringInputOperator.class);


  TestOffsetManager tfm = new TestOffsetManager();

  tfm.setFilename(TEST_TOPIC + OFFSET_FILE);

  node.setInitialPartitionCount(1);
  node.setOffsetManager(tfm);
  node.setStrategy(PartitionStrategy.ONE_TO_MANY.toString());
  node.setRepartitionInterval(-1);

  //set topic
  consumer.setTopic(TEST_TOPIC);
  //set the zookeeper list used to initialize the partition
  SetMultimap<String, String> zookeeper = HashMultimap.create();
  String zks = "localhost:" + KafkaOperatorTestBase.TEST_ZOOKEEPER_PORT[0];
  consumer.setZookeeper(zks);
  consumer.setInitialOffset("earliest");

  node.setConsumer(consumer);

  // Create Test tuple collector
  CollectorModule collector = dag.addOperator("TestMessageCollector", new CollectorModule());

  // Connect ports
  dag.addStream("Kafka message", node.outputPort, collector.inputPort).setLocality(Locality.CONTAINER_LOCAL);

  dag.setAttribute(Context.DAGContext.CHECKPOINT_WINDOW_COUNT, 1);

  // Create local cluster
  final LocalMode.Controller lc = lma.getController();
  lc.setHeartbeatMonitoringEnabled(true);

  lc.runAsync();



  boolean isNotTimeout = latch.await(30000, TimeUnit.MILLISECONDS);
  // Wait 30s for consumer finish consuming all the messages and offsets has been updated to 100
  assertTrue("TIMEOUT: 30s, collected " + collectedTuples.size() + " tuples", isNotTimeout);

  // Check results
  assertEquals("Tuple count " + collectedTuples, expectedCount, collectedTuples.size());
  logger.debug(String.format("Number of emitted tuples: %d", collectedTuples.size()));

  p.close();
  lc.shutdown();
}
 
Example 20
Source File: CustomerServiceDemoV2.java    From examples with Apache License 2.0 4 votes vote down vote up
protected void populateCsGeoDAG(DAG dag, Configuration conf,
    List<DefaultInputPort<? super EnrichedCustomerService>> customerServiceStreamSinks)
{
  // dimension
  DimensionsComputationFlexibleSingleSchemaPOJO dimensions = dag.addOperator("TagServiceGeoLocations",
      DimensionsComputationFlexibleSingleSchemaPOJO.class);
  dag.getMeta(dimensions).getAttributes().put(Context.OperatorContext.APPLICATION_WINDOW_COUNT, 4);
  dag.getMeta(dimensions).getAttributes().put(Context.OperatorContext.CHECKPOINT_WINDOW_COUNT, 4);

  customerServiceStreamSinks.add(dimensions.input);

  // Set operator properties
  // key expression: Point( Lat, Lon )
  {
    Map<String, String> keyToExpression = Maps.newHashMap();
    keyToExpression.put("zipcode", "getZipCode()");
    keyToExpression.put("region", "getRegionZip2()");
    keyToExpression.put("time", "getTime()");
    dimensions.setKeyToExpression(keyToExpression);
  }

  // aggregate expression: disconnect and downloads
  {
    Map<String, String> aggregateToExpression = Maps.newHashMap();
    aggregateToExpression.put("wait", "getWait()");
    aggregateToExpression.put("lat", "getLat()");
    aggregateToExpression.put("lon", "getLon()");
    dimensions.setAggregateToExpression(aggregateToExpression);
  }

  // event schema
  String geoSchema = SchemaUtils.jarResourceFileToString(csGeoSchemaLocation);
  dimensions.setConfigurationSchemaJSON(geoSchema);

  dimensions.setUnifier(new DimensionsComputationUnifierImpl<InputEvent, Aggregate>());
  dag.getMeta(dimensions).getMeta(dimensions.output).getUnifierMeta().getAttributes().put(OperatorContext.MEMORY_MB,
      8092);

  // store
  //AppDataSingleSchemaDimensionStoreHDHT store = dag.addOperator("StoreTaggedServiceGeoLocations", AppDataSingleSchemaDimensionStoreHDHT.class);
  GeoDimensionStore store = dag.addOperator("StoreTaggedServiceGeoLocations", GeoDimensionStore.class);
  store.setUpdateEnumValues(true);
  String basePath = Preconditions.checkNotNull(conf.get(PROP_GEO_STORE_PATH),
      "GEO base path should be specified in the properties.xml");
  TFileImpl hdsFile = new TFileImpl.DTFileImpl();
  basePath += System.currentTimeMillis();
  hdsFile.setBasePath(basePath);

  store.setFileStore(hdsFile);
  store.setConfigurationSchemaJSON(geoSchema);
  dag.setAttribute(store, Context.OperatorContext.COUNTERS_AGGREGATOR,
      new BasicCounters.LongAggregator<MutableLong>());

  PubSubWebSocketAppDataQuery query = createAppDataQuery();
  URI queryUri = ConfigUtil.getAppDataQueryPubSubURI(dag, conf);
  query.setUri(queryUri);
  store.setEmbeddableQueryInfoProvider(query);
  if (csGeoStorePartitionCount > 1) {
    store.setPartitionCount(csGeoStorePartitionCount);
    store.setQueryResultUnifier(new DimensionStoreHDHTNonEmptyQueryResultUnifier());
  }

  // wsOut
  PubSubWebSocketAppDataResult wsOut = createAppDataResult();
  wsOut.setUri(queryUri);
  dag.addOperator("CSGeoQueryResult", wsOut);
  // Set remaining dag options

  dag.setAttribute(store, Context.OperatorContext.COUNTERS_AGGREGATOR,
      new BasicCounters.LongAggregator<MutableLong>());

  dag.addStream("CSGeoStream", dimensions.output, store.input);
  dag.addStream("CSGeoQueryResult", store.queryResult, wsOut.input);
}