com.hazelcast.jet.pipeline.Sinks Java Examples

The following examples show how to use com.hazelcast.jet.pipeline.Sinks. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: InfluxDbSourceTest.java    From hazelcast-jet-contrib with Apache License 2.0 6 votes vote down vote up
@Test
public void test_stream_influxDbSource_withPojoResultMapper() {
    InfluxDB db = influxdbContainer.getNewInfluxDB();
    fillCpuData(db);

    Pipeline p = Pipeline.create();

    p.readFrom(
            InfluxDbSources.influxDb("SELECT * FROM test_db..cpu",
                    DATABASE_NAME,
                    influxdbContainer.getUrl(),
                    USERNAME,
                    PASSWORD,
                    Cpu.class))
     .addTimestamps(cpu -> cpu.time.toEpochMilli(), 0)
     .writeTo(Sinks.list("results"));

    jet.newJob(p).join();

    assertEquals(VALUE_COUNT, jet.getList("results").size());
}
 
Example #2
Source File: HazelcastJetInterpreterUtilsTest.java    From zeppelin with Apache License 2.0 6 votes vote down vote up
@Test
public void testDisplayNetworkFromDAGUtil() {

  Pipeline p = Pipeline.create();
  p.drawFrom(Sources.<String>list("text"))
    .flatMap(word ->
      traverseArray(word.toLowerCase().split("\\W+"))).setName("flat traversing")
    .filter(word -> !word.isEmpty())
    .groupingKey(wholeItem())
    .aggregate(counting())
    .drainTo(Sinks.map("counts"));

  assertEquals(
              NETWORK_RESULT_1,
              HazelcastJetInterpreterUtils.displayNetworkFromDAG(p.toDag())
  );

}
 
Example #3
Source File: Solution6.java    From hazelcast-jet-training with Apache License 2.0 6 votes vote down vote up
private static Pipeline buildPipeline() {
    Pipeline p = Pipeline.create();

    p.readFrom(TradeSource.tradeSource(1))
      .withNativeTimestamps(0 )
      .mapStateful(
             LongAccumulator::new,
             (previousPrice, currentTrade) -> {
                 Long difference = previousPrice.get() - currentTrade.getPrice();
                 previousPrice.set(currentTrade.getPrice());

                 return (difference > PRICE_DROP_TRESHOLD) ? difference : null;
             })
      .writeTo(Sinks.logger( m -> "Price drop: " + m));

    return p;
}
 
Example #4
Source File: Solution5.java    From hazelcast-jet-training with Apache License 2.0 6 votes vote down vote up
private static Pipeline buildPipeline() {
    Pipeline p = Pipeline.create();

    SinkStage sinkStage = p.readFrom(TradeSource.tradeSource(1000))
            .withNativeTimestamps(0)
            // Step 1 solution
            // .window(WindowDefinition.tumbling(3000))
            // .aggregate(AggregateOperations.summingLong(Trade::getPrice))
            //
            // Step 2 solution
            // .window(WindowDefinition.tumbling(3000).setEarlyResultsPeriod(1000))
            // .aggregate(AggregateOperations.summingLong(Trade::getPrice))
            //
            // Step 3 solution
            // .window(WindowDefinition.sliding(3000,1000))
            // .aggregate(AggregateOperations.summingLong(Trade::getPrice)
            //
            // Step 4 solution
            // .groupingKey(Trade::getSymbol)
            // .window(WindowDefinition.sliding(3000,1000))
            // .aggregate(AggregateOperations.summingLong(Trade::getPrice))

            .writeTo(Sinks.logger());

    return p;
}
 
Example #5
Source File: InProcessClassification.java    From hazelcast-jet-demos with Apache License 2.0 6 votes vote down vote up
private static Pipeline buildPipeline(IMap<Long, String> reviewsMap) {
    // Set up the mapping context that loads the model on each member, shared
    // by all parallel processors on that member.
    ServiceFactory<Tuple2<SavedModelBundle, WordIndex>, Tuple2<SavedModelBundle, WordIndex>> modelContext = ServiceFactory
            .withCreateContextFn(context -> {
                File data = context.attachedDirectory("data");
                SavedModelBundle bundle = SavedModelBundle.load(data.toPath().resolve("model/1").toString(), "serve");
                return tuple2(bundle, new WordIndex(data));
            })
            .withDestroyContextFn(t -> t.f0().close())
            .withCreateServiceFn((context, tuple2) -> tuple2);
    Pipeline p = Pipeline.create();
    p.readFrom(Sources.map(reviewsMap))
     .map(Map.Entry::getValue)
     .mapUsingService(modelContext, (tuple, review) -> classify(review, tuple.f0(), tuple.f1()))
     // TensorFlow executes models in parallel, we'll use 2 local threads to maximize throughput.
     .setLocalParallelism(2)
     .writeTo(Sinks.logger(t -> String.format("Sentiment rating for review \"%s\" is %.2f", t.f0(), t.f1())));
    return p;
}
 
Example #6
Source File: DebeziumCDCWithKafkaAndJet.java    From hazelcast-jet-demos with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
    JetInstance jet = JetBootstrap.getInstance();

    Properties properties = new Properties();
    properties.setProperty("group.id", "cdc-demo");
    properties.setProperty("bootstrap.servers", "kafka:9092");
    properties.setProperty("key.deserializer", JsonDeserializer.class.getCanonicalName());
    properties.setProperty("value.deserializer", JsonDeserializer.class.getCanonicalName());
    properties.setProperty("auto.offset.reset", "earliest");
    Pipeline p = Pipeline.create();

    p.readFrom(KafkaSources.kafka(properties, record -> {
        HazelcastJsonValue key = new HazelcastJsonValue(record.key().toString());
        HazelcastJsonValue value = new HazelcastJsonValue(record.value().toString());
        return Util.entry(key, value);
    }, "dbserver1.inventory.customers"))
     .withoutTimestamps()
     .peek()
     .writeTo(Sinks.map("customers"));

    jet.newJob(p).join();
}
 
Example #7
Source File: ReadKafkaIntoHazelcast.java    From hazelcast-jet-demos with Apache License 2.0 6 votes vote down vote up
public static Pipeline build(String bootstrapServers) {
    Properties properties = new Properties();
    properties.put(ConsumerConfig.GROUP_ID_CONFIG, UUID.randomUUID().toString());
    properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
    properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getCanonicalName());
    properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getCanonicalName());
    properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");

    Pipeline pipeline = Pipeline.create();

    pipeline
            .readFrom(KafkaSources.kafka(properties, Constants.TOPIC_NAME_PRECIOUS))
            .withoutTimestamps()
            .writeTo(Sinks.map(Constants.IMAP_NAME_PRECIOUS));

    return pipeline;
}
 
Example #8
Source File: InfluxDbSourceTest.java    From hazelcast-jet-contrib with Apache License 2.0 6 votes vote down vote up
@Test
public void test_stream_influxDbSource_withMeasurementMapper() {
    InfluxDB db = influxdbContainer.getNewInfluxDB();
    fillData(db);

    Pipeline p = Pipeline.create();

    p.readFrom(
            InfluxDbSources.influxDb("SELECT * FROM test_db..test",
                    DATABASE_NAME,
                    influxdbContainer.getUrl(),
                    USERNAME,
                    PASSWORD,
                    (name, tags, columns, row) -> tuple2(row.get(0), row.get(1))))
     .writeTo(Sinks.list("results"));

    jet.newJob(p).join();

    assertEquals(VALUE_COUNT, jet.getList("results").size());
}
 
Example #9
Source File: Lab4.java    From hazelcast-jet-training with Apache License 2.0 5 votes vote down vote up
private static Pipeline buildPipeline(IMap<String, String> lookupTable) {
    Pipeline p = Pipeline.create();

    p.readFrom(TradeSource.tradeSource())
     .withoutTimestamps()

    // Convert Trade stream to EnrichedTrade stream
    // - Trade (dto.Trade) has a symbol field
    // - Use LOOKUP_TABLE to look up full company name based on the symbol
    // - Create new Enriched Trade (dto.EnrichedTrade) using Trade and company name

    .writeTo(Sinks.logger());

    return p;
}
 
Example #10
Source File: WordCounter.java    From tutorials with MIT License 5 votes vote down vote up
private Pipeline createPipeLine() {
    Pipeline p = Pipeline.create();
    p.drawFrom(Sources.<String> list(LIST_NAME))
        .flatMap(word -> traverseArray(word.toLowerCase()
            .split("\\W+")))
        .filter(word -> !word.isEmpty())
        .groupingKey(wholeItem())
        .aggregate(counting())
        .drainTo(Sinks.map(MAP_NAME));
    return p;
}
 
Example #11
Source File: Lab1.java    From hazelcast-jet-training with Apache License 2.0 5 votes vote down vote up
private static Pipeline buildPipeline() {
    Pipeline p = Pipeline.create();

    StreamSource<Long> source = TestSources.itemStream(1, (ts, seq) -> seq);

    p.readFrom(source)
     .withoutTimestamps()
     .writeTo(Sinks.logger());

    // Run the code to see the results in the console
    // Stop it before leaving the lab

    return p;
}
 
Example #12
Source File: MongoDBSourceTest.java    From hazelcast-jet-contrib with Apache License 2.0 5 votes vote down vote up
@Test
public void testBatch() {

    IList<Document> list = jet.getList("list");

    List<Document> documents = new ArrayList<>();
    for (int i = 0; i < 100; i++) {
        documents.add(new Document("key", i).append("val", i));
    }
    collection().insertMany(documents);


    String connectionString = mongoContainer.connectionString();

    Pipeline p = Pipeline.create();
    p.readFrom(MongoDBSources.batch(SOURCE_NAME, connectionString, DB_NAME, COL_NAME,
            new Document("val", new Document("$gte", 10)),
            new Document("val", 1).append("_id", 0)))
     .writeTo(Sinks.list(list));

    jet.newJob(p).join();

    assertEquals(90, list.size());
    Document actual = list.get(0);
    assertNull(actual.get("key"));
    assertNull(actual.get("_id"));
    assertNotNull(actual.get("val"));
}
 
Example #13
Source File: Solution2.java    From hazelcast-jet-training with Apache License 2.0 5 votes vote down vote up
private static Pipeline buildPipeline() {
    Pipeline p = Pipeline.create();

    StreamSource<Long> source = TestSources.itemStream(1, (ts, seq) -> seq);
    // StreamSource<String> source = Sources.fileWatcher(DIRECTORY);

    p.readFrom(source)
     .withoutTimestamps()
     // .map( line-> Long.valueOf(line))
     .filter(item -> (item % 2) == 0)
     .writeTo(Sinks.logger());

    return p;
}
 
Example #14
Source File: Solution3.java    From hazelcast-jet-training with Apache License 2.0 5 votes vote down vote up
private static Pipeline buildPipeline() {
    Pipeline p = Pipeline.create();

    p.readFrom(TradeSource.tradeSource())
     .withNativeTimestamps(0)
     .map(trade -> Util.entry(trade.getSymbol(), trade))
     .writeTo(Sinks.map(LATEST_TRADES_PER_SYMBOL));

    return p;
}
 
Example #15
Source File: Solution4.java    From hazelcast-jet-training with Apache License 2.0 5 votes vote down vote up
private static Pipeline buildPipeline(IMap<String, String> lookupTable) {
    Pipeline p = Pipeline.create();

    p.readFrom(TradeSource.tradeSource())
            .withNativeTimestamps(0)
            .mapUsingIMap(lookupTable, Trade::getSymbol,
                    (trade, companyName) -> new EnrichedTrade(trade, companyName) )
            .writeTo(Sinks.logger());

    return p;
}
 
Example #16
Source File: Lab5.java    From hazelcast-jet-training with Apache License 2.0 5 votes vote down vote up
private static Pipeline buildPipeline() {
    Pipeline p = Pipeline.create();

    p.readFrom(TradeSource.tradeSource(1000))
     .withNativeTimestamps(0)

     // STEP 1 - Compute sum of trades for 3-second intervals
     // - Use 3 sec tumbling windows (defined in WindowDef.tumbling with size 3000
     // - Sum trade prices
     // Run the job and inspect the results. Stop the Job before moving to STEP 2.

     // STEP 2 - Compute sum of trades for 3-second intervals with speculative results every second
     // - Use early results when defining the window
     // - Watch the early result flag in the console output
     // Run the job and inspect the results. Stop the Job before moving to STEP 3.

     // STEP 3 - Compute sum of trades in last 3-second, updated each second
     // - Use 3 sec sliding windows with 1 sec step
     // Run the job and inspect the results. Stop the Job before moving to STEP 4.

     // STEP 4 - Compute sum of trades in last 3-second for each trading symbol
     // - Group the stream on the trading symbol
     // - Use 3 sec sliding windows with 1 sec step
     // Run the job and inspect the results. Stop the Job before leaving the lab.



     .writeTo(Sinks.logger());


    return p;
}
 
Example #17
Source File: PreciousHistory.java    From hazelcast-jet-demos with Apache License 2.0 5 votes vote down vote up
public static Pipeline build() {
    Pipeline p = Pipeline.create();

    // Palladium and Platinum only
    p.readFrom(Sources.<String, Object>mapJournal(
            Constants.IMAP_NAME_PRECIOUS, JournalInitialPosition.START_FROM_OLDEST)
    ).withoutTimestamps()
     .map(e -> e.getKey() + "==" + e.getValue())
     .filter(str -> str.toLowerCase().startsWith("p"))
     .writeTo(Sinks.logger());

    return p;
}
 
Example #18
Source File: FileWatcher.java    From hazelcast-jet-demos with Apache License 2.0 5 votes vote down vote up
static Pipeline build() {
	Pipeline pipeline = Pipeline.create();
	
	StreamStage<String> source = pipeline
			.readFrom(FileWatcher.buildFileWatcherSource()).withoutTimestamps().setName("fileSource");

	source.writeTo(FileWatcher.buildTopicSink());
	source.writeTo(Sinks.logger()).setName("loggerSink");

	return pipeline;
}
 
Example #19
Source File: BreastCancerClassification.java    From hazelcast-jet-demos with Apache License 2.0 5 votes vote down vote up
/**
 * Builds and returns the Pipeline which represents the actual computation.
 */
private static Pipeline buildPipeline(Path sourceFile) {
    Pipeline pipeline = Pipeline.create();

    BatchStage<BreastCancerDiagnostic> fileSource = pipeline.readFrom(filesBuilder(sourceFile.getParent().toString())
            .glob(sourceFile.getFileName().toString())
            .build(path -> Files.lines(path).skip(1).map(BreastCancerDiagnostic::new)))
                                                            .setName("Read from CSV input file");

    fileSource.apply(applyPredictionFromModelFile())
              .writeTo(Sinks.logger()).setName("Write to standard out");
    return pipeline;
}
 
Example #20
Source File: AnalysisJet.java    From hazelcast-jet-demos with Apache License 2.0 5 votes vote down vote up
/**
 * Helper method to construct the pipeline for the job
 *
 * @return the pipeline for the job
 */
public static Pipeline buildPipeline() {
    final Pipeline p = Pipeline.create();

    // Compute map server side
    final BatchStage<Horse> c = p.readFrom(Sources.map(EVENTS_BY_NAME, t -> true, HORSE_FROM_EVENT));

    final BatchStage<Entry<Horse, Long>> c2 = c.groupingKey(wholeItem())
                                               .aggregate(counting())
                                               .filter(ent -> ent.getValue() > 1);

    c2.writeTo(Sinks.map(MULTIPLE));

    return p;
}
 
Example #21
Source File: JetBetMain.java    From hazelcast-jet-demos with Apache License 2.0 5 votes vote down vote up
/**
 * Helper method to construct the pipeline for the job
 *
 * @return the pipeline for the real-time analysis
 */
public static Pipeline buildPipeline() {
    final Pipeline pipeline = Pipeline.create();

    // Draw users from the Hazelcast IMDG source
    BatchStage<User> users = pipeline.readFrom(Sources.<User, Long, User>map(USER_ID, e -> true, Entry::getValue));

    // All bet legs which are single
    BatchStage<Tuple3<Race, Horse, Bet>> bets = users.flatMap(user -> traverseStream(
            user.getKnownBets().stream()
                .filter(Bet::single)
                .flatMap(bet -> bet.getLegs().stream().map(leg -> tuple3(leg.getRace(), leg.getBacking(), bet)))
            )
    );

    // Find for each race the projected loss if each horse was to win
    BatchStage<Entry<Race, Map<Horse, Double>>> betsByRace = bets.groupingKey(Tuple3::f0).aggregate(
            AggregateOperations.toMap(
                    Tuple3::f1,
                    t -> t.f2().projectedPayout(t.f1()), // payout if backed horse was to win
                    (l, r) -> l + r
            )
    );

    // Write out: (r : (h : losses))
    betsByRace.writeTo(Sinks.map(WORST_ID));

    return pipeline;
}
 
Example #22
Source File: MarkovChainGenerator.java    From hazelcast-jet-demos with Apache License 2.0 5 votes vote down vote up
/**
 * Builds and returns the Pipeline which represents the actual computation.
 * To compute the probability of finding word B after A, one has to know
 * how many pairs contain word A as a first entry and how many of them
 * contain B as a second entry. The pipeline creates pairs from consecutive
 * words and computes the probabilities of A->B.
 */
private static Pipeline buildPipeline() {
    Pipeline p = Pipeline.create();
    // Reads files line-by-line
    BatchStage<String> lines = p.readFrom(Sources.<String>files(INPUT_FILE));
    Pattern twoWords = Pattern.compile("(\\.|\\w+)\\s(\\.|\\w+)");
    // Calculates probabilities by flatmapping lines into two-word consecutive pairs using regular expressions
    // and aggregates them into an IMap.
    lines.flatMap(e -> traverseMatcher(twoWords.matcher(e.toLowerCase()), m -> tuple2(m.group(1), m.group(2))))
         .groupingKey(Tuple2::f0)
         .aggregate(buildAggregateOp())
         .writeTo(Sinks.map("stateTransitions"));
    return p;
}
 
Example #23
Source File: DebeziumCDCWithJet.java    From hazelcast-jet-demos with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {
    JetInstance jet = JetBootstrap.getInstance();

    Configuration configuration = Configuration
            .create()
            .with("name", "mysql-demo-connector")
            .with("connector.class", "io.debezium.connector.mysql.MySqlConnector")
            /* begin connector properties */
            .with("database.hostname", "mysql")
            .with("database.port", "3306")
            .with("database.user", "debezium")
            .with("database.password", "dbz")
            .with("database.server.id", "184054")
            .with("database.server.name", "dbserver1")
            .with("database.whitelist", "inventory")
            .with("database.history.hazelcast.list.name", "test")
            .with("snapshot.mode", "schema_only")
            .build();

    Pipeline p = Pipeline.create();

    p.readFrom(DebeziumSources.cdc(configuration))
     .withoutTimestamps()
     .map(sourceRecord -> {
         String keyString = Values.convertToString(sourceRecord.keySchema(), sourceRecord.key());
         String valueString = Values.convertToString(sourceRecord.valueSchema(), sourceRecord.value());
         return Tuple2.tuple2(keyString, valueString);
     })
     .writeTo(Sinks.logger());

    jet.newJob(p).join();
}
 
Example #24
Source File: Lab6.java    From hazelcast-jet-training with Apache License 2.0 4 votes vote down vote up
private static Pipeline buildPipeline() {
    Pipeline p = Pipeline.create();

    p.readFrom(TradeSource.tradeSource(1))
      .withNativeTimestamps(0 )

     // Detect if price between two consecutive trades drops by more than 200

     // Use the mapStateful to keep price of previous Trade
     // - Consider using com.hazelcast.jet.accumulator.LongAccumulator as a mutable container for long values
     // - Return the price difference if drop is detected, nothing otherwise

     .writeTo(Sinks.logger( m -> "Price drop: " + m));

    return p;
}
 
Example #25
Source File: ModelServerClassification.java    From hazelcast-jet-demos with Apache License 2.0 4 votes vote down vote up
private static Pipeline buildPipeline(String serverAddress, IMap<Long, String> reviewsMap) {
    ServiceFactory<Tuple2<PredictionServiceFutureStub, WordIndex>, Tuple2<PredictionServiceFutureStub, WordIndex>>
            tfServingContext = ServiceFactory
            .withCreateContextFn(context -> {
                WordIndex wordIndex = new WordIndex(context.attachedDirectory("data"));
                ManagedChannel channel = ManagedChannelBuilder.forTarget(serverAddress)
                                                              .usePlaintext().build();
                return Tuple2.tuple2(PredictionServiceGrpc.newFutureStub(channel), wordIndex);
            })
            .withDestroyContextFn(t -> ((ManagedChannel) t.f0().getChannel()).shutdownNow())
            .withCreateServiceFn((context, tuple2) -> tuple2);

    Pipeline p = Pipeline.create();
    p.readFrom(Sources.map(reviewsMap))
     .map(Map.Entry::getValue)
     .mapUsingServiceAsync(tfServingContext, 16, true, (t, review) -> {
         float[][] featuresTensorData = t.f1().createTensorInput(review);
         TensorProto.Builder featuresTensorBuilder = TensorProto.newBuilder();
         for (float[] featuresTensorDatum : featuresTensorData) {
             for (float v : featuresTensorDatum) {
                 featuresTensorBuilder.addFloatVal(v);
             }
         }
         TensorShapeProto.Dim featuresDim1 =
                 TensorShapeProto.Dim.newBuilder().setSize(featuresTensorData.length).build();
         TensorShapeProto.Dim featuresDim2 =
                 TensorShapeProto.Dim.newBuilder().setSize(featuresTensorData[0].length).build();
         TensorShapeProto featuresShape =
                 TensorShapeProto.newBuilder().addDim(featuresDim1).addDim(featuresDim2).build();
         featuresTensorBuilder.setDtype(org.tensorflow.framework.DataType.DT_FLOAT)
                              .setTensorShape(featuresShape);
         TensorProto featuresTensorProto = featuresTensorBuilder.build();

         // Generate gRPC request
         Int64Value version = Int64Value.newBuilder().setValue(1).build();
         Model.ModelSpec modelSpec =
                 Model.ModelSpec.newBuilder().setName("reviewSentiment").setVersion(version).build();
         Predict.PredictRequest request = Predict.PredictRequest.newBuilder()
                                                                .setModelSpec(modelSpec)
                                                                .putInputs("input_review", featuresTensorProto)
                                                                .build();

         return toCompletableFuture(t.f0().predict(request))
                 .thenApply(response -> {
                     float classification = response
                             .getOutputsOrThrow("dense_1/Sigmoid:0")
                             .getFloatVal(0);
                     // emit the review along with the classification
                     return tuple2(review, classification);
                 });
     })
     .setLocalParallelism(1) // one worker is enough to drive they async calls
     .writeTo(Sinks.logger());
    return p;
}
 
Example #26
Source File: TrafficPredictor.java    From hazelcast-jet-demos with Apache License 2.0 4 votes vote down vote up
/**
 * Builds and returns the Pipeline which represents the actual computation.
 */
private static Pipeline buildPipeline(Path sourceFile, String targetDirectory) {
    Pipeline pipeline = Pipeline.create();

    // Calculate car counts from the file.
    StreamStage<CarCount> carCounts = pipeline.readFrom(
            filesBuilder(sourceFile.getParent().toString())
                    .glob(sourceFile.getFileName().toString())
                    .build((filename, line) -> {
                        String[] split = line.split(",");
                        long time = LocalDateTime.parse(split[0])
                                                 .atZone(systemDefault())
                                                 .toInstant()
                                                 .toEpochMilli();
                        return new CarCount(split[1], time, parseInt(split[2]));
                    }
            )
    ).addTimestamps(CarCount::getTime, MINUTES.toMillis(300));

    // Calculate linear trends of car counts and writes them into an IMap
    // in 2 hour windows sliding by 15 minutes.
    carCounts
            .groupingKey(CarCount::getLocation)
            .window(sliding(MINUTES.toMillis(120), MINUTES.toMillis(15)))
            .aggregate(linearTrend(CarCount::getTime, CarCount::getCount))
            .map((KeyedWindowResult<String, Double> e) ->
                    entry(new TrendKey(e.getKey(), e.end()), e.getValue()))
            .writeTo(Sinks.map("trends"));

    // Makes predictions using the trends calculated above from an IMap and writes them to a file
    carCounts
            .mapUsingService(ServiceFactories.<TrendKey, Double>iMapService("trends"),
                    (trendMap, cc) -> {
                        int[] counts = new int[NUM_PREDICTIONS];
                        double trend = 0.0;
                        for (int i = 0; i < NUM_PREDICTIONS; i++) {
                            Double newTrend = trendMap.get(new TrendKey(cc.location, cc.time - DAYS.toMillis(7)));
                            if (newTrend != null) {
                                trend = newTrend;
                            }
                            double prediction = cc.count + i * GRANULARITY_STEP_MS * trend;
                            counts[i] = (int) Math.round(prediction);
                        }
                        return new Prediction(cc.location, cc.time + GRANULARITY_STEP_MS, counts);
                    })
            .writeTo(Sinks.files(targetDirectory));
    return pipeline;
}
 
Example #27
Source File: Lab2.java    From hazelcast-jet-training with Apache License 2.0 3 votes vote down vote up
private static Pipeline buildPipeline() {
    Pipeline p = Pipeline.create();

    StreamSource<Long> source = TestSources.itemStream(1, (ts, seq) -> seq);

    p.readFrom(source)
     .withoutTimestamps()
     .writeTo(Sinks.logger());

    // STEP 1: Filter out odd numbers from the stream

    // Add filter() to  your pipeline
    // - Use lambda to define the predicate

    // Stop the job before continuing to Step 2



    // STEP 2: Process data from a file instead of generated data

    // Create a directory somewhere in your computer and create an empty input.txt file in it

    // Replace itemStream with fileWatcher source from com.hazelcast.jet.pipeline.Sources
    // - (fileWatcher stream lines added to files in a directory.)
    // - Adjust source type - the generator was producing Longs, fileWatcher produces Strings

    // Add a mapping step before the filter to convert the stream from Strings to Longs

    // Run this pipeline to test it!
    // - Add text lines to the file.
    // - Use echo -- some text editors create a new file for every save. That results in replaying the file.
    //
    // echo "0" >> input.txt
    // echo "1" >> input.txt

    // Stop the job


    return p;
}
 
Example #28
Source File: MongoDBSourceTest.java    From hazelcast-jet-contrib with Apache License 2.0 votes vote down vote up
@Test
public void testStream_whenWatchAll() {
    IList<Document> list = jet.getList("list");

    String connectionString = mongoContainer.connectionString();
    long value = startAtOperationTime.getValue();

    StreamSource<? extends Document> source = MongoDBSourceBuilder
            .streamAll(SOURCE_NAME, () -> MongoClients.create(connectionString))
            .destroyFn(MongoClient::close)
            .searchFn(client -> {
                List<Bson> aggregates = new ArrayList<>();
                aggregates.add(Aggregates.match(new Document("fullDocument.val", new Document("$gt", 10))
                        .append("operationType", "insert")));

                aggregates.add(Aggregates.project(new Document("fullDocument.val", 1).append("_id", 1)));
                return client.watch(aggregates);
            })
            .mapFn(ChangeStreamDocument::getFullDocument)
            .startAtOperationTimeFn(client -> new BsonTimestamp(value))
            .build();

    Pipeline p = Pipeline.create();
    p.readFrom(source)
     .withNativeTimestamps(0)
     .writeTo(Sinks.list(list));

    Job job = jet.newJob(p);

    MongoCollection<Document> col1 = collection("db1", "col1");
    MongoCollection<Document> col2 = collection("db1", "col2");
    MongoCollection<Document> col3 = collection("db2", "col3");

    col1.insertOne(new Document("val", 1));
    col1.insertOne(new Document("val", 11).append("foo", "bar"));
    col2.insertOne(new Document("val", 2));
    col2.insertOne(new Document("val", 12).append("foo", "bar"));
    col3.insertOne(new Document("val", 3));
    col3.insertOne(new Document("val", 13).append("foo", "bar"));

    assertTrueEventually(() -> {
        assertEquals(3, list.size());
        list.forEach(document -> assertNull(document.get("foo")));

        assertEquals(11, list.get(0).get("val"));
        assertEquals(12, list.get(1).get("val"));
        assertEquals(13, list.get(2).get("val"));
    });

    col1.insertOne(new Document("val", 4));
    col1.insertOne(new Document("val", 14).append("foo", "bar"));
    col2.insertOne(new Document("val", 5));
    col2.insertOne(new Document("val", 15).append("foo", "bar"));
    col2.insertOne(new Document("val", 6));
    col2.insertOne(new Document("val", 16).append("foo", "bar"));

    assertTrueEventually(() -> {
        assertEquals(6, list.size());
        list.forEach(document -> assertNull(document.get("foo")));

        assertEquals(14, list.get(3).get("val"));
        assertEquals(15, list.get(4).get("val"));
        assertEquals(16, list.get(5).get("val"));
    });

    job.cancel();

}
 
Example #29
Source File: MongoDBSourceTest.java    From hazelcast-jet-contrib with Apache License 2.0 votes vote down vote up
@Test
public void testStream_whenWatchDatabase() {
    IList<Document> list = jet.getList("list");

    String connectionString = mongoContainer.connectionString();
    long value = startAtOperationTime.getValue();

    StreamSource<? extends Document> source = MongoDBSourceBuilder
            .streamDatabase(SOURCE_NAME, () -> MongoClients.create(connectionString))
            .databaseFn(client -> client.getDatabase(DB_NAME))
            .destroyFn(MongoClient::close)
            .searchFn(db -> {
                List<Bson> aggregates = new ArrayList<>();
                aggregates.add(Aggregates.match(new Document("fullDocument.val", new Document("$gte", 10))
                        .append("operationType", "insert")));

                aggregates.add(Aggregates.project(new Document("fullDocument.val", 1).append("_id", 1)));
                return db.watch(aggregates);
            })
            .mapFn(ChangeStreamDocument::getFullDocument)
            .startAtOperationTimeFn(client -> new BsonTimestamp(value))
            .build();


    Pipeline p = Pipeline.create();
    p.readFrom(source)
     .withNativeTimestamps(0)
     .writeTo(Sinks.list(list));

    Job job = jet.newJob(p);

    MongoCollection<Document> col1 = collection("col1");
    MongoCollection<Document> col2 = collection("col2");

    col1.insertOne(new Document("val", 1));
    col1.insertOne(new Document("val", 10).append("foo", "bar"));

    col2.insertOne(new Document("val", 2));
    col2.insertOne(new Document("val", 11).append("foo", "bar"));

    assertTrueEventually(() -> {
        assertEquals(2, list.size());
        list.forEach(document -> assertNull(document.get("foo")));

        assertEquals(10, list.get(0).get("val"));
        assertEquals(11, list.get(1).get("val"));

    });

    col1.insertOne(new Document("val", 3));
    col1.insertOne(new Document("val", 12).append("foo", "bar"));

    col2.insertOne(new Document("val", 4));
    col2.insertOne(new Document("val", 13).append("foo", "bar"));

    assertTrueEventually(() -> {
        assertEquals(4, list.size());
        list.forEach(document -> assertNull(document.get("foo")));

        assertEquals(12, list.get(2).get("val"));
        assertEquals(13, list.get(3).get("val"));
    });

    job.cancel();

}