com.hazelcast.jet.pipeline.Sources Java Examples

The following examples show how to use com.hazelcast.jet.pipeline.Sources. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: InfluxDbSinkTest.java    From hazelcast-jet-contrib with Apache License 2.0 6 votes vote down vote up
@Test
public void test_influxDbSink_nonExistingDb() {
    IList<Integer> measurements = jet.getList("mem_usage");
    IntStream.range(0, VALUE_COUNT).forEach(measurements::add);
    influxdbContainer.getNewInfluxDB();

    Pipeline p = Pipeline.create();
    int startTime = 0;
    p.readFrom(Sources.list(measurements))
     .map(index -> Point.measurement("mem_usage")
                        .time(startTime + index, TimeUnit.MILLISECONDS)
                        .addField("value", index)
                        .build())
     .writeTo(InfluxDbSinks.influxDb(influxdbContainer.getUrl(), "non-existing", USERNAME, PASSWORD));

    expected.expectMessage("database not found: \"non-existing\"");
    jet.newJob(p).join();
}
 
Example #2
Source File: RedisSinkTest.java    From hazelcast-jet-contrib with Apache License 2.0 6 votes vote down vote up
@Test
public void stream() {
    IList<String> list = instance.getList("list");
    for (int i = 0; i < 10; i++) {
        list.add("key-" + i);
    }

    Pipeline p = Pipeline.create();
    p.readFrom(Sources.list(list))
            .writeTo(RedisSinks.stream("source", uri, "stream"));

    instance.newJob(p).join();

    RedisCommands<String, String> sync = connection.sync();
    List<StreamMessage<String, String>> messages = sync.xread(XReadArgs.StreamOffset.from("stream", "0"));
    assertEquals(list.size(), messages.size());
}
 
Example #3
Source File: MongoDBSinkTest.java    From hazelcast-jet-contrib with Apache License 2.0 6 votes vote down vote up
@Test
public void test() {
    IList<Integer> list = jet.getList("list");
    for (int i = 0; i < 100; i++) {
        list.add(i);
    }

    String connectionString = mongoContainer.connectionString();

    Pipeline p = Pipeline.create();
    p.readFrom(Sources.list(list))
     .map(i -> new Document("key", i))
     .writeTo(MongoDBSinks.mongodb(SINK_NAME, connectionString, DB_NAME, COL_NAME));

    jet.newJob(p).join();

    MongoCollection<Document> collection = collection();
    assertEquals(100, collection.countDocuments());
}
 
Example #4
Source File: InProcessClassification.java    From hazelcast-jet-demos with Apache License 2.0 6 votes vote down vote up
private static Pipeline buildPipeline(IMap<Long, String> reviewsMap) {
    // Set up the mapping context that loads the model on each member, shared
    // by all parallel processors on that member.
    ServiceFactory<Tuple2<SavedModelBundle, WordIndex>, Tuple2<SavedModelBundle, WordIndex>> modelContext = ServiceFactory
            .withCreateContextFn(context -> {
                File data = context.attachedDirectory("data");
                SavedModelBundle bundle = SavedModelBundle.load(data.toPath().resolve("model/1").toString(), "serve");
                return tuple2(bundle, new WordIndex(data));
            })
            .withDestroyContextFn(t -> t.f0().close())
            .withCreateServiceFn((context, tuple2) -> tuple2);
    Pipeline p = Pipeline.create();
    p.readFrom(Sources.map(reviewsMap))
     .map(Map.Entry::getValue)
     .mapUsingService(modelContext, (tuple, review) -> classify(review, tuple.f0(), tuple.f1()))
     // TensorFlow executes models in parallel, we'll use 2 local threads to maximize throughput.
     .setLocalParallelism(2)
     .writeTo(Sinks.logger(t -> String.format("Sentiment rating for review \"%s\" is %.2f", t.f0(), t.f1())));
    return p;
}
 
Example #5
Source File: HazelcastJetInterpreterUtilsTest.java    From zeppelin with Apache License 2.0 6 votes vote down vote up
@Test
public void testDisplayNetworkFromDAGUtil() {

  Pipeline p = Pipeline.create();
  p.drawFrom(Sources.<String>list("text"))
    .flatMap(word ->
      traverseArray(word.toLowerCase().split("\\W+"))).setName("flat traversing")
    .filter(word -> !word.isEmpty())
    .groupingKey(wholeItem())
    .aggregate(counting())
    .drainTo(Sinks.map("counts"));

  assertEquals(
              NETWORK_RESULT_1,
              HazelcastJetInterpreterUtils.displayNetworkFromDAG(p.toDag())
  );

}
 
Example #6
Source File: InfluxDbSinkTest.java    From hazelcast-jet-contrib with Apache License 2.0 5 votes vote down vote up
@Test
public void test_influxDbSink() {
    IList<Integer> measurements = jet.getList("mem_usage");
    for (int i = 0; i < VALUE_COUNT; i++) {
        measurements.add(i);
    }

    InfluxDB db = influxdbContainer.getNewInfluxDB();
    db.query(new Query("DROP SERIES FROM mem_usage"));

    Pipeline p = Pipeline.create();

    int startTime = 0;
    p.readFrom(Sources.list(measurements))
     .map(index -> Point.measurement("mem_usage")
                        .time(startTime + index, TimeUnit.MILLISECONDS)
                        .addField("value", index)
                        .build())
     .writeTo(InfluxDbSinks.influxDb(influxdbContainer.getUrl(), DATABASE_NAME, USERNAME, PASSWORD));

    jet.newJob(p).join();

    List<Result> results = db.query(new Query("SELECT * FROM mem_usage")).getResults();
    assertEquals(1, results.size());
    List<Series> seriesList = results.get(0).getSeries();
    assertEquals(1, seriesList.size());
    Series series = seriesList.get(0);
    assertEquals(SERIES, series.getName());
    assertEquals(VALUE_COUNT, series.getValues().size());
}
 
Example #7
Source File: MongoDBSinkTest.java    From hazelcast-jet-contrib with Apache License 2.0 5 votes vote down vote up
@Test
public void test_whenServerNotAvailable() {
    String connectionString = mongoContainer.connectionString();
    mongoContainer.close();

    IList<Integer> list = jet.getList("list");
    for (int i = 0; i < 100; i++) {
        list.add(i);
    }

    Sink<Document> sink = MongoDBSinks
            .<Document>builder(SINK_NAME, () -> mongoClient(connectionString, 3))
            .databaseFn(client -> client.getDatabase(DB_NAME))
            .collectionFn(db -> db.getCollection(COL_NAME))
            .destroyFn(MongoClient::close)
            .build();


    Pipeline p = Pipeline.create();
    p.readFrom(Sources.list(list))
     .map(i -> new Document("key", i))
     .writeTo(sink);

    try {
        jet.newJob(p).join();
        fail();
    } catch (CompletionException e) {
        assertTrue(e.getCause() instanceof JetException);
    }
}
 
Example #8
Source File: PreciousHistory.java    From hazelcast-jet-demos with Apache License 2.0 5 votes vote down vote up
public static Pipeline build() {
    Pipeline p = Pipeline.create();

    // Palladium and Platinum only
    p.readFrom(Sources.<String, Object>mapJournal(
            Constants.IMAP_NAME_PRECIOUS, JournalInitialPosition.START_FROM_OLDEST)
    ).withoutTimestamps()
     .map(e -> e.getKey() + "==" + e.getValue())
     .filter(str -> str.toLowerCase().startsWith("p"))
     .writeTo(Sinks.logger());

    return p;
}
 
Example #9
Source File: MarkovChainGenerator.java    From hazelcast-jet-demos with Apache License 2.0 5 votes vote down vote up
/**
 * Builds and returns the Pipeline which represents the actual computation.
 * To compute the probability of finding word B after A, one has to know
 * how many pairs contain word A as a first entry and how many of them
 * contain B as a second entry. The pipeline creates pairs from consecutive
 * words and computes the probabilities of A->B.
 */
private static Pipeline buildPipeline() {
    Pipeline p = Pipeline.create();
    // Reads files line-by-line
    BatchStage<String> lines = p.readFrom(Sources.<String>files(INPUT_FILE));
    Pattern twoWords = Pattern.compile("(\\.|\\w+)\\s(\\.|\\w+)");
    // Calculates probabilities by flatmapping lines into two-word consecutive pairs using regular expressions
    // and aggregates them into an IMap.
    lines.flatMap(e -> traverseMatcher(twoWords.matcher(e.toLowerCase()), m -> tuple2(m.group(1), m.group(2))))
         .groupingKey(Tuple2::f0)
         .aggregate(buildAggregateOp())
         .writeTo(Sinks.map("stateTransitions"));
    return p;
}
 
Example #10
Source File: JetBetMain.java    From hazelcast-jet-demos with Apache License 2.0 5 votes vote down vote up
/**
 * Helper method to construct the pipeline for the job
 *
 * @return the pipeline for the real-time analysis
 */
public static Pipeline buildPipeline() {
    final Pipeline pipeline = Pipeline.create();

    // Draw users from the Hazelcast IMDG source
    BatchStage<User> users = pipeline.readFrom(Sources.<User, Long, User>map(USER_ID, e -> true, Entry::getValue));

    // All bet legs which are single
    BatchStage<Tuple3<Race, Horse, Bet>> bets = users.flatMap(user -> traverseStream(
            user.getKnownBets().stream()
                .filter(Bet::single)
                .flatMap(bet -> bet.getLegs().stream().map(leg -> tuple3(leg.getRace(), leg.getBacking(), bet)))
            )
    );

    // Find for each race the projected loss if each horse was to win
    BatchStage<Entry<Race, Map<Horse, Double>>> betsByRace = bets.groupingKey(Tuple3::f0).aggregate(
            AggregateOperations.toMap(
                    Tuple3::f1,
                    t -> t.f2().projectedPayout(t.f1()), // payout if backed horse was to win
                    (l, r) -> l + r
            )
    );

    // Write out: (r : (h : losses))
    betsByRace.writeTo(Sinks.map(WORST_ID));

    return pipeline;
}
 
Example #11
Source File: AnalysisJet.java    From hazelcast-jet-demos with Apache License 2.0 5 votes vote down vote up
/**
 * Helper method to construct the pipeline for the job
 *
 * @return the pipeline for the job
 */
public static Pipeline buildPipeline() {
    final Pipeline p = Pipeline.create();

    // Compute map server side
    final BatchStage<Horse> c = p.readFrom(Sources.map(EVENTS_BY_NAME, t -> true, HORSE_FROM_EVENT));

    final BatchStage<Entry<Horse, Long>> c2 = c.groupingKey(wholeItem())
                                               .aggregate(counting())
                                               .filter(ent -> ent.getValue() > 1);

    c2.writeTo(Sinks.map(MULTIPLE));

    return p;
}
 
Example #12
Source File: WordCounter.java    From tutorials with MIT License 5 votes vote down vote up
private Pipeline createPipeLine() {
    Pipeline p = Pipeline.create();
    p.drawFrom(Sources.<String> list(LIST_NAME))
        .flatMap(word -> traverseArray(word.toLowerCase()
            .split("\\W+")))
        .filter(word -> !word.isEmpty())
        .groupingKey(wholeItem())
        .aggregate(counting())
        .drainTo(Sinks.map(MAP_NAME));
    return p;
}
 
Example #13
Source File: ModelServerClassification.java    From hazelcast-jet-demos with Apache License 2.0 4 votes vote down vote up
private static Pipeline buildPipeline(String serverAddress, IMap<Long, String> reviewsMap) {
    ServiceFactory<Tuple2<PredictionServiceFutureStub, WordIndex>, Tuple2<PredictionServiceFutureStub, WordIndex>>
            tfServingContext = ServiceFactory
            .withCreateContextFn(context -> {
                WordIndex wordIndex = new WordIndex(context.attachedDirectory("data"));
                ManagedChannel channel = ManagedChannelBuilder.forTarget(serverAddress)
                                                              .usePlaintext().build();
                return Tuple2.tuple2(PredictionServiceGrpc.newFutureStub(channel), wordIndex);
            })
            .withDestroyContextFn(t -> ((ManagedChannel) t.f0().getChannel()).shutdownNow())
            .withCreateServiceFn((context, tuple2) -> tuple2);

    Pipeline p = Pipeline.create();
    p.readFrom(Sources.map(reviewsMap))
     .map(Map.Entry::getValue)
     .mapUsingServiceAsync(tfServingContext, 16, true, (t, review) -> {
         float[][] featuresTensorData = t.f1().createTensorInput(review);
         TensorProto.Builder featuresTensorBuilder = TensorProto.newBuilder();
         for (float[] featuresTensorDatum : featuresTensorData) {
             for (float v : featuresTensorDatum) {
                 featuresTensorBuilder.addFloatVal(v);
             }
         }
         TensorShapeProto.Dim featuresDim1 =
                 TensorShapeProto.Dim.newBuilder().setSize(featuresTensorData.length).build();
         TensorShapeProto.Dim featuresDim2 =
                 TensorShapeProto.Dim.newBuilder().setSize(featuresTensorData[0].length).build();
         TensorShapeProto featuresShape =
                 TensorShapeProto.newBuilder().addDim(featuresDim1).addDim(featuresDim2).build();
         featuresTensorBuilder.setDtype(org.tensorflow.framework.DataType.DT_FLOAT)
                              .setTensorShape(featuresShape);
         TensorProto featuresTensorProto = featuresTensorBuilder.build();

         // Generate gRPC request
         Int64Value version = Int64Value.newBuilder().setValue(1).build();
         Model.ModelSpec modelSpec =
                 Model.ModelSpec.newBuilder().setName("reviewSentiment").setVersion(version).build();
         Predict.PredictRequest request = Predict.PredictRequest.newBuilder()
                                                                .setModelSpec(modelSpec)
                                                                .putInputs("input_review", featuresTensorProto)
                                                                .build();

         return toCompletableFuture(t.f0().predict(request))
                 .thenApply(response -> {
                     float classification = response
                             .getOutputsOrThrow("dense_1/Sigmoid:0")
                             .getFloatVal(0);
                     // emit the review along with the classification
                     return tuple2(review, classification);
                 });
     })
     .setLocalParallelism(1) // one worker is enough to drive they async calls
     .writeTo(Sinks.logger());
    return p;
}
 
Example #14
Source File: MovingAverage.java    From hazelcast-jet-demos with Apache License 2.0 3 votes vote down vote up
/**
 * <p>{@link com.hazelcast.jet.demos.bitcoin.Task4PriceFeed Task4PriceFeed} writes
 * the current price of Bitcoin into an
 * {@link com.hazelcast.map.IMap IMap}. This
 * {@link com.hazelcast.map.IMap IMap} is defined with a
 * {@link com.hazelcast.map.impl.journal.MapEventJournal MapEventJournal}
 * that allows Jet to track the history of changes. Use this as a
 * source to stream in.
 * <p>
 * <p>Don't bother yet with timestamps, they are added in later
 * in the pipeline.
 * </p>
 * <p>Group (route) all events based on the key, which will be
 * "{@code BTCUSD}". However many Jet nodes are running, only
 * one will handle "{@code BTCUSD}". 
 * </p>
 *
 * @param pipeline Will be empty
 * @return The first stage of the pipeline
 */
protected static StreamStageWithKey<Entry<String, Price>, String> 
	buildPriceFeed(Pipeline pipeline) {

	return pipeline.readFrom(
			Sources.<String,Price>mapJournal(
				MyConstants.IMAP_NAME_PRICES_IN,
				JournalInitialPosition.START_FROM_OLDEST)
			)
			.withoutTimestamps()
			.setName("priceFeed")
			.groupingKey(Functions.entryKey());
}
 
Example #15
Source File: FileWatcher.java    From hazelcast-jet-demos with Apache License 2.0 2 votes vote down vote up
/**
 * <p>A custom source, a filewatcher that produces a continuous stream
 * of lines in files in the "{@code beam-output}" directory.
 * </p>
 * <p>As the Beam job writes the lines, this job reads the lines.
 * </p>
 *
 * @return
 */
protected static StreamSource<String> buildFileWatcherSource() {
	return Sources.filesBuilder(".").glob("beam-output-*").buildWatcher();
}