org.apache.samza.table.Table Java Examples
The following examples show how to use
org.apache.samza.table.Table.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestRemoteTableEndToEnd.java From samza with Apache License 2.0 | 6 votes |
private <K, V> Table<KV<K, V>> getCachingTable(TableDescriptor<K, V, ?> actualTableDesc, boolean defaultCache, StreamApplicationDescriptor appDesc) { String id = actualTableDesc.getTableId(); CachingTableDescriptor<K, V> cachingDesc; if (defaultCache) { cachingDesc = new CachingTableDescriptor<>("caching-table-" + id, actualTableDesc); cachingDesc.withReadTtl(Duration.ofMinutes(5)); cachingDesc.withWriteTtl(Duration.ofMinutes(5)); } else { GuavaCacheTableDescriptor<K, V> guavaTableDesc = new GuavaCacheTableDescriptor<>("guava-table-" + id); guavaTableDesc.withCache(CacheBuilder.newBuilder().expireAfterAccess(5, TimeUnit.MINUTES).build()); cachingDesc = new CachingTableDescriptor<>("caching-table-" + id, actualTableDesc, guavaTableDesc); } return appDesc.getTable(cachingDesc); }
Example #2
Source File: TestLocalTableEndToEnd.java From samza with Apache License 2.0 | 6 votes |
@Override public void describe(StreamApplicationDescriptor appDesc) { Table<KV<Integer, Profile>> table = appDesc.getTable( new InMemoryTableDescriptor("t1", KVSerde.of(new IntegerSerde(), new ProfileJsonSerde()))); DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test"); GenericInputDescriptor<Profile> profileISD = ksd.getInputDescriptor("Profile", new NoOpSerde<>()); appDesc.getInputStream(profileISD) .map(m -> new KV(m.getMemberId(), m)) .sendTo(table); GenericInputDescriptor<PageView> pageViewISD = ksd.getInputDescriptor("PageView", new NoOpSerde<>()); appDesc.getInputStream(pageViewISD) .map(pv -> { received.add(pv); return pv; }) .partitionBy(PageView::getMemberId, v -> v, KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()), "p1") .join(table, new PageViewToProfileJoinFunction()) .sink((m, collector, coordinator) -> joined.add(m)); }
Example #3
Source File: TestExecutionPlanner.java From samza with Apache License 2.0 | 6 votes |
private StreamApplicationDescriptorImpl createStreamGraphWithStreamTableJoinAndSendToSameTable() { /** * A special example of stream-table join where a stream is joined with a table, and the result is * sent to the same table. This example is necessary to ensure {@link ExecutionPlanner} does not * get stuck traversing the virtual cycle between stream-table-join and send-to-table operator specs * indefinitely. * * The reason such virtual cycle is present is to support computing partitions of intermediate * streams participating in stream-table joins. Please, refer to SAMZA SEP-16 for more details. */ return new StreamApplicationDescriptorImpl(appDesc -> { MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor); TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor( "table-id", new KVSerde(new StringSerde(), new StringSerde())); Table table = appDesc.getTable(tableDescriptor); messageStream1 .join(table, mock(StreamTableJoinFunction.class)) .sendTo(table); }, config); }
Example #4
Source File: TestExecutionPlanner.java From samza with Apache License 2.0 | 6 votes |
private StreamApplicationDescriptorImpl createStreamGraphWithInvalidStreamTableJoinWithSideInputs() { /** * Example stream-table join that is invalid due to disagreement in partition count between the * stream behind table t and another joined stream. Table t is configured with input2 (16) as * side-input stream. * * join-table t -> output1 (8) * | * input1 (64) ————————— * */ return new StreamApplicationDescriptorImpl(appDesc -> { MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor); OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor); TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor( "table-id", new KVSerde(new StringSerde(), new StringSerde())) .withSideInputs(Arrays.asList("input2")) .withSideInputsProcessor(mock(SideInputsProcessor.class)); Table table = appDesc.getTable(tableDescriptor); messageStream1 .join(table, mock(StreamTableJoinFunction.class)) .sendTo(output1); }, config); }
Example #5
Source File: TestExecutionPlanner.java From samza with Apache License 2.0 | 6 votes |
private StreamApplicationDescriptorImpl createStreamGraphWithStreamTableJoinWithSideInputs() { /** * Example stream-table join where table t is configured with input1 (64) as a side-input stream. * * join-table t -> output1 (8) * | * input2 (16) -> partitionBy ("64") __| * */ return new StreamApplicationDescriptorImpl(appDesc -> { MessageStream<KV<Object, Object>> messageStream2 = appDesc.getInputStream(input2Descriptor); OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor); TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor( "table-id", new KVSerde(new StringSerde(), new StringSerde())) .withSideInputs(Arrays.asList("input1")) .withSideInputsProcessor(mock(SideInputsProcessor.class)); Table table = appDesc.getTable(tableDescriptor); messageStream2 .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1") .join(table, mock(StreamTableJoinFunction.class)) .sendTo(output1); }, config); }
Example #6
Source File: TableMetrics.java From samza with Apache License 2.0 | 6 votes |
/** * Constructor based on container and task container context * * @param context {@link Context} for this task * @param table underlying table * @param tableId table Id */ public TableMetrics(Context context, Table table, String tableId) { TableMetricsUtil tableMetricsUtil = new TableMetricsUtil(context, table, tableId); // Read metrics numGets = tableMetricsUtil.newCounter("num-gets"); getNs = tableMetricsUtil.newTimer("get-ns"); numGetAlls = tableMetricsUtil.newCounter("num-getAlls"); getAllNs = tableMetricsUtil.newTimer("getAll-ns"); numReads = tableMetricsUtil.newCounter("num-reads"); readNs = tableMetricsUtil.newTimer("read-ns"); numMissedLookups = tableMetricsUtil.newCounter("num-missed-lookups"); // Write metrics numPuts = tableMetricsUtil.newCounter("num-puts"); putNs = tableMetricsUtil.newTimer("put-ns"); numPutAlls = tableMetricsUtil.newCounter("num-putAlls"); putAllNs = tableMetricsUtil.newTimer("putAll-ns"); numDeletes = tableMetricsUtil.newCounter("num-deletes"); deleteNs = tableMetricsUtil.newTimer("delete-ns"); numDeleteAlls = tableMetricsUtil.newCounter("num-deleteAlls"); deleteAllNs = tableMetricsUtil.newTimer("deleteAll-ns"); numWrites = tableMetricsUtil.newCounter("num-writes"); writeNs = tableMetricsUtil.newTimer("write-ns"); numFlushes = tableMetricsUtil.newCounter("num-flushes"); flushNs = tableMetricsUtil.newTimer("flush-ns"); }
Example #7
Source File: TestExecutionPlanner.java From samza with Apache License 2.0 | 5 votes |
private StreamApplicationDescriptorImpl createStreamGraphWithStreamTableJoin() { /** * Example stream-table join app. Expected partition counts of intermediate streams introduced * by partitionBy operations are enclosed in quotes. * * input2 (16) -> partitionBy ("32") -> send-to-table t * * join-table t ————— * | | * input1 (64) -> partitionBy ("32") _| | * join -> output1 (8) * | * input3 (32) —————— * */ return new StreamApplicationDescriptorImpl(appDesc -> { MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor); MessageStream<KV<Object, Object>> messageStream2 = appDesc.getInputStream(input2Descriptor); MessageStream<KV<Object, Object>> messageStream3 = appDesc.getInputStream(input3Descriptor); OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor); TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor( "table-id", new KVSerde(new StringSerde(), new StringSerde())); Table table = appDesc.getTable(tableDescriptor); messageStream2 .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1") .sendTo(table); messageStream1 .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p2") .join(table, mock(StreamTableJoinFunction.class)) .join(messageStream3, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j2") .sendTo(output1); }, config); }
Example #8
Source File: TestLocalTableEndToEnd.java From samza with Apache License 2.0 | 5 votes |
@Test public void testSendTo() throws Exception { int count = 10; Profile[] profiles = TestTableData.generateProfiles(count); int partitionCount = 4; Map<String, String> configs = getBaseJobConfig(bootstrapUrl(), zkConnect()); configs.put("streams.Profile.samza.system", "test"); configs.put("streams.Profile.source", Base64Serializer.serialize(profiles)); configs.put("streams.Profile.partitionCount", String.valueOf(partitionCount)); MyMapFunction mapFn = new MyMapFunction(); final StreamApplication app = appDesc -> { Table<KV<Integer, Profile>> table = appDesc.getTable(new InMemoryTableDescriptor("t1", KVSerde.of(new IntegerSerde(), new ProfileJsonSerde()))); DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test"); GenericInputDescriptor<Profile> isd = ksd.getInputDescriptor("Profile", new NoOpSerde<>()); appDesc.getInputStream(isd) .map(mapFn) .sendTo(table); }; Config config = new MapConfig(configs); final LocalApplicationRunner runner = new LocalApplicationRunner(app, config); executeRun(runner, config); runner.waitForFinish(); for (int i = 0; i < partitionCount; i++) { MyMapFunction mapFnCopy = MyMapFunction.getMapFunctionByTask(String.format("Partition %d", i)); assertEquals(count, mapFnCopy.received.size()); mapFnCopy.received.forEach(p -> Assert.assertTrue(mapFnCopy.table.get(p.getMemberId()) != null)); } }
Example #9
Source File: QueryTranslator.java From samza with Apache License 2.0 | 5 votes |
private void sendToOutputStream(String queryLogicalId, String logicalOpId, String sinkStream, StreamApplicationDescriptor appDesc, TranslatorContext translatorContext, RelNode node, int queryId) { SqlIOConfig sinkConfig = sqlConfig.getOutputSystemStreamConfigsBySource().get(sinkStream); MessageStream<SamzaSqlRelMessage> stream = translatorContext.getMessageStream(node.getId()); MessageStream<KV<Object, Object>> outputStream = stream.map(new OutputMapFunction(queryLogicalId, logicalOpId, sinkStream, queryId)); Optional<TableDescriptor> tableDescriptor = sinkConfig.getTableDescriptor(); if (!tableDescriptor.isPresent()) { KVSerde<Object, Object> noOpKVSerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()); String systemName = sinkConfig.getSystemName(); DelegatingSystemDescriptor sd = systemDescriptors.computeIfAbsent(systemName, DelegatingSystemDescriptor::new); GenericOutputDescriptor<KV<Object, Object>> osd = sd.getOutputDescriptor(sinkConfig.getStreamId(), noOpKVSerde); OutputStream stm = outputMsgStreams.computeIfAbsent(sinkConfig.getSource(), v -> appDesc.getOutputStream(osd)); outputStream.sendTo(stm); // Process system events only if the output is a stream. if (sqlConfig.isProcessSystemEvents()) { for (MessageStream<SamzaSqlInputMessage> inputStream : inputMsgStreams.values()) { MessageStream<KV<Object, Object>> systemEventStream = inputStream.filter(message -> message.getMetadata().isSystemMessage()) .map(SamzaSqlInputMessage::getKeyAndMessageKV); systemEventStream.sendTo(stm); } } } else { Table outputTable = appDesc.getTable(tableDescriptor.get()); if (outputTable == null) { String msg = "Failed to obtain table descriptor of " + sinkConfig.getSource(); throw new SamzaException(msg); } outputStream.sendTo(outputTable); } }
Example #10
Source File: TestExecutionPlanner.java From samza with Apache License 2.0 | 5 votes |
private StreamApplicationDescriptorImpl createStreamGraphWithInvalidStreamTableJoin() { /** * Example stream-table join that is invalid due to disagreement in partition count * between the 2 input streams. * * input1 (64) -> send-to-table t * * join-table t -> output1 (8) * | * input2 (16) ————————— * */ return new StreamApplicationDescriptorImpl(appDesc -> { MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor); MessageStream<KV<Object, Object>> messageStream2 = appDesc.getInputStream(input2Descriptor); OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor); TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor( "table-id", new KVSerde(new StringSerde(), new StringSerde())); Table table = appDesc.getTable(tableDescriptor); messageStream1.sendTo(table); messageStream1 .join(table, mock(StreamTableJoinFunction.class)) .join(messageStream2, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j2") .sendTo(output1); }, config); }
Example #11
Source File: TableMetricsUtil.java From samza with Apache License 2.0 | 5 votes |
/** * Constructor based on container context * * @param context {@link Context} for this task * @param table underlying table * @param tableId table Id */ public TableMetricsUtil(Context context, Table table, String tableId) { Preconditions.checkNotNull(context); Preconditions.checkNotNull(table); Preconditions.checkNotNull(tableId); this.metricsRegistry = context.getContainerContext().getContainerMetricsRegistry(); this.groupName = table.getClass().getSimpleName(); this.tableId = tableId; }
Example #12
Source File: MessageStreamImpl.java From samza with Apache License 2.0 | 5 votes |
@Override public <K, V> MessageStream<KV<K, V>> sendTo(Table<KV<K, V>> table, Object ... args) { String opId = this.streamAppDesc.getNextOpId(OpCode.SEND_TO); SendToTableOperatorSpec<K, V> op = OperatorSpecs.createSendToTableOperatorSpec(((TableImpl) table).getTableId(), opId, args); this.operatorSpec.registerNextOperatorSpec(op); return new MessageStreamImpl<>(this.streamAppDesc, op); }
Example #13
Source File: MessageStreamImpl.java From samza with Apache License 2.0 | 5 votes |
@Override public <K, R extends KV, JM> MessageStream<JM> join(Table<R> table, StreamTableJoinFunction<? extends K, ? super M, ? super R, ? extends JM> joinFn, Object ... args) { String opId = this.streamAppDesc.getNextOpId(OpCode.JOIN); StreamTableJoinOperatorSpec<K, M, R, JM> joinOpSpec = OperatorSpecs.createStreamTableJoinOperatorSpec( ((TableImpl) table).getTableId(), (StreamTableJoinFunction<K, M, R, JM>) joinFn, opId, args); this.operatorSpec.registerNextOperatorSpec(joinOpSpec); return new MessageStreamImpl<>(this.streamAppDesc, joinOpSpec); }
Example #14
Source File: StreamApplicationDescriptorImpl.java From samza with Apache License 2.0 | 5 votes |
@Override public <K, V> Table<KV<K, V>> getTable(TableDescriptor<K, V, ?> tableDescriptor) { addTableDescriptor(tableDescriptor); if (tableDescriptor instanceof LocalTableDescriptor) { LocalTableDescriptor localTableDescriptor = (LocalTableDescriptor) tableDescriptor; getOrCreateTableSerdes(localTableDescriptor.getTableId(), localTableDescriptor.getSerde()); } return new TableImpl(tableDescriptor); }
Example #15
Source File: TestLocalTableWithSideInputsEndToEnd.java From samza with Apache License 2.0 | 5 votes |
@Override public void describe(StreamApplicationDescriptor appDescriptor) { Table<KV<Integer, TestTableData.Profile>> table = appDescriptor.getTable(getTableDescriptor()); KafkaSystemDescriptor sd = new KafkaSystemDescriptor("test"); appDescriptor.getInputStream(sd.getInputDescriptor(PAGEVIEW_STREAM, new NoOpSerde<TestTableData.PageView>())) .partitionBy(TestTableData.PageView::getMemberId, v -> v, KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()), "partition-page-view") .join(table, new PageViewToProfileJoinFunction()) .sendTo(appDescriptor.getOutputStream(sd.getOutputDescriptor(ENRICHED_PAGEVIEW_STREAM, new NoOpSerde<>()))); }
Example #16
Source File: StreamApplicationIntegrationTest.java From samza with Apache License 2.0 | 5 votes |
@Override public void describe(StreamApplicationDescriptor appDescriptor) { Table<KV<Integer, TestTableData.Profile>> table = appDescriptor.getTable( new RocksDbTableDescriptor<Integer, TestTableData.Profile>("profile-view-store", KVSerde.of(new IntegerSerde(), new TestTableData.ProfileJsonSerde()))); KafkaSystemDescriptor ksd = new KafkaSystemDescriptor("test"); KafkaInputDescriptor<KV<String, TestTableData.Profile>> profileISD = ksd.getInputDescriptor("Profile", KVSerde.of(new StringSerde(), new JsonSerdeV2<>())); KafkaInputDescriptor<KV<String, TestTableData.PageView>> pageViewISD = ksd.getInputDescriptor("PageView", KVSerde.of(new StringSerde(), new JsonSerdeV2<>())); KafkaOutputDescriptor<TestTableData.EnrichedPageView> enrichedPageViewOSD = ksd.getOutputDescriptor("EnrichedPageView", new JsonSerdeV2<>()); appDescriptor.getInputStream(profileISD) .map(m -> new KV(m.getValue().getMemberId(), m.getValue())) .sendTo(table) .sink((kv, collector, coordinator) -> { LOG.info("Inserted Profile with Key: {} in profile-view-store", kv.getKey()); }); OutputStream<TestTableData.EnrichedPageView> outputStream = appDescriptor.getOutputStream(enrichedPageViewOSD); appDescriptor.getInputStream(pageViewISD) .partitionBy(pv -> pv.getValue().getMemberId(), pv -> pv.getValue(), KVSerde.of(new IntegerSerde(), new JsonSerdeV2<>(TestTableData.PageView.class)), "p1") .join(table, new PageViewToProfileJoinFunction()) .sendTo(outputStream) .map(TestTableData.EnrichedPageView::getPageKey) .sink((joinPageKey, collector, coordinator) -> { collector.send(new OutgoingMessageEnvelope(new SystemStream("test", "JoinPageKeys"), null, null, joinPageKey)); }); }
Example #17
Source File: RemoteTableJoinExample.java From samza-hello-samza with Apache License 2.0 | 5 votes |
@Override public void describe(StreamApplicationDescriptor appDescriptor) { KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(KAFKA_SYSTEM_NAME) .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT) .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS) .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS); KafkaInputDescriptor<String> stockSymbolInputDescriptor = kafkaSystemDescriptor.getInputDescriptor(INPUT_STREAM_ID, new StringSerde()); KafkaOutputDescriptor<StockPrice> stockPriceOutputDescriptor = kafkaSystemDescriptor.getOutputDescriptor(OUTPUT_STREAM_ID, new JsonSerdeV2<>(StockPrice.class)); appDescriptor.withDefaultSystem(kafkaSystemDescriptor); MessageStream<String> stockSymbolStream = appDescriptor.getInputStream(stockSymbolInputDescriptor); OutputStream<StockPrice> stockPriceStream = appDescriptor.getOutputStream(stockPriceOutputDescriptor); RemoteTableDescriptor<String, Double> remoteTableDescriptor = new RemoteTableDescriptor("remote-table") .withReadRateLimit(10) .withReadFunction(new StockPriceReadFunction()); CachingTableDescriptor<String, Double> cachedRemoteTableDescriptor = new CachingTableDescriptor<>("cached-remote-table", remoteTableDescriptor) .withReadTtl(Duration.ofSeconds(5)); Table<KV<String, Double>> cachedRemoteTable = appDescriptor.getTable(cachedRemoteTableDescriptor); stockSymbolStream .map(symbol -> new KV<String, Void>(symbol, null)) .join(cachedRemoteTable, new JoinFn()) .sendTo(stockPriceStream); }
Example #18
Source File: StreamTableJoinExample.java From samza-hello-samza with Apache License 2.0 | 5 votes |
@Override public void describe(StreamApplicationDescriptor appDescriptor) { Serde<Profile> profileSerde = new JsonSerdeV2<>(Profile.class); Serde<PageView> pageViewSerde = new JsonSerdeV2<>(PageView.class); Serde<EnrichedPageView> joinResultSerde = new JsonSerdeV2<>(EnrichedPageView.class); KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(KAFKA_SYSTEM_NAME) .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT) .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS) .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS); KafkaInputDescriptor<Profile> profileInputDescriptor = kafkaSystemDescriptor.getInputDescriptor(PROFILE_STREAM_ID, profileSerde); KafkaInputDescriptor<PageView> pageViewInputDescriptor = kafkaSystemDescriptor.getInputDescriptor(PAGEVIEW_STREAM_ID, pageViewSerde); KafkaOutputDescriptor<EnrichedPageView> joinResultOutputDescriptor = kafkaSystemDescriptor.getOutputDescriptor(OUTPUT_TOPIC, joinResultSerde); RocksDbTableDescriptor<String, Profile> profileTableDescriptor = new RocksDbTableDescriptor<String, Profile>("profile-table", KVSerde.of(new StringSerde(), profileSerde)); appDescriptor.withDefaultSystem(kafkaSystemDescriptor); MessageStream<Profile> profileStream = appDescriptor.getInputStream(profileInputDescriptor); MessageStream<PageView> pageViewStream = appDescriptor.getInputStream(pageViewInputDescriptor); OutputStream<EnrichedPageView> joinResultStream = appDescriptor.getOutputStream(joinResultOutputDescriptor); Table<KV<String, Profile>> profileTable = appDescriptor.getTable(profileTableDescriptor); profileStream .map(profile -> KV.of(profile.userId, profile)) .sendTo(profileTable); pageViewStream .partitionBy(pv -> pv.userId, pv -> pv, KVSerde.of(new StringSerde(), pageViewSerde), "join") .join(profileTable, new JoinFn()) .sendTo(joinResultStream); }
Example #19
Source File: TestRemoteTableDescriptor.java From samza with Apache License 2.0 | 4 votes |
private void doTestDeserializeReadFunctionAndLimiter(boolean rateOnly, boolean rlGets, boolean rlPuts) { int numRateLimitOps = (rlGets ? 1 : 0) + (rlPuts ? 1 : 0); RemoteTableDescriptor<String, String> desc = new RemoteTableDescriptor("1") .withReadFunction(createMockTableReadFunction()) .withReadRetryPolicy(new TableRetryPolicy().withRetryPredicate((ex) -> false)) .withWriteFunction(createMockTableWriteFunction()) .withAsyncCallbackExecutorPoolSize(10); if (rateOnly) { if (rlGets) { desc.withReadRateLimit(1000); } else { desc.withReadRateLimiterDisabled(); } if (rlPuts) { desc.withWriteRateLimit(2000); } else { desc.withWriteRateLimiterDisabled(); } } else { if (numRateLimitOps > 0) { Map<String, Integer> tagCredits = new HashMap<>(); if (rlGets) { tagCredits.put(RemoteTableDescriptor.RL_READ_TAG, 1000); } else { desc.withReadRateLimiterDisabled(); } if (rlPuts) { tagCredits.put(RemoteTableDescriptor.RL_WRITE_TAG, 2000); } else { desc.withWriteRateLimiterDisabled(); } // Spy the rate limiter to verify call count RateLimiter rateLimiter = spy(new EmbeddedTaggedRateLimiter(tagCredits)); desc.withRateLimiter(rateLimiter, new CountingCreditFunction(), new CountingCreditFunction()); } else { desc.withRateLimiterDisabled(); } } RemoteTableProvider provider = new RemoteTableProvider(desc.getTableId()); provider.init(createMockContext(desc)); Table table = provider.getTable(); Assert.assertTrue(table instanceof RemoteTable); RemoteTable rwTable = (RemoteTable) table; AsyncReadWriteTable delegate = TestUtils.getFieldValue(rwTable, "asyncTable"); Assert.assertTrue(delegate instanceof AsyncRetriableTable); if (rlGets || rlPuts) { delegate = TestUtils.getFieldValue(delegate, "table"); Assert.assertTrue(delegate instanceof AsyncRateLimitedTable); } delegate = TestUtils.getFieldValue(delegate, "table"); Assert.assertTrue(delegate instanceof AsyncRemoteTable); if (numRateLimitOps > 0) { TableRateLimiter readRateLimiter = TestUtils.getFieldValue(rwTable, "readRateLimiter"); TableRateLimiter writeRateLimiter = TestUtils.getFieldValue(rwTable, "writeRateLimiter"); Assert.assertTrue(!rlGets || readRateLimiter != null); Assert.assertTrue(!rlPuts || writeRateLimiter != null); } ThreadPoolExecutor callbackExecutor = TestUtils.getFieldValue(rwTable, "callbackExecutor"); Assert.assertEquals(10, callbackExecutor.getCorePoolSize()); }
Example #20
Source File: TestCouchbaseRemoteTableEndToEnd.java From samza with Apache License 2.0 | 4 votes |
@Test public void testEndToEnd() throws Exception { Bucket inputBucket = cluster.openBucket(inputBucketName); inputBucket.upsert(ByteArrayDocument.create("Alice", "20".getBytes())); inputBucket.upsert(ByteArrayDocument.create("Bob", "30".getBytes())); inputBucket.upsert(ByteArrayDocument.create("Chris", "40".getBytes())); inputBucket.upsert(ByteArrayDocument.create("David", "50".getBytes())); inputBucket.close(); String[] users = new String[]{"Alice", "Bob", "Chris", "David"}; int partitionCount = 1; Map<String, String> configs = TestLocalTableEndToEnd.getBaseJobConfig(bootstrapUrl(), zkConnect()); configs.put("streams.User.samza.system", "test"); configs.put("streams.User.source", Base64Serializer.serialize(users)); configs.put("streams.User.partitionCount", String.valueOf(partitionCount)); Config config = new MapConfig(configs); final StreamApplication app = appDesc -> { DelegatingSystemDescriptor inputSystemDescriptor = new DelegatingSystemDescriptor("test"); GenericInputDescriptor<String> inputDescriptor = inputSystemDescriptor.getInputDescriptor("User", new NoOpSerde<>()); CouchbaseTableReadFunction<String> readFunction = new CouchbaseTableReadFunction<>(inputBucketName, String.class, "couchbase://127.0.0.1") .withBootstrapCarrierDirectPort(couchbaseMock.getCarrierPort(inputBucketName)) .withBootstrapHttpDirectPort(couchbaseMock.getHttpPort()) .withSerde(new StringSerde()); CouchbaseTableWriteFunction<JsonObject> writeFunction = new CouchbaseTableWriteFunction<>(outputBucketName, JsonObject.class, "couchbase://127.0.0.1") .withBootstrapCarrierDirectPort(couchbaseMock.getCarrierPort(outputBucketName)) .withBootstrapHttpDirectPort(couchbaseMock.getHttpPort()); RemoteTableDescriptor inputTableDesc = new RemoteTableDescriptor<String, String>("input-table") .withReadFunction(readFunction) .withRateLimiterDisabled(); Table<KV<String, String>> inputTable = appDesc.getTable(inputTableDesc); RemoteTableDescriptor outputTableDesc = new RemoteTableDescriptor<String, JsonObject>("output-table") .withReadFunction(new NoOpTableReadFunction<>()) .withWriteFunction(writeFunction) .withRateLimiterDisabled(); Table<KV<String, JsonObject>> outputTable = appDesc.getTable(outputTableDesc); appDesc.getInputStream(inputDescriptor) .map(k -> KV.of(k, k)) .join(inputTable, new JoinFunction()) .sendTo(outputTable); }; final LocalApplicationRunner runner = new LocalApplicationRunner(app, config); executeRun(runner, config); runner.waitForFinish(); Bucket outputBucket = cluster.openBucket(outputBucketName); Assert.assertEquals("{\"name\":\"Alice\",\"age\":\"20\"}", outputBucket.get("Alice").content().toString()); Assert.assertEquals("{\"name\":\"Bob\",\"age\":\"30\"}", outputBucket.get("Bob").content().toString()); Assert.assertEquals("{\"name\":\"Chris\",\"age\":\"40\"}", outputBucket.get("Chris").content().toString()); Assert.assertEquals("{\"name\":\"David\",\"age\":\"50\"}", outputBucket.get("David").content().toString()); outputBucket.close(); }
Example #21
Source File: TestLocalTableEndToEnd.java From samza with Apache License 2.0 | 4 votes |
@Override public void describe(StreamApplicationDescriptor appDesc) { KVSerde<Integer, Profile> profileKVSerde = KVSerde.of(new IntegerSerde(), new ProfileJsonSerde()); KVSerde<Integer, PageView> pageViewKVSerde = KVSerde.of(new IntegerSerde(), new PageViewJsonSerde()); PageViewToProfileJoinFunction joinFn1 = new PageViewToProfileJoinFunction(); PageViewToProfileJoinFunction joinFn2 = new PageViewToProfileJoinFunction(); Table<KV<Integer, Profile>> profileTable = appDesc.getTable(new InMemoryTableDescriptor("t1", profileKVSerde)); DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test"); GenericInputDescriptor<Profile> profileISD1 = ksd.getInputDescriptor("Profile1", new NoOpSerde<>()); GenericInputDescriptor<Profile> profileISD2 = ksd.getInputDescriptor("Profile2", new NoOpSerde<>()); MessageStream<Profile> profileStream1 = appDesc.getInputStream(profileISD1); MessageStream<Profile> profileStream2 = appDesc.getInputStream(profileISD2); profileStream1 .map(m -> { sentToProfileTable1.add(m); return new KV(m.getMemberId(), m); }) .sendTo(profileTable); profileStream2 .map(m -> { sentToProfileTable2.add(m); return new KV(m.getMemberId(), m); }) .sendTo(profileTable); GenericInputDescriptor<PageView> pageViewISD1 = ksd.getInputDescriptor("PageView1", new NoOpSerde<PageView>()); GenericInputDescriptor<PageView> pageViewISD2 = ksd.getInputDescriptor("PageView2", new NoOpSerde<PageView>()); MessageStream<PageView> pageViewStream1 = appDesc.getInputStream(pageViewISD1); MessageStream<PageView> pageViewStream2 = appDesc.getInputStream(pageViewISD2); pageViewStream1 .partitionBy(PageView::getMemberId, v -> v, pageViewKVSerde, "p1") .join(profileTable, joinFn1) .sink((m, collector, coordinator) -> joinedPageViews1.add(m)); pageViewStream2 .partitionBy(PageView::getMemberId, v -> v, pageViewKVSerde, "p2") .join(profileTable, joinFn2) .sink((m, collector, coordinator) -> joinedPageViews2.add(m)); }
Example #22
Source File: TestRemoteTableEndToEnd.java From samza with Apache License 2.0 | 4 votes |
private void doTestStreamTableJoinRemoteTable(boolean withCache, boolean defaultCache, boolean withArgs, String testName) throws Exception { writtenRecords.put(testName, new ArrayList<>()); int count = 10; final PageView[] pageViews = generatePageViews(count); final String profiles = Base64Serializer.serialize(generateProfiles(count)); final int partitionCount = 4; final Map<String, String> configs = TestLocalTableEndToEnd.getBaseJobConfig(bootstrapUrl(), zkConnect()); configs.put("streams.PageView.samza.system", "test"); configs.put("streams.PageView.source", Base64Serializer.serialize(pageViews)); configs.put("streams.PageView.partitionCount", String.valueOf(partitionCount)); final RateLimiter readRateLimiter = mock(RateLimiter.class, withSettings().serializable()); final TableRateLimiter.CreditFunction creditFunction = (k, v, args) -> 1; final StreamApplication app = appDesc -> { final RemoteTableDescriptor joinTableDesc = new RemoteTableDescriptor<Integer, TestTableData.Profile>("profile-table-1") .withReadFunction(InMemoryProfileReadFunction.getInMemoryReadFunction(testName, profiles)) .withRateLimiter(readRateLimiter, creditFunction, null); final RemoteTableDescriptor outputTableDesc = new RemoteTableDescriptor<Integer, EnrichedPageView>("enriched-page-view-table-1") .withReadFunction(new NoOpTableReadFunction<>()) .withReadRateLimiterDisabled() .withWriteFunction(new InMemoryEnrichedPageViewWriteFunction(testName)) .withWriteRateLimit(1000); final Table<KV<Integer, EnrichedPageView>> outputTable = withCache ? getCachingTable(outputTableDesc, defaultCache, appDesc) : appDesc.getTable(outputTableDesc); final Table<KV<Integer, Profile>> joinTable = withCache ? getCachingTable(joinTableDesc, defaultCache, appDesc) : appDesc.getTable(joinTableDesc); final DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test"); final GenericInputDescriptor<PageView> isd = ksd.getInputDescriptor("PageView", new NoOpSerde<>()); if (!withArgs) { appDesc.getInputStream(isd) .map(pv -> new KV<>(pv.getMemberId(), pv)) .join(joinTable, new PageViewToProfileJoinFunction()) .map(m -> new KV(m.getMemberId(), m)) .sendTo(outputTable); } else { counters.put(testName, new AtomicInteger()); final RemoteTableDescriptor counterTableDesc = new RemoteTableDescriptor("counter-table-1") .withReadFunction(new InMemoryCounterReadFunction(testName)) .withWriteFunction(new InMemoryCounterWriteFunction(testName)) .withRateLimiterDisabled(); final Table counterTable = withCache ? getCachingTable(counterTableDesc, defaultCache, appDesc) : appDesc.getTable(counterTableDesc); final String counterTableName = ((TableImpl) counterTable).getTableId(); appDesc.getInputStream(isd) .map(new TestReadWriteMapFunction(counterTableName)) .map(pv -> new KV<>(pv.getMemberId(), pv)) .join(joinTable, new PageViewToProfileJoinFunction(), true) .map(m -> new KV(m.getMemberId(), m)) .sendTo(outputTable, true); } }; final Config config = new MapConfig(configs); final LocalApplicationRunner runner = new LocalApplicationRunner(app, config); executeRun(runner, config); runner.waitForFinish(); final int numExpected = count * partitionCount; Assert.assertEquals(numExpected, writtenRecords.get(testName).size()); Assert.assertTrue(writtenRecords.get(testName).get(0) instanceof EnrichedPageView); if (!withArgs) { writtenRecords.get(testName).forEach(epv -> Assert.assertFalse(epv.company.contains("-"))); } else { writtenRecords.get(testName).forEach(epv -> Assert.assertTrue(epv.company.endsWith("-r-w"))); Assert.assertEquals(numExpected, counters.get(testName).get()); } }
Example #23
Source File: TestRemoteTableWithBatchEndToEnd.java From samza with Apache License 2.0 | 4 votes |
private void doTestStreamTableJoinRemoteTable(String testName, boolean batchRead, boolean batchWrite) throws Exception { final InMemoryWriteFunction writer = new InMemoryWriteFunction(testName); batchReads.put(testName, new AtomicInteger()); batchWrites.put(testName, new AtomicInteger()); writtenRecords.put(testName, new CopyOnWriteArrayList<>()); final int count = 16; final int batchSize = 4; PageView[] pageViews = generatePageViewsWithDistinctKeys(count); String profiles = Base64Serializer.serialize(generateProfiles(count)); int partitionCount = 1; Map<String, String> configs = TestLocalTableEndToEnd.getBaseJobConfig(bootstrapUrl(), zkConnect()); configs.put("streams.PageView.samza.system", "test"); configs.put("streams.PageView.source", Base64Serializer.serialize(pageViews)); configs.put("streams.PageView.partitionCount", String.valueOf(partitionCount)); configs.put("task.max.concurrency", String.valueOf(count)); configs.put("task.async.commit", String.valueOf(true)); final RateLimiter readRateLimiter = mock(RateLimiter.class, withSettings().serializable()); final RateLimiter writeRateLimiter = mock(RateLimiter.class, withSettings().serializable()); final TableRateLimiter.CreditFunction creditFunction = (k, v, args)->1; final StreamApplication app = appDesc -> { RemoteTableDescriptor<Integer, Profile> inputTableDesc = new RemoteTableDescriptor<>("profile-table-1"); inputTableDesc .withReadFunction(InMemoryReadFunction.getInMemoryReadFunction(testName, profiles)) .withRateLimiter(readRateLimiter, creditFunction, null); if (batchRead) { inputTableDesc.withBatchProvider(new CompactBatchProvider().withMaxBatchSize(batchSize).withMaxBatchDelay(Duration.ofHours(1))); } // dummy reader TableReadFunction readFn = new MyReadFunction(); RemoteTableDescriptor<Integer, EnrichedPageView> outputTableDesc = new RemoteTableDescriptor<>("enriched-page-view-table-1"); outputTableDesc .withReadFunction(readFn) .withWriteFunction(writer) .withRateLimiter(writeRateLimiter, creditFunction, creditFunction); if (batchWrite) { outputTableDesc.withBatchProvider(new CompactBatchProvider().withMaxBatchSize(batchSize).withMaxBatchDelay(Duration.ofHours(1))); } Table<KV<Integer, EnrichedPageView>> outputTable = appDesc.getTable(outputTableDesc); Table<KV<Integer, Profile>> inputTable = appDesc.getTable(inputTableDesc); DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test"); GenericInputDescriptor<PageView> isd = ksd.getInputDescriptor("PageView", new NoOpSerde<>()); appDesc.getInputStream(isd) .map(pv -> new KV<>(pv.getMemberId(), pv)) .join(inputTable, new PageViewToProfileJoinFunction()) .map(m -> new KV(m.getMemberId(), m)) .sendTo(outputTable); }; Config config = new MapConfig(configs); final LocalApplicationRunner runner = new LocalApplicationRunner(app, config); executeRun(runner, config); runner.waitForFinish(); int numExpected = count * partitionCount; Assert.assertEquals(numExpected, writtenRecords.get(testName).size()); Assert.assertTrue(writtenRecords.get(testName).get(0) instanceof EnrichedPageView); if (batchRead) { Assert.assertEquals(numExpected / batchSize, batchReads.get(testName).get()); } if (batchWrite) { Assert.assertEquals(numExpected / batchSize, batchWrites.get(testName).get()); } }
Example #24
Source File: JoinTranslator.java From samza with Apache License 2.0 | 4 votes |
private Table getTable(JoinInputNode tableNode, TranslatorContext context) { SqlIOConfig sourceTableConfig = resolveSQlIOForTable(tableNode.getRelNode(), context); if (sourceTableConfig == null || !sourceTableConfig.getTableDescriptor().isPresent()) { String errMsg = "Failed to resolve table source in join operation: node=" + tableNode.getRelNode(); log.error(errMsg); throw new SamzaException(errMsg); } Table<KV<SamzaSqlRelRecord, SamzaSqlRelMessage>> table = context.getStreamAppDescriptor().getTable(sourceTableConfig.getTableDescriptor().get()); if (tableNode.isRemoteTable()) { return table; } // If local table, load the table. // Load the local table with the fields in the join condition as composite key and relational message as the value. // Send the messages from the input stream denoted as 'table' to the created table store. MessageStream<SamzaSqlRelMessage> relOutputStream = context.getMessageStream(tableNode.getRelNode().getId()); SamzaSqlRelRecordSerdeFactory.SamzaSqlRelRecordSerde keySerde = (SamzaSqlRelRecordSerdeFactory.SamzaSqlRelRecordSerde) new SamzaSqlRelRecordSerdeFactory().getSerde(null, null); SamzaSqlRelMessageSerdeFactory.SamzaSqlRelMessageSerde valueSerde = (SamzaSqlRelMessageSerdeFactory.SamzaSqlRelMessageSerde) new SamzaSqlRelMessageSerdeFactory().getSerde(null, null); List<Integer> tableKeyIds = tableNode.getKeyIds(); // Let's always repartition by the join fields as key before sending the key and value to the table. // We need to repartition the stream denoted as table to ensure that both the stream and table that are joined // have the same partitioning scheme with the same partition key and number. Please note that bootstrap semantic is // not propagated to the intermediate streams. Please refer SAMZA-1613 for more details on this. Subsequently, the // results are consistent only after the local table is caught up. relOutputStream .partitionBy(m -> createSamzaSqlCompositeKey(m, tableKeyIds), m -> m, KVSerde.of(keySerde, valueSerde), intermediateStreamPrefix + "table_" + logicalOpId) .sendTo(table); return table; }
Example #25
Source File: JoinTranslator.java From samza with Apache License 2.0 | 4 votes |
void translate(final LogicalJoin join, final TranslatorContext translatorContext) { JoinInputNode.InputType inputTypeOnLeft = getInputType(join.getLeft(), translatorContext); JoinInputNode.InputType inputTypeOnRight = getInputType(join.getRight(), translatorContext); // Do the validation of join query validateJoinQuery(join, inputTypeOnLeft, inputTypeOnRight); // At this point, one of the sides is a table. Let's figure out if it is on left or right side. boolean isTablePosOnRight = inputTypeOnRight != JoinInputNode.InputType.STREAM; // stream and table keyIds are used to extract the join condition field (key) names and values out of the stream // and table records. List<Integer> streamKeyIds = new LinkedList<>(); List<Integer> tableKeyIds = new LinkedList<>(); // Fetch the stream and table indices corresponding to the fields given in the join condition. final int leftSideSize = join.getLeft().getRowType().getFieldCount(); final int tableStartIdx = isTablePosOnRight ? leftSideSize : 0; final int streamStartIdx = isTablePosOnRight ? 0 : leftSideSize; final int tableEndIdx = isTablePosOnRight ? join.getRowType().getFieldCount() : leftSideSize; join.getCondition().accept(new RexShuttle() { @Override public RexNode visitInputRef(RexInputRef inputRef) { validateJoinKeyType(inputRef); // Validate the type of the input ref. int index = inputRef.getIndex(); if (index >= tableStartIdx && index < tableEndIdx) { tableKeyIds.add(index - tableStartIdx); } else { streamKeyIds.add(index - streamStartIdx); } return inputRef; } }); Collections.sort(tableKeyIds); Collections.sort(streamKeyIds); // Get the two input nodes (stream and table nodes) for the join. JoinInputNode streamNode = new JoinInputNode(isTablePosOnRight ? join.getLeft() : join.getRight(), streamKeyIds, isTablePosOnRight ? inputTypeOnLeft : inputTypeOnRight, !isTablePosOnRight); JoinInputNode tableNode = new JoinInputNode(isTablePosOnRight ? join.getRight() : join.getLeft(), tableKeyIds, isTablePosOnRight ? inputTypeOnRight : inputTypeOnLeft, isTablePosOnRight); MessageStream<SamzaSqlRelMessage> inputStream = translatorContext.getMessageStream(streamNode.getRelNode().getId()); Table table = getTable(tableNode, translatorContext); MessageStream<SamzaSqlRelMessage> outputStream = joinStreamWithTable(inputStream, table, streamNode, tableNode, join, translatorContext); translatorContext.registerMessageStream(join.getId(), outputStream); outputStream.map(outputMetricsMF); }
Example #26
Source File: TranslationContext.java From beam with Apache License 2.0 | 4 votes |
@SuppressWarnings("unchecked") public <K, V> Table<KV<K, V>> getTable(TableDescriptor<K, V, ?> tableDesc) { return registeredTables.computeIfAbsent( tableDesc.getTableId(), id -> appDescriptor.getTable(tableDesc)); }
Example #27
Source File: PortableTranslationContext.java From beam with Apache License 2.0 | 4 votes |
@SuppressWarnings("unchecked") public <K, V> Table<KV<K, V>> getTable(TableDescriptor<K, V, ?> tableDesc) { return registeredTables.computeIfAbsent( tableDesc.getTableId(), id -> appDescriptor.getTable(tableDesc)); }
Example #28
Source File: MessageStream.java From samza with Apache License 2.0 | 2 votes |
/** * Allows sending messages in this {@link MessageStream} to a {@link Table} and then propagates this * {@link MessageStream} to the next chained operator. The type of input message is expected to be {@link KV}, * otherwise a {@link ClassCastException} will be thrown. * <p> * Note: The message will be written but may not be flushed to the underlying table before its propagated to the * chained operators. Whether the message can be read back from the Table in the chained operator depends on whether * it was flushed and whether the Table offers read after write consistency. Messages retain the original partitioning * scheme when propogated to next operator. * * @param table the table to write messages to * @param args additional arguments passed to the table * @param <K> the type of key in the table * @param <V> the type of record value in the table * @return this {@link MessageStream} */ <K, V> MessageStream<KV<K, V>> sendTo(Table<KV<K, V>> table, Object ... args);
Example #29
Source File: MessageStream.java From samza with Apache License 2.0 | 2 votes |
/** * Joins this {@link MessageStream} with another {@link Table} using the provided * pairwise {@link StreamTableJoinFunction}. * <p> * The type of input message is expected to be {@link KV}. * <p> * Records are looked up from the joined table using the join key, join function * is applied and join results are emitted as matches are found. * <p> * The join function allows implementation of both inner and left outer join. A null will be * passed to the join function, if no record matching the join key is found in the table. * The join function can choose to return an instance of JM (outer left join) or null * (inner join); if null is returned, it won't be processed further. * <p> * Both the input stream and table being joined must have the same number of partitions, * and should be partitioned by the same join key. * <p> * * @param table the table being joined * @param joinFn the join function * @param args additional arguments passed to the table * @param <K> the type of join key * @param <R> the type of table record * @param <JM> the type of messages resulting from the {@code joinFn} * @return the joined {@link MessageStream} */ <K, R extends KV, JM> MessageStream<JM> join(Table<R> table, StreamTableJoinFunction<? extends K, ? super M, ? super R, ? extends JM> joinFn, Object ... args);
Example #30
Source File: StreamApplicationDescriptor.java From samza with Apache License 2.0 | 2 votes |
/** * Gets the {@link Table} corresponding to the {@link TableDescriptor}. * <p> * Multiple invocations of this method with the same {@link TableDescriptor} will throw an * {@link IllegalStateException}. * * @param tableDescriptor the {@link TableDescriptor} * @param <K> the type of the key * @param <V> the type of the value * @return the {@link Table} corresponding to the {@code tableDescriptor} * @throws IllegalStateException when invoked multiple times with the same {@link TableDescriptor} */ <K, V> Table<KV<K, V>> getTable(TableDescriptor<K, V, ?> tableDescriptor);