org.apache.beam.sdk.coders.KvCoder Java Examples
The following examples show how to use
org.apache.beam.sdk.coders.KvCoder.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ReduceFnTester.java From beam with Apache License 2.0 | 6 votes |
public static <W extends BoundedWindow, AccumT, OutputT> ReduceFnTester<Integer, OutputT, W> combining( WindowingStrategy<?, W> strategy, TriggerStateMachine triggerStateMachine, CombineFnWithContext<Integer, AccumT, OutputT> combineFn, Coder<OutputT> outputCoder, PipelineOptions options, SideInputReader sideInputReader) throws Exception { CoderRegistry registry = CoderRegistry.createDefault(); AppliedCombineFn<String, Integer, AccumT, OutputT> fn = AppliedCombineFn.withInputCoder( combineFn, registry, KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())); return new ReduceFnTester<>( strategy, triggerStateMachine, SystemReduceFn.combining(StringUtf8Coder.of(), fn), outputCoder, options, sideInputReader); }
Example #2
Source File: BigQueryFeatureSink.java From feast with Apache License 2.0 | 6 votes |
/** @param featureSetSpecs Feature set to be written */ @Override public PCollection<FeatureSetReference> prepareWrite( PCollection<KV<FeatureSetReference, FeatureSetProto.FeatureSetSpec>> featureSetSpecs) { PCollection<KV<FeatureSetReference, TableSchema>> schemas = featureSetSpecs .apply( "GenerateTableSchema", ParDo.of( new FeatureSetSpecToTableSchema( DatasetId.of(getProjectId(), getDatasetId()), getBQClient()))) .setCoder( KvCoder.of( AvroCoder.of(FeatureSetReference.class), FeatureSetSpecToTableSchema.TableSchemaCoder.of())); schemasView = schemas .apply("ReferenceString", ParDo.of(new ReferenceToString())) .apply("View", View.asMultimap()); return schemas.apply("Ready", Keys.create()); }
Example #3
Source File: StatefulTeamScoreTest.java From deployment-examples with MIT License | 6 votes |
/** * Tests that {@link UpdateTeamScoreFn} {@link org.apache.beam.sdk.transforms.DoFn} outputs * correctly for one team. */ @Test public void testScoreUpdatesOneTeam() { TestStream<KV<String, GameActionInfo>> createEvents = TestStream.create(KvCoder.of(StringUtf8Coder.of(), AvroCoder.of(GameActionInfo.class))) .advanceWatermarkTo(baseTime) .addElements( event(TestUser.RED_TWO, 99, Duration.standardSeconds(10)), event(TestUser.RED_ONE, 1, Duration.standardSeconds(20)), event(TestUser.RED_ONE, 0, Duration.standardSeconds(30)), event(TestUser.RED_TWO, 100, Duration.standardSeconds(40)), event(TestUser.RED_TWO, 201, Duration.standardSeconds(50))) .advanceWatermarkToInfinity(); PCollection<KV<String, Integer>> teamScores = p.apply(createEvents).apply(ParDo.of(new UpdateTeamScoreFn(100))); String redTeam = TestUser.RED_ONE.getTeam(); PAssert.that(teamScores) .inWindow(GlobalWindow.INSTANCE) .containsInAnyOrder(KV.of(redTeam, 100), KV.of(redTeam, 200), KV.of(redTeam, 401)); p.run().waitUntilFinish(); }
Example #4
Source File: StatefulParDoEvaluatorFactoryTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testRequiresTimeSortedInputWithLateDataAndAllowedLateness() { Instant now = Instant.ofEpochMilli(0); PCollection<KV<String, Integer>> input = pipeline .apply( TestStream.create(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())) .addElements(TimestampedValue.of(KV.of("", 1), now.plus(2))) .addElements(TimestampedValue.of(KV.of("", 2), now.plus(1))) .advanceWatermarkTo(now.plus(1)) .addElements(TimestampedValue.of(KV.of("", 3), now)) .advanceWatermarkToInfinity()) .apply( Window.<KV<String, Integer>>into(new GlobalWindows()) .withAllowedLateness(Duration.millis(2))); PCollection<String> result = input.apply(ParDo.of(statefulConcat())); PAssert.that(result).containsInAnyOrder("3", "3:2", "3:2:1"); pipeline.run(); }
Example #5
Source File: ToIsmRecordForMultimapDoFnFactory.java From beam with Apache License 2.0 | 6 votes |
@Override public ParDoFn create( PipelineOptions options, CloudObject cloudUserFn, List<SideInputInfo> sideInputInfos, TupleTag<?> mainOutputTag, Map<TupleTag<?>, Integer> outputTupleTagsToReceiverIndices, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception { Coder<?> coder = CloudObjects.coderFromCloudObject( CloudObject.fromSpec(Structs.getObject(cloudUserFn, PropertyNames.ENCODING))); checkState( coder instanceof IsmRecordCoder, "Expected to received an instanceof an %s but got %s", IsmRecordCoder.class.getSimpleName(), coder); IsmRecordCoder<?> ismRecordCoder = (IsmRecordCoder<?>) coder; return new ToIsmRecordForMultimapParDoFn( KvCoder.of( ismRecordCoder.getCoderArguments().get(0), ismRecordCoder.getCoderArguments().get(1))); }
Example #6
Source File: CombineTranslation.java From beam with Apache License 2.0 | 6 votes |
private static <K, InputT, AccumT> Coder<AccumT> extractAccumulatorCoder( GlobalCombineFn<InputT, AccumT, ?> combineFn, AppliedPTransform<PCollection<KV<K, InputT>>, ?, Combine.PerKey<K, InputT, ?>> transform) throws IOException { try { @SuppressWarnings("unchecked") PCollection<KV<K, InputT>> mainInput = (PCollection<KV<K, InputT>>) Iterables.getOnlyElement(TransformInputs.nonAdditionalInputs(transform)); return combineFn.getAccumulatorCoder( transform.getPipeline().getCoderRegistry(), ((KvCoder<K, InputT>) mainInput.getCoder()).getValueCoder()); } catch (CannotProvideCoderException e) { throw new IOException("Could not obtain a Coder for the accumulator", e); } }
Example #7
Source File: WriteToGCSTextTest.java From DataflowTemplates with Apache License 2.0 | 6 votes |
/** Test whether {@link WriteToGCSText} throws an exception if no output directory is provided. */ @Test public void testWriteWithoutOutputDirectory() { expectedException.expect(IllegalArgumentException.class); expectedException.expectMessage("withOutputDirectory(outputDirectory) called with null input."); pipeline .apply( "CreateInput", Create.of(message).withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))) .apply( "WriteTextFile(s)", WriteToGCSText.newBuilder() .withOutputDirectory(null) .withOutputFilenamePrefix(TEXT_FILENAME_PREFIX) .setNumShards(NUM_SHARDS) .withTempLocation(FAKE_TEMP_LOCATION) .build()); pipeline.run(); }
Example #8
Source File: WriteToGCSTextTest.java From DataflowTemplates with Apache License 2.0 | 6 votes |
/** * Test whether {@link WriteToGCSText} throws an exception if temporary directory is not provided. */ @Test public void testWriteWithoutTempLocation() { expectedException.expect(IllegalArgumentException.class); expectedException.expectMessage("withTempLocation(tempLocation) called with null input. "); pipeline .apply( "CreateInput", Create.of(message).withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))) .apply( "WriteTextFile(s)", WriteToGCSText.newBuilder() .withOutputDirectory(FAKE_DIR) .withOutputFilenamePrefix(TEXT_FILENAME_PREFIX) .setNumShards(NUM_SHARDS) .withTempLocation(null) .build()); pipeline.run(); }
Example #9
Source File: ReduceFnTester.java From beam with Apache License 2.0 | 6 votes |
/** * Creates a {@link ReduceFnTester} for the given {@link WindowingStrategy} and {@link CombineFn}, * creating a {@link TriggerStateMachine} from the {@link Trigger} in the {@link * WindowingStrategy}. */ public static <W extends BoundedWindow, AccumT, OutputT> ReduceFnTester<Integer, OutputT, W> combining( WindowingStrategy<?, W> strategy, CombineFn<Integer, AccumT, OutputT> combineFn, Coder<OutputT> outputCoder) throws Exception { CoderRegistry registry = CoderRegistry.createDefault(); // Ensure that the CombineFn can be converted into an AppliedCombineFn AppliedCombineFn.withInputCoder( combineFn, registry, KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())); return combining( strategy, TriggerStateMachines.stateMachineForTrigger( TriggerTranslation.toProto(strategy.getTrigger())), combineFn, outputCoder); }
Example #10
Source File: WriteToGCSAvroTest.java From DataflowTemplates with Apache License 2.0 | 6 votes |
/** Test whether {@link WriteToGCSAvro} throws an exception if no output directory is provided. */ @Test public void testWriteWithoutOutputDirectory() { expectedException.expect(IllegalArgumentException.class); expectedException.expectMessage("withOutputDirectory(outputDirectory) called with null input."); pipeline .apply( "CreateInput", Create.of(message).withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))) .apply( "WriteTextFile(s)", WriteToGCSAvro.newBuilder() .withOutputDirectory(null) .withOutputFilenamePrefix(AVRO_FILENAME_PREFIX) .setNumShards(NUM_SHARDS) .withTempLocation(FAKE_TEMP_LOCATION) .build()); pipeline.run(); }
Example #11
Source File: GroupByKeyTranslator.java From beam with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") private static <K, InputT, OutputT> SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> getSystemReduceFn( PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> transform, Pipeline pipeline, KvCoder<K, InputT> kvInputCoder) { if (transform instanceof GroupByKey) { return (SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow>) SystemReduceFn.buffering(kvInputCoder.getValueCoder()); } else if (transform instanceof Combine.PerKey) { final CombineFnBase.GlobalCombineFn<? super InputT, ?, OutputT> combineFn = ((Combine.PerKey) transform).getFn(); return SystemReduceFn.combining( kvInputCoder.getKeyCoder(), AppliedCombineFn.withInputCoder(combineFn, pipeline.getCoderRegistry(), kvInputCoder)); } else { throw new RuntimeException("Transform " + transform + " cannot be translated as GroupByKey."); } }
Example #12
Source File: CombineValuesFnFactory.java From beam with Apache License 2.0 | 6 votes |
private static <K, InputT, AccumT, OutputT> DoFnInfo<?, ?> createDoFnInfo( AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn, SideInputReader sideInputReader) { GlobalCombineFnRunner<InputT, AccumT, OutputT> combineFnRunner = GlobalCombineFnRunners.create(combineFn.getFn()); DoFn<KV<K, AccumT>, KV<K, OutputT>> doFn = new ExtractOutputDoFn<>(combineFnRunner, sideInputReader); KvCoder<K, AccumT> inputCoder = null; if (combineFn.getKvCoder() != null) { inputCoder = KvCoder.of(combineFn.getKvCoder().getKeyCoder(), combineFn.getAccumulatorCoder()); } return DoFnInfo.forFn( doFn, combineFn.getWindowingStrategy(), combineFn.getSideInputViews(), inputCoder, Collections.emptyMap(), // Not needed here. new TupleTag<>(PropertyNames.OUTPUT), DoFnSchemaInformation.create(), Collections.emptyMap()); }
Example #13
Source File: SortValues.java From beam with Apache License 2.0 | 6 votes |
/** Retrieves the {@link Coder} for the secondary key-value pairs. */ @SuppressWarnings("unchecked") private static <PrimaryKeyT, SecondaryKeyT, ValueT> KvCoder<SecondaryKeyT, ValueT> getSecondaryKeyValueCoder( Coder<KV<PrimaryKeyT, Iterable<KV<SecondaryKeyT, ValueT>>>> inputCoder) { if (!(inputCoder instanceof KvCoder)) { throw new IllegalStateException("SortValues requires its input to use KvCoder"); } @SuppressWarnings("unchecked") KvCoder<PrimaryKeyT, Iterable<KV<SecondaryKeyT, ValueT>>> kvCoder = (KvCoder<PrimaryKeyT, Iterable<KV<SecondaryKeyT, ValueT>>>) inputCoder; if (!(kvCoder.getValueCoder() instanceof IterableCoder)) { throw new IllegalStateException( "SortValues requires the values be encoded with IterableCoder"); } IterableCoder<KV<SecondaryKeyT, ValueT>> iterableCoder = (IterableCoder<KV<SecondaryKeyT, ValueT>>) (kvCoder.getValueCoder()); if (!(iterableCoder.getElemCoder() instanceof KvCoder)) { throw new IllegalStateException( "SortValues requires the secondary key-value pairs to use KvCoder"); } return (KvCoder<SecondaryKeyT, ValueT>) (iterableCoder.getElemCoder()); }
Example #14
Source File: GroupNonMergingWindowsFunctions.java From beam with Apache License 2.0 | 6 votes |
/** * Creates composite key of K and W and group all values for that composite key with Spark's * repartitionAndSortWithinPartitions. Stream of sorted by composite key's is transformed to key * with iterator of all values for that key (via {@link GroupByKeyIterator}). * * <p>repartitionAndSortWithinPartitions is used because all values are not collected into memory * at once, but streamed with iterator unlike GroupByKey (it minimizes memory pressure). */ static <K, V, W extends BoundedWindow> JavaRDD<WindowedValue<KV<K, Iterable<V>>>> groupByKeyAndWindow( JavaRDD<WindowedValue<KV<K, V>>> rdd, Coder<K> keyCoder, Coder<V> valueCoder, WindowingStrategy<?, W> windowingStrategy, Partitioner partitioner) { final Coder<W> windowCoder = windowingStrategy.getWindowFn().windowCoder(); FullWindowedValueCoder<KV<K, V>> windowedKvCoder = WindowedValue.FullWindowedValueCoder.of(KvCoder.of(keyCoder, valueCoder), windowCoder); JavaPairRDD<ByteArray, byte[]> windowInKey = bringWindowToKey( rdd, keyCoder, windowCoder, wv -> CoderHelpers.toByteArray(wv, windowedKvCoder)); return windowInKey .repartitionAndSortWithinPartitions(getPartitioner(partitioner, rdd)) .mapPartitions( it -> new GroupByKeyIterator<>(it, keyCoder, windowingStrategy, windowedKvCoder)) .filter(Objects::nonNull); // filter last null element from GroupByKeyIterator }
Example #15
Source File: DataflowSideInputHandlerFactoryTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void emptyResultForEmptyCollection() { ImmutableMap<String, SideInputReader> sideInputReadersMap = ImmutableMap.<String, SideInputReader>builder() .put(TRANSFORM_ID, fakeSideInputReader) .build(); ImmutableMap<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>> sideInputIdToPCollectionViewMap = ImmutableMap.<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>>builder() .put(sideInputId, view) .build(); DataflowSideInputHandlerFactory factory = DataflowSideInputHandlerFactory.of(sideInputReadersMap, sideInputIdToPCollectionViewMap); MultimapSideInputHandler<String, Integer, GlobalWindow> handler = factory.forMultimapSideInput( TRANSFORM_ID, SIDE_INPUT_NAME, KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()), GlobalWindow.Coder.INSTANCE); Iterable<Integer> result = handler.get("foo2", GlobalWindow.INSTANCE); assertThat(result, emptyIterable()); }
Example #16
Source File: StreamingDataflowWorker.java From beam with Apache License 2.0 | 6 votes |
/** * Extracts the userland key coder, if any, from the coder used in the initial read step of a * stage. This encodes many assumptions about how the streaming execution context works. */ @Nullable private Coder<?> extractKeyCoder(Coder<?> readCoder) { if (!(readCoder instanceof WindowedValueCoder)) { throw new RuntimeException( String.format( "Expected coder for streaming read to be %s, but received %s", WindowedValueCoder.class.getSimpleName(), readCoder)); } // Note that TimerOrElementCoder is a backwards-compatibility class // that is really a FakeKeyedWorkItemCoder Coder<?> valueCoder = ((WindowedValueCoder<?>) readCoder).getValueCoder(); if (valueCoder instanceof KvCoder<?, ?>) { return ((KvCoder<?, ?>) valueCoder).getKeyCoder(); } if (!(valueCoder instanceof WindmillKeyedWorkItem.FakeKeyedWorkItemCoder<?, ?>)) { return null; } return ((WindmillKeyedWorkItem.FakeKeyedWorkItemCoder<?, ?>) valueCoder).getKeyCoder(); }
Example #17
Source File: CloningBundleFactoryTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void keyedBundleWorkingCoderSucceedsClonesOutput() { PCollection<Integer> created = p.apply(Create.of(1, 3).withCoder(VarIntCoder.of())); PCollection<KV<String, Iterable<Integer>>> keyed = created .apply(WithKeys.of("foo")) .setCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())) .apply(GroupByKey.create()); WindowedValue<KV<String, Iterable<Integer>>> foos = WindowedValue.valueInGlobalWindow( KV.<String, Iterable<Integer>>of("foo", ImmutableList.of(1, 3))); CommittedBundle<KV<String, Iterable<Integer>>> keyedBundle = factory .createKeyedBundle(StructuralKey.of("foo", StringUtf8Coder.of()), keyed) .add(foos) .commit(Instant.now()); assertThat(keyedBundle.getElements(), containsInAnyOrder(foos)); assertThat( Iterables.getOnlyElement(keyedBundle.getElements()).getValue(), not(theInstance(foos.getValue()))); assertThat(keyedBundle.getPCollection(), equalTo(keyed)); assertThat(keyedBundle.getKey(), equalTo(StructuralKey.of("foo", StringUtf8Coder.of()))); }
Example #18
Source File: SdkComponentsTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void registerCoder() throws IOException { Coder<?> coder = KvCoder.of(StringUtf8Coder.of(), IterableCoder.of(SetCoder.of(ByteArrayCoder.of()))); String id = components.registerCoder(coder); assertThat(components.registerCoder(coder), equalTo(id)); assertThat(id, not(isEmptyOrNullString())); Coder<?> equalCoder = KvCoder.of(StringUtf8Coder.of(), IterableCoder.of(SetCoder.of(ByteArrayCoder.of()))); assertThat(components.registerCoder(equalCoder), equalTo(id)); Coder<?> otherCoder = VarLongCoder.of(); assertThat(components.registerCoder(otherCoder), not(equalTo(id))); components.toComponents().getCodersOrThrow(id); components.toComponents().getCodersOrThrow(components.registerCoder(otherCoder)); }
Example #19
Source File: JacksonTransformsTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testWritingInvalidJsonsWithFailuresDefaultHandler() { WithFailures.Result<PCollection<String>, KV<MyPojo, Map<String, String>>> result = pipeline .apply( Create.of(Iterables.concat(POJOS, INVALID_POJOS)) .withCoder(SerializableCoder.of(MyPojo.class))) .apply(AsJsons.of(MyPojo.class).exceptionsVia()); result.output().setCoder(StringUtf8Coder.of()); result .failures() .setCoder( KvCoder.of( SerializableCoder.of(MyPojo.class), MapCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))); PAssert.that(result.output()).containsInAnyOrder(VALID_JSONS); assertWritingWithErrorMapHandler(result); pipeline.run(); }
Example #20
Source File: GroupNonMergingWindowsFunctionsTest.java From beam with Apache License 2.0 | 6 votes |
private <W extends BoundedWindow> GroupByKeyIterator<String, Integer, W> createGbkIterator( W window, Coder<W> winCoder, WindowingStrategy<Object, W> winStrategy) throws Coder.NonDeterministicException { StringUtf8Coder keyCoder = StringUtf8Coder.of(); final WindowedValue.FullWindowedValueCoder<KV<String, Integer>> winValCoder = WindowedValue.getFullCoder( KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()), winStrategy.getWindowFn().windowCoder()); ItemFactory<String, Integer, W> factory = ItemFactory.forWindow(keyCoder, winValCoder, winCoder, window); List<Tuple2<ByteArray, byte[]>> items = Arrays.asList( factory.create("k1", 1), factory.create("k1", 2), factory.create("k2", 3), factory.create("k2", 4), factory.create("k2", 5)); return new GroupByKeyIterator<>(items.iterator(), keyCoder, winStrategy, winValCoder); }
Example #21
Source File: Reify.java From beam with Apache License 2.0 | 6 votes |
@Override public PCollection<KV<K, V>> expand(PCollection<KV<K, TimestampedValue<V>>> input) { KvCoder<K, TimestampedValue<V>> kvCoder = (KvCoder<K, TimestampedValue<V>>) input.getCoder(); TimestampedValueCoder<V> tvCoder = (TimestampedValueCoder<V>) kvCoder.getValueCoder(); return input .apply( ParDo.of( new DoFn<KV<K, TimestampedValue<V>>, KV<K, V>>() { @Override public Duration getAllowedTimestampSkew() { return Duration.millis(Long.MAX_VALUE); } @ProcessElement public void processElement( @Element KV<K, TimestampedValue<V>> kv, OutputReceiver<KV<K, V>> r) { r.outputWithTimestamp( KV.of(kv.getKey(), kv.getValue().getValue()), kv.getValue().getTimestamp()); } })) .setCoder(KvCoder.of(kvCoder.getKeyCoder(), tvCoder.getValueCoder())); }
Example #22
Source File: WriteFiles.java From beam with Apache License 2.0 | 6 votes |
@Override public WriteFilesResult<DestinationT> expand( PCollection<List<FileResult<DestinationT>>> input) { List<PCollectionView<?>> finalizeSideInputs = Lists.newArrayList(getSideInputs()); if (numShardsView != null) { finalizeSideInputs.add(numShardsView); } PCollection<KV<DestinationT, String>> outputFilenames = input .apply("Finalize", ParDo.of(new FinalizeFn()).withSideInputs(finalizeSideInputs)) .setCoder(KvCoder.of(destinationCoder, StringUtf8Coder.of())) // Reshuffle the filenames to make sure they are observable downstream // only after each one is done finalizing. .apply(Reshuffle.viaRandomKey()); TupleTag<KV<DestinationT, String>> perDestinationOutputFilenamesTag = new TupleTag<>("perDestinationOutputFilenames"); return WriteFilesResult.in( input.getPipeline(), perDestinationOutputFilenamesTag, outputFilenames); }
Example #23
Source File: SplittableProcessFnFactory.java From beam with Apache License 2.0 | 5 votes |
@Override public DoFnInfo<?, ?> getDoFnInfo(CloudObject cloudUserFn) throws Exception { DoFnInfo<?, ?> doFnInfo = (DoFnInfo<?, ?>) deserializeFromByteArray( getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN), "Serialized DoFnInfo"); Coder restrictionAndStateCoder = coderFromCloudObject( fromSpec(getObject(cloudUserFn, WorkerPropertyNames.RESTRICTION_CODER))); checkState( restrictionAndStateCoder instanceof KvCoder, "Expected pair coder with restriction as key coder and watermark estimator state as value coder, but received %s.", restrictionAndStateCoder); Coder restrictionCoder = ((KvCoder) restrictionAndStateCoder).getKeyCoder(); Coder watermarkEstimatorStateCoder = ((KvCoder) restrictionAndStateCoder).getValueCoder(); ProcessFn processFn = new ProcessFn( doFnInfo.getDoFn(), doFnInfo.getInputCoder(), restrictionCoder, watermarkEstimatorStateCoder, doFnInfo.getWindowingStrategy()); return DoFnInfo.forFn( processFn, doFnInfo.getWindowingStrategy(), doFnInfo.getSideInputViews(), KeyedWorkItemCoder.of( ByteArrayCoder.of(), KvCoder.of(doFnInfo.getInputCoder(), restrictionCoder), doFnInfo.getWindowingStrategy().getWindowFn().windowCoder()), doFnInfo.getOutputCoders(), doFnInfo.getMainOutput(), doFnInfo.getDoFnSchemaInformation(), doFnInfo.getSideInputMapping()); }
Example #24
Source File: StreamingDataflowWorkerTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testKeyTokenInvalidException() throws Exception { if (streamingEngine) { // TODO: This test needs to be adapted to work with streamingEngine=true. return; } KvCoder<String, String> kvCoder = KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()); List<ParallelInstruction> instructions = Arrays.asList( makeSourceInstruction(kvCoder), makeDoFnInstruction(new KeyTokenInvalidFn(), 0, kvCoder), makeSinkInstruction(kvCoder, 1)); FakeWindmillServer server = new FakeWindmillServer(errorCollector); server.addWorkToOffer(makeInput(0, 0, "key")); StreamingDataflowWorker worker = makeWorker(instructions, createTestingPipelineOptions(server), true /* publishCounters */); worker.start(); server.waitForEmptyWorkQueue(); server.addWorkToOffer(makeInput(1, 0, "key")); Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1); assertEquals(makeExpectedOutput(1, 0, "key", "key").build(), result.get(1L)); assertEquals(1, result.size()); }
Example #25
Source File: CoderTranslators.java From beam with Apache License 2.0 | 5 votes |
static CoderTranslator<KvCoder<?, ?>> kv() { return new SimpleStructuredCoderTranslator<KvCoder<?, ?>>() { @Override public List<? extends Coder<?>> getComponents(KvCoder<?, ?> from) { return ImmutableList.of(from.getKeyCoder(), from.getValueCoder()); } @Override public KvCoder<?, ?> fromComponents(List<Coder<?>> components) { return KvCoder.of(components.get(0), components.get(1)); } }; }
Example #26
Source File: ViewTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category({ValidatesRunner.class, DataflowPortabilityApiUnsupported.class}) public void testMultimapSideInputWithNonDeterministicKeyCoder() { final PCollectionView<Map<String, Iterable<Integer>>> view = pipeline .apply( "CreateSideInput", Create.of(KV.of("a", 1), KV.of("a", 1), KV.of("a", 2), KV.of("b", 3)) .withCoder(KvCoder.of(new NonDeterministicStringCoder(), VarIntCoder.of()))) .apply(View.asMultimap()); PCollection<KV<String, Integer>> output = pipeline .apply("CreateMainInput", Create.of("apple", "banana", "blackberry")) .apply( "OutputSideInputs", ParDo.of( new DoFn<String, KV<String, Integer>>() { @ProcessElement public void processElement(ProcessContext c) { for (Integer v : c.sideInput(view).get(c.element().substring(0, 1))) { c.output(KV.of(c.element(), v)); } } }) .withSideInputs(view)); PAssert.that(output) .containsInAnyOrder( KV.of("apple", 1), KV.of("apple", 1), KV.of("apple", 2), KV.of("banana", 3), KV.of("blackberry", 3)); pipeline.run(); }
Example #27
Source File: CombiningGroupAlsoByWindowsViaOutputBufferDoFnTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testCombinesElementsInSlidingWindows() throws Exception { CombineFn<Long, ?, Long> combineFn = Sum.ofLongs(); AppliedCombineFn<String, Long, ?, Long> appliedFn = AppliedCombineFn.withInputCoder( combineFn, CoderRegistry.createDefault(), KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of())); GroupAlsoByWindowProperties.combinesElementsInSlidingWindows( new CombiningGABWViaOutputBufferDoFnFactory<>(StringUtf8Coder.of(), appliedFn), combineFn); }
Example #28
Source File: KafkaIO.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@Override public PDone expand(PCollection<V> input) { return input .apply( "Kafka values with default key", MapElements.via( new SimpleFunction<V, KV<K, V>>() { @Override public KV<K, V> apply(V element) { return KV.of(null, element); } })) .setCoder(KvCoder.of(new NullOnlyCoder<>(), input.getCoder())) .apply(kvWriteTransform); }
Example #29
Source File: GroupingTablesTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testBufferingGroupingTable() throws Exception { GroupingTableBase<String, String, List<String>> table = (GroupingTableBase<String, String, List<String>>) GroupingTables.buffering( new IdentityGroupingKeyCreator(), new KvPairInfo(), new StringPowerSizeEstimator(), new StringPowerSizeEstimator()); table.setMaxSize(1000); TestOutputReceiver receiver = new TestOutputReceiver( KvCoder.of(StringUtf8Coder.of(), IterableCoder.of(StringUtf8Coder.of())), NameContextsForTests.nameContextForTest()); table.put("A", "a", receiver); table.put("B", "b1", receiver); table.put("B", "b2", receiver); table.put("C", "c", receiver); assertThat(receiver.outputElems, empty()); table.put("C", "cccc", receiver); assertThat(receiver.outputElems, hasItem((Object) KV.of("C", Arrays.asList("c", "cccc")))); table.put("DDDD", "d", receiver); assertThat(receiver.outputElems, hasItem((Object) KV.of("DDDD", Arrays.asList("d")))); table.flush(receiver); assertThat( receiver.outputElems, IsIterableContainingInAnyOrder.<Object>containsInAnyOrder( KV.of("A", Arrays.asList("a")), KV.of("B", Arrays.asList("b1", "b2")), KV.of("C", Arrays.asList("c", "cccc")), KV.of("DDDD", Arrays.asList("d")))); }
Example #30
Source File: GroupByKeyTranslatorBatch.java From beam with Apache License 2.0 | 5 votes |
@Override public void translateTransform( PTransform<PCollection<KV<K, V>>, PCollection<KV<K, Iterable<V>>>> transform, TranslationContext context) { @SuppressWarnings("unchecked") final PCollection<KV<K, V>> inputPCollection = (PCollection<KV<K, V>>) context.getInput(); Dataset<WindowedValue<KV<K, V>>> input = context.getDataset(inputPCollection); WindowingStrategy<?, ?> windowingStrategy = inputPCollection.getWindowingStrategy(); KvCoder<K, V> kvCoder = (KvCoder<K, V>) inputPCollection.getCoder(); Coder<V> valueCoder = kvCoder.getValueCoder(); // group by key only Coder<K> keyCoder = kvCoder.getKeyCoder(); KeyValueGroupedDataset<K, WindowedValue<KV<K, V>>> groupByKeyOnly = input.groupByKey(KVHelpers.extractKey(), EncoderHelpers.fromBeamCoder(keyCoder)); // group also by windows WindowedValue.FullWindowedValueCoder<KV<K, Iterable<V>>> outputCoder = WindowedValue.FullWindowedValueCoder.of( KvCoder.of(keyCoder, IterableCoder.of(valueCoder)), windowingStrategy.getWindowFn().windowCoder()); Dataset<WindowedValue<KV<K, Iterable<V>>>> output = groupByKeyOnly.flatMapGroups( new GroupAlsoByWindowViaOutputBufferFn<>( windowingStrategy, new InMemoryStateInternalsFactory<>(), SystemReduceFn.buffering(valueCoder), context.getSerializableOptions()), EncoderHelpers.fromBeamCoder(outputCoder)); context.putDataset(context.getOutput(), output); }