org.apache.beam.sdk.coders.KvCoder Java Examples
The following examples show how to use
org.apache.beam.sdk.coders.KvCoder.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CombineValuesFnFactory.java From beam with Apache License 2.0 | 6 votes |
private static <K, InputT, AccumT, OutputT> DoFnInfo<?, ?> createDoFnInfo( AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn, SideInputReader sideInputReader) { GlobalCombineFnRunner<InputT, AccumT, OutputT> combineFnRunner = GlobalCombineFnRunners.create(combineFn.getFn()); DoFn<KV<K, AccumT>, KV<K, OutputT>> doFn = new ExtractOutputDoFn<>(combineFnRunner, sideInputReader); KvCoder<K, AccumT> inputCoder = null; if (combineFn.getKvCoder() != null) { inputCoder = KvCoder.of(combineFn.getKvCoder().getKeyCoder(), combineFn.getAccumulatorCoder()); } return DoFnInfo.forFn( doFn, combineFn.getWindowingStrategy(), combineFn.getSideInputViews(), inputCoder, Collections.emptyMap(), // Not needed here. new TupleTag<>(PropertyNames.OUTPUT), DoFnSchemaInformation.create(), Collections.emptyMap()); }
Example #2
Source File: JacksonTransformsTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testWritingInvalidJsonsWithFailuresDefaultHandler() { WithFailures.Result<PCollection<String>, KV<MyPojo, Map<String, String>>> result = pipeline .apply( Create.of(Iterables.concat(POJOS, INVALID_POJOS)) .withCoder(SerializableCoder.of(MyPojo.class))) .apply(AsJsons.of(MyPojo.class).exceptionsVia()); result.output().setCoder(StringUtf8Coder.of()); result .failures() .setCoder( KvCoder.of( SerializableCoder.of(MyPojo.class), MapCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))); PAssert.that(result.output()).containsInAnyOrder(VALID_JSONS); assertWritingWithErrorMapHandler(result); pipeline.run(); }
Example #3
Source File: GroupNonMergingWindowsFunctionsTest.java From beam with Apache License 2.0 | 6 votes |
private <W extends BoundedWindow> GroupByKeyIterator<String, Integer, W> createGbkIterator( W window, Coder<W> winCoder, WindowingStrategy<Object, W> winStrategy) throws Coder.NonDeterministicException { StringUtf8Coder keyCoder = StringUtf8Coder.of(); final WindowedValue.FullWindowedValueCoder<KV<String, Integer>> winValCoder = WindowedValue.getFullCoder( KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()), winStrategy.getWindowFn().windowCoder()); ItemFactory<String, Integer, W> factory = ItemFactory.forWindow(keyCoder, winValCoder, winCoder, window); List<Tuple2<ByteArray, byte[]>> items = Arrays.asList( factory.create("k1", 1), factory.create("k1", 2), factory.create("k2", 3), factory.create("k2", 4), factory.create("k2", 5)); return new GroupByKeyIterator<>(items.iterator(), keyCoder, winStrategy, winValCoder); }
Example #4
Source File: SdkComponentsTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void registerCoder() throws IOException { Coder<?> coder = KvCoder.of(StringUtf8Coder.of(), IterableCoder.of(SetCoder.of(ByteArrayCoder.of()))); String id = components.registerCoder(coder); assertThat(components.registerCoder(coder), equalTo(id)); assertThat(id, not(isEmptyOrNullString())); Coder<?> equalCoder = KvCoder.of(StringUtf8Coder.of(), IterableCoder.of(SetCoder.of(ByteArrayCoder.of()))); assertThat(components.registerCoder(equalCoder), equalTo(id)); Coder<?> otherCoder = VarLongCoder.of(); assertThat(components.registerCoder(otherCoder), not(equalTo(id))); components.toComponents().getCodersOrThrow(id); components.toComponents().getCodersOrThrow(components.registerCoder(otherCoder)); }
Example #5
Source File: CloningBundleFactoryTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void keyedBundleWorkingCoderSucceedsClonesOutput() { PCollection<Integer> created = p.apply(Create.of(1, 3).withCoder(VarIntCoder.of())); PCollection<KV<String, Iterable<Integer>>> keyed = created .apply(WithKeys.of("foo")) .setCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())) .apply(GroupByKey.create()); WindowedValue<KV<String, Iterable<Integer>>> foos = WindowedValue.valueInGlobalWindow( KV.<String, Iterable<Integer>>of("foo", ImmutableList.of(1, 3))); CommittedBundle<KV<String, Iterable<Integer>>> keyedBundle = factory .createKeyedBundle(StructuralKey.of("foo", StringUtf8Coder.of()), keyed) .add(foos) .commit(Instant.now()); assertThat(keyedBundle.getElements(), containsInAnyOrder(foos)); assertThat( Iterables.getOnlyElement(keyedBundle.getElements()).getValue(), not(theInstance(foos.getValue()))); assertThat(keyedBundle.getPCollection(), equalTo(keyed)); assertThat(keyedBundle.getKey(), equalTo(StructuralKey.of("foo", StringUtf8Coder.of()))); }
Example #6
Source File: Reify.java From beam with Apache License 2.0 | 6 votes |
@Override public PCollection<KV<K, V>> expand(PCollection<KV<K, TimestampedValue<V>>> input) { KvCoder<K, TimestampedValue<V>> kvCoder = (KvCoder<K, TimestampedValue<V>>) input.getCoder(); TimestampedValueCoder<V> tvCoder = (TimestampedValueCoder<V>) kvCoder.getValueCoder(); return input .apply( ParDo.of( new DoFn<KV<K, TimestampedValue<V>>, KV<K, V>>() { @Override public Duration getAllowedTimestampSkew() { return Duration.millis(Long.MAX_VALUE); } @ProcessElement public void processElement( @Element KV<K, TimestampedValue<V>> kv, OutputReceiver<KV<K, V>> r) { r.outputWithTimestamp( KV.of(kv.getKey(), kv.getValue().getValue()), kv.getValue().getTimestamp()); } })) .setCoder(KvCoder.of(kvCoder.getKeyCoder(), tvCoder.getValueCoder())); }
Example #7
Source File: StreamingDataflowWorker.java From beam with Apache License 2.0 | 6 votes |
/** * Extracts the userland key coder, if any, from the coder used in the initial read step of a * stage. This encodes many assumptions about how the streaming execution context works. */ @Nullable private Coder<?> extractKeyCoder(Coder<?> readCoder) { if (!(readCoder instanceof WindowedValueCoder)) { throw new RuntimeException( String.format( "Expected coder for streaming read to be %s, but received %s", WindowedValueCoder.class.getSimpleName(), readCoder)); } // Note that TimerOrElementCoder is a backwards-compatibility class // that is really a FakeKeyedWorkItemCoder Coder<?> valueCoder = ((WindowedValueCoder<?>) readCoder).getValueCoder(); if (valueCoder instanceof KvCoder<?, ?>) { return ((KvCoder<?, ?>) valueCoder).getKeyCoder(); } if (!(valueCoder instanceof WindmillKeyedWorkItem.FakeKeyedWorkItemCoder<?, ?>)) { return null; } return ((WindmillKeyedWorkItem.FakeKeyedWorkItemCoder<?, ?>) valueCoder).getKeyCoder(); }
Example #8
Source File: GroupByKeyTranslator.java From beam with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") private static <K, InputT, OutputT> SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> getSystemReduceFn( PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> transform, Pipeline pipeline, KvCoder<K, InputT> kvInputCoder) { if (transform instanceof GroupByKey) { return (SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow>) SystemReduceFn.buffering(kvInputCoder.getValueCoder()); } else if (transform instanceof Combine.PerKey) { final CombineFnBase.GlobalCombineFn<? super InputT, ?, OutputT> combineFn = ((Combine.PerKey) transform).getFn(); return SystemReduceFn.combining( kvInputCoder.getKeyCoder(), AppliedCombineFn.withInputCoder(combineFn, pipeline.getCoderRegistry(), kvInputCoder)); } else { throw new RuntimeException("Transform " + transform + " cannot be translated as GroupByKey."); } }
Example #9
Source File: WriteToGCSAvroTest.java From DataflowTemplates with Apache License 2.0 | 6 votes |
/** Test whether {@link WriteToGCSAvro} throws an exception if no output directory is provided. */ @Test public void testWriteWithoutOutputDirectory() { expectedException.expect(IllegalArgumentException.class); expectedException.expectMessage("withOutputDirectory(outputDirectory) called with null input."); pipeline .apply( "CreateInput", Create.of(message).withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))) .apply( "WriteTextFile(s)", WriteToGCSAvro.newBuilder() .withOutputDirectory(null) .withOutputFilenamePrefix(AVRO_FILENAME_PREFIX) .setNumShards(NUM_SHARDS) .withTempLocation(FAKE_TEMP_LOCATION) .build()); pipeline.run(); }
Example #10
Source File: ReduceFnTester.java From beam with Apache License 2.0 | 6 votes |
/** * Creates a {@link ReduceFnTester} for the given {@link WindowingStrategy} and {@link CombineFn}, * creating a {@link TriggerStateMachine} from the {@link Trigger} in the {@link * WindowingStrategy}. */ public static <W extends BoundedWindow, AccumT, OutputT> ReduceFnTester<Integer, OutputT, W> combining( WindowingStrategy<?, W> strategy, CombineFn<Integer, AccumT, OutputT> combineFn, Coder<OutputT> outputCoder) throws Exception { CoderRegistry registry = CoderRegistry.createDefault(); // Ensure that the CombineFn can be converted into an AppliedCombineFn AppliedCombineFn.withInputCoder( combineFn, registry, KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())); return combining( strategy, TriggerStateMachines.stateMachineForTrigger( TriggerTranslation.toProto(strategy.getTrigger())), combineFn, outputCoder); }
Example #11
Source File: DataflowSideInputHandlerFactoryTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void emptyResultForEmptyCollection() { ImmutableMap<String, SideInputReader> sideInputReadersMap = ImmutableMap.<String, SideInputReader>builder() .put(TRANSFORM_ID, fakeSideInputReader) .build(); ImmutableMap<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>> sideInputIdToPCollectionViewMap = ImmutableMap.<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>>builder() .put(sideInputId, view) .build(); DataflowSideInputHandlerFactory factory = DataflowSideInputHandlerFactory.of(sideInputReadersMap, sideInputIdToPCollectionViewMap); MultimapSideInputHandler<String, Integer, GlobalWindow> handler = factory.forMultimapSideInput( TRANSFORM_ID, SIDE_INPUT_NAME, KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()), GlobalWindow.Coder.INSTANCE); Iterable<Integer> result = handler.get("foo2", GlobalWindow.INSTANCE); assertThat(result, emptyIterable()); }
Example #12
Source File: WriteToGCSTextTest.java From DataflowTemplates with Apache License 2.0 | 6 votes |
/** * Test whether {@link WriteToGCSText} throws an exception if temporary directory is not provided. */ @Test public void testWriteWithoutTempLocation() { expectedException.expect(IllegalArgumentException.class); expectedException.expectMessage("withTempLocation(tempLocation) called with null input. "); pipeline .apply( "CreateInput", Create.of(message).withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))) .apply( "WriteTextFile(s)", WriteToGCSText.newBuilder() .withOutputDirectory(FAKE_DIR) .withOutputFilenamePrefix(TEXT_FILENAME_PREFIX) .setNumShards(NUM_SHARDS) .withTempLocation(null) .build()); pipeline.run(); }
Example #13
Source File: CombineTranslation.java From beam with Apache License 2.0 | 6 votes |
private static <K, InputT, AccumT> Coder<AccumT> extractAccumulatorCoder( GlobalCombineFn<InputT, AccumT, ?> combineFn, AppliedPTransform<PCollection<KV<K, InputT>>, ?, Combine.PerKey<K, InputT, ?>> transform) throws IOException { try { @SuppressWarnings("unchecked") PCollection<KV<K, InputT>> mainInput = (PCollection<KV<K, InputT>>) Iterables.getOnlyElement(TransformInputs.nonAdditionalInputs(transform)); return combineFn.getAccumulatorCoder( transform.getPipeline().getCoderRegistry(), ((KvCoder<K, InputT>) mainInput.getCoder()).getValueCoder()); } catch (CannotProvideCoderException e) { throw new IOException("Could not obtain a Coder for the accumulator", e); } }
Example #14
Source File: ToIsmRecordForMultimapDoFnFactory.java From beam with Apache License 2.0 | 6 votes |
@Override public ParDoFn create( PipelineOptions options, CloudObject cloudUserFn, List<SideInputInfo> sideInputInfos, TupleTag<?> mainOutputTag, Map<TupleTag<?>, Integer> outputTupleTagsToReceiverIndices, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception { Coder<?> coder = CloudObjects.coderFromCloudObject( CloudObject.fromSpec(Structs.getObject(cloudUserFn, PropertyNames.ENCODING))); checkState( coder instanceof IsmRecordCoder, "Expected to received an instanceof an %s but got %s", IsmRecordCoder.class.getSimpleName(), coder); IsmRecordCoder<?> ismRecordCoder = (IsmRecordCoder<?>) coder; return new ToIsmRecordForMultimapParDoFn( KvCoder.of( ismRecordCoder.getCoderArguments().get(0), ismRecordCoder.getCoderArguments().get(1))); }
Example #15
Source File: GroupNonMergingWindowsFunctions.java From beam with Apache License 2.0 | 6 votes |
/** * Creates composite key of K and W and group all values for that composite key with Spark's * repartitionAndSortWithinPartitions. Stream of sorted by composite key's is transformed to key * with iterator of all values for that key (via {@link GroupByKeyIterator}). * * <p>repartitionAndSortWithinPartitions is used because all values are not collected into memory * at once, but streamed with iterator unlike GroupByKey (it minimizes memory pressure). */ static <K, V, W extends BoundedWindow> JavaRDD<WindowedValue<KV<K, Iterable<V>>>> groupByKeyAndWindow( JavaRDD<WindowedValue<KV<K, V>>> rdd, Coder<K> keyCoder, Coder<V> valueCoder, WindowingStrategy<?, W> windowingStrategy, Partitioner partitioner) { final Coder<W> windowCoder = windowingStrategy.getWindowFn().windowCoder(); FullWindowedValueCoder<KV<K, V>> windowedKvCoder = WindowedValue.FullWindowedValueCoder.of(KvCoder.of(keyCoder, valueCoder), windowCoder); JavaPairRDD<ByteArray, byte[]> windowInKey = bringWindowToKey( rdd, keyCoder, windowCoder, wv -> CoderHelpers.toByteArray(wv, windowedKvCoder)); return windowInKey .repartitionAndSortWithinPartitions(getPartitioner(partitioner, rdd)) .mapPartitions( it -> new GroupByKeyIterator<>(it, keyCoder, windowingStrategy, windowedKvCoder)) .filter(Objects::nonNull); // filter last null element from GroupByKeyIterator }
Example #16
Source File: StatefulParDoEvaluatorFactoryTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testRequiresTimeSortedInputWithLateDataAndAllowedLateness() { Instant now = Instant.ofEpochMilli(0); PCollection<KV<String, Integer>> input = pipeline .apply( TestStream.create(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())) .addElements(TimestampedValue.of(KV.of("", 1), now.plus(2))) .addElements(TimestampedValue.of(KV.of("", 2), now.plus(1))) .advanceWatermarkTo(now.plus(1)) .addElements(TimestampedValue.of(KV.of("", 3), now)) .advanceWatermarkToInfinity()) .apply( Window.<KV<String, Integer>>into(new GlobalWindows()) .withAllowedLateness(Duration.millis(2))); PCollection<String> result = input.apply(ParDo.of(statefulConcat())); PAssert.that(result).containsInAnyOrder("3", "3:2", "3:2:1"); pipeline.run(); }
Example #17
Source File: StatefulTeamScoreTest.java From deployment-examples with MIT License | 6 votes |
/** * Tests that {@link UpdateTeamScoreFn} {@link org.apache.beam.sdk.transforms.DoFn} outputs * correctly for one team. */ @Test public void testScoreUpdatesOneTeam() { TestStream<KV<String, GameActionInfo>> createEvents = TestStream.create(KvCoder.of(StringUtf8Coder.of(), AvroCoder.of(GameActionInfo.class))) .advanceWatermarkTo(baseTime) .addElements( event(TestUser.RED_TWO, 99, Duration.standardSeconds(10)), event(TestUser.RED_ONE, 1, Duration.standardSeconds(20)), event(TestUser.RED_ONE, 0, Duration.standardSeconds(30)), event(TestUser.RED_TWO, 100, Duration.standardSeconds(40)), event(TestUser.RED_TWO, 201, Duration.standardSeconds(50))) .advanceWatermarkToInfinity(); PCollection<KV<String, Integer>> teamScores = p.apply(createEvents).apply(ParDo.of(new UpdateTeamScoreFn(100))); String redTeam = TestUser.RED_ONE.getTeam(); PAssert.that(teamScores) .inWindow(GlobalWindow.INSTANCE) .containsInAnyOrder(KV.of(redTeam, 100), KV.of(redTeam, 200), KV.of(redTeam, 401)); p.run().waitUntilFinish(); }
Example #18
Source File: WriteToGCSTextTest.java From DataflowTemplates with Apache License 2.0 | 6 votes |
/** Test whether {@link WriteToGCSText} throws an exception if no output directory is provided. */ @Test public void testWriteWithoutOutputDirectory() { expectedException.expect(IllegalArgumentException.class); expectedException.expectMessage("withOutputDirectory(outputDirectory) called with null input."); pipeline .apply( "CreateInput", Create.of(message).withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))) .apply( "WriteTextFile(s)", WriteToGCSText.newBuilder() .withOutputDirectory(null) .withOutputFilenamePrefix(TEXT_FILENAME_PREFIX) .setNumShards(NUM_SHARDS) .withTempLocation(FAKE_TEMP_LOCATION) .build()); pipeline.run(); }
Example #19
Source File: WriteFiles.java From beam with Apache License 2.0 | 6 votes |
@Override public WriteFilesResult<DestinationT> expand( PCollection<List<FileResult<DestinationT>>> input) { List<PCollectionView<?>> finalizeSideInputs = Lists.newArrayList(getSideInputs()); if (numShardsView != null) { finalizeSideInputs.add(numShardsView); } PCollection<KV<DestinationT, String>> outputFilenames = input .apply("Finalize", ParDo.of(new FinalizeFn()).withSideInputs(finalizeSideInputs)) .setCoder(KvCoder.of(destinationCoder, StringUtf8Coder.of())) // Reshuffle the filenames to make sure they are observable downstream // only after each one is done finalizing. .apply(Reshuffle.viaRandomKey()); TupleTag<KV<DestinationT, String>> perDestinationOutputFilenamesTag = new TupleTag<>("perDestinationOutputFilenames"); return WriteFilesResult.in( input.getPipeline(), perDestinationOutputFilenamesTag, outputFilenames); }
Example #20
Source File: BigQueryFeatureSink.java From feast with Apache License 2.0 | 6 votes |
/** @param featureSetSpecs Feature set to be written */ @Override public PCollection<FeatureSetReference> prepareWrite( PCollection<KV<FeatureSetReference, FeatureSetProto.FeatureSetSpec>> featureSetSpecs) { PCollection<KV<FeatureSetReference, TableSchema>> schemas = featureSetSpecs .apply( "GenerateTableSchema", ParDo.of( new FeatureSetSpecToTableSchema( DatasetId.of(getProjectId(), getDatasetId()), getBQClient()))) .setCoder( KvCoder.of( AvroCoder.of(FeatureSetReference.class), FeatureSetSpecToTableSchema.TableSchemaCoder.of())); schemasView = schemas .apply("ReferenceString", ParDo.of(new ReferenceToString())) .apply("View", View.asMultimap()); return schemas.apply("Ready", Keys.create()); }
Example #21
Source File: SortValues.java From beam with Apache License 2.0 | 6 votes |
/** Retrieves the {@link Coder} for the secondary key-value pairs. */ @SuppressWarnings("unchecked") private static <PrimaryKeyT, SecondaryKeyT, ValueT> KvCoder<SecondaryKeyT, ValueT> getSecondaryKeyValueCoder( Coder<KV<PrimaryKeyT, Iterable<KV<SecondaryKeyT, ValueT>>>> inputCoder) { if (!(inputCoder instanceof KvCoder)) { throw new IllegalStateException("SortValues requires its input to use KvCoder"); } @SuppressWarnings("unchecked") KvCoder<PrimaryKeyT, Iterable<KV<SecondaryKeyT, ValueT>>> kvCoder = (KvCoder<PrimaryKeyT, Iterable<KV<SecondaryKeyT, ValueT>>>) inputCoder; if (!(kvCoder.getValueCoder() instanceof IterableCoder)) { throw new IllegalStateException( "SortValues requires the values be encoded with IterableCoder"); } IterableCoder<KV<SecondaryKeyT, ValueT>> iterableCoder = (IterableCoder<KV<SecondaryKeyT, ValueT>>) (kvCoder.getValueCoder()); if (!(iterableCoder.getElemCoder() instanceof KvCoder)) { throw new IllegalStateException( "SortValues requires the secondary key-value pairs to use KvCoder"); } return (KvCoder<SecondaryKeyT, ValueT>) (iterableCoder.getElemCoder()); }
Example #22
Source File: ReduceFnTester.java From beam with Apache License 2.0 | 6 votes |
public static <W extends BoundedWindow, AccumT, OutputT> ReduceFnTester<Integer, OutputT, W> combining( WindowingStrategy<?, W> strategy, TriggerStateMachine triggerStateMachine, CombineFnWithContext<Integer, AccumT, OutputT> combineFn, Coder<OutputT> outputCoder, PipelineOptions options, SideInputReader sideInputReader) throws Exception { CoderRegistry registry = CoderRegistry.createDefault(); AppliedCombineFn<String, Integer, AccumT, OutputT> fn = AppliedCombineFn.withInputCoder( combineFn, registry, KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())); return new ReduceFnTester<>( strategy, triggerStateMachine, SystemReduceFn.combining(StringUtf8Coder.of(), fn), outputCoder, options, sideInputReader); }
Example #23
Source File: DeduplicateTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category({NeedsRunner.class, UsesTestStreamWithProcessingTime.class}) public void testTriggeredRepresentativeValuesWithType() { Instant base = new Instant(0); TestStream<KV<Integer, String>> values = TestStream.create(KvCoder.of(VarIntCoder.of(), StringUtf8Coder.of())) .advanceWatermarkTo(base) .addElements( TimestampedValue.of(KV.of(1, "k1"), base), TimestampedValue.of(KV.of(2, "k2"), base.plus(Duration.standardSeconds(10))), TimestampedValue.of(KV.of(3, "k3"), base.plus(Duration.standardSeconds(20)))) .advanceProcessingTime(Duration.standardMinutes(1)) .addElements( TimestampedValue.of(KV.of(1, "k1"), base.plus(Duration.standardSeconds(30))), TimestampedValue.of(KV.of(2, "k2"), base.plus(Duration.standardSeconds(40))), TimestampedValue.of(KV.of(3, "k3"), base.plus(Duration.standardSeconds(50)))) .advanceWatermarkToInfinity(); PCollection<KV<Integer, String>> distinctValues = p.apply(values) .apply( Deduplicate.withRepresentativeValueFn(new Keys<Integer>()) .withRepresentativeCoder(VarIntCoder.of())); PAssert.that(distinctValues).containsInAnyOrder(KV.of(1, "k1"), KV.of(2, "k2"), KV.of(3, "k3")); p.run(); }
Example #24
Source File: OuterLeftJoinTest.java From beam with Apache License 2.0 | 5 votes |
@Test(expected = NullPointerException.class) public void testJoinRightCollectionNull() { p.enableAbandonedNodeEnforcement(false); Join.leftOuterJoin( p.apply( Create.of(leftListOfKv).withCoder(KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()))), null, ""); }
Example #25
Source File: OuterLeftJoinTest.java From beam with Apache License 2.0 | 5 votes |
@Test(expected = NullPointerException.class) public void testJoinLeftCollectionNull() { p.enableAbandonedNodeEnforcement(false); Join.leftOuterJoin( null, p.apply( Create.of(rightListOfKv) .withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))), ""); }
Example #26
Source File: WindowDoFnOperatorTest.java From beam with Apache License 2.0 | 5 votes |
private WindowDoFnOperator<Long, Long, Long> getWindowDoFnOperator() { WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(standardMinutes(1))); TupleTag<KV<Long, Long>> outputTag = new TupleTag<>("main-output"); SystemReduceFn<Long, Long, long[], Long, BoundedWindow> reduceFn = SystemReduceFn.combining( VarLongCoder.of(), AppliedCombineFn.withInputCoder( Sum.ofLongs(), CoderRegistry.createDefault(), KvCoder.of(VarLongCoder.of(), VarLongCoder.of()))); Coder<IntervalWindow> windowCoder = windowingStrategy.getWindowFn().windowCoder(); SingletonKeyedWorkItemCoder<Long, Long> workItemCoder = SingletonKeyedWorkItemCoder.of(VarLongCoder.of(), VarLongCoder.of(), windowCoder); FullWindowedValueCoder<SingletonKeyedWorkItem<Long, Long>> inputCoder = WindowedValue.getFullCoder(workItemCoder, windowCoder); FullWindowedValueCoder<KV<Long, Long>> outputCoder = WindowedValue.getFullCoder(KvCoder.of(VarLongCoder.of(), VarLongCoder.of()), windowCoder); return new WindowDoFnOperator<Long, Long, Long>( reduceFn, "stepName", (Coder) inputCoder, outputTag, emptyList(), new MultiOutputOutputManagerFactory<>(outputTag, outputCoder), windowingStrategy, emptyMap(), emptyList(), PipelineOptionsFactory.as(FlinkPipelineOptions.class), VarLongCoder.of(), new WorkItemKeySelector(VarLongCoder.of())); }
Example #27
Source File: StreamingDataflowWorkerTest.java From beam with Apache License 2.0 | 5 votes |
private List<ParallelInstruction> makeUnboundedSourcePipeline( int numMessagesPerShard, // Total number of messages in each split of the unbounded source. DoFn<ValueWithRecordId<KV<Integer, Integer>>, String> doFn) throws Exception { DataflowPipelineOptions options = PipelineOptionsFactory.create().as(DataflowPipelineOptions.class); options.setNumWorkers(1); CloudObject codec = CloudObjects.asCloudObject( WindowedValue.getFullCoder( ValueWithRecordId.ValueWithRecordIdCoder.of( KvCoder.of(VarIntCoder.of(), VarIntCoder.of())), GlobalWindow.Coder.INSTANCE), /*sdkComponents=*/ null); return Arrays.asList( new ParallelInstruction() .setSystemName("Read") .setOriginalName("OriginalReadName") .setRead( new ReadInstruction() .setSource( CustomSources.serializeToCloudSource( new TestCountingSource(numMessagesPerShard), options) .setCodec(codec))) .setOutputs( Arrays.asList( new InstructionOutput() .setName("read_output") .setOriginalName(DEFAULT_OUTPUT_ORIGINAL_NAME) .setSystemName(DEFAULT_OUTPUT_SYSTEM_NAME) .setCodec(codec))), makeDoFnInstruction(doFn, 0, StringUtf8Coder.of(), WindowingStrategy.globalDefault()), makeSinkInstruction(StringUtf8Coder.of(), 1, GlobalWindow.Coder.INSTANCE)); }
Example #28
Source File: OuterFullJoinTest.java From beam with Apache License 2.0 | 5 votes |
@Test(expected = NullPointerException.class) public void testJoinLeftCollectionNull() { p.enableAbandonedNodeEnforcement(false); Join.fullOuterJoin( null, p.apply( Create.of(rightListOfKv) .withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))), "", ""); }
Example #29
Source File: OuterRightJoinTest.java From beam with Apache License 2.0 | 5 votes |
@Test(expected = NullPointerException.class) public void testJoinNullValueIsNull() { p.enableAbandonedNodeEnforcement(false); Join.rightOuterJoin( p.apply("CreateLeft", Create.empty(KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()))), p.apply( "CreateRight", Create.empty(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))), null); }
Example #30
Source File: GroupIntoBatches.java From beam with Apache License 2.0 | 5 votes |
@Override public PCollection<KV<K, Iterable<InputT>>> expand(PCollection<KV<K, InputT>> input) { Duration allowedLateness = input.getWindowingStrategy().getAllowedLateness(); checkArgument( input.getCoder() instanceof KvCoder, "coder specified in the input PCollection is not a KvCoder"); KvCoder inputCoder = (KvCoder) input.getCoder(); Coder<K> keyCoder = (Coder<K>) inputCoder.getCoderArguments().get(0); Coder<InputT> valueCoder = (Coder<InputT>) inputCoder.getCoderArguments().get(1); return input.apply( ParDo.of(new GroupIntoBatchesDoFn<>(batchSize, allowedLateness, keyCoder, valueCoder))); }