Java Code Examples for org.apache.beam.sdk.coders.KvCoder#of()

The following examples show how to use org.apache.beam.sdk.coders.KvCoder#of() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CombineValuesFnFactory.java    From beam with Apache License 2.0 6 votes vote down vote up
private static <K, InputT, AccumT, OutputT> DoFnInfo<?, ?> createDoFnInfo(
    AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn, SideInputReader sideInputReader) {
  GlobalCombineFnRunner<InputT, AccumT, OutputT> combineFnRunner =
      GlobalCombineFnRunners.create(combineFn.getFn());
  DoFn<KV<K, Iterable<InputT>>, KV<K, OutputT>> doFn =
      new CombineValuesDoFn<>(combineFnRunner, sideInputReader);

  Coder<KV<K, Iterable<InputT>>> inputCoder = null;
  if (combineFn.getKvCoder() != null) {
    inputCoder =
        KvCoder.of(
            combineFn.getKvCoder().getKeyCoder(),
            IterableCoder.of(combineFn.getKvCoder().getValueCoder()));
  }
  return DoFnInfo.forFn(
      doFn,
      combineFn.getWindowingStrategy(),
      combineFn.getSideInputViews(),
      inputCoder,
      Collections.emptyMap(), // Not needed here.
      new TupleTag<>(PropertyNames.OUTPUT),
      DoFnSchemaInformation.create(),
      Collections.emptyMap());
}
 
Example 2
Source File: GroupByKeyViaGroupByKeyOnly.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<KV<K, Iterable<V>>> expand(
    PCollection<KV<K, Iterable<WindowedValue<V>>>> input) {
  @SuppressWarnings("unchecked")
  KvCoder<K, Iterable<WindowedValue<V>>> inputKvCoder =
      (KvCoder<K, Iterable<WindowedValue<V>>>) input.getCoder();

  Coder<K> keyCoder = inputKvCoder.getKeyCoder();
  Coder<Iterable<WindowedValue<V>>> inputValueCoder = inputKvCoder.getValueCoder();

  IterableCoder<WindowedValue<V>> inputIterableValueCoder =
      (IterableCoder<WindowedValue<V>>) inputValueCoder;
  Coder<WindowedValue<V>> inputIterableElementCoder = inputIterableValueCoder.getElemCoder();
  WindowedValueCoder<V> inputIterableWindowedValueCoder =
      (WindowedValueCoder<V>) inputIterableElementCoder;

  Coder<V> inputIterableElementValueCoder = inputIterableWindowedValueCoder.getValueCoder();
  Coder<Iterable<V>> outputValueCoder = IterableCoder.of(inputIterableElementValueCoder);
  Coder<KV<K, Iterable<V>>> outputKvCoder = KvCoder.of(keyCoder, outputValueCoder);

  return PCollection.createPrimitiveOutputInternal(
      input.getPipeline(), windowingStrategy, input.isBounded(), outputKvCoder);
}
 
Example 3
Source File: SdkComponentsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void registerCoder() throws IOException {
  Coder<?> coder =
      KvCoder.of(StringUtf8Coder.of(), IterableCoder.of(SetCoder.of(ByteArrayCoder.of())));
  String id = components.registerCoder(coder);
  assertThat(components.registerCoder(coder), equalTo(id));
  assertThat(id, not(isEmptyOrNullString()));
  Coder<?> equalCoder =
      KvCoder.of(StringUtf8Coder.of(), IterableCoder.of(SetCoder.of(ByteArrayCoder.of())));
  assertThat(components.registerCoder(equalCoder), equalTo(id));
  Coder<?> otherCoder = VarLongCoder.of();
  assertThat(components.registerCoder(otherCoder), not(equalTo(id)));

  components.toComponents().getCodersOrThrow(id);
  components.toComponents().getCodersOrThrow(components.registerCoder(otherCoder));
}
 
Example 4
Source File: FakeBigQueryServices.java    From beam with Apache License 2.0 5 votes vote down vote up
public static KV<Table, List<TableRow>> decodeQueryResult(String queryResult) throws IOException {
  KvCoder<String, List<TableRow>> coder =
      KvCoder.of(StringUtf8Coder.of(), ListCoder.of(TableRowJsonCoder.of()));
  ByteArrayInputStream inputStream = new ByteArrayInputStream(Base64.decodeBase64(queryResult));
  KV<String, List<TableRow>> kv = coder.decode(inputStream);
  Table table = BigQueryHelpers.fromJsonString(kv.getKey(), Table.class);
  List<TableRow> rows = kv.getValue();
  rows.forEach(FakeBigQueryServices::convertNumbers);
  return KV.of(table, rows);
}
 
Example 5
Source File: AutoKVWrapper.java    From component-runtime with Apache License 2.0 5 votes vote down vote up
public static PTransform<PCollection<Record>, PCollection<KV<String, Record>>> of(final String plugin,
        final Function<GroupKeyProvider.GroupContext, String> idGenerator, final String component,
        final String branch) {

    return new RecordParDoTransformCoderProvider<>(KvCoder.of(StringUtf8Coder.of(), SchemaRegistryCoder.of()),
            new AutoKVWrapper(idGenerator, component, branch));
}
 
Example 6
Source File: StreamingDataflowWorkerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testKeyTokenInvalidException() throws Exception {
  if (streamingEngine) {
    // TODO: This test needs to be adapted to work with streamingEngine=true.
    return;
  }
  KvCoder<String, String> kvCoder = KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());

  List<ParallelInstruction> instructions =
      Arrays.asList(
          makeSourceInstruction(kvCoder),
          makeDoFnInstruction(new KeyTokenInvalidFn(), 0, kvCoder),
          makeSinkInstruction(kvCoder, 1));

  FakeWindmillServer server = new FakeWindmillServer(errorCollector);
  server.addWorkToOffer(makeInput(0, 0, "key"));

  StreamingDataflowWorker worker =
      makeWorker(instructions, createTestingPipelineOptions(server), true /* publishCounters */);
  worker.start();

  server.waitForEmptyWorkQueue();

  server.addWorkToOffer(makeInput(1, 0, "key"));

  Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);

  assertEquals(makeExpectedOutput(1, 0, "key", "key").build(), result.get(1L));
  assertEquals(1, result.size());
}
 
Example 7
Source File: KryoCoderTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCodingWithKvCoderClassToBeEncoded() throws IOException {
  final KryoRegistrar registrar =
      k -> {
        k.register(TestClass.class);
        k.register(ClassToBeEncoded.class);
      };

  final ListCoder<Void> listCoder = ListCoder.of(VoidCoder.of());
  final KvCoder<ClassToBeEncoded, List<Void>> kvCoder =
      KvCoder.of(KryoCoder.of(OPTIONS, registrar), listCoder);
  final List<Void> inputValue = new ArrayList<>();
  inputValue.add(null);
  inputValue.add(null);

  final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();

  final ClassToBeEncoded inputKey = new ClassToBeEncoded("something", 1, 0.2);
  kvCoder.encode(KV.of(inputKey, inputValue), byteArrayOutputStream);

  final KV<ClassToBeEncoded, List<Void>> decoded =
      kvCoder.decode(new ByteArrayInputStream(byteArrayOutputStream.toByteArray()));

  assertNotNull(decoded);
  assertNotNull(decoded.getKey());
  assertEquals(inputKey, decoded.getKey());

  assertNotNull(decoded.getValue());
  assertEquals(inputValue, decoded.getValue());
}
 
Example 8
Source File: WindmillKeyedWorkItem.java    From beam with Apache License 2.0 5 votes vote down vote up
protected FakeKeyedWorkItemCoder(Coder<?> elemCoder) {
  if (elemCoder instanceof KeyedWorkItemCoder) {
    KeyedWorkItemCoder kwiCoder = (KeyedWorkItemCoder) elemCoder;
    this.kvCoder = KvCoder.of(kwiCoder.getKeyCoder(), kwiCoder.getElementCoder());
  } else if (elemCoder instanceof KvCoder) {
    this.kvCoder = ((KvCoder) elemCoder);
  } else {
    throw new IllegalArgumentException(
        "FakeKeyedWorkItemCoder only works with KeyedWorkItemCoder or KvCoder; was: "
            + elemCoder.getClass());
  }
}
 
Example 9
Source File: UnboundedSourceWrapper.java    From beam with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public UnboundedSourceWrapper(
    String stepName,
    PipelineOptions pipelineOptions,
    UnboundedSource<OutputT, CheckpointMarkT> source,
    int parallelism)
    throws Exception {
  this.stepName = stepName;
  this.serializedOptions = new SerializablePipelineOptions(pipelineOptions);
  this.isConvertedBoundedSource =
      source instanceof UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter;

  if (source.requiresDeduping()) {
    LOG.warn("Source {} requires deduping but Flink runner doesn't support this yet.", source);
  }

  Coder<CheckpointMarkT> checkpointMarkCoder = source.getCheckpointMarkCoder();
  if (checkpointMarkCoder == null) {
    LOG.info("No CheckpointMarkCoder specified for this source. Won't create snapshots.");
    checkpointCoder = null;
  } else {

    Coder<? extends UnboundedSource<OutputT, CheckpointMarkT>> sourceCoder =
        (Coder) SerializableCoder.of(new TypeDescriptor<UnboundedSource>() {});

    checkpointCoder = KvCoder.of(sourceCoder, checkpointMarkCoder);
  }

  // get the splits early. we assume that the generated splits are stable,
  // this is necessary so that the mapping of state to source is correct
  // when restoring
  splitSources = source.split(parallelism, pipelineOptions);

  FlinkPipelineOptions options = pipelineOptions.as(FlinkPipelineOptions.class);
  idleTimeoutMs = options.getShutdownSourcesAfterIdleMs();
}
 
Example 10
Source File: Watch.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public Coder<KV<Instant, ReadableDuration>> getStateCoder() {
  return KvCoder.of(InstantCoder.of(), DurationCoder.of());
}
 
Example 11
Source File: BatchLoads.java    From beam with Apache License 2.0 4 votes vote down vote up
private PCollection<KV<TableDestination, String>> writeTempTables(
    PCollection<KV<ShardedKey<DestinationT>, List<String>>> input,
    PCollectionView<String> jobIdTokenView) {
  List<PCollectionView<?>> sideInputs = Lists.newArrayList(jobIdTokenView);
  sideInputs.addAll(dynamicDestinations.getSideInputs());

  Coder<KV<ShardedKey<DestinationT>, List<String>>> partitionsCoder =
      KvCoder.of(
          ShardedKeyCoder.of(NullableCoder.of(destinationCoder)),
          ListCoder.of(StringUtf8Coder.of()));

  // If the final destination table exists already (and we're appending to it), then the temp
  // tables must exactly match schema, partitioning, etc. Wrap the DynamicDestinations object
  // with one that makes this happen.
  @SuppressWarnings("unchecked")
  DynamicDestinations<?, DestinationT> destinations = dynamicDestinations;
  if (createDisposition.equals(CreateDisposition.CREATE_IF_NEEDED)
      || createDisposition.equals(CreateDisposition.CREATE_NEVER)) {
    destinations =
        DynamicDestinationsHelpers.matchTableDynamicDestinations(destinations, bigQueryServices);
  }

  Coder<TableDestination> tableDestinationCoder =
      clusteringEnabled ? TableDestinationCoderV3.of() : TableDestinationCoderV2.of();

  // If WriteBundlesToFiles produced more than DEFAULT_MAX_FILES_PER_PARTITION files or
  // DEFAULT_MAX_BYTES_PER_PARTITION bytes, then
  // the import needs to be split into multiple partitions, and those partitions will be
  // specified in multiPartitionsTag.
  return input
      .setCoder(partitionsCoder)
      // Reshuffle will distribute this among multiple workers, and also guard against
      // reexecution of the WritePartitions step once WriteTables has begun.
      .apply("MultiPartitionsReshuffle", Reshuffle.of())
      .apply(
          "MultiPartitionsWriteTables",
          new WriteTables<>(
              true,
              bigQueryServices,
              jobIdTokenView,
              WriteDisposition.WRITE_EMPTY,
              CreateDisposition.CREATE_IF_NEEDED,
              sideInputs,
              destinations,
              loadJobProjectId,
              maxRetryJobs,
              ignoreUnknownValues,
              kmsKey,
              rowWriterFactory.getSourceFormat(),
              useAvroLogicalTypes,
              schemaUpdateOptions))
      .setCoder(KvCoder.of(tableDestinationCoder, StringUtf8Coder.of()));
}
 
Example 12
Source File: Watch.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public Coder<KV<Instant, ReadableDuration>> getStateCoder() {
  return KvCoder.of(NullableCoder.of(InstantCoder.of()), DurationCoder.of());
}
 
Example 13
Source File: KafkaRecordCoder.java    From DataflowTemplates with Apache License 2.0 4 votes vote down vote up
public KafkaRecordCoder(Coder<K> keyCoder, Coder<V> valueCoder) {
  this.kvCoder = KvCoder.of(keyCoder, valueCoder);
}
 
Example 14
Source File: StreamingDataflowWorkerTest.java    From beam with Apache License 2.0 4 votes vote down vote up
private void runMergeSessionsActions(List<Action> actions) throws Exception {
  Coder<KV<String, String>> kvCoder = KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());
  Coder<WindowedValue<KV<String, String>>> windowedKvCoder =
      FullWindowedValueCoder.of(kvCoder, IntervalWindow.getCoder());
  KvCoder<String, List<String>> groupedCoder =
      KvCoder.of(StringUtf8Coder.of(), ListCoder.of(StringUtf8Coder.of()));
  Coder<WindowedValue<KV<String, List<String>>>> windowedGroupedCoder =
      FullWindowedValueCoder.of(groupedCoder, IntervalWindow.getCoder());

  CloudObject spec = CloudObject.forClassName("MergeWindowsDoFn");
  SdkComponents sdkComponents = SdkComponents.create();
  sdkComponents.registerEnvironment(Environments.JAVA_SDK_HARNESS_ENVIRONMENT);
  addString(
      spec,
      PropertyNames.SERIALIZED_FN,
      StringUtils.byteArrayToJsonString(
          WindowingStrategyTranslation.toMessageProto(
                  WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10)))
                      .withMode(AccumulationMode.DISCARDING_FIRED_PANES)
                      .withTrigger(
                          Repeatedly.forever(
                              AfterWatermark.pastEndOfWindow()
                                  .withLateFirings(AfterPane.elementCountAtLeast(1))))
                      .withAllowedLateness(Duration.standardMinutes(60)),
                  sdkComponents)
              .toByteArray()));
  addObject(
      spec,
      WorkerPropertyNames.INPUT_CODER,
      CloudObjects.asCloudObject(windowedKvCoder, /*sdkComponents=*/ null));

  ParallelInstruction mergeWindowsInstruction =
      new ParallelInstruction()
          .setSystemName("MergeWindows-System")
          .setName("MergeWindowsStep")
          .setOriginalName("MergeWindowsOriginal")
          .setParDo(
              new ParDoInstruction()
                  .setInput(new InstructionInput().setProducerInstructionIndex(0).setOutputNum(0))
                  .setNumOutputs(1)
                  .setUserFn(spec))
          .setOutputs(
              Arrays.asList(
                  new InstructionOutput()
                      .setOriginalName(DEFAULT_OUTPUT_ORIGINAL_NAME)
                      .setSystemName(DEFAULT_OUTPUT_SYSTEM_NAME)
                      .setName("output")
                      .setCodec(
                          CloudObjects.asCloudObject(
                              windowedGroupedCoder, /*sdkComponents=*/ null))));

  List<ParallelInstruction> instructions =
      Arrays.asList(
          makeWindowingSourceInstruction(kvCoder),
          mergeWindowsInstruction,
          makeSinkInstruction(groupedCoder, 1));

  FakeWindmillServer server = new FakeWindmillServer(errorCollector);

  StreamingDataflowWorker worker =
      makeWorker(instructions, createTestingPipelineOptions(server), false /* publishCounters */);
  Map<String, String> nameMap = new HashMap<>();
  nameMap.put("MergeWindowsStep", "MergeWindows");
  worker.addStateNameMappings(nameMap);
  worker.start();

  // Respond to any GetData requests with empty state.
  for (int i = 0; i < 1000; ++i) {
    server.addDataFnToOffer(EMPTY_DATA_RESPONDER);
  }

  for (int i = 0; i < actions.size(); ++i) {
    Action action = actions.get(i);
    server.addWorkToOffer(action.response);
    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
    WorkItemCommitRequest actualOutput = result.get(i + 1L);
    assertThat(actualOutput, Matchers.not(Matchers.nullValue()));
    verifyTimers(actualOutput, action.expectedTimers);
    verifyHolds(actualOutput, action.expectedHolds);
  }
}
 
Example 15
Source File: ConfigurableHDFSFileSource.java    From components with Apache License 2.0 4 votes vote down vote up
@Override
public Coder<KV<K, V>> getDefaultOutputCoder() {
    return KvCoder.of(getDefaultCoder(keyClass), getDefaultCoder(valueClass));
}
 
Example 16
Source File: DataflowPipelineTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private <InputT, OutputT> void translateSingleHelper(
    ParDoSingle<InputT, OutputT> transform, TranslationContext context) {

  DoFnSchemaInformation doFnSchemaInformation;
  doFnSchemaInformation =
      ParDoTranslation.getSchemaInformation(context.getCurrentTransform());
  Map<String, PCollectionView<?>> sideInputMapping =
      ParDoTranslation.getSideInputMapping(context.getCurrentTransform());
  StepTranslationContext stepContext = context.addStep(transform, "ParallelDo");
  Map<TupleTag<?>, Coder<?>> outputCoders =
      context.getOutputs(transform).entrySet().stream()
          .collect(
              Collectors.toMap(
                  Map.Entry::getKey, e -> ((PCollection) e.getValue()).getCoder()));

  translateInputs(
      stepContext,
      context.getInput(transform),
      transform.getSideInputs().values(),
      context);
  stepContext.addOutput(
      transform.getMainOutputTag().getId(), context.getOutput(transform));
  String ptransformId =
      context.getSdkComponents().getPTransformIdOrThrow(context.getCurrentTransform());
  translateFn(
      stepContext,
      ptransformId,
      transform.getFn(),
      context.getInput(transform).getWindowingStrategy(),
      transform.getSideInputs().values(),
      context.getInput(transform).getCoder(),
      context,
      transform.getMainOutputTag(),
      outputCoders,
      doFnSchemaInformation,
      sideInputMapping);

  // TODO: Move this logic into translateFn once the legacy ProcessKeyedElements is
  // removed.
  if (context.isFnApi()) {
    DoFnSignature signature = DoFnSignatures.signatureForDoFn(transform.getFn());
    if (signature.processElement().isSplittable()) {
      DoFnInvoker<?, ?> doFnInvoker = DoFnInvokers.invokerFor(transform.getFn());
      Coder<?> restrictionAndWatermarkStateCoder =
          KvCoder.of(
              doFnInvoker.invokeGetRestrictionCoder(
                  context.getInput(transform).getPipeline().getCoderRegistry()),
              doFnInvoker.invokeGetWatermarkEstimatorStateCoder(
                  context.getInput(transform).getPipeline().getCoderRegistry()));
      stepContext.addInput(
          PropertyNames.RESTRICTION_ENCODING,
          translateCoder(restrictionAndWatermarkStateCoder, context));
    }
  }
}
 
Example 17
Source File: SyntheticUnboundedSource.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public Coder<KV<byte[], byte[]>> getOutputCoder() {
  return KvCoder.of(ByteArrayCoder.of(), ByteArrayCoder.of());
}
 
Example 18
Source File: HadoopFormatIO.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public Coder<KV<K, V>> getOutputCoder() {
  return KvCoder.of(keyCoder, valueCoder);
}
 
Example 19
Source File: TestCountingSource.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public Coder<KV<Integer, Integer>> getDefaultOutputCoder() {
  return KvCoder.of(VarIntCoder.of(), VarIntCoder.of());
}
 
Example 20
Source File: DataGeneratorSource.java    From scotty-window-processor with Apache License 2.0 4 votes vote down vote up
@Override
public Coder<KV<Integer,Integer>> getOutputCoder() {
    return KvCoder.of(VarIntCoder.of(),VarIntCoder.of());
}