org.apache.beam.model.pipeline.v1.RunnerApi Java Examples

The following examples show how to use org.apache.beam.model.pipeline.v1.RunnerApi. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CoderTranslation.java    From beam with Apache License 2.0 6 votes vote down vote up
private static Coder<?> fromKnownCoder(
    RunnerApi.Coder coder, RehydratedComponents components, TranslationContext context)
    throws IOException {
  String coderUrn = coder.getSpec().getUrn();
  List<Coder<?>> coderComponents = new ArrayList<>();
  for (String componentId : coder.getComponentCoderIdsList()) {
    // Only store coders in RehydratedComponents as long as we are not using a custom
    // translation context.
    Coder<?> innerCoder =
        context == TranslationContext.DEFAULT
            ? components.getCoder(componentId)
            : fromProto(
                components.getComponents().getCodersOrThrow(componentId), components, context);
    coderComponents.add(innerCoder);
  }
  Class<? extends Coder> coderType = KNOWN_CODER_URNS.inverse().get(coderUrn);
  CoderTranslator<?> translator = KNOWN_TRANSLATORS.get(coderType);
  checkArgument(
      translator != null,
      "Unknown Coder URN %s. Known URNs: %s",
      coderUrn,
      KNOWN_CODER_URNS.values());
  return translator.fromComponents(
      coderComponents, coder.getSpec().getPayload().toByteArray(), context);
}
 
Example #2
Source File: WorkerCustomSourcesTest.java    From beam with Apache License 2.0 6 votes vote down vote up
static com.google.api.services.dataflow.model.Source translateIOToCloudSource(
    BoundedSource<?> io, DataflowPipelineOptions options) throws Exception {
  DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
  Pipeline p = Pipeline.create(options);
  p.begin().apply(Read.from(io));

  DataflowRunner runner = DataflowRunner.fromOptions(options);
  SdkComponents sdkComponents = SdkComponents.create();
  RunnerApi.Environment defaultEnvironmentForDataflow =
      Environments.createDockerEnvironment("dummy-image-url");
  sdkComponents.registerEnvironment(defaultEnvironmentForDataflow);
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);

  Job workflow =
      translator
          .translate(p, pipelineProto, sdkComponents, runner, new ArrayList<DataflowPackage>())
          .getJob();
  Step step = workflow.getSteps().get(0);

  return stepToCloudSource(step);
}
 
Example #3
Source File: SamzaPipelineRunner.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PortablePipelineResult run(final Pipeline pipeline, JobInfo jobInfo) {
  // Expand any splittable DoFns within the graph to enable sizing and splitting of bundles.
  Pipeline pipelineWithSdfExpanded =
      ProtoOverrides.updateTransform(
          PTransformTranslation.PAR_DO_TRANSFORM_URN,
          pipeline,
          SplittableParDoExpander.createSizedReplacement());

  // Fused pipeline proto.
  final RunnerApi.Pipeline fusedPipeline =
      GreedyPipelineFuser.fuse(pipelineWithSdfExpanded).toPipeline();
  LOG.info("Portable pipeline to run:");
  LOG.info(PipelineDotRenderer.toDotString(fusedPipeline));
  // the pipeline option coming from sdk will set the sdk specific runner which will break
  // serialization
  // so we need to reset the runner here to a valid Java runner
  options.setRunner(SamzaRunner.class);
  try {
    final SamzaRunner runner = SamzaRunner.fromOptions(options);
    return runner.runPortablePipeline(fusedPipeline);
  } catch (Exception e) {
    throw new RuntimeException("Failed to invoke samza job", e);
  }
}
 
Example #4
Source File: DataflowPipelineTranslatorTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testSubnetworkConfigMissing() throws IOException {
  DataflowPipelineOptions options = buildPipelineOptions();

  Pipeline p = buildPipeline(options);
  p.traverseTopologically(new RecordingPipelineVisitor());
  SdkComponents sdkComponents = createSdkComponents(options);
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
  Job job =
      DataflowPipelineTranslator.fromOptions(options)
          .translate(
              p,
              pipelineProto,
              sdkComponents,
              DataflowRunner.fromOptions(options),
              Collections.emptyList())
          .getJob();

  assertEquals(1, job.getEnvironment().getWorkerPools().size());
  assertNull(job.getEnvironment().getWorkerPools().get(0).getSubnetwork());
}
 
Example #5
Source File: DataflowPipelineTranslator.java    From beam with Apache License 2.0 6 votes vote down vote up
private static byte[] serializeWindowingStrategy(
    WindowingStrategy<?, ?> windowingStrategy, PipelineOptions options) {
  try {
    SdkComponents sdkComponents = SdkComponents.create();

    String workerHarnessContainerImageURL =
        DataflowRunner.getContainerImageForJob(options.as(DataflowPipelineOptions.class));
    RunnerApi.Environment defaultEnvironmentForDataflow =
        Environments.createDockerEnvironment(workerHarnessContainerImageURL);
    sdkComponents.registerEnvironment(defaultEnvironmentForDataflow);

    return WindowingStrategyTranslation.toMessageProto(windowingStrategy, sdkComponents)
        .toByteArray();
  } catch (Exception e) {
    throw new RuntimeException(
        String.format("Unable to format windowing strategy %s as bytes", windowingStrategy), e);
  }
}
 
Example #6
Source File: BoundedSourceRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testStart() throws Exception {
  List<WindowedValue<Long>> outValues = new ArrayList<>();
  Collection<FnDataReceiver<WindowedValue<Long>>> consumers = ImmutableList.of(outValues::add);

  ByteString encodedSource =
      ByteString.copyFrom(SerializableUtils.serializeToByteArray(CountingSource.upTo(3)));

  BoundedSourceRunner<BoundedSource<Long>, Long> runner =
      new BoundedSourceRunner<>(
          PipelineOptionsFactory.create(),
          RunnerApi.FunctionSpec.newBuilder()
              .setUrn(ProcessBundleHandler.JAVA_SOURCE_URN)
              .setPayload(encodedSource)
              .build(),
          consumers);

  runner.start();

  assertThat(
      outValues,
      contains(valueInGlobalWindow(0L), valueInGlobalWindow(1L), valueInGlobalWindow(2L)));
}
 
Example #7
Source File: DataflowPipelineTranslatorTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testSubnetworkConfig() throws IOException {
  final String testSubnetwork = "regions/REGION/subnetworks/SUBNETWORK";

  DataflowPipelineOptions options = buildPipelineOptions();
  options.setSubnetwork(testSubnetwork);

  Pipeline p = buildPipeline(options);
  p.traverseTopologically(new RecordingPipelineVisitor());
  SdkComponents sdkComponents = createSdkComponents(options);
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
  Job job =
      DataflowPipelineTranslator.fromOptions(options)
          .translate(
              p,
              pipelineProto,
              sdkComponents,
              DataflowRunner.fromOptions(options),
              Collections.emptyList())
          .getJob();

  assertEquals(1, job.getEnvironment().getWorkerPools().size());
  assertEquals(testSubnetwork, job.getEnvironment().getWorkerPools().get(0).getSubnetwork());
}
 
Example #8
Source File: DataflowPipelineTranslatorTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testNetworkConfig() throws IOException {
  final String testNetwork = "test-network";

  DataflowPipelineOptions options = buildPipelineOptions();
  options.setNetwork(testNetwork);

  Pipeline p = buildPipeline(options);
  p.traverseTopologically(new RecordingPipelineVisitor());
  SdkComponents sdkComponents = createSdkComponents(options);
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
  Job job =
      DataflowPipelineTranslator.fromOptions(options)
          .translate(
              p,
              pipelineProto,
              sdkComponents,
              DataflowRunner.fromOptions(options),
              Collections.emptyList())
          .getJob();

  assertEquals(1, job.getEnvironment().getWorkerPools().size());
  assertEquals(testNetwork, job.getEnvironment().getWorkerPools().get(0).getNetwork());
}
 
Example #9
Source File: PCollectionTranslation.java    From beam with Apache License 2.0 6 votes vote down vote up
static IsBounded fromProto(RunnerApi.IsBounded.Enum isBounded) {
  switch (isBounded) {
    case BOUNDED:
      return IsBounded.BOUNDED;
    case UNBOUNDED:
      return IsBounded.UNBOUNDED;
    case UNRECOGNIZED:
    default:
      // Whether or not this enum cannot be recognized by the proto (due to the version of the
      // generated code we link to) or the switch hasn't been updated to handle it,
      // the situation is the same: we don't know what this IsBounded means
      throw new IllegalArgumentException(
          String.format(
              "Cannot convert unknown %s to %s: %s",
              RunnerApi.IsBounded.class.getCanonicalName(),
              IsBounded.class.getCanonicalName(),
              isBounded));
  }
}
 
Example #10
Source File: BatchSideInputHandlerFactoryTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private static ExecutableStage createExecutableStage(Collection<SideInputReference> sideInputs) {
  Components components = Components.getDefaultInstance();
  Environment environment = Environment.getDefaultInstance();
  PCollectionNode inputCollection =
      PipelineNode.pCollection("collection-id", RunnerApi.PCollection.getDefaultInstance());
  return ImmutableExecutableStage.of(
      components,
      environment,
      inputCollection,
      sideInputs,
      Collections.emptyList(),
      Collections.emptyList(),
      Collections.emptyList(),
      Collections.emptyList(),
      DEFAULT_WIRE_CODER_SETTINGS);
}
 
Example #11
Source File: GroupByWindowFunction.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Method used to initialize the transient variables that were sent over as byte arrays or proto
 * buffers.
 */
private void initTransient() {
  if (isInitialized) {
    return;
  }

  SdkComponents components = SdkComponents.create();
  options = new SerializablePipelineOptions(serializedOptions).get();

  try {
    options = null;
    windowStrategyProto = RunnerApi.MessageWithComponents.parseFrom(windowBytes);
    windowingStrategy =
        (WindowingStrategy<?, W>)
            WindowingStrategyTranslation.fromProto(
                windowStrategyProto.getWindowingStrategy(),
                RehydratedComponents.forComponents(components.toComponents()));
  } catch (InvalidProtocolBufferException e) {
    LOG.info(e.getMessage());
  }
  this.isInitialized = true;
}
 
Example #12
Source File: AssignWindowsFunction.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Method used to initialize the transient variables that were sent over as byte arrays or proto
 * buffers.
 */
private void initTransient() {
  if (isInitialized) {
    return;
  }
  options = new SerializablePipelineOptions(serializedOptions).get();

  try {
    RunnerApi.FunctionSpec windowFnProto = RunnerApi.FunctionSpec.parseFrom(windowFnBytes);

    windowFn =
        (WindowFn<T, BoundedWindow>)
            WindowingStrategyTranslation.windowFnFromProto(windowFnProto);
  } catch (InvalidProtocolBufferException e) {
    LOG.info(e.getMessage());
  }
  this.isInitialized = true;
}
 
Example #13
Source File: DisplayDataTranslation.java    From beam with Apache License 2.0 6 votes vote down vote up
public static List<RunnerApi.DisplayData> toProto(DisplayData displayData) {
  ImmutableList.Builder<RunnerApi.DisplayData> builder = ImmutableList.builder();
  for (DisplayData.Item item : displayData.items()) {
    Function<DisplayData.Item, ByteString> translator =
        WELL_KNOWN_URN_TRANSLATORS.get(item.getKey());
    String urn;
    if (translator != null) {
      urn = item.getKey();
    } else {
      urn = LABELLED_STRING;
      translator = DisplayDataTranslation::translateStringUtf8;
    }
    builder.add(
        RunnerApi.DisplayData.newBuilder()
            .setUrn(urn)
            .setPayload(translator.apply(item))
            .build());
  }
  return builder.build();
}
 
Example #14
Source File: ProcessPythonEnvironmentManager.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public RunnerApi.Environment createEnvironment() throws IOException {
	Map<String, String> env = constructEnvironmentVariables();

	if (dependencyInfo.getRequirementsFilePath().isPresent()) {
		LOG.info("Trying to pip install the Python requirements...");
		PythonEnvironmentManagerUtils.pipInstallRequirements(
			dependencyInfo.getRequirementsFilePath().get(),
			dependencyInfo.getRequirementsCacheDir().orElse(null),
			requirementsDirectory,
			dependencyInfo.getPythonExec(),
			env);
	}
	String runnerScript = PythonEnvironmentManagerUtils.getPythonUdfRunnerScript(dependencyInfo.getPythonExec(), env);

	return Environments.createProcessEnvironment(
		"",
		"",
		runnerScript,
		env);
}
 
Example #15
Source File: ProcessBundleDescriptors.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Patches the input coder of a stateful transform to ensure that the byte representation of a key
 * used to partition the input element at the Runner, matches the key byte representation received
 * for state requests and timers from the SDK Harness. Stateful transforms always have a KvCoder
 * as input.
 */
private static void lengthPrefixKeyCoder(
    String inputColId, Components.Builder componentsBuilder) {
  RunnerApi.PCollection pcollection = componentsBuilder.getPcollectionsOrThrow(inputColId);
  RunnerApi.Coder kvCoder = componentsBuilder.getCodersOrThrow(pcollection.getCoderId());
  Preconditions.checkState(
      ModelCoders.KV_CODER_URN.equals(kvCoder.getSpec().getUrn()),
      "Stateful executable stages must use a KV coder, but is: %s",
      kvCoder.getSpec().getUrn());
  String keyCoderId = ModelCoders.getKvCoderComponents(kvCoder).keyCoderId();
  // Retain the original coder, but wrap in LengthPrefixCoder
  String newKeyCoderId =
      LengthPrefixUnknownCoders.addLengthPrefixedCoder(keyCoderId, componentsBuilder, false);
  // Replace old key coder with LengthPrefixCoder<old_key_coder>
  kvCoder = kvCoder.toBuilder().setComponentCoderIds(0, newKeyCoderId).build();
  componentsBuilder.putCoders(pcollection.getCoderId(), kvCoder);
}
 
Example #16
Source File: WindowingStrategyTranslation.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Converts a {@link WindowingStrategy} into a {@link RunnerApi.WindowingStrategy}, registering
 * any components in the provided {@link SdkComponents}.
 */
public static RunnerApi.WindowingStrategy toProto(
    WindowingStrategy<?, ?> windowingStrategy, SdkComponents components) throws IOException {
  FunctionSpec windowFnSpec = toProto(windowingStrategy.getWindowFn(), components);

  RunnerApi.WindowingStrategy.Builder windowingStrategyProto =
      RunnerApi.WindowingStrategy.newBuilder()
          .setOutputTime(toProto(windowingStrategy.getTimestampCombiner()))
          .setAccumulationMode(toProto(windowingStrategy.getMode()))
          .setClosingBehavior(toProto(windowingStrategy.getClosingBehavior()))
          .setAllowedLateness(windowingStrategy.getAllowedLateness().getMillis())
          .setTrigger(TriggerTranslation.toProto(windowingStrategy.getTrigger()))
          .setWindowFn(windowFnSpec)
          .setAssignsToOneWindow(windowingStrategy.getWindowFn().assignsToOneWindow())
          .setOnTimeBehavior(toProto(windowingStrategy.getOnTimeBehavior()))
          .setWindowCoderId(
              components.registerCoder(windowingStrategy.getWindowFn().windowCoder()))
          .setEnvironmentId(components.getOnlyEnvironmentId());

  return windowingStrategyProto.build();
}
 
Example #17
Source File: ParDoTranslation.java    From beam with Apache License 2.0 5 votes vote down vote up
private static RunnerApi.TimeDomain.Enum translateTimeDomain(TimeDomain timeDomain) {
  switch (timeDomain) {
    case EVENT_TIME:
      return RunnerApi.TimeDomain.Enum.EVENT_TIME;
    case PROCESSING_TIME:
      return RunnerApi.TimeDomain.Enum.PROCESSING_TIME;
    case SYNCHRONIZED_PROCESSING_TIME:
      return RunnerApi.TimeDomain.Enum.SYNCHRONIZED_PROCESSING_TIME;
    default:
      throw new IllegalArgumentException("Unknown time domain");
  }
}
 
Example #18
Source File: QueryablePipelineTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void forTransformsWithSubgraph() {
  Components components =
      Components.newBuilder()
          .putTransforms(
              "root", PTransform.newBuilder().putOutputs("output", "output.out").build())
          .putPcollections(
              "output.out",
              RunnerApi.PCollection.newBuilder().setUniqueName("output.out").build())
          .putTransforms(
              "consumer", PTransform.newBuilder().putInputs("input", "output.out").build())
          .putTransforms(
              "ignored", PTransform.newBuilder().putInputs("input", "output.out").build())
          .build();

  QueryablePipeline pipeline =
      QueryablePipeline.forTransforms(ImmutableSet.of("root", "consumer"), components);

  assertThat(
      pipeline.getRootTransforms(),
      contains(PipelineNode.pTransform("root", components.getTransformsOrThrow("root"))));

  Set<PTransformNode> consumers =
      pipeline.getPerElementConsumers(
          PipelineNode.pCollection(
              "output.out", components.getPcollectionsOrThrow("output.out")));

  assertThat(
      consumers,
      contains(PipelineNode.pTransform("consumer", components.getTransformsOrThrow("consumer"))));
}
 
Example #19
Source File: RegisterNodeFunction.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Returns an artificial PCollectionView that can be used to fulfill API requirements of a {@link
 * SideInputReader} when used inside the Dataflow runner harness.
 *
 * <p>Generates length prefixed coder variants suitable to be used within the Dataflow Runner
 * harness so that encoding and decoding values matches the length prefixing that occurred when
 * materializing the side input.
 */
public static final PCollectionView<?> transformSideInputForRunner(
    RunnerApi.Pipeline pipeline,
    RunnerApi.PTransform parDoPTransform,
    String sideInputTag,
    RunnerApi.SideInput sideInput) {
  checkArgument(
      Materializations.MULTIMAP_MATERIALIZATION_URN.equals(sideInput.getAccessPattern().getUrn()),
      "This handler is only capable of dealing with %s materializations "
          + "but was asked to handle %s for PCollectionView with tag %s.",
      Materializations.MULTIMAP_MATERIALIZATION_URN,
      sideInput.getAccessPattern().getUrn(),
      sideInputTag);
  String sideInputPCollectionId = parDoPTransform.getInputsOrThrow(sideInputTag);
  RunnerApi.PCollection sideInputPCollection =
      pipeline.getComponents().getPcollectionsOrThrow(sideInputPCollectionId);
  try {
    FullWindowedValueCoder<KV<Object, Object>> runnerSideInputCoder =
        (FullWindowedValueCoder)
            WireCoders.instantiateRunnerWireCoder(
                PipelineNode.pCollection(sideInputPCollectionId, sideInputPCollection),
                pipeline.getComponents());

    return DataflowPortabilityPCollectionView.with(
        new TupleTag<>(sideInputTag), runnerSideInputCoder);
  } catch (IOException e) {
    throw new IllegalStateException("Unable to translate proto to coder", e);
  }
}
 
Example #20
Source File: PortablePipelineJarCreator.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Stages all dependencies in environment into the jar file at outputStream, returning a new
 * environment that references these artifacts as classpath artifacts.
 */
private RunnerApi.Environment writeArtifacts(RunnerApi.Environment environment, String jobName)
    throws IOException {
  RunnerApi.Environment.Builder result = environment.toBuilder();
  result.clearDependencies();
  for (RunnerApi.ArtifactInformation artifact : environment.getDependenciesList()) {
    result.addDependencies(writeArtifact(artifact, jobName));
  }
  return result.build();
}
 
Example #21
Source File: ProcessBundleDescriptorsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private static void ensureLengthPrefixed(
    RunnerApi.Coder coder,
    RunnerApi.Coder originalCoder,
    Map<String, RunnerApi.Coder> pbsCoderMap) {
  assertThat(coder.getSpec().getUrn(), is(ModelCoders.LENGTH_PREFIX_CODER_URN));
  // Check that the wrapped coder is unchanged
  String lengthPrefixedWrappedCoderId = coder.getComponentCoderIds(0);
  assertThat(pbsCoderMap.get(lengthPrefixedWrappedCoderId), is(originalCoder));
}
 
Example #22
Source File: TriggerTranslation.java    From beam with Apache License 2.0 5 votes vote down vote up
private RunnerApi.Trigger convertSpecific(OrFinallyTrigger v) {
  return RunnerApi.Trigger.newBuilder()
      .setOrFinally(
          RunnerApi.Trigger.OrFinally.newBuilder()
              .setMain(toProto(v.getMainTrigger()))
              .setFinally(toProto(v.getUntilTrigger())))
      .build();
}
 
Example #23
Source File: DataflowPipelineTranslatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testToSingletonTranslationWithFnApiSideInput() throws Exception {
  // A "change detector" test that makes sure the translation
  // of getting a PCollectionView<T> does not change
  // in bad ways during refactor

  DataflowPipelineOptions options = buildPipelineOptions();
  options.setExperiments(Arrays.asList("beam_fn_api"));
  DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);

  Pipeline pipeline = Pipeline.create(options);
  pipeline.apply(Create.of(1)).apply(View.asSingleton());
  DataflowRunner runner = DataflowRunner.fromOptions(options);
  runner.replaceTransforms(pipeline);
  SdkComponents sdkComponents = createSdkComponents(options);
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
  Job job =
      translator
          .translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList())
          .getJob();
  assertAllStepOutputsHaveUniqueIds(job);

  List<Step> steps = job.getSteps();
  assertEquals(9, steps.size());

  Step collectionToSingletonStep = steps.get(steps.size() - 1);
  assertEquals("CollectionToSingleton", collectionToSingletonStep.getKind());

  @SuppressWarnings("unchecked")
  List<Map<String, Object>> ctsOutputs =
      (List<Map<String, Object>>)
          steps.get(steps.size() - 1).getProperties().get(PropertyNames.OUTPUT_INFO);
  assertTrue(Structs.getBoolean(Iterables.getOnlyElement(ctsOutputs), "use_indexed_format"));
}
 
Example #24
Source File: FlinkStreamingPortablePipelineTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
private <K, V> void translateReshuffle(
    String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) {
  RunnerApi.PTransform transform = pipeline.getComponents().getTransformsOrThrow(id);
  DataStream<WindowedValue<KV<K, V>>> inputDataStream =
      context.getDataStreamOrThrow(Iterables.getOnlyElement(transform.getInputsMap().values()));
  context.addDataStream(
      Iterables.getOnlyElement(transform.getOutputsMap().values()), inputDataStream.rebalance());
}
 
Example #25
Source File: SdkComponents.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Registers the provided {@link Coder} into this {@link SdkComponents}, returning a unique ID for
 * the {@link Coder}. Multiple registrations of the same {@link Coder} will return the same unique
 * ID.
 *
 * <p>Coders are stored by identity to ensure that coders with implementations of {@link
 * #equals(Object)} and {@link #hashCode()} but incompatible binary formats are not considered the
 * same coder.
 */
public String registerCoder(Coder<?> coder) throws IOException {
  String existing = coderIds.get(coder);
  if (existing != null) {
    return existing;
  }
  String baseName = NameUtils.approximateSimpleName(coder);
  String name = uniqify(baseName, coderIds.values());
  coderIds.put(coder, name);
  RunnerApi.Coder coderProto = CoderTranslation.toProto(coder, this);
  componentsBuilder.putCoders(name, coderProto);
  return name;
}
 
Example #26
Source File: Twister2BoundedSource.java    From beam with Apache License 2.0 5 votes vote down vote up
public Twister2BoundedSource(
    BoundedSource<T> boundedSource, Twister2TranslationContext context, PipelineOptions options) {
  source = boundedSource;
  this.options = options;
  this.serializedOptions = new SerializablePipelineOptions(options).toString();
  SdkComponents components = SdkComponents.create();
  components.registerEnvironment(
      Environments.createOrGetDefaultEnvironment(options.as(PortablePipelineOptions.class)));
  RunnerApi.FunctionSpec sourceProto = ReadTranslation.toProto(source, components);
  sourceBytes = sourceProto.getPayload().toByteArray();
}
 
Example #27
Source File: FlinkBatchPortablePipelineTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
private static <T> void translateFlatten(
    PTransformNode transform, RunnerApi.Pipeline pipeline, BatchTranslationContext context) {
  Map<String, String> allInputs = transform.getTransform().getInputsMap();
  DataSet<WindowedValue<T>> result = null;

  if (allInputs.isEmpty()) {

    // Create an empty dummy source to satisfy downstream operations. We cannot create an empty
    // source in Flink, so we send the DataSet to a flatMap that never forwards its element.
    DataSource<String> dummySource = context.getExecutionEnvironment().fromElements("dummy");
    result =
        dummySource
            .<WindowedValue<T>>flatMap(
                (s, collector) -> {
                  // never return anything
                })
            .returns(
                new CoderTypeInformation<>(
                    WindowedValue.getFullCoder(
                        (Coder<T>) VoidCoder.of(), GlobalWindow.Coder.INSTANCE)));
  } else {
    for (String pCollectionId : allInputs.values()) {
      DataSet<WindowedValue<T>> current = context.getDataSetOrThrow(pCollectionId);
      if (result == null) {
        result = current;
      } else {
        result = result.union(current);
      }
    }
  }

  // Insert a dummy filter. Flink produces duplicate elements after the union in some cases if we
  // don't do so.
  result = result.filter(tWindowedValue -> true).name("UnionFixFilter");
  context.addDataSet(
      Iterables.getOnlyElement(transform.getTransform().getOutputsMap().values()), result);
}
 
Example #28
Source File: TestStreamTranslation.java    From beam with Apache License 2.0 5 votes vote down vote up
static <T> TestStream.Event<T> eventFromProto(
    RunnerApi.TestStreamPayload.Event protoEvent, Coder<T> coder) throws IOException {
  switch (protoEvent.getEventCase()) {
    case WATERMARK_EVENT:
      return TestStream.WatermarkEvent.advanceTo(
          new Instant(protoEvent.getWatermarkEvent().getNewWatermark()));
    case PROCESSING_TIME_EVENT:
      return TestStream.ProcessingTimeEvent.advanceBy(
          Duration.millis(protoEvent.getProcessingTimeEvent().getAdvanceDuration()));
    case ELEMENT_EVENT:
      List<TimestampedValue<T>> decodedElements = new ArrayList<>();
      for (RunnerApi.TestStreamPayload.TimestampedElement element :
          protoEvent.getElementEvent().getElementsList()) {
        decodedElements.add(
            TimestampedValue.of(
                CoderUtils.decodeFromByteArray(coder, element.getEncodedElement().toByteArray()),
                new Instant(element.getTimestamp())));
      }
      return TestStream.ElementEvent.add(decodedElements);
    case EVENT_NOT_SET:
    default:
      throw new IllegalArgumentException(
          String.format(
              "Unsupported type of %s: %s",
              RunnerApi.TestStreamPayload.Event.class.getCanonicalName(),
              protoEvent.getEventCase()));
  }
}
 
Example #29
Source File: DataflowPipelineTranslatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that when {@link DataflowPipelineOptions#setWorkerHarnessContainerImage(String)} pipeline
 * option is set, {@link DataflowRunner} sets that value as the {@link
 * DockerPayload#getContainerImage()} of the default {@link Environment} used when generating the
 * model pipeline proto.
 */
@Test
public void testSetWorkerHarnessContainerImageInPipelineProto() throws Exception {
  DataflowPipelineOptions options = buildPipelineOptions();
  String containerImage = "gcr.io/IMAGE/foo";
  options.as(DataflowPipelineOptions.class).setWorkerHarnessContainerImage(containerImage);

  Pipeline p = Pipeline.create(options);
  SdkComponents sdkComponents = createSdkComponents(options);
  RunnerApi.Pipeline proto = PipelineTranslation.toProto(p, sdkComponents, true);
  JobSpecification specification =
      DataflowPipelineTranslator.fromOptions(options)
          .translate(
              p,
              proto,
              sdkComponents,
              DataflowRunner.fromOptions(options),
              Collections.emptyList());
  RunnerApi.Pipeline pipelineProto = specification.getPipelineProto();

  assertEquals(1, pipelineProto.getComponents().getEnvironmentsCount());
  Environment defaultEnvironment =
      Iterables.getOnlyElement(pipelineProto.getComponents().getEnvironmentsMap().values());

  DockerPayload payload = DockerPayload.parseFrom(defaultEnvironment.getPayload());
  assertEquals(DataflowRunner.getContainerImageForJob(options), payload.getContainerImage());
}
 
Example #30
Source File: SparkPortableExecutionTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 120_000)
public void testExecStageWithMultipleConsumers() throws Exception {
  PipelineOptions options = PipelineOptionsFactory.create();
  options.setRunner(CrashingRunner.class);
  options
      .as(PortablePipelineOptions.class)
      .setDefaultEnvironmentType(Environments.ENVIRONMENT_EMBEDDED);
  Pipeline pipeline = Pipeline.create(options);
  PCollection<KV<String, Iterable<String>>> f =
      pipeline
          .apply("impulse", Impulse.create())
          .apply("F", ParDo.of(new DoFnWithSideEffect<>("F")))
          // use GBK to prevent fusion of F, G, and H
          .apply(GroupByKey.create());
  f.apply("G", ParDo.of(new DoFnWithSideEffect<>("G")));
  f.apply("H", ParDo.of(new DoFnWithSideEffect<>("H")));
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline);
  JobInvocation jobInvocation =
      SparkJobInvoker.createJobInvocation(
          "testExecStageWithMultipleConsumers",
          "testExecStageWithMultipleConsumersRetrievalToken",
          sparkJobExecutor,
          pipelineProto,
          options.as(SparkPipelineOptions.class));
  jobInvocation.start();
  Assert.assertEquals(Enum.DONE, jobInvocation.getState());
}