Java Code Examples for org.apache.beam.sdk.options.PipelineOptions

The following examples show how to use org.apache.beam.sdk.options.PipelineOptions. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: beam   Source File: PipelineOptionsTranslationTest.java    License: Apache License 2.0 6 votes vote down vote up
@Parameters(name = "{index}: {0}")
public static Iterable<? extends PipelineOptions> options() {
  PipelineOptionsFactory.register(TestUnserializableOptions.class);
  PipelineOptionsFactory.register(TestDefaultOptions.class);
  PipelineOptionsFactory.register(TestOptions.class);
  PipelineOptions emptyOptions = PipelineOptionsFactory.create();

  TestUnserializableOptions withNonSerializable =
      PipelineOptionsFactory.as(TestUnserializableOptions.class);
  withNonSerializable.setUnserializable(new Object());

  TestOptions withCustomField = PipelineOptionsFactory.as(TestOptions.class);
  withCustomField.setExample(99);

  PipelineOptions withSettings = PipelineOptionsFactory.create();
  withSettings.as(ApplicationNameOptions.class).setAppName("my_app");
  withSettings.setJobName("my_job");

  PipelineOptions withParsedSettings =
      PipelineOptionsFactory.fromArgs("--jobName=my_job --appName=my_app").create();

  return ImmutableList.of(
      emptyOptions, withNonSerializable, withCustomField, withSettings, withParsedSettings);
}
 
Example 2
Source Project: incubator-nemo   Source File: AbstractDoFnTransform.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * AbstractDoFnTransform constructor.
 *
 * @param doFn                 doFn
 * @param inputCoder           input coder
 * @param outputCoders         output coders
 * @param mainOutputTag        main output tag
 * @param additionalOutputTags additional output tags
 * @param windowingStrategy    windowing strategy
 * @param sideInputs           side inputs
 * @param options              pipeline options
 * @param displayData          display data.
 */
public AbstractDoFnTransform(final DoFn<InterT, OutputT> doFn,
                             final Coder<InputT> inputCoder,
                             final Map<TupleTag<?>, Coder<?>> outputCoders,
                             final TupleTag<OutputT> mainOutputTag,
                             final List<TupleTag<?>> additionalOutputTags,
                             final WindowingStrategy<?, ?> windowingStrategy,
                             final Map<Integer, PCollectionView<?>> sideInputs,
                             final PipelineOptions options,
                             final DisplayData displayData,
                             final DoFnSchemaInformation doFnSchemaInformation,
                             final Map<String, PCollectionView<?>> sideInputMapping) {
  this.doFn = doFn;
  this.inputCoder = inputCoder;
  this.outputCoders = outputCoders;
  this.mainOutputTag = mainOutputTag;
  this.additionalOutputTags = additionalOutputTags;
  this.sideInputs = sideInputs;
  this.serializedOptions = new SerializablePipelineOptions(options);
  this.windowingStrategy = windowingStrategy;
  this.displayData = displayData;
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.sideInputMapping = sideInputMapping;
}
 
Example 3
Source Project: beam   Source File: DataflowPipelineTranslator.java    License: Apache License 2.0 6 votes vote down vote up
private static byte[] serializeWindowingStrategy(
    WindowingStrategy<?, ?> windowingStrategy, PipelineOptions options) {
  try {
    SdkComponents sdkComponents = SdkComponents.create();

    String workerHarnessContainerImageURL =
        DataflowRunner.getContainerImageForJob(options.as(DataflowPipelineOptions.class));
    RunnerApi.Environment defaultEnvironmentForDataflow =
        Environments.createDockerEnvironment(workerHarnessContainerImageURL);
    sdkComponents.registerEnvironment(defaultEnvironmentForDataflow);

    return WindowingStrategyTranslation.toMessageProto(windowingStrategy, sdkComponents)
        .toByteArray();
  } catch (Exception e) {
    throw new RuntimeException(
        String.format("Unable to format windowing strategy %s as bytes", windowingStrategy), e);
  }
}
 
Example 4
Source Project: beam   Source File: ShuffleSinkFactoryTest.java    License: Apache License 2.0 6 votes vote down vote up
private ShuffleSink runTestCreateShuffleSinkHelper(
    byte[] shuffleWriterConfig,
    String shuffleKind,
    Coder<?> deserializedCoder,
    FullWindowedValueCoder<?> coder)
    throws Exception {
  CloudObject spec = CloudObject.forClassName("ShuffleSink");
  addString(spec, "shuffle_writer_config", encodeBase64String(shuffleWriterConfig));
  addString(spec, "shuffle_kind", shuffleKind);

  PipelineOptions options = PipelineOptionsFactory.create();

  ShuffleSinkFactory factory = new ShuffleSinkFactory();
  Sink<?> sink =
      factory.create(
          spec,
          deserializedCoder,
          options,
          BatchModeExecutionContext.forTesting(options, "testStage"),
          TestOperationContext.create());
  Assert.assertThat(sink, new IsInstanceOf(ShuffleSink.class));
  ShuffleSink shuffleSink = (ShuffleSink) sink;
  Assert.assertArrayEquals(shuffleWriterConfig, shuffleSink.shuffleWriterConfig);
  Assert.assertEquals(coder, shuffleSink.windowedElemCoder);
  return shuffleSink;
}
 
Example 5
Source Project: beam   Source File: Task.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
  PipelineOptions options = PipelineOptionsFactory.fromArgs(args).create();
  Pipeline pipeline = Pipeline.create(options);

  PCollection<String> fruits =
      pipeline.apply("Fruits",
          Create.of("apple", "banana", "cherry")
      );

  PCollection<String> countries =
      pipeline.apply("Countries",
          Create.of("australia", "brazil", "canada")
      );

  PCollection<String> output = applyTransform(fruits, countries);

  output.apply(Log.ofElements());

  pipeline.run();
}
 
Example 6
Source Project: beam   Source File: CountingSource.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Splits an unbounded source {@code desiredNumSplits} ways by giving each split every {@code
 * desiredNumSplits}th element that this {@link UnboundedCountingSource} produces.
 *
 * <p>E.g., if a source produces all even numbers {@code [0, 2, 4, 6, 8, ...)} and we want to
 * split into 3 new sources, then the new sources will produce numbers that are 6 apart and are
 * offset at the start by the original stride: {@code [0, 6, 12, ...)}, {@code [2, 8, 14, ...)},
 * and {@code [4, 10, 16, ...)}.
 */
@Override
public List<? extends UnboundedSource<Long, CountingSource.CounterMark>> split(
    int desiredNumSplits, PipelineOptions options) throws Exception {
  // Using Javadoc example, stride 2 with 3 splits becomes stride 6.
  long newStride = stride * desiredNumSplits;

  ImmutableList.Builder<UnboundedCountingSource> splits = ImmutableList.builder();
  for (int i = 0; i < desiredNumSplits; ++i) {
    // Starts offset by the original stride. Using Javadoc example, this generates starts of
    // 0, 2, and 4.
    splits.add(
        new UnboundedCountingSource(
            start + i * stride, newStride, elementsPerPeriod, period, timestampFn));
  }
  return splits.build();
}
 
Example 7
Source Project: beam   Source File: GroupingShuffleReaderTest.java    License: Apache License 2.0 6 votes vote down vote up
private void runTestBytesReadCounter(
    List<KV<Integer, List<KV<Integer, Integer>>>> input,
    boolean useSecondaryKey,
    ValuesToRead valuesToRead,
    long expectedReadBytes)
    throws Exception {
  PipelineOptions options = PipelineOptionsFactory.create();
  runTestBytesReadCounterForOptions(
      options, input, useSecondaryKey, valuesToRead, expectedReadBytes);

  // TODO: Remove experimental worker code once inter-transform IO has shipped.
  options
      .as(DataflowPipelineDebugOptions.class)
      .setExperiments(Lists.newArrayList(Experiment.IntertransformIO.getName()));
  runTestBytesReadCounterForOptions(
      options, input, useSecondaryKey, valuesToRead, expectedReadBytes);
}
 
Example 8
Source Project: beam   Source File: BoundedReadFromUnboundedSource.java    License: Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void process(
    @Element Shard<T> shard, OutputReceiver<Shard<T>> out, PipelineOptions options)
    throws Exception {
  int numInitialSplits = numInitialSplits(shard.getMaxNumRecords());
  List<? extends UnboundedSource<T, ?>> splits =
      shard.getSource().split(numInitialSplits, options);
  int numSplits = splits.size();
  long[] numRecords = splitNumRecords(shard.getMaxNumRecords(), numSplits);
  for (int i = 0; i < numSplits; i++) {
    out.output(
        shard
            .toBuilder()
            .setSource(splits.get(i))
            .setMaxNumRecords(numRecords[i])
            .setMaxReadTime(shard.getMaxReadTime())
            .build());
  }
}
 
Example 9
Source Project: beam   Source File: GcpOptions.java    License: Apache License 2.0 6 votes vote down vote up
@Override
@Nullable
public String create(PipelineOptions options) {
  String tempLocation = options.getTempLocation();
  if (isNullOrEmpty(tempLocation)) {
    tempLocation =
        tryCreateDefaultBucket(
            options,
            newCloudResourceManagerClient(options.as(CloudResourceManagerOptions.class))
                .build());
    options.setTempLocation(tempLocation);
  } else {
    try {
      PathValidator validator = options.as(GcsOptions.class).getPathValidator();
      validator.validateOutputFilePrefixSupported(tempLocation);
    } catch (Exception e) {
      throw new IllegalArgumentException(
          String.format(
              "Error constructing default value for gcpTempLocation: tempLocation is not"
                  + " a valid GCS path, %s. ",
              tempLocation),
          e);
    }
  }
  return tempLocation;
}
 
Example 10
Source Project: beam   Source File: DefaultJobBundleFactory.java    License: Apache License 2.0 6 votes vote down vote up
private static boolean shouldLoadBalanceBundles(JobInfo jobInfo) {
  PipelineOptions pipelineOptions =
      PipelineOptionsTranslation.fromProto(jobInfo.pipelineOptions());
  boolean loadBalanceBundles =
      pipelineOptions.as(PortablePipelineOptions.class).getLoadBalanceBundles();
  if (loadBalanceBundles) {
    int stateCacheSize =
        Integer.parseInt(
            MoreObjects.firstNonNull(
                ExperimentalOptions.getExperimentValue(
                    pipelineOptions, ExperimentalOptions.STATE_CACHE_SIZE),
                "0"));
    Preconditions.checkArgument(
        stateCacheSize == 0,
        "%s must be 0 when using bundle load balancing",
        ExperimentalOptions.STATE_CACHE_SIZE);
  }
  return loadBalanceBundles;
}
 
Example 11
Source Project: dbeam   Source File: JdbcExportArgsFactory.java    License: Apache License 2.0 6 votes vote down vote up
public static JdbcExportArgs fromPipelineOptions(final PipelineOptions options)
    throws ClassNotFoundException, IOException {
  final JdbcExportPipelineOptions exportOptions = options.as(JdbcExportPipelineOptions.class);
  final JdbcAvroArgs jdbcAvroArgs =
      JdbcAvroArgs.create(
          JdbcConnectionArgs.create(exportOptions.getConnectionUrl())
              .withUsername(exportOptions.getUsername())
              .withPassword(PasswordReader.INSTANCE.readPassword(exportOptions).orElse(null)),
          exportOptions.getFetchSize(),
          exportOptions.getAvroCodec(),
          Optional.ofNullable(exportOptions.getPreCommand()).orElse(Collections.emptyList()));

  return JdbcExportArgs.create(
      jdbcAvroArgs,
      createQueryArgs(exportOptions),
      exportOptions.getAvroSchemaNamespace(),
      Optional.ofNullable(exportOptions.getAvroDoc()),
      exportOptions.isUseAvroLogicalTypes(),
      Duration.parse(exportOptions.getExportTimeout()),
      BeamJdbcAvroSchema.parseOptionalInputAvroSchemaFile(exportOptions.getAvroSchemaFilePath()));
}
 
Example 12
Source Project: beam   Source File: FileBasedSourceTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSplitAtFraction() throws Exception {
  PipelineOptions options = PipelineOptionsFactory.create();
  File file = createFileWithData("file", createStringDataset(3, 100));

  Metadata metadata = FileSystems.matchSingleFileSpec(file.getPath());
  TestFileBasedSource source = new TestFileBasedSource(metadata, 1, 0, file.length(), null);
  // Shouldn't be able to split while unstarted.
  assertSplitAtFractionFails(source, 0, 0.7, options);
  assertSplitAtFractionSucceedsAndConsistent(source, 1, 0.7, options);
  assertSplitAtFractionSucceedsAndConsistent(source, 30, 0.7, options);
  assertSplitAtFractionFails(source, 0, 0.0, options);
  assertSplitAtFractionFails(source, 70, 0.3, options);
  assertSplitAtFractionFails(source, 100, 1.0, options);
  assertSplitAtFractionFails(source, 100, 0.99, options);
  assertSplitAtFractionSucceedsAndConsistent(source, 100, 0.995, options);
}
 
Example 13
Source Project: beam   Source File: SourceTestUtils.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Given a reference {@code Source} and a list of {@code Source}s, assert that the union of the
 * records read from the list of sources is equal to the records read from the reference source.
 */
public static <T> void assertSourcesEqualReferenceSource(
    BoundedSource<T> referenceSource,
    List<? extends BoundedSource<T>> sources,
    PipelineOptions options)
    throws Exception {
  Coder<T> coder = referenceSource.getOutputCoder();
  List<T> referenceRecords = readFromSource(referenceSource, options);
  List<T> bundleRecords = new ArrayList<>();
  for (BoundedSource<T> source : sources) {
    assertThat(
        "Coder type for source "
            + source
            + " is not compatible with Coder type for referenceSource "
            + referenceSource,
        source.getOutputCoder(),
        equalTo(coder));
    List<T> elems = readFromSource(source, options);
    bundleRecords.addAll(elems);
  }
  List<ReadableStructuralValue<T>> bundleValues = createStructuralValues(coder, bundleRecords);
  List<ReadableStructuralValue<T>> referenceValues =
      createStructuralValues(coder, referenceRecords);
  assertThat(bundleValues, containsInAnyOrder(referenceValues.toArray()));
}
 
Example 14
Source Project: beam   Source File: KinesisSource.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Creates reader based on given {@link KinesisReaderCheckpoint}. If {@link
 * KinesisReaderCheckpoint} is not given, then we use {@code initialCheckpointGenerator} to
 * generate new checkpoint.
 */
@Override
public UnboundedReader<KinesisRecord> createReader(
    PipelineOptions options, KinesisReaderCheckpoint checkpointMark) {

  CheckpointGenerator checkpointGenerator = initialCheckpointGenerator;

  if (checkpointMark != null) {
    checkpointGenerator = new StaticCheckpointGenerator(checkpointMark);
  }

  LOG.info("Creating new reader using {}", checkpointGenerator);

  return new KinesisReader(
      SimplifiedKinesisClient.from(awsClientsProvider, limit),
      checkpointGenerator,
      this,
      watermarkPolicyFactory,
      rateLimitPolicyFactory,
      upToDateThreshold,
      maxCapacityPerShard);
}
 
Example 15
Source Project: beam   Source File: BatchGroupAlsoByWindowAndCombineFn.java    License: Apache License 2.0 6 votes vote down vote up
private CombineWithContext.Context createFromComponents(
    final PipelineOptions pipelineOptions,
    final SideInputReader sideInputReader,
    final BoundedWindow mainInputWindow) {
  return new CombineWithContext.Context() {
    @Override
    public PipelineOptions getPipelineOptions() {
      return pipelineOptions;
    }

    @Override
    public <T> T sideInput(PCollectionView<T> view) {
      return sideInputReader.get(
          view, view.getWindowMappingFn().getSideInputWindow(mainInputWindow));
    }
  };
}
 
Example 16
Source Project: beam   Source File: FileBasedSourceTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCloseUnstartedFilePatternReader() throws IOException {
  PipelineOptions options = PipelineOptionsFactory.create();
  List<String> data1 = createStringDataset(3, 50);
  File file1 = createFileWithData("file1", data1);

  List<String> data2 = createStringDataset(3, 50);
  createFileWithData("file2", data2);

  List<String> data3 = createStringDataset(3, 50);
  createFileWithData("file3", data3);

  List<String> data4 = createStringDataset(3, 50);
  createFileWithData("otherfile", data4);

  TestFileBasedSource source =
      new TestFileBasedSource(new File(file1.getParent(), "file*").getPath(), 64, null);
  Reader<String> reader = source.createReader(options);
  // Closing an unstarted FilePatternReader should not throw an exception.
  try {
    reader.close();
  } catch (Exception e) {
    throw new AssertionError(
        "Closing an unstarted FilePatternReader should not throw an exception", e);
  }
}
 
Example 17
Source Project: beam   Source File: DatastoreV1.java    License: Apache License 2.0 6 votes vote down vote up
/** Returns Number of entities available for reading. */
public long getNumEntities(
    PipelineOptions options, String ourKind, @Nullable String namespace) {
  try {
    V1Options v1Options = V1Options.from(getProjectId(), getNamespace(), getLocalhost());
    V1DatastoreFactory datastoreFactory = new V1DatastoreFactory();
    Datastore datastore =
        datastoreFactory.getDatastore(
            options, v1Options.getProjectId(), v1Options.getLocalhost());

    Entity entity = getLatestTableStats(ourKind, namespace, datastore);
    return entity.getProperties().get("count").getIntegerValue();
  } catch (Exception e) {
    return -1;
  }
}
 
Example 18
public GroupingShuffleReaderWithFaultyBytesReadCounter(
    PipelineOptions options,
    byte[] shuffleReaderConfig,
    @Nullable String startShufflePosition,
    @Nullable String stopShufflePosition,
    Coder<WindowedValue<KV<K, Iterable<V>>>> coder,
    BatchModeExecutionContext executionContext,
    DataflowOperationContext operationContext,
    boolean sortValues)
    throws Exception {
  super(
      options,
      shuffleReaderConfig,
      startShufflePosition,
      stopShufflePosition,
      coder,
      executionContext,
      operationContext,
      ShuffleReadCounterFactory.INSTANCE,
      sortValues);
}
 
Example 19
Source Project: beam   Source File: HBaseIO.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public long getEstimatedSizeBytes(PipelineOptions pipelineOptions) throws Exception {
  if (estimatedSizeBytes == null) {
    try (Connection connection = ConnectionFactory.createConnection(read.configuration)) {
      estimatedSizeBytes =
          HBaseUtils.estimateSizeBytes(
              connection, read.tableId, HBaseUtils.getByteKeyRange(read.scan));
    }
    LOG.debug(
        "Estimated size {} bytes for table {} and scan {}",
        estimatedSizeBytes,
        read.tableId,
        read.scan);
  }
  return estimatedSizeBytes;
}
 
Example 20
Source Project: beam   Source File: Environments.java    License: Apache License 2.0 5 votes vote down vote up
public static List<ArtifactInformation> getDeferredArtifacts(PipelineOptions options) {
  List<String> stagingFiles = options.as(PortablePipelineOptions.class).getFilesToStage();
  if (stagingFiles == null || stagingFiles.isEmpty()) {
    return ImmutableList.of();
  }

  String key = UUID.randomUUID().toString();
  DefaultArtifactResolver.INSTANCE.register(
      (info) -> {
        if (BeamUrns.getUrn(StandardArtifacts.Types.DEFERRED).equals(info.getTypeUrn())) {
          RunnerApi.DeferredArtifactPayload deferredArtifactPayload;
          try {
            deferredArtifactPayload =
                RunnerApi.DeferredArtifactPayload.parseFrom(info.getTypePayload());
          } catch (InvalidProtocolBufferException e) {
            throw new RuntimeException("Error parsing deferred artifact payload.", e);
          }
          if (key.equals(deferredArtifactPayload.getKey())) {
            return Optional.of(getArtifacts(stagingFiles));
          } else {
            return Optional.empty();
          }
        } else {
          return Optional.empty();
        }
      });

  return ImmutableList.of(
      ArtifactInformation.newBuilder()
          .setTypeUrn(BeamUrns.getUrn(StandardArtifacts.Types.DEFERRED))
          .setTypePayload(
              RunnerApi.DeferredArtifactPayload.newBuilder().setKey(key).build().toByteString())
          .build());
}
 
Example 21
Source Project: beam   Source File: GlobalCombineFnRunners.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public OutputT extractOutput(
    AccumT accumulator,
    PipelineOptions options,
    SideInputReader sideInputReader,
    Collection<? extends BoundedWindow> windows) {
  return combineFnWithContext.extractOutput(
      accumulator,
      createFromComponents(options, sideInputReader, Iterables.getOnlyElement(windows)));
}
 
Example 22
Source Project: hop   Source File: PipelineTestBase.java    License: Apache License 2.0 5 votes vote down vote up
@Ignore
public void createRunPipeline( PipelineMeta pipelineMeta ) throws Exception {

  /*
  FileOutputStream fos = new FileOutputStream( "/tmp/"+pipelineMeta.getName()+".ktr" );
  fos.write( pipelineMeta.getXML().getBytes() );
  fos.close();
  */

  PipelineOptions pipelineOptions = PipelineOptionsFactory.create();

  pipelineOptions.setJobName( pipelineMeta.getName() );
  pipelineOptions.setUserAgent( BeamConst.STRING_HOP_BEAM );

  BeamDirectPipelineRunConfiguration beamRunConfig = new BeamDirectPipelineRunConfiguration();
  beamRunConfig.setTempLocation( System.getProperty( "java.io.tmpdir" ) );

  // No extra plugins to load : null option
  HopPipelineMetaToBeamPipelineConverter converter = new HopPipelineMetaToBeamPipelineConverter( pipelineMeta, metadataProvider, beamRunConfig );
  Pipeline pipeline = converter.createPipeline();

  PipelineResult pipelineResult = pipeline.run();
  pipelineResult.waitUntilFinish();

  MetricResults metricResults = pipelineResult.metrics();

  MetricQueryResults allResults = metricResults.queryMetrics( MetricsFilter.builder().build() );
  for ( MetricResult<Long> result : allResults.getCounters() ) {
    System.out.println( "Name: " + result.getName() + " Attempted: " + result.getAttempted() );
  }
}
 
Example 23
Source Project: beam   Source File: FlinkRequiresStableInputTest.java    License: Apache License 2.0 5 votes vote down vote up
private static Pipeline createPipeline(
    PipelineOptions options, String singleOutputPrefix, String multiOutputPrefix) {
  Pipeline p = Pipeline.create(options);

  SerializableFunction<Void, Void> firstTime =
      (SerializableFunction<Void, Void>)
          value -> {
            latch.countDown();
            return null;
          };

  PCollection<String> impulse = p.apply("CreatePCollectionOfOneValue", Create.of(VALUE));
  impulse
      .apply(
          "Single-PairWithRandomKey",
          MapElements.via(new RequiresStableInputIT.PairWithRandomKeyFn()))
      .apply(
          "Single-MakeSideEffectAndThenFail",
          ParDo.of(
              new RequiresStableInputIT.MakeSideEffectAndThenFailFn(
                  singleOutputPrefix, firstTime)));
  impulse
      .apply(
          "Multi-PairWithRandomKey",
          MapElements.via(new RequiresStableInputIT.PairWithRandomKeyFn()))
      .apply(
          "Multi-MakeSideEffectAndThenFail",
          ParDo.of(
                  new RequiresStableInputIT.MakeSideEffectAndThenFailFn(
                      multiOutputPrefix, firstTime))
              .withOutputTags(new TupleTag<>(), TupleTagList.empty()));

  return p;
}
 
Example 24
Source Project: beam   Source File: SparkPortableExecutionTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test(timeout = 120_000)
public void testExecStageWithMultipleConsumers() throws Exception {
  PipelineOptions options = PipelineOptionsFactory.create();
  options.setRunner(CrashingRunner.class);
  options
      .as(PortablePipelineOptions.class)
      .setDefaultEnvironmentType(Environments.ENVIRONMENT_EMBEDDED);
  Pipeline pipeline = Pipeline.create(options);
  PCollection<KV<String, Iterable<String>>> f =
      pipeline
          .apply("impulse", Impulse.create())
          .apply("F", ParDo.of(new DoFnWithSideEffect<>("F")))
          // use GBK to prevent fusion of F, G, and H
          .apply(GroupByKey.create());
  f.apply("G", ParDo.of(new DoFnWithSideEffect<>("G")));
  f.apply("H", ParDo.of(new DoFnWithSideEffect<>("H")));
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline);
  JobInvocation jobInvocation =
      SparkJobInvoker.createJobInvocation(
          "testExecStageWithMultipleConsumers",
          "testExecStageWithMultipleConsumersRetrievalToken",
          sparkJobExecutor,
          pipelineProto,
          options.as(SparkPipelineOptions.class));
  jobInvocation.start();
  Assert.assertEquals(Enum.DONE, jobInvocation.getState());
}
 
Example 25
Source Project: beam   Source File: SqsUnboundedSource.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public List<SqsUnboundedSource> split(int desiredNumSplits, PipelineOptions options) {
  List<SqsUnboundedSource> sources = new ArrayList<>();
  for (int i = 0; i < Math.max(1, desiredNumSplits); ++i) {
    sources.add(new SqsUnboundedSource(read));
  }
  return sources;
}
 
Example 26
Source Project: beam   Source File: DefaultJobBundleFactory.java    License: Apache License 2.0 5 votes vote down vote up
public static DefaultJobBundleFactory create(JobInfo jobInfo) {
  PipelineOptions pipelineOptions =
      PipelineOptionsTranslation.fromProto(jobInfo.pipelineOptions());
  Map<String, EnvironmentFactory.Provider> environmentFactoryProviderMap =
      ImmutableMap.of(
          BeamUrns.getUrn(StandardEnvironments.Environments.DOCKER),
          new DockerEnvironmentFactory.Provider(pipelineOptions),
          BeamUrns.getUrn(StandardEnvironments.Environments.PROCESS),
          new ProcessEnvironmentFactory.Provider(pipelineOptions),
          BeamUrns.getUrn(StandardEnvironments.Environments.EXTERNAL),
          new ExternalEnvironmentFactory.Provider(),
          Environments.ENVIRONMENT_EMBEDDED, // Non Public urn for testing.
          new EmbeddedEnvironmentFactory.Provider(pipelineOptions));
  return new DefaultJobBundleFactory(jobInfo, environmentFactoryProviderMap);
}
 
Example 27
Source Project: incubator-nemo   Source File: WindowedBroadcast.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Main function for the MR BEAM program.
 *
 * @param args arguments.
 */
public static void main(final String[] args) {
  final String outputFilePath = args[0];

  final Window<Long> windowFn = Window
    .<Long>into(SlidingWindows.of(Duration.standardSeconds(2))
      .every(Duration.standardSeconds(1)));

  final PipelineOptions options = NemoPipelineOptionsFactory.create();
  options.setJobName("WindowedBroadcast");

  final Pipeline p = Pipeline.create(options);

  final PCollection<Long> windowedElements = getSource(p).apply(windowFn);
  final PCollectionView<List<Long>> windowedView = windowedElements.apply(View.asList());

  windowedElements.apply(ParDo.of(new DoFn<Long, String>() {
      @ProcessElement
      public void processElement(final ProcessContext c) {
        final Long anElementInTheWindow = c.element();
        final List<Long> allElementsInTheWindow = c.sideInput(windowedView);
        System.out.println(anElementInTheWindow + " / " + allElementsInTheWindow);
        if (!allElementsInTheWindow.contains(anElementInTheWindow)) {
          throw new RuntimeException(anElementInTheWindow + " not in " + allElementsInTheWindow.toString());
        } else {
          c.output(anElementInTheWindow + " is in " + allElementsInTheWindow);
        }
      }
    }).withSideInputs(windowedView)
  ).apply(new WriteOneFilePerWindow(outputFilePath, 1));

  p.run().waitUntilFinish();
}
 
Example 28
Source Project: beam   Source File: GlobalCombineFnRunners.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public AccumT createAccumulator(
    PipelineOptions options,
    SideInputReader sideInputReader,
    Collection<? extends BoundedWindow> windows) {
  return combineFn.createAccumulator();
}
 
Example 29
Source Project: components   Source File: WindowRuntimeTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testFixedWindow() {

    PipelineOptions options = PipelineOptionsFactory.create();
    options.setRunner(DirectRunner.class);
    final Pipeline p = Pipeline.create(options);

    // creation of PCollection with different timestamp PCollection<IndexedRecord>

    List<TimestampedValue<IndexedRecord>> data = Arrays.asList(TimestampedValue.of(irA, new Instant(1L)),
            TimestampedValue.of(irB, new Instant(2L)), TimestampedValue.of(irC, new Instant(3L)));

    PCollection<IndexedRecord> input = (PCollection<IndexedRecord>) p
            .apply(Create.timestamped(data).withCoder(LazyAvroCoder.of()));

    WindowProperties windowProperties = new WindowProperties("window");
    windowProperties.windowLength.setValue(2);
    windowProperties.windowSlideLength.setValue(-1);
    windowProperties.windowSession.setValue(false);

    windowProperties.setValue("windowLength", 2);
    windowProperties.setValue("windowSlideLength", -1);
    windowProperties.setValue("windowSession", false);

    WindowRuntime windowRun = new WindowRuntime();
    windowRun.initialize(null, windowProperties);

    PCollection<IndexedRecord> test = windowRun.expand(input);

    PCollection<KV<IndexedRecord, Long>> windowed_counts = test.apply(Count.<IndexedRecord> perElement());

    /////////
    // Fixed duration: 2

    PAssert.that(windowed_counts).containsInAnyOrder(KV.of(irA, 1L), KV.of(irB, 1L), KV.of(irC, 1L));

    p.run();
}
 
Example 30
Source Project: beam   Source File: FnApiStateAccessor.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public <ElementT, AccumT, ResultT>
    CombiningState<ElementT, AccumT, ResultT> bindCombiningWithContext(
        String id,
        StateSpec<CombiningState<ElementT, AccumT, ResultT>> spec,
        Coder<AccumT> accumCoder,
        CombineFnWithContext<ElementT, AccumT, ResultT> combineFn) {
  return (CombiningState<ElementT, AccumT, ResultT>)
      stateKeyObjectCache.computeIfAbsent(
          createBagUserStateKey(id),
          key ->
              bindCombining(
                  id,
                  spec,
                  accumCoder,
                  CombineFnUtil.bindContext(
                      combineFn,
                      new StateContext<BoundedWindow>() {
                        @Override
                        public PipelineOptions getPipelineOptions() {
                          return pipelineOptions;
                        }

                        @Override
                        public <T> T sideInput(PCollectionView<T> view) {
                          return get(view, currentWindowSupplier.get());
                        }

                        @Override
                        public BoundedWindow window() {
                          return currentWindowSupplier.get();
                        }
                      })));
}