org.apache.beam.sdk.coders.CoderProviders Java Examples

The following examples show how to use org.apache.beam.sdk.coders.CoderProviders. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DoFnInvokersTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testDefaultWatermarkEstimatorStateAndCoder() throws Exception {
  class MockFn extends DoFn<String, String> {
    @ProcessElement
    public void processElement(
        ProcessContext c, RestrictionTracker<RestrictionWithDefaultTracker, Void> tracker) {}

    @GetInitialRestriction
    public RestrictionWithDefaultTracker getInitialRestriction(@Element String element) {
      return null;
    }
  }

  MockFn fn = mock(MockFn.class);
  DoFnInvoker<String, String> invoker = DoFnInvokers.invokerFor(fn);

  CoderRegistry coderRegistry = CoderRegistry.createDefault();
  coderRegistry.registerCoderProvider(
      CoderProviders.fromStaticMethods(
          RestrictionWithDefaultTracker.class, CoderForDefaultTracker.class));
  assertEquals(VoidCoder.of(), invoker.invokeGetWatermarkEstimatorStateCoder(coderRegistry));
  assertNull(invoker.invokeGetInitialWatermarkEstimatorState(new FakeArgumentProvider<>()));
}
 
Example #2
Source File: PubsubCoderProviderRegistrar.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public List<CoderProvider> getCoderProviders() {
  return ImmutableList.of(
      CoderProviders.forCoder(
          TypeDescriptor.of(PubsubMessage.class), PubsubMessageWithAttributesCoder.of()),
      CoderProviders.forCoder(
          TypeDescriptor.of(PubsubMessage.class), PubsubMessageWithMessageIdCoder.of()),
      CoderProviders.forCoder(
          TypeDescriptor.of(PubsubMessage.class),
          PubsubMessageWithAttributesAndMessageIdCoder.of()));
}
 
Example #3
Source File: JavaBinCodecCoder.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public List<CoderProvider> getCoderProviders() {
  return Arrays.asList(
      CoderProviders.forCoder(
          TypeDescriptor.of(SolrDocument.class), JavaBinCodecCoder.of(SolrDocument.class)),
      CoderProviders.forCoder(
          TypeDescriptor.of(SolrInputDocument.class),
          JavaBinCodecCoder.of(SolrInputDocument.class)));
}
 
Example #4
Source File: GroupByKeyTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Verify that runners correctly hash/group on the encoded value and not the value itself. */
@Test
@Category({ValidatesRunner.class, DataflowPortabilityApiUnsupported.class})
public void testGroupByKeyWithBadEqualsHashCode() throws Exception {
  final int numValues = 10;
  final int numKeys = 5;

  p.getCoderRegistry()
      .registerCoderProvider(
          CoderProviders.fromStaticMethods(BadEqualityKey.class, DeterministicKeyCoder.class));

  // construct input data
  List<KV<BadEqualityKey, Long>> input = new ArrayList<>();
  for (int i = 0; i < numValues; i++) {
    for (int key = 0; key < numKeys; key++) {
      input.add(KV.of(new BadEqualityKey(key), 1L));
    }
  }

  // We first ensure that the values are randomly partitioned in the beginning.
  // Some runners might otherwise keep all values on the machine where
  // they are initially created.
  PCollection<KV<BadEqualityKey, Long>> dataset1 =
      p.apply(Create.of(input))
          .apply(ParDo.of(new AssignRandomKey()))
          .apply(Reshuffle.of())
          .apply(Values.create());

  // Make the GroupByKey and Count implicit, in real-world code
  // this would be a Count.perKey()
  PCollection<KV<BadEqualityKey, Long>> result =
      dataset1.apply(GroupByKey.create()).apply(Combine.groupedValues(new CountFn()));

  PAssert.that(result).satisfies(new AssertThatCountPerKeyCorrect(numValues));

  PAssert.that(result.apply(Keys.create())).satisfies(new AssertThatAllKeysExist(numKeys));

  p.run();
}
 
Example #5
Source File: ProtobufCoderProviderRegistrar.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public List<CoderProvider> getCoderProviders() {
  return ImmutableList.of(
      CoderProviders.forCoder(TypeDescriptor.of(ByteString.class), ByteStringCoder.of()),
      ProtoCoder.getCoderProvider());
}
 
Example #6
Source File: AlternatingLeastSquareInefficient.java    From nemo with Apache License 2.0 4 votes vote down vote up
/**
 * Main function for the ALS BEAM program.
 * @param args arguments.
 */
public static void main(final String[] args) {
  final Long start = System.currentTimeMillis();
  LOG.info(Arrays.toString(args));
  final String inputFilePath = args[0];
  final Integer numFeatures = Integer.parseInt(args[1]);
  final Integer numItr = Integer.parseInt(args[2]);
  final Double lambda;
  if (args.length > 4) {
    lambda = Double.parseDouble(args[3]);
  } else {
    lambda = 0.05;
  }

  final PipelineOptions options = PipelineOptionsFactory.create();
  options.setRunner(NemoPipelineRunner.class);
  options.setJobName("ALS");
  options.setStableUniqueNames(PipelineOptions.CheckEnabled.OFF);

  final Pipeline p = Pipeline.create(options);
  p.getCoderRegistry().registerCoderProvider(CoderProviders.fromStaticMethods(Pair.class, PairCoder.class));

  // Read raw data
  final PCollection<String> rawData = GenericSourceSink.read(p, inputFilePath);

  // Parse data for item
  final PCollection<KV<Integer, Pair<List<Integer>, List<Double>>>> parsedItemData = rawData
      .apply(ParDo.of(new AlternatingLeastSquare.ParseLine(false)))
      .apply(Combine.perKey(new AlternatingLeastSquare.TrainingDataCombiner()));

  // Create Initial Item Matrix
  PCollection<KV<Integer, List<Double>>> itemMatrix = parsedItemData
      .apply(ParDo.of(new DoFn<KV<Integer, Pair<List<Integer>, List<Double>>>, KV<Integer, List<Double>>>() {
        @ProcessElement
        public void processElement(final ProcessContext c) throws Exception {
          final List<Double> result = new ArrayList<>(numFeatures);
          result.add(0, 0.0);

          final KV<Integer, Pair<List<Integer>, List<Double>>> element = c.element();
          final List<Double> ratings = element.getValue().right();
          for (Integer i = 0; i < ratings.size(); i++) {
            result.set(0, result.get(0) + ratings.get(i));
          }

          result.set(0, result.get(0) / ratings.size());
          for (Integer i = 1; i < result.size(); i++) {
            result.add(i, (Math.random() * 0.01));
          }
          c.output(KV.of(element.getKey(), result));
        }
      }));

  // Iterations to update Item Matrix.
  for (Integer i = 0; i < numItr; i++) {
    // NOTE: a single composite transform for the iteration.
    itemMatrix = itemMatrix.apply(new UpdateUserAndItemMatrix(numFeatures, lambda, rawData, parsedItemData));
  }

  p.run();
  LOG.info("JCT " + (System.currentTimeMillis() - start));
}
 
Example #7
Source File: BigQueryCoderProviderRegistrar.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public List<CoderProvider> getCoderProviders() {
  return ImmutableList.of(
      CoderProviders.forCoder(TypeDescriptor.of(TableRow.class), TableRowJsonCoder.of()));
}
 
Example #8
Source File: BigtableWriteResultCoder.java    From beam with Apache License 2.0 4 votes vote down vote up
public static CoderProvider getCoderProvider() {
  return CoderProviders.forCoder(
      TypeDescriptor.of(BigtableWriteResult.class), BigtableWriteResultCoder.of());
}
 
Example #9
Source File: AlternatingLeastSquare.java    From incubator-nemo with Apache License 2.0 4 votes vote down vote up
/**
 * Main function for the ALS BEAM program.
 *
 * @param args arguments.
 * @throws ClassNotFoundException exception.
 */
public static void main(final String[] args) {
  final Long start = System.currentTimeMillis();
  LOG.info(Arrays.toString(args));
  final String inputFilePath = args[0];
  final Integer numFeatures = Integer.parseInt(args[1]);
  final Integer numItr = Integer.parseInt(args[2]);
  final Double lambda;
  if (args.length > 3) {
    lambda = Double.parseDouble(args[3]);
  } else {
    lambda = 0.05;
  }
  final String outputFilePath;
  boolean checkOutput = false;
  if (args.length > 4) {
    outputFilePath = args[4];
    checkOutput = true;
  } else {
    outputFilePath = "";
  }

  final PipelineOptions options = NemoPipelineOptionsFactory.create();
  options.setJobName("ALS");
  options.setStableUniqueNames(PipelineOptions.CheckEnabled.OFF);

  final Pipeline p = Pipeline.create(options);
  p.getCoderRegistry().registerCoderProvider(CoderProviders.fromStaticMethods(int[].class, IntArrayCoder.class));
  p.getCoderRegistry().registerCoderProvider(CoderProviders.fromStaticMethods(float[].class, FloatArrayCoder.class));

  // Read raw data
  final PCollection<String> rawData = GenericSourceSink.read(p, inputFilePath);

  // Parse data for item
  final PCollection<KV<Integer, KV<int[], float[]>>> parsedItemData = rawData
    .apply(ParDo.of(new ParseLine(false)))
    .apply(Combine.perKey(new TrainingDataCombiner()));

  // Parse data for user
  final PCollection<KV<Integer, KV<int[], float[]>>> parsedUserData = rawData
    .apply(ParDo.of(new ParseLine(true)))
    .apply(Combine.perKey(new TrainingDataCombiner()));

  // Create Initial Item Matrix
  PCollection<KV<Integer, float[]>> itemMatrix =
    parsedItemData.apply(ParDo.of(new CreateInitialMatrix(numFeatures, checkOutput)));

  // Iterations to update Item Matrix.
  for (int i = 0; i < numItr; i++) {
    // NOTE: a single composite transform for the iteration.
    itemMatrix = itemMatrix.apply(new UpdateUserAndItemMatrix(numFeatures, lambda, parsedUserData, parsedItemData));
  }

  if (checkOutput) {
    final PCollection<String> result = itemMatrix.apply(MapElements.<KV<Integer, float[]>, String>via(
      new SimpleFunction<KV<Integer, float[]>, String>() {
        @Override
        public String apply(final KV<Integer, float[]> elem) {
          final List<String> values = Stream.of(ArrayUtils.toObject(elem.getValue()))
            .map(String::valueOf)
            .collect(Collectors.toList());
          return elem.getKey() + "," + String.join(",", values);
        }
      }));

    GenericSourceSink.write(result, outputFilePath);
  }

  p.run().waitUntilFinish();
  LOG.info("JCT " + (System.currentTimeMillis() - start));
}
 
Example #10
Source File: SnsCoderProviderRegistrar.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public List<CoderProvider> getCoderProviders() {
  return ImmutableList.of(
      CoderProviders.forCoder(
          TypeDescriptor.of(PublishResult.class), PublishResultCoders.defaultPublishResult()));
}
 
Example #11
Source File: AttributeValueCoderProviderRegistrar.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public List<CoderProvider> getCoderProviders() {
  return ImmutableList.of(
      CoderProviders.forCoder(TypeDescriptor.of(AttributeValue.class), AttributeValueCoder.of()));
}
 
Example #12
Source File: HBaseCoderProviderRegistrar.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public List<CoderProvider> getCoderProviders() {
  return ImmutableList.of(
      HBaseMutationCoder.getCoderProvider(),
      CoderProviders.forCoder(TypeDescriptor.of(Result.class), HBaseResultCoder.of()));
}
 
Example #13
Source File: MessageCoderRegistrar.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public List<CoderProvider> getCoderProviders() {
  return ImmutableList.of(
      CoderProviders.forCoder(TypeDescriptor.of(Message.class), MessageCoder.of()));
}
 
Example #14
Source File: SendMessageRequestCoderRegistrar.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public List<CoderProvider> getCoderProviders() {
  return ImmutableList.of(
      CoderProviders.forCoder(
          TypeDescriptor.of(SendMessageRequest.class), SendMessageRequestCoder.of()));
}
 
Example #15
Source File: SnsCoderProviderRegistrar.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public List<CoderProvider> getCoderProviders() {
  return ImmutableList.of(
      CoderProviders.forCoder(
          TypeDescriptor.of(PublishResponse.class), PublishResponseCoder.of()));
}
 
Example #16
Source File: AmqpMessageCoderProviderRegistrar.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public List<CoderProvider> getCoderProviders() {
  return ImmutableList.of(
      CoderProviders.forCoder(TypeDescriptor.of(Message.class), AmqpMessageCoder.of()));
}
 
Example #17
Source File: AlternatingLeastSquareInefficient.java    From incubator-nemo with Apache License 2.0 4 votes vote down vote up
/**
 * Main function for the ALS BEAM program.
 *
 * @param args arguments.
 */
public static void main(final String[] args) {
  final Long start = System.currentTimeMillis();
  LOG.info(Arrays.toString(args));
  final String inputFilePath = args[0];
  final Integer numFeatures = Integer.parseInt(args[1]);
  final Integer numItr = Integer.parseInt(args[2]);
  final Double lambda;
  if (args.length > 4) {
    lambda = Double.parseDouble(args[3]);
  } else {
    lambda = 0.05;
  }

  final PipelineOptions options = NemoPipelineOptionsFactory.create();
  options.setJobName("ALS");
  options.setStableUniqueNames(PipelineOptions.CheckEnabled.OFF);

  final Pipeline p = Pipeline.create(options);
  p.getCoderRegistry().registerCoderProvider(CoderProviders.fromStaticMethods(int[].class, IntArrayCoder.class));
  p.getCoderRegistry().registerCoderProvider(CoderProviders.fromStaticMethods(float[].class, FloatArrayCoder.class));

  // Read raw data
  final PCollection<String> rawData = GenericSourceSink.read(p, inputFilePath);

  // Parse data for item
  final PCollection<KV<Integer, KV<int[], float[]>>> parsedItemData = rawData
    .apply(ParDo.of(new AlternatingLeastSquare.ParseLine(false)))
    .apply(Combine.perKey(new AlternatingLeastSquare.TrainingDataCombiner()));

  // Create Initial Item Matrix
  PCollection<KV<Integer, float[]>> itemMatrix = parsedItemData
    .apply(ParDo.of(new DoFn<KV<Integer, KV<int[], float[]>>, KV<Integer, float[]>>() {
      @ProcessElement
      public void processElement(final ProcessContext c) throws Exception {
        final float[] result = new float[numFeatures];

        final KV<Integer, KV<int[], float[]>> element = c.element();
        final float[] ratings = element.getValue().getValue();
        for (int i = 0; i < ratings.length; i++) {
          result[0] += ratings[i];
        }

        result[0] /= ratings.length;
        for (int i = 1; i < result.length; i++) {
          result[i] = (float) (Math.random() * 0.01);
        }
        c.output(KV.of(element.getKey(), result));
      }
    }));

  // Iterations to update Item Matrix.
  for (Integer i = 0; i < numItr; i++) {
    // NOTE: a single composite transform for the iteration.
    itemMatrix = itemMatrix.apply(new UpdateUserAndItemMatrix(numFeatures, lambda, rawData, parsedItemData));
  }

  p.run().waitUntilFinish();
  LOG.info("JCT " + (System.currentTimeMillis() - start));
}