com.google.cloud.dataflow.sdk.values.KV Java Examples

The following examples show how to use com.google.cloud.dataflow.sdk.values.KV. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: GroupAlsoByWindowTest.java    From flink-dataflow with Apache License 2.0 6 votes vote down vote up
@Test
public void testAfterWatermarkProgram() throws Exception {
	WindowingStrategy strategy = fixedWindowWithAfterWatermarkTriggerStrategy;
	long initialTime = 0L;
	OneInputStreamOperatorTestHarness<WindowedValue<KV<String, Integer>>, WindowedValue<KV<String, Integer>>> testHarness =
			createTestingOperatorAndState(strategy, initialTime);
	ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<>();

	expectedOutput.add(new StreamRecord<>(makeWindowedValue(strategy, KV.of("key1", 6),
			new Instant(initialTime + 1), null, PaneInfo.createPane(true, true, PaneInfo.Timing.ON_TIME)), initialTime + 1));
	expectedOutput.add(new Watermark(initialTime + 10000));

	expectedOutput.add(new StreamRecord<>(makeWindowedValue(strategy, KV.of("key1", 11),
			new Instant(initialTime + 10000), null, PaneInfo.createPane(true, true, PaneInfo.Timing.ON_TIME)), initialTime + 10000));
	expectedOutput.add(new StreamRecord<>(makeWindowedValue(strategy, KV.of("key2", 1),
			new Instant(initialTime + 19500), null, PaneInfo.createPane(true, true, PaneInfo.Timing.ON_TIME)), initialTime + 19500));
	expectedOutput.add(new Watermark(initialTime + 20000));

	TestHarnessUtil.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput(), new ResultSortComparator());
	testHarness.close();
}
 
Example #2
Source File: JoinKnownGoodAndLiveStates.java    From policyscanner with Apache License 2.0 6 votes vote down vote up
/**
 * Process an element of the type KV<GCPResource, KV<StateResource, GCPResourceState>>.
 * The GCPResource is the resource that is being described by the GCPResourceState. In
 * this case, it's the GCP project.
 * The GCPResourceState is the attribute describing the GCPResource, i.e. the project policies.
 * StateSource represents the source of the GCPResourceState:
 *  - it was either checked in as a known-good, or
 *  - it is the live state of the resource
 *  GCPResourceStates tagged with one StateSource (say, DESIRED) will be input through
 *  a side input, and those tagged with the other will be input through the main input.
 * @param context The ProcessContext object that contains context-specific methods and objects.
 */
@Override
public void processElement(ProcessContext context) {
  // the project
  GCPResource resource = context.element().getKey();
  // the project's policies
  KV<StateSource, GCPResourceState> mainValue = context.element().getValue();

  // if the known-good policies' projects contain this project...
  if (context.sideInput(this.view).containsKey(resource)) {
    // make sure there's an element in the side input with the same GCPResource.

    KV<StateSource, GCPResourceState> sideValue = context.sideInput(this.view).get(resource);

    // the HashMap will contain two entries, one for
    // the DESIRED state and one for the LIVE state.
    Map<StateSource, GCPResourceState> mismatchedStates = new HashMap<>(2);
    mismatchedStates.put(mainValue.getKey(), mainValue.getValue());
    mismatchedStates.put(sideValue.getKey(), sideValue.getValue());
    context.output(KV.of(resource, mismatchedStates));
  }
}
 
Example #3
Source File: FilePathToLiveState.java    From policyscanner with Apache License 2.0 6 votes vote down vote up
/**
 * Convert the file path into the GCP resource object that it corresponds to.
 * @param processContext The ProcessContext object that contains context-specific
 * methods and objects.
 * @throws IOException Thrown when there's an error reading from the API.
 * @throws GeneralSecurityException Thrown when there's an error reading from the API.
 */
@Override
public void processElement(ProcessContext processContext)
    throws IOException, GeneralSecurityException {
  List<String> filePath = processContext.element();
  if (filePath.size() == 3 && filePath.get(2).equals(GCPResourcePolicy.getPolicyFile())) {
    // only project policies are supported for now.
    // filePath.size() must be 3 and of the form org_id/project_id/POLICY_FILE.

    GCPProject project = new GCPProject(filePath.get(1), filePath.get(0));
    GCPResourceState policy = null;
    try {
      policy = project.getPolicy();
    } catch (Exception e) {
      logger.log(Level.WARNING, "Error getting policy", e);
    }
    if (policy != null) {
      processContext.output(KV.of((GCPResource) project, policy));
    }
  }
  else {
    throw new IllegalArgumentException("Malformed input to FilePathToLiveState.");
  }
}
 
Example #4
Source File: ExtractState.java    From policyscanner with Apache License 2.0 6 votes vote down vote up
/**
 * Convert a GCPProject to a Key-Value pair of the project and its policy.
 * @param processContext The ProcessContext object that contains processContext-specific
 * methods and objects.
 */
@Override
public void processElement(ProcessContext processContext) {
  GCPProject input = processContext.element();

  if (input.getId() == null) {
    this.addToSideOutput(processContext, input, "Null project id");
    return;
  }
  GCPResourceState policy = null;
  String errorMsg = null;
  try {
    policy = input.getPolicy();
  } catch (Exception e) {
    errorMsg = e.getMessage();
    logger.log(Level.FINE, "Error getting policy", e);
  }

  if (policy == null) {
    this.addToSideOutput(processContext, input, String.format("Policy error %s", errorMsg));
  } else {
    processContext.output(KV.of((GCPResource) input, policy));
  }
}
 
Example #5
Source File: FilterOutMatchingState.java    From policyscanner with Apache License 2.0 6 votes vote down vote up
/**
 * Process an element of the type KV<GCPResource, KV<StateResource, GCPResourceState>>
 * and output only those states that do not match.
 * The GCPResource is the resource that is being described by the GCPResourceState. In
 * this case, it's the GCP project.
 * The GCPResourceState is the attribute describing the GCPResource, i.e. the project policies.
 * StateSource represents the source of the GCPResourceState:
 *  - it was either checked in as a known-good, or
 *  - it is the live state of the resource
 *  GCPResourceStates tagged with one StateSource (say, DESIRED) will be inputted through
 *  a side input, and those tagged with the other will be inputted through the main input.
 * @param context The ProcessContext object that contains context-specific methods and objects.
 */
@Override
public void processElement(ProcessContext context) {
  // the project
  GCPResource resource = context.element().getKey();
  // the project's policies
  KV<StateSource, GCPResourceState> mainValue = context.element().getValue();

  // if the known-good policies' projects contain this project...
  if (context.sideInput(this.view).containsKey(resource)) {
    // make sure there's an element in the side input with the same GCPResource.

    KV<StateSource, GCPResourceState> sideValue = context.sideInput(this.view).get(resource);
    if (!mainValue.getValue().equals(sideValue.getValue())) {
      // make sure the GCPResourceStates are different.

      // the HashMap will contain two entries, one for
      // the DESIRED state and one for the LIVE state.
      Map<StateSource, GCPResourceState> mismatchedStates = new HashMap<>(2);
      mismatchedStates.put(mainValue.getKey(), mainValue.getValue());
      mismatchedStates.put(sideValue.getKey(), sideValue.getValue());
      context.output(KV.of(resource, mismatchedStates));
    }
  }
}
 
Example #6
Source File: GenerateSampleData.java    From data-timeseries-java with Apache License 2.0 6 votes vote down vote up
public static void generateSequentialList(List<KV<String, TSProto>> ts, Instant time, String key,
    double value, double change) {

  Instant tsTime = new Instant(time);
  for (int i = 0; i < 5; i++) {

    ts.add(KV.of(key, TSProto.newBuilder().setAskPrice(value).setBidPrice(value).setKey(key)
        .setIsLive(true).setTime(tsTime.getMillis()).build()));
    tsTime = tsTime.plus(Duration.standardMinutes(1));
    value += change;
  }

  value -= change;

  for (int i = 5; i < 10; i++) {
    ts.add(KV.of(key, TSProto.newBuilder().setAskPrice(value).setBidPrice(value).setKey(key)
        .setIsLive(true).setTime(tsTime.getMillis()).build()));
    tsTime = tsTime.plus(Duration.standardMinutes(1));
    value -= change;

  }

}
 
Example #7
Source File: LoadBooksTest.java    From cloud-bigtable-examples with Apache License 2.0 6 votes vote down vote up
@Test
public void parseBooks_returnsNgramsCounts() {
  // Arrange
  Pipeline p = TestPipeline.create();
  PCollection<String> input = p.apply(Create.of(testFile));

  // Act
  PCollection<KV<String, Integer>> output = LoadBooks.applyPipelineToParseBooks(input);

  // Assert
  DataflowAssert.that(output)
      .containsInAnyOrder(
          KV.of("despatch when art", 10),
          KV.of("despatch when came", 10),
          KV.of("despatch when published", 12),
          KV.of("despatch where was", 10),
          KV.of("despatch which made", 45),
          // There are two entries for "despatch which addressed".
          // Each entry has a different part of speech for "addressed".
          KV.of("despatch which addressed", 12 + 46),
          KV.of("despatch which admitted", 13),
          KV.of("despatch which allow", 14),
          KV.of("despatch which announced", 50),
          KV.of("despatch which answer", 32));
}
 
Example #8
Source File: KvCoderComperator.java    From flink-dataflow with Apache License 2.0 6 votes vote down vote up
@Override
public boolean equalToReference(KV<K, V> candidate) {
	try {
		buffer2.reset();
		keyCoder.encode(candidate.getKey(), buffer2, Coder.Context.OUTER);
		byte[] arr = referenceBuffer.getBuffer();
		byte[] arrOther = buffer2.getBuffer();
		if (referenceBuffer.size() != buffer2.size()) {
			return false;
		}
		int len = buffer2.size();
		for(int i = 0; i < len; i++ ) {
			if (arr[i] != arrOther[i]) {
				return false;
			}
		}
		return true;
	} catch (IOException e) {
		throw new RuntimeException("Could not compare reference.", e);
	}
}
 
Example #9
Source File: TfIdfITCase.java    From flink-dataflow with Apache License 2.0 6 votes vote down vote up
@Override
protected void testProgram() throws Exception {

	Pipeline pipeline = FlinkTestPipeline.createForBatch();

	pipeline.getCoderRegistry().registerCoder(URI.class, StringDelegateCoder.of(URI.class));

	PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf = pipeline
			.apply(Create.of(
					KV.of(new URI("x"), "a b c d"),
					KV.of(new URI("y"), "a b c"),
					KV.of(new URI("z"), "a m n")))
			.apply(new TfIdf.ComputeTfIdf());

	PCollection<String> words = wordToUriAndTfIdf
			.apply(Keys.<String>create())
			.apply(RemoveDuplicates.<String>create());

	words.apply(TextIO.Write.to(resultPath));

	pipeline.run();
}
 
Example #10
Source File: GCSFilesSource.java    From policyscanner with Apache License 2.0 6 votes vote down vote up
/**
 * Get the next file in queue.
 * @return A Key-Value pair where the key is a list of strings representing the path of
 * the file and the value is a string representing the content of the file.
 * @throws NoSuchElementException If the file can't be read from the GCS API.
 */
@Override
public KV<List<String>, String> getCurrent() throws NoSuchElementException {
  String filePath = this.currentFiles.get(0);
  String fileContent = null;
  try {
    fileContent = this.source.getFileContent(filePath);
  } catch (IOException ioe) {
    throw new NoSuchElementException(
        "Object " + filePath + " not found in bucket " + this.source.bucket);
  } catch (GeneralSecurityException gse) {
    throw new NoSuchElementException(
        "Cannot access object "
            + filePath
            + " in bucket "
            + this.source.bucket
            + " due to security reasons");
  }
  List<String> splitPath = Arrays.asList(filePath.split(this.source.getDirDelimiter()));
  return KV.of(splitPath, fileContent);
}
 
Example #11
Source File: FlinkPartialReduceFunction.java    From flink-dataflow with Apache License 2.0 6 votes vote down vote up
@Override
public void combine(Iterable<KV<K, VI>> elements, Collector<KV<K, VA>> out) throws Exception {

	final Iterator<KV<K, VI>> iterator = elements.iterator();
	// create accumulator using the first elements key
	KV<K, VI> first = iterator.next();
	K key = first.getKey();
	VI value = first.getValue();
	VA accumulator = keyedCombineFn.createAccumulator(key);
	accumulator = keyedCombineFn.addInput(key, accumulator, value);

	while(iterator.hasNext()) {
		value = iterator.next().getValue();
		accumulator = keyedCombineFn.addInput(key, accumulator, value);
	}

	out.collect(KV.of(key, accumulator));
}
 
Example #12
Source File: FileToStateTest.java    From policyscanner with Apache License 2.0 6 votes vote down vote up
@Test
public void testMultipleElements() {
  int elementCount = 5;
  GCPProject project = getSampleProject();
  List<String> filePath = getSampleProjectFilePath(project);
  String fileContent = getSamplePolicyBindingsString(1);
  GCPResourceState policy = getSampleGCPResourcePolicy(project, 1);
  List<KV<List<String>, String>> inputs = new ArrayList<>(elementCount);

  for (int i = 0; i < elementCount; ++i) {
    inputs.add(KV.of(filePath, fileContent));
  }

  List<KV<GCPResource, GCPResourceState>> results = this.tester.processBatch(inputs);
  assertEquals(results.size(), elementCount);
  for (int i = 0; i < elementCount; ++i) {
    assertEquals(results.get(i).getKey(), project);
    assertEquals(results.get(i).getValue(), policy);
  }
}
 
Example #13
Source File: FlinkGroupByKeyWrapper.java    From flink-dataflow with Apache License 2.0 6 votes vote down vote up
public static <K, V> KeyedStream<WindowedValue<KV<K, V>>, K> groupStreamByKey(DataStream<WindowedValue<KV<K, V>>> inputDataStream, KvCoder<K, V> inputKvCoder) {
	final Coder<K> keyCoder = inputKvCoder.getKeyCoder();
	final TypeInformation<K> keyTypeInfo = new CoderTypeInformation<>(keyCoder);
	final boolean isKeyVoid = keyCoder instanceof VoidCoder;

	return inputDataStream.keyBy(
			new KeySelectorWithQueryableResultType<K, V>() {

				@Override
				public K getKey(WindowedValue<KV<K, V>> value) throws Exception {
					return isKeyVoid ? (K) VoidCoderTypeSerializer.VoidValue.INSTANCE :
							value.getValue().getKey();
				}

				@Override
				public TypeInformation<K> getProducedType() {
					return keyTypeInfo;
				}
			});
}
 
Example #14
Source File: FXTimeSeriesPipelineSRGTests.java    From data-timeseries-java with Apache License 2.0 6 votes vote down vote up
public PCollection<KV<String, TSProto>> setupDataInput(Pipeline pipeline,
    List<KV<String, TSProto>> data) {


  // Assert that we have 44 Elements in the PCollection
  PCollection<KV<String, TSProto>> tsData =
      pipeline.apply("ReadData", Create.of(data))
          .apply(ParDo.of(new DoFn<KV<String, TSProto>, KV<String, TSProto>>() {

            @Override
            public void processElement(ProcessContext c) throws Exception {
              c.outputWithTimestamp(c.element(),
                  new DateTime(c.element().getValue().getTime()).toInstant());

            }

          })).setName("Assign TimeStamps");
  return tsData;

}
 
Example #15
Source File: KvCoderComperator.java    From flink-dataflow with Apache License 2.0 6 votes vote down vote up
@Override
public int compare(KV<K, V> first, KV<K, V> second) {
	try {
		buffer1.reset();
		buffer2.reset();
		keyCoder.encode(first.getKey(), buffer1, Coder.Context.OUTER);
		keyCoder.encode(second.getKey(), buffer2, Coder.Context.OUTER);
		byte[] arr = buffer1.getBuffer();
		byte[] arrOther = buffer2.getBuffer();
		if (buffer1.size() != buffer2.size()) {
			return buffer1.size() - buffer2.size();
		}
		int len = buffer1.size();
		for(int i = 0; i < len; i++ ) {
			if (arr[i] != arrOther[i]) {
				return arr[i] - arrOther[i];
			}
		}
		return 0;
	} catch (IOException e) {
		throw new RuntimeException("Could not compare reference.", e);
	}
}
 
Example #16
Source File: LoadBooksTest.java    From cloud-bigtable-examples with Apache License 2.0 6 votes vote down vote up
@Test
public void doMutation_encodesKeysAndCounts() {
  // Arrange
  DoFnTester<KV<String, Integer>, Mutation> tester = DoFnTester.of(LoadBooks.ENCODE_NGRAM);
  KV<String, Integer> input = KV.of("this is a test", 513);

  // Act
  List<Mutation> output = tester.processBatch(input);

  // Assert
  Put put = (Put) output.get(0);
  assertThat(put.getRow()).isEqualTo("this is a test".getBytes(StandardCharsets.UTF_8));
  Cell valueCell = put.get(LoadBooks.FAMILY, LoadBooks.COUNT_QUALIFIER).get(0);
  byte[] valueArray = valueCell.getValueArray();
  byte[] value =
      Arrays.copyOfRange(
          valueArray,
          valueCell.getValueOffset(),
          valueCell.getValueOffset() + valueCell.getValueLength());
  assertThat(value).isEqualTo(new byte[] {0, 0, 2, 1});
}
 
Example #17
Source File: DockerDo.java    From dockerflow with Apache License 2.0 6 votes vote down vote up
@Override
public void processElement(
    DoFn<KV<String, Wrapper>, KV<String, WorkflowArgs>>.ProcessContext c) throws Exception {

  LOG.info("Combining args");

  Wrapper value = c.element().getValue();
  WorkflowArgs retval = null;

  // Iterate in order
  for (WorkflowArgs wa : value.map.values()) {

    // Modify a copy
    if (retval == null) {
      retval = new WorkflowArgs(wa);
    // Find differences and merge
    } else {
      retval.gatherArgs(wa);
    }
  }
  c.output(KV.of(c.element().getKey(), retval));
}
 
Example #18
Source File: FileToStateTest.java    From policyscanner with Apache License 2.0 6 votes vote down vote up
@Test
public void testInvalidFilePathCreatesSideOutput() {
  List<String> filePath = getSampleProjectFilePath(getSampleProject());
  filePath.set(2, "POLICY.txt");
  String fileContent = getSamplePolicyBindingsString(1);
  GCPProject project = getSampleProject();
  List<KV<List<String>, String>> inputs = Arrays.asList(KV.of(filePath, fileContent));

  sideOutputTester.processBatch(inputs);
  List<GCPResourceErrorInfo> sideOutputs = sideOutputTester.takeSideOutputElements(errorTag);

  List<GCPResourceErrorInfo> expected = new ArrayList<>();
  expected.add(new GCPResourceErrorInfo(
      project,
      String.format("Invalid policy filepath %s/%s/%s",
          filePath.get(0), filePath.get(1), filePath.get(2))));
  Assert.assertEquals(expected, sideOutputs);
}
 
Example #19
Source File: TagStateWithSourceTest.java    From policyscanner with Apache License 2.0 6 votes vote down vote up
@Test
public void testLiveTaggerMultipleInput() {
  int elementCount = 5;
  GCPProject project = getSampleProject("");
  GCPResourceState policy = getSampleGCPResourcePolicy(project, 1);
  List<KV<GCPResource, GCPResourceState>> inputs = new ArrayList<>(elementCount);
  List<KV<GCPResource, KV<StateSource, GCPResourceState>>> expectedOutputs =
      new ArrayList<>(elementCount);

  for (int i = 0; i < elementCount; ++i) {
    inputs.add(KV.of((GCPResource) project, policy));
    expectedOutputs.add(KV.of((GCPResource) project, KV.of(StateSource.LIVE, policy)));
  }

  List<KV<GCPResource, KV<StateSource, GCPResourceState>>> results =
      this.liveTester.processBatch(inputs);

  assertEquals(results, expectedOutputs);
}
 
Example #20
Source File: LoadBooks.java    From cloud-bigtable-examples with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
  // CloudBigtableOptions is one way to retrieve the options.  It's not required.
  // https://github.com/GoogleCloudPlatform/cloud-bigtable-examples/blob/master/java/dataflow-connector-examples/src/main/java/com/google/cloud/bigtable/dataflow/example/HelloWorldWrite.java
  BigtableCsvOptions options =
      PipelineOptionsFactory.fromArgs(args).withValidation().as(BigtableCsvOptions.class);
  CloudBigtableTableConfiguration config =
      CloudBigtableTableConfiguration.fromCBTOptions(options);

  Pipeline p = Pipeline.create(options);

  CloudBigtableIO.initializeForWrite(p);

  PCollection<KV<String, Integer>> ngrams =
      applyPipelineToParseBooks(p.apply(TextIO.Read.from(options.getInputFile())));
  PCollection<Mutation> mutations = ngrams.apply(ParDo.of(ENCODE_NGRAM));
  mutations.apply(CloudBigtableIO.writeToTable(config));

  // Run the pipeline.
  p.run();
}
 
Example #21
Source File: GCSFilesSourceTest.java    From policyscanner with Apache License 2.0 6 votes vote down vote up
@Test
public void testReaderAdvance() {
  String objectName = REPOSITORY + this.source.getDirDelimiter() + "sampleProject";
  PipelineOptions options = PipelineOptionsFactory.create();
  BoundedReader<KV<List<String>, String>> reader;

  try {
    setUpGetFilesPage(objectName, 0);
    reader = this.source.createReader(options);
    assertFalse(reader.start());

    setUpGetFilesPage(objectName, 1);
    reader = this.source.createReader(options);
    assertTrue(reader.start());
    assertFalse(reader.advance());

    setUpGetFilesPage(objectName, 2);
    reader = this.source.createReader(options);
    assertTrue(reader.start());
    assertTrue(reader.advance());
    assertFalse(reader.advance());
  } catch (IOException e) {
    fail();
  }
}
 
Example #22
Source File: KvCoderComperator.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
@Override
public int hash(KV<K, V> record) {
	K key = record.getKey();
	if (key != null) {
		return key.hashCode();
	} else {
		return 0;
	}
}
 
Example #23
Source File: FlinkReduceFunction.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
@Override
public void reduce(Iterable<KV<K, VA>> values, Collector<KV<K, VO>> out) throws Exception {
	Iterator<KV<K, VA>> it = values.iterator();

	KV<K, VA> current = it.next();
	K k = current.getKey();
	VA accumulator = current.getValue();

	while (it.hasNext()) {
		current = it.next();
		keyedCombineFn.mergeAccumulators(k, ImmutableList.of(accumulator, current.getValue()) );
	}

	out.collect(KV.of(k, keyedCombineFn.extractOutput(k, accumulator)));
}
 
Example #24
Source File: FilePathFromPairTest.java    From policyscanner with Apache License 2.0 5 votes vote down vote up
@Test
public void testNonEmptyFilePath() {
  List<String> filePath = Arrays.asList("sample", "file", "path");
  String fileContent = "";
  List<List<String>> result = tester.processBatch(KV.of(filePath, fileContent));
  assertEquals(result.size(), 1);
  assertEquals(result.get(0), filePath);
}
 
Example #25
Source File: TagStateWithSourceTest.java    From policyscanner with Apache License 2.0 5 votes vote down vote up
@Test
public void testLiveTaggerSingleInput() {
  GCPProject project = getSampleProject("");
  GCPResourceState policy = getSampleGCPResourcePolicy(project, 1);
  List<KV<GCPResource, GCPResourceState>> inputs = Arrays.asList(KV.of((GCPResource) project, policy));
  List<KV<GCPResource, KV<StateSource, GCPResourceState>>> expectedOutputs =
      Arrays.asList(KV.of((GCPResource) project, KV.of(StateSource.LIVE, policy)));

  List<KV<GCPResource, KV<StateSource, GCPResourceState>>> results =
      this.liveTester.processBatch(inputs);

  assertEquals(results, expectedOutputs);
}
 
Example #26
Source File: WordCount.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<KV<String, Long>> apply(PCollection<String> lines) {

	// Convert lines of text into individual words.
	PCollection<String> words = lines.apply(
			ParDo.of(new ExtractWordsFn()));

	// Count the number of times each word occurs.
	PCollection<KV<String, Long>> wordCounts =
			words.apply(Count.<String>perElement());

	return wordCounts;
}
 
Example #27
Source File: LoadBooks.java    From cloud-bigtable-examples with Apache License 2.0 5 votes vote down vote up
public void processElement(ProcessContext c) {
  KV<String, Integer> ngram = c.element();
  byte[] key = ngram.getKey().getBytes(STRING_ENCODING);
  int count = ngram.getValue();
  byte[] data = Bytes.toBytes(count);
  c.output(new Put(key).addColumn(FAMILY, COUNT_QUALIFIER, data));
}
 
Example #28
Source File: CountRides.java    From cloud-dataflow-nyc-taxi-tycoon with Apache License 2.0 5 votes vote down vote up
@Override
public KV<LatLon, TableRow> apply(TableRow t) {
  float lat = Float.parseFloat(t.get("latitude").toString());
  float lon = Float.parseFloat(t.get("longitude").toString());
  final float PRECISION = 0.005f; // very approximately 500m
  float roundedLat = (float) Math.floor(lat / PRECISION) * PRECISION + PRECISION / 2;
  float roundedLon = (float) Math.floor(lon / PRECISION) * PRECISION + PRECISION / 2;
  LatLon key = new LatLon(roundedLat, roundedLon);

  return KV.of(key, t);
}
 
Example #29
Source File: DockerDo.java    From dockerflow with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<KV<String, WorkflowArgs>> apply(
    PCollection<KV<String, WorkflowArgs>> input) {
  PCollection<KV<String, WorkflowArgs>> pc = input;

  // Add retries
  for (int i = 1; i < ((WorkflowArgs) task.getArgs()).getMaxTries(); ++i) {
    pc = pc.apply(new RunTask(task, i));
  }
  return pc;
}
 
Example #30
Source File: FilterOutMatchingStateTest.java    From policyscanner with Apache License 2.0 5 votes vote down vote up
@Test
public void testFilterStateNoMatchingResources() {
  GCPProject checkedProject = getSampleProject("_checked");
  GCPProject liveProject = getSampleProject("_live");
  GCPResourceState checkedPolicy = getSampleGCPResourcePolicy(checkedProject, 1);
  GCPResourceState livePolicy = getSampleGCPResourcePolicy(liveProject, 2);
  List<KV<GCPResource, KV<StateSource, GCPResourceState>>> sideInputList =
      Arrays.asList(KV.of((GCPResource) checkedProject, KV.of(StateSource.DESIRED, checkedPolicy)));
  List<KV<GCPResource, KV<StateSource, GCPResourceState>>> mainInputList =
      Arrays.asList(KV.of((GCPResource) liveProject, KV.of(StateSource.LIVE, livePolicy)));

  PCollection<KV<GCPResource, KV<StateSource, GCPResourceState>>> collection =
      pipeline.apply(Create.of(sideInputList)).setCoder(
          KvCoder.of(SerializableCoder.of(GCPResource.class),
              KvCoder.of(SerializableCoder.of(StateSource.class),
                  SerializableCoder.of(GCPResourceState.class))));
  PCollectionView<Map<GCPResource, KV<StateSource, GCPResourceState>>> view =
      View.<GCPResource, KV<StateSource, GCPResourceState>>asMap().apply(collection);

  FilterOutMatchingState function = new FilterOutMatchingState(view);
  DoFnTester<KV<GCPResource, KV<StateSource, GCPResourceState>>,
      KV<GCPResource, Map<StateSource, GCPResourceState>>> tester = DoFnTester.of(function);
  tester.setSideInputInGlobalWindow(view, sideInputList);

  List<KV<GCPResource, Map<StateSource, GCPResourceState>>> results =
      tester.processBatch(mainInputList);
  assertEquals(0, results.size());
}