com.google.cloud.dataflow.sdk.values.KV Java Examples
The following examples show how to use
com.google.cloud.dataflow.sdk.values.KV.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: KvCoderComperator.java From flink-dataflow with Apache License 2.0 | 6 votes |
@Override public boolean equalToReference(KV<K, V> candidate) { try { buffer2.reset(); keyCoder.encode(candidate.getKey(), buffer2, Coder.Context.OUTER); byte[] arr = referenceBuffer.getBuffer(); byte[] arrOther = buffer2.getBuffer(); if (referenceBuffer.size() != buffer2.size()) { return false; } int len = buffer2.size(); for(int i = 0; i < len; i++ ) { if (arr[i] != arrOther[i]) { return false; } } return true; } catch (IOException e) { throw new RuntimeException("Could not compare reference.", e); } }
Example #2
Source File: FilePathToLiveState.java From policyscanner with Apache License 2.0 | 6 votes |
/** * Convert the file path into the GCP resource object that it corresponds to. * @param processContext The ProcessContext object that contains context-specific * methods and objects. * @throws IOException Thrown when there's an error reading from the API. * @throws GeneralSecurityException Thrown when there's an error reading from the API. */ @Override public void processElement(ProcessContext processContext) throws IOException, GeneralSecurityException { List<String> filePath = processContext.element(); if (filePath.size() == 3 && filePath.get(2).equals(GCPResourcePolicy.getPolicyFile())) { // only project policies are supported for now. // filePath.size() must be 3 and of the form org_id/project_id/POLICY_FILE. GCPProject project = new GCPProject(filePath.get(1), filePath.get(0)); GCPResourceState policy = null; try { policy = project.getPolicy(); } catch (Exception e) { logger.log(Level.WARNING, "Error getting policy", e); } if (policy != null) { processContext.output(KV.of((GCPResource) project, policy)); } } else { throw new IllegalArgumentException("Malformed input to FilePathToLiveState."); } }
Example #3
Source File: ExtractState.java From policyscanner with Apache License 2.0 | 6 votes |
/** * Convert a GCPProject to a Key-Value pair of the project and its policy. * @param processContext The ProcessContext object that contains processContext-specific * methods and objects. */ @Override public void processElement(ProcessContext processContext) { GCPProject input = processContext.element(); if (input.getId() == null) { this.addToSideOutput(processContext, input, "Null project id"); return; } GCPResourceState policy = null; String errorMsg = null; try { policy = input.getPolicy(); } catch (Exception e) { errorMsg = e.getMessage(); logger.log(Level.FINE, "Error getting policy", e); } if (policy == null) { this.addToSideOutput(processContext, input, String.format("Policy error %s", errorMsg)); } else { processContext.output(KV.of((GCPResource) input, policy)); } }
Example #4
Source File: JoinKnownGoodAndLiveStates.java From policyscanner with Apache License 2.0 | 6 votes |
/** * Process an element of the type KV<GCPResource, KV<StateResource, GCPResourceState>>. * The GCPResource is the resource that is being described by the GCPResourceState. In * this case, it's the GCP project. * The GCPResourceState is the attribute describing the GCPResource, i.e. the project policies. * StateSource represents the source of the GCPResourceState: * - it was either checked in as a known-good, or * - it is the live state of the resource * GCPResourceStates tagged with one StateSource (say, DESIRED) will be input through * a side input, and those tagged with the other will be input through the main input. * @param context The ProcessContext object that contains context-specific methods and objects. */ @Override public void processElement(ProcessContext context) { // the project GCPResource resource = context.element().getKey(); // the project's policies KV<StateSource, GCPResourceState> mainValue = context.element().getValue(); // if the known-good policies' projects contain this project... if (context.sideInput(this.view).containsKey(resource)) { // make sure there's an element in the side input with the same GCPResource. KV<StateSource, GCPResourceState> sideValue = context.sideInput(this.view).get(resource); // the HashMap will contain two entries, one for // the DESIRED state and one for the LIVE state. Map<StateSource, GCPResourceState> mismatchedStates = new HashMap<>(2); mismatchedStates.put(mainValue.getKey(), mainValue.getValue()); mismatchedStates.put(sideValue.getKey(), sideValue.getValue()); context.output(KV.of(resource, mismatchedStates)); } }
Example #5
Source File: FilterOutMatchingState.java From policyscanner with Apache License 2.0 | 6 votes |
/** * Process an element of the type KV<GCPResource, KV<StateResource, GCPResourceState>> * and output only those states that do not match. * The GCPResource is the resource that is being described by the GCPResourceState. In * this case, it's the GCP project. * The GCPResourceState is the attribute describing the GCPResource, i.e. the project policies. * StateSource represents the source of the GCPResourceState: * - it was either checked in as a known-good, or * - it is the live state of the resource * GCPResourceStates tagged with one StateSource (say, DESIRED) will be inputted through * a side input, and those tagged with the other will be inputted through the main input. * @param context The ProcessContext object that contains context-specific methods and objects. */ @Override public void processElement(ProcessContext context) { // the project GCPResource resource = context.element().getKey(); // the project's policies KV<StateSource, GCPResourceState> mainValue = context.element().getValue(); // if the known-good policies' projects contain this project... if (context.sideInput(this.view).containsKey(resource)) { // make sure there's an element in the side input with the same GCPResource. KV<StateSource, GCPResourceState> sideValue = context.sideInput(this.view).get(resource); if (!mainValue.getValue().equals(sideValue.getValue())) { // make sure the GCPResourceStates are different. // the HashMap will contain two entries, one for // the DESIRED state and one for the LIVE state. Map<StateSource, GCPResourceState> mismatchedStates = new HashMap<>(2); mismatchedStates.put(mainValue.getKey(), mainValue.getValue()); mismatchedStates.put(sideValue.getKey(), sideValue.getValue()); context.output(KV.of(resource, mismatchedStates)); } } }
Example #6
Source File: LoadBooksTest.java From cloud-bigtable-examples with Apache License 2.0 | 6 votes |
@Test public void parseBooks_returnsNgramsCounts() { // Arrange Pipeline p = TestPipeline.create(); PCollection<String> input = p.apply(Create.of(testFile)); // Act PCollection<KV<String, Integer>> output = LoadBooks.applyPipelineToParseBooks(input); // Assert DataflowAssert.that(output) .containsInAnyOrder( KV.of("despatch when art", 10), KV.of("despatch when came", 10), KV.of("despatch when published", 12), KV.of("despatch where was", 10), KV.of("despatch which made", 45), // There are two entries for "despatch which addressed". // Each entry has a different part of speech for "addressed". KV.of("despatch which addressed", 12 + 46), KV.of("despatch which admitted", 13), KV.of("despatch which allow", 14), KV.of("despatch which announced", 50), KV.of("despatch which answer", 32)); }
Example #7
Source File: TfIdfITCase.java From flink-dataflow with Apache License 2.0 | 6 votes |
@Override protected void testProgram() throws Exception { Pipeline pipeline = FlinkTestPipeline.createForBatch(); pipeline.getCoderRegistry().registerCoder(URI.class, StringDelegateCoder.of(URI.class)); PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf = pipeline .apply(Create.of( KV.of(new URI("x"), "a b c d"), KV.of(new URI("y"), "a b c"), KV.of(new URI("z"), "a m n"))) .apply(new TfIdf.ComputeTfIdf()); PCollection<String> words = wordToUriAndTfIdf .apply(Keys.<String>create()) .apply(RemoveDuplicates.<String>create()); words.apply(TextIO.Write.to(resultPath)); pipeline.run(); }
Example #8
Source File: GCSFilesSource.java From policyscanner with Apache License 2.0 | 6 votes |
/** * Get the next file in queue. * @return A Key-Value pair where the key is a list of strings representing the path of * the file and the value is a string representing the content of the file. * @throws NoSuchElementException If the file can't be read from the GCS API. */ @Override public KV<List<String>, String> getCurrent() throws NoSuchElementException { String filePath = this.currentFiles.get(0); String fileContent = null; try { fileContent = this.source.getFileContent(filePath); } catch (IOException ioe) { throw new NoSuchElementException( "Object " + filePath + " not found in bucket " + this.source.bucket); } catch (GeneralSecurityException gse) { throw new NoSuchElementException( "Cannot access object " + filePath + " in bucket " + this.source.bucket + " due to security reasons"); } List<String> splitPath = Arrays.asList(filePath.split(this.source.getDirDelimiter())); return KV.of(splitPath, fileContent); }
Example #9
Source File: FXTimeSeriesPipelineSRGTests.java From data-timeseries-java with Apache License 2.0 | 6 votes |
public PCollection<KV<String, TSProto>> setupDataInput(Pipeline pipeline, List<KV<String, TSProto>> data) { // Assert that we have 44 Elements in the PCollection PCollection<KV<String, TSProto>> tsData = pipeline.apply("ReadData", Create.of(data)) .apply(ParDo.of(new DoFn<KV<String, TSProto>, KV<String, TSProto>>() { @Override public void processElement(ProcessContext c) throws Exception { c.outputWithTimestamp(c.element(), new DateTime(c.element().getValue().getTime()).toInstant()); } })).setName("Assign TimeStamps"); return tsData; }
Example #10
Source File: FileToStateTest.java From policyscanner with Apache License 2.0 | 6 votes |
@Test public void testMultipleElements() { int elementCount = 5; GCPProject project = getSampleProject(); List<String> filePath = getSampleProjectFilePath(project); String fileContent = getSamplePolicyBindingsString(1); GCPResourceState policy = getSampleGCPResourcePolicy(project, 1); List<KV<List<String>, String>> inputs = new ArrayList<>(elementCount); for (int i = 0; i < elementCount; ++i) { inputs.add(KV.of(filePath, fileContent)); } List<KV<GCPResource, GCPResourceState>> results = this.tester.processBatch(inputs); assertEquals(results.size(), elementCount); for (int i = 0; i < elementCount; ++i) { assertEquals(results.get(i).getKey(), project); assertEquals(results.get(i).getValue(), policy); } }
Example #11
Source File: FileToStateTest.java From policyscanner with Apache License 2.0 | 6 votes |
@Test public void testInvalidFilePathCreatesSideOutput() { List<String> filePath = getSampleProjectFilePath(getSampleProject()); filePath.set(2, "POLICY.txt"); String fileContent = getSamplePolicyBindingsString(1); GCPProject project = getSampleProject(); List<KV<List<String>, String>> inputs = Arrays.asList(KV.of(filePath, fileContent)); sideOutputTester.processBatch(inputs); List<GCPResourceErrorInfo> sideOutputs = sideOutputTester.takeSideOutputElements(errorTag); List<GCPResourceErrorInfo> expected = new ArrayList<>(); expected.add(new GCPResourceErrorInfo( project, String.format("Invalid policy filepath %s/%s/%s", filePath.get(0), filePath.get(1), filePath.get(2)))); Assert.assertEquals(expected, sideOutputs); }
Example #12
Source File: FlinkGroupByKeyWrapper.java From flink-dataflow with Apache License 2.0 | 6 votes |
public static <K, V> KeyedStream<WindowedValue<KV<K, V>>, K> groupStreamByKey(DataStream<WindowedValue<KV<K, V>>> inputDataStream, KvCoder<K, V> inputKvCoder) { final Coder<K> keyCoder = inputKvCoder.getKeyCoder(); final TypeInformation<K> keyTypeInfo = new CoderTypeInformation<>(keyCoder); final boolean isKeyVoid = keyCoder instanceof VoidCoder; return inputDataStream.keyBy( new KeySelectorWithQueryableResultType<K, V>() { @Override public K getKey(WindowedValue<KV<K, V>> value) throws Exception { return isKeyVoid ? (K) VoidCoderTypeSerializer.VoidValue.INSTANCE : value.getValue().getKey(); } @Override public TypeInformation<K> getProducedType() { return keyTypeInfo; } }); }
Example #13
Source File: KvCoderComperator.java From flink-dataflow with Apache License 2.0 | 6 votes |
@Override public int compare(KV<K, V> first, KV<K, V> second) { try { buffer1.reset(); buffer2.reset(); keyCoder.encode(first.getKey(), buffer1, Coder.Context.OUTER); keyCoder.encode(second.getKey(), buffer2, Coder.Context.OUTER); byte[] arr = buffer1.getBuffer(); byte[] arrOther = buffer2.getBuffer(); if (buffer1.size() != buffer2.size()) { return buffer1.size() - buffer2.size(); } int len = buffer1.size(); for(int i = 0; i < len; i++ ) { if (arr[i] != arrOther[i]) { return arr[i] - arrOther[i]; } } return 0; } catch (IOException e) { throw new RuntimeException("Could not compare reference.", e); } }
Example #14
Source File: LoadBooksTest.java From cloud-bigtable-examples with Apache License 2.0 | 6 votes |
@Test public void doMutation_encodesKeysAndCounts() { // Arrange DoFnTester<KV<String, Integer>, Mutation> tester = DoFnTester.of(LoadBooks.ENCODE_NGRAM); KV<String, Integer> input = KV.of("this is a test", 513); // Act List<Mutation> output = tester.processBatch(input); // Assert Put put = (Put) output.get(0); assertThat(put.getRow()).isEqualTo("this is a test".getBytes(StandardCharsets.UTF_8)); Cell valueCell = put.get(LoadBooks.FAMILY, LoadBooks.COUNT_QUALIFIER).get(0); byte[] valueArray = valueCell.getValueArray(); byte[] value = Arrays.copyOfRange( valueArray, valueCell.getValueOffset(), valueCell.getValueOffset() + valueCell.getValueLength()); assertThat(value).isEqualTo(new byte[] {0, 0, 2, 1}); }
Example #15
Source File: TagStateWithSourceTest.java From policyscanner with Apache License 2.0 | 6 votes |
@Test public void testLiveTaggerMultipleInput() { int elementCount = 5; GCPProject project = getSampleProject(""); GCPResourceState policy = getSampleGCPResourcePolicy(project, 1); List<KV<GCPResource, GCPResourceState>> inputs = new ArrayList<>(elementCount); List<KV<GCPResource, KV<StateSource, GCPResourceState>>> expectedOutputs = new ArrayList<>(elementCount); for (int i = 0; i < elementCount; ++i) { inputs.add(KV.of((GCPResource) project, policy)); expectedOutputs.add(KV.of((GCPResource) project, KV.of(StateSource.LIVE, policy))); } List<KV<GCPResource, KV<StateSource, GCPResourceState>>> results = this.liveTester.processBatch(inputs); assertEquals(results, expectedOutputs); }
Example #16
Source File: GCSFilesSourceTest.java From policyscanner with Apache License 2.0 | 6 votes |
@Test public void testReaderAdvance() { String objectName = REPOSITORY + this.source.getDirDelimiter() + "sampleProject"; PipelineOptions options = PipelineOptionsFactory.create(); BoundedReader<KV<List<String>, String>> reader; try { setUpGetFilesPage(objectName, 0); reader = this.source.createReader(options); assertFalse(reader.start()); setUpGetFilesPage(objectName, 1); reader = this.source.createReader(options); assertTrue(reader.start()); assertFalse(reader.advance()); setUpGetFilesPage(objectName, 2); reader = this.source.createReader(options); assertTrue(reader.start()); assertTrue(reader.advance()); assertFalse(reader.advance()); } catch (IOException e) { fail(); } }
Example #17
Source File: LoadBooks.java From cloud-bigtable-examples with Apache License 2.0 | 6 votes |
public static void main(String[] args) { // CloudBigtableOptions is one way to retrieve the options. It's not required. // https://github.com/GoogleCloudPlatform/cloud-bigtable-examples/blob/master/java/dataflow-connector-examples/src/main/java/com/google/cloud/bigtable/dataflow/example/HelloWorldWrite.java BigtableCsvOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(BigtableCsvOptions.class); CloudBigtableTableConfiguration config = CloudBigtableTableConfiguration.fromCBTOptions(options); Pipeline p = Pipeline.create(options); CloudBigtableIO.initializeForWrite(p); PCollection<KV<String, Integer>> ngrams = applyPipelineToParseBooks(p.apply(TextIO.Read.from(options.getInputFile()))); PCollection<Mutation> mutations = ngrams.apply(ParDo.of(ENCODE_NGRAM)); mutations.apply(CloudBigtableIO.writeToTable(config)); // Run the pipeline. p.run(); }
Example #18
Source File: GroupAlsoByWindowTest.java From flink-dataflow with Apache License 2.0 | 6 votes |
@Test public void testAfterWatermarkProgram() throws Exception { WindowingStrategy strategy = fixedWindowWithAfterWatermarkTriggerStrategy; long initialTime = 0L; OneInputStreamOperatorTestHarness<WindowedValue<KV<String, Integer>>, WindowedValue<KV<String, Integer>>> testHarness = createTestingOperatorAndState(strategy, initialTime); ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<>(); expectedOutput.add(new StreamRecord<>(makeWindowedValue(strategy, KV.of("key1", 6), new Instant(initialTime + 1), null, PaneInfo.createPane(true, true, PaneInfo.Timing.ON_TIME)), initialTime + 1)); expectedOutput.add(new Watermark(initialTime + 10000)); expectedOutput.add(new StreamRecord<>(makeWindowedValue(strategy, KV.of("key1", 11), new Instant(initialTime + 10000), null, PaneInfo.createPane(true, true, PaneInfo.Timing.ON_TIME)), initialTime + 10000)); expectedOutput.add(new StreamRecord<>(makeWindowedValue(strategy, KV.of("key2", 1), new Instant(initialTime + 19500), null, PaneInfo.createPane(true, true, PaneInfo.Timing.ON_TIME)), initialTime + 19500)); expectedOutput.add(new Watermark(initialTime + 20000)); TestHarnessUtil.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput(), new ResultSortComparator()); testHarness.close(); }
Example #19
Source File: DockerDo.java From dockerflow with Apache License 2.0 | 6 votes |
@Override public void processElement( DoFn<KV<String, Wrapper>, KV<String, WorkflowArgs>>.ProcessContext c) throws Exception { LOG.info("Combining args"); Wrapper value = c.element().getValue(); WorkflowArgs retval = null; // Iterate in order for (WorkflowArgs wa : value.map.values()) { // Modify a copy if (retval == null) { retval = new WorkflowArgs(wa); // Find differences and merge } else { retval.gatherArgs(wa); } } c.output(KV.of(c.element().getKey(), retval)); }
Example #20
Source File: FlinkPartialReduceFunction.java From flink-dataflow with Apache License 2.0 | 6 votes |
@Override public void combine(Iterable<KV<K, VI>> elements, Collector<KV<K, VA>> out) throws Exception { final Iterator<KV<K, VI>> iterator = elements.iterator(); // create accumulator using the first elements key KV<K, VI> first = iterator.next(); K key = first.getKey(); VI value = first.getValue(); VA accumulator = keyedCombineFn.createAccumulator(key); accumulator = keyedCombineFn.addInput(key, accumulator, value); while(iterator.hasNext()) { value = iterator.next().getValue(); accumulator = keyedCombineFn.addInput(key, accumulator, value); } out.collect(KV.of(key, accumulator)); }
Example #21
Source File: GenerateSampleData.java From data-timeseries-java with Apache License 2.0 | 6 votes |
public static void generateSequentialList(List<KV<String, TSProto>> ts, Instant time, String key, double value, double change) { Instant tsTime = new Instant(time); for (int i = 0; i < 5; i++) { ts.add(KV.of(key, TSProto.newBuilder().setAskPrice(value).setBidPrice(value).setKey(key) .setIsLive(true).setTime(tsTime.getMillis()).build())); tsTime = tsTime.plus(Duration.standardMinutes(1)); value += change; } value -= change; for (int i = 5; i < 10; i++) { ts.add(KV.of(key, TSProto.newBuilder().setAskPrice(value).setBidPrice(value).setKey(key) .setIsLive(true).setTime(tsTime.getMillis()).build())); tsTime = tsTime.plus(Duration.standardMinutes(1)); value -= change; } }
Example #22
Source File: CountRides.java From cloud-dataflow-nyc-taxi-tycoon with Apache License 2.0 | 5 votes |
@Override public KV<LatLon, TableRow> apply(TableRow t) { float lat = Float.parseFloat(t.get("latitude").toString()); float lon = Float.parseFloat(t.get("longitude").toString()); final float PRECISION = 0.005f; // very approximately 500m float roundedLat = (float) Math.floor(lat / PRECISION) * PRECISION + PRECISION / 2; float roundedLon = (float) Math.floor(lon / PRECISION) * PRECISION + PRECISION / 2; LatLon key = new LatLon(roundedLat, roundedLon); return KV.of(key, t); }
Example #23
Source File: KvCoderComperator.java From flink-dataflow with Apache License 2.0 | 5 votes |
@Override public int hash(KV<K, V> record) { K key = record.getKey(); if (key != null) { return key.hashCode(); } else { return 0; } }
Example #24
Source File: FileToState.java From policyscanner with Apache License 2.0 | 5 votes |
/** * Converts a Key-Value pair of (FilePath, FileContent) to (GCPResource, GCPResourceState). * The FilePath is a list of Strings which represents the location of a file. * The FileContent is the content of the file described by the FilePath. * The path is used to obtain the resource, and the content describes the state of that resource. * @param processContext The ProcessContext object that contains processContext-specific * methods and objects. */ @Override public void processElement(ProcessContext processContext) { KV<List<String>, String> input = processContext.element(); List<String> filePath = input.getKey(); String fileContent = input.getValue(); String orgName = filePath.size() > 0 ? filePath.get(0) : null; String projectId = filePath.size() > 1 ? filePath.get(1) : null; String policyFileName = filePath.size() > 2 ? filePath.get(2) : null; GCPProject project = new GCPProject(projectId, orgName); if (filePath.size() == 3 && GCPResourcePolicy.getPolicyFile().equals(policyFileName)) { // only project policies are supported for now. // filePath.size() must be 3 and of the form org_id/project_id/POLICY_FILE. Gson gson = new Gson(); try { List<PolicyBinding> bindings = Arrays.asList( gson.fromJson(fileContent, PolicyBinding[].class)); GCPResourceState policy = new GCPResourcePolicy(project, bindings); processContext.output(KV.of((GCPResource) project, policy)); return; } catch (JsonSyntaxException jse) { addToSideOutput( processContext, project, String.format("Invalid policy json %s/%s/%s", orgName, projectId, policyFileName)); } } addToSideOutput( processContext, project, String.format("Invalid policy filepath %s/%s/%s", orgName, projectId, policyFileName)); }
Example #25
Source File: FilterOutPolicies.java From policyscanner with Apache License 2.0 | 5 votes |
@Override public void processElement(ProcessContext context) { Map<StateSource, GCPResourcePolicy> map = new HashMap<>(context.element().getValue().size()); for (Map.Entry<StateSource, GCPResourceState> entry : context.element().getValue().entrySet()) { if (!(entry.getValue() instanceof GCPResourcePolicy)) { return; } map.put(entry.getKey(), (GCPResourcePolicy) entry.getValue()); } context.output(KV.of(context.element().getKey(), map)); }
Example #26
Source File: DesiredStateEnforcer.java From policyscanner with Apache License 2.0 | 5 votes |
/** * Construct a LiveStateChecker to compare the live states of GCP resources * with their checked-in known-good counterparts. * @param options The options used to construct the pipeline. * @param knownGoodSource The source used to read the known-good. * @param org The organization the projects are to be read from. */ public DesiredStateEnforcer(PipelineOptions options, BoundedSource<KV<List<String>, String>> knownGoodSource, String org) { this.pipeline = Pipeline.create(options); this.outputMessages = constructPipeline(this.pipeline, org, knownGoodSource); this.enforcedStates = 0L; }
Example #27
Source File: DesiredStateEnforcer.java From policyscanner with Apache License 2.0 | 5 votes |
private PCollection<String> constructPipeline(Pipeline pipeline, String org, BoundedSource<KV<List<String>, String>> knownGoodSource) { // Read files from GCS. PCollection<KV<List<String>, String>> knownGoodFiles = pipeline.apply("Read known-good data", Read.from(knownGoodSource)); // Convert files to GCPResourceState objects. PCollection<KV<GCPResource, GCPResourceState>> knownGoodStates = knownGoodFiles.apply(ParDo.named("Convert file data to Java Objects") .of(new FileToState())); // Tag the state objects to indicate they're from a checked-in repo and not live. PCollection<KV<GCPResource, KV<StateSource, GCPResourceState>>> taggedKnownGoodStates = knownGoodStates.apply(ParDo.named("Mark states as being known-good") .of(new TagStateWithSource(StateSource.DESIRED))); // Read projects from the CRM API. PCollection<GCPProject> allProjects = pipeline.apply("Read live projects", Read.from(new LiveProjectSource(org))); // Extract project states. PCollection<KV<GCPResource, GCPResourceState>> liveStates = allProjects .apply(ParDo.named("Extract project policies").of(new ExtractState())); // Tag the states to indicate they're live and not from a checked-in source. PCollection<KV<GCPResource, KV<StateSource, GCPResourceState>>> taggedLiveStates = liveStates.apply(ParDo.named("Mark states as being live") .of(new TagStateWithSource(StateSource.LIVE))); // Join the two known-good and the live halves. PCollectionView<Map<GCPResource, KV<StateSource, GCPResourceState>>> knownGoodStatesView = taggedKnownGoodStates.apply(View.<GCPResource, KV<StateSource, GCPResourceState>>asMap()); PCollection<KV<GCPResource, Map<StateSource, GCPResourceState>>> mismatchedStates = taggedLiveStates.apply(ParDo.named("Find states that don't match") .withSideInputs(knownGoodStatesView) .of(new FilterOutMatchingState(knownGoodStatesView))); // Construct an alert message for all the discrepancies found and fix the discrepancies. return mismatchedStates .apply(ParDo.named("Fix discrepancies").of(discrepancyAutoFixMessenger)); }
Example #28
Source File: OnDemandLiveStateChecker.java From policyscanner with Apache License 2.0 | 5 votes |
private PCollection<String> constructPipeline(Pipeline pipeline, BoundedSource<KV<List<String>, String>> knownGoodSource) { // Read files from GCS. PCollection<KV<List<String>, String>> knownGoodFiles = pipeline.apply("Read known-good data", Read.from(knownGoodSource)); // Convert files to GCPResourceState objects. PCollection<KV<GCPResource, GCPResourceState>> knownGoodStates = knownGoodFiles.apply(ParDo.named("Convert file data to Java objects") .of(new FileToState())); // Tag the state objects to indicate they're from a checked-in repo and not live. PCollection<KV<GCPResource, KV<StateSource, GCPResourceState>>> taggedKnownGoodStates = knownGoodStates.apply(ParDo.named("Mark states as being known-good") .of(new TagStateWithSource(StateSource.DESIRED))); // Extract a list of checked-in projects from GCS. PCollection<List<String>> allFilePaths = knownGoodFiles .apply("Extract just the file paths", ParDo.of(new FilePathFromPair())); // Read the live version of the states of the checked-in projects. PCollection<KV<GCPResource, GCPResourceState>> liveStates = allFilePaths.apply(ParDo.named("Get live resource and states from file path") .of(new FilePathToLiveState())); // Tag the states to indicate they're live and not from a checked-in source. PCollection<KV<GCPResource, KV<StateSource, GCPResourceState>>> taggedLiveStates = liveStates.apply(ParDo.named("Mark states as being live") .of(new TagStateWithSource(StateSource.LIVE))); // Join the two known-good and the live halves. PCollectionView<Map<GCPResource, KV<StateSource, GCPResourceState>>> liveStatesView = taggedLiveStates.apply(View.<GCPResource, KV<StateSource, GCPResourceState>>asMap()); PCollection<KV<GCPResource, Map<StateSource, GCPResourceState>>> mismatchedStates = taggedKnownGoodStates.apply(ParDo.named("Find states that don't match") .withSideInputs(liveStatesView) .of(new FilterOutMatchingState(liveStatesView))); // Construct an alert message for all the discrepancies found. return mismatchedStates.apply(ParDo .named("Generate notification messages") .of(new StateDiscrepancyMessenger())); }
Example #29
Source File: LiveStateChecker.java From policyscanner with Apache License 2.0 | 5 votes |
/** * Construct a LiveStateChecker to compare the live states of GCP resources * with their checked-in known-good counterparts. * @param options The options used to construct the pipeline. * @param knownGoodSource The source used to read the known-good. * @param org The organization the projects are to be read from. */ public LiveStateChecker(PipelineOptions options, BoundedSource<KV<List<String>, String>> knownGoodSource, String org) { this.pipeline = Pipeline.create(options); this.knownGoodSource = knownGoodSource; this.org = org; }
Example #30
Source File: ExtractStateTest.java From policyscanner with Apache License 2.0 | 5 votes |
@Test public void testOneElement() throws IOException { GCPProject project = getSampleProject(""); List<GCPProject> projects = new ArrayList<>(1); projects.add(project); when(this.getIamPolicy.execute()).thenReturn(getSamplePolicy(1)); List<KV<GCPResource, GCPResourceState>> results = this.tester.processBatch(projects); assertEquals(results.size(), 1); assertEquals(results.get(0).getKey(), project); assertEquals(results.get(0).getValue(), getSampleGCPResourcePolicy(project)); }