org.apache.beam.sdk.transforms.SimpleFunction Java Examples
The following examples show how to use
org.apache.beam.sdk.transforms.SimpleFunction.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: WordCount.java From incubator-nemo with Apache License 2.0 | 6 votes |
/** * Static method to generate the word count Beam pipeline. * @param options options for the pipeline. * @param inputFilePath the input file path. * @param outputFilePath the output file path. * @return the generated pipeline. */ static Pipeline generateWordCountPipeline(final PipelineOptions options, final String inputFilePath, final String outputFilePath) { final Pipeline p = Pipeline.create(options); final PCollection<String> result = GenericSourceSink.read(p, inputFilePath) .apply(MapElements.<String, KV<String, Long>>via(new SimpleFunction<String, KV<String, Long>>() { @Override public KV<String, Long> apply(final String line) { final String[] words = line.split(" +"); final String documentId = words[0] + "#" + words[1]; final Long count = Long.parseLong(words[2]); return KV.of(documentId, count); } })) .apply(Sum.longsPerKey()) .apply(MapElements.<KV<String, Long>, String>via(new SimpleFunction<KV<String, Long>, String>() { @Override public String apply(final KV<String, Long> kv) { return kv.getKey() + ": " + kv.getValue(); } })); GenericSourceSink.write(result, outputFilePath); return p; }
Example #2
Source File: BigQueryMapper.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@Override public PCollection<OutputT> expand(PCollection<InputT> tableKVPCollection) { return tableKVPCollection.apply( "TableRowExtractDestination", MapElements.via( new SimpleFunction<InputT, OutputT>() { @Override public OutputT apply(InputT input) { /* We run validation against every event to ensure all columns exist in source. If a column is in the event and not in BigQuery, the column is added to the table before the event can continue. */ TableId tableId = getTableId(input); TableRow row = getTableRow(input); Map<String, LegacySQLTypeName> inputSchema = getObjectSchema(input); // TODO the Dynamic converter needs to use the tableId object rather than a string updateTableIfRequired(tableId, row, inputSchema); return getOutputObject(input); // return KV.of(tableId, row); } })); }
Example #3
Source File: TextToBigQueryStreaming.java From DataflowTemplates with Apache License 2.0 | 6 votes |
/** * Method to read a BigQuery schema file from GCS and return the file contents as a string. * * @param gcsPath Path string for the schema file in GCS. * @return File contents as a string. */ private static ValueProvider<String> getSchemaFromGCS(ValueProvider<String> gcsPath) { return NestedValueProvider.of( gcsPath, new SimpleFunction<String, String>() { @Override public String apply(String input) { ResourceId sourceResourceId = FileSystems.matchNewResource(input, false); String schema; try (ReadableByteChannel rbc = FileSystems.open(sourceResourceId)) { try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { try (WritableByteChannel wbc = Channels.newChannel(baos)) { ByteStreams.copy(rbc, wbc); schema = baos.toString(Charsets.UTF_8.name()); LOG.info("Extracted schema: " + schema); } } } catch (IOException e) { LOG.error("Error extracting schema: " + e.getMessage()); throw new RuntimeException(e); } return schema; } }); }
Example #4
Source File: BigQueryMapper.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@Override public PCollection<OutputT> expand(PCollection<InputT> tableKVPCollection) { return tableKVPCollection.apply( "TableRowExtractDestination", MapElements.via( new SimpleFunction<InputT, OutputT>() { @Override public OutputT apply(InputT input) { /* We run validation against every event to ensure all columns exist in source. If a column is in the event and not in BigQuery, the column is added to the table before the event can continue. */ setUp(); TableId tableId = getTableId(input); TableRow row = getTableRow(input); Map<String, LegacySQLTypeName> inputSchema = getObjectSchema(input); int retries = getMapperRetries(); applyMapperToTableRow(tableId, row, inputSchema, retries); return getOutputObject(input); } })); }
Example #5
Source File: StarterPipeline.java From beam with Apache License 2.0 | 6 votes |
public static void main(String[] args) { Pipeline p = Pipeline.create( PipelineOptionsFactory.fromArgs(args).withValidation().create()); p.apply(Create.of("Hello", "World")) .apply(MapElements.via(new SimpleFunction<String, String>() { @Override public String apply(String input) { return input.toUpperCase(); } })) .apply(ParDo.of(new DoFn<String, Void>() { @ProcessElement public void processElement(ProcessContext c) { LOG.info(c.element()); } })); p.run(); }
Example #6
Source File: StarterPipeline.java From beam with Apache License 2.0 | 6 votes |
public static void main(String[] args) { Pipeline p = Pipeline.create(PipelineOptionsFactory.fromArgs(args).withValidation().create()); p.apply(Create.of("Hello", "World")) .apply( MapElements.via( new SimpleFunction<String, String>() { @Override public String apply(String input) { return input.toUpperCase(); } })) .apply( ParDo.of( new DoFn<String, Void>() { @ProcessElement public void processElement(ProcessContext c) { LOG.info(c.element()); } })); p.run(); }
Example #7
Source File: JacksonTransformsTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testParsingInvalidJsonsWithFailuresSimpleFunction() { WithFailures.Result<PCollection<MyPojo>, KV<String, String>> result = pipeline .apply(Create.of(Iterables.concat(VALID_JSONS, INVALID_JSONS))) .apply( ParseJsons.of(MyPojo.class) .exceptionsVia( new SimpleFunction< WithFailures.ExceptionElement<String>, KV<String, String>>() { @Override public KV<String, String> apply( WithFailures.ExceptionElement<String> failure) { return KV.of( failure.element(), failure.exception().getClass().getCanonicalName()); } })); result.output().setCoder(SerializableCoder.of(MyPojo.class)); PAssert.that(result.output()).containsInAnyOrder(POJOS); assertParsingWithErrorFunctionHandler(result); pipeline.run(); }
Example #8
Source File: PubsubReader.java From beam with Apache License 2.0 | 6 votes |
@Override public NativeReader<?> create( CloudObject cloudSourceSpec, Coder<?> coder, @Nullable PipelineOptions options, @Nullable DataflowExecutionContext executionContext, DataflowOperationContext operationContext) throws Exception { checkArgument(coder != null, "coder must not be null"); @SuppressWarnings("unchecked") Coder<WindowedValue<Object>> typedCoder = (Coder<WindowedValue<Object>>) coder; SimpleFunction<PubsubMessage, Object> parseFn = null; byte[] attributesFnBytes = getBytes(cloudSourceSpec, PropertyNames.PUBSUB_SERIALIZED_ATTRIBUTES_FN, null); // If attributesFnBytes is set, Pubsub data will be in PubsubMessage protobuf format. The // array should contain a serialized Java function that accepts a PubsubMessage object. The // special case of a zero-length array allows pass-through of the raw protobuf. if (attributesFnBytes != null && attributesFnBytes.length > 0) { parseFn = (SimpleFunction<PubsubMessage, Object>) SerializableUtils.deserializeFromByteArray(attributesFnBytes, "serialized fn info"); } return new PubsubReader<>( typedCoder, (StreamingModeExecutionContext) executionContext, parseFn); }
Example #9
Source File: Window.java From beam with Apache License 2.0 | 6 votes |
@Override public PCollection<T> expand(PCollection<T> input) { WindowingStrategy<?, ?> outputWindowingStrategy = getOutputWindowing(input.getWindowingStrategy()); return input // We first apply a (trivial) transform to the input PCollection to produce a new // PCollection. This ensures that we don't modify the windowing strategy of the input // which may be used elsewhere. .apply( "Identity", MapElements.via( new SimpleFunction<T, T>() { @Override public T apply(T element) { return element; } })) // Then we modify the windowing strategy. .setWindowingStrategyInternal(outputWindowingStrategy); }
Example #10
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testCreateNeverWithStreaming() throws Exception { p.enableAbandonedNodeEnforcement(false); TableReference tableRef = new TableReference(); tableRef.setDatasetId("dataset"); tableRef.setTableId("sometable"); PCollection<TableRow> tableRows = p.apply(GenerateSequence.from(0)) .apply( MapElements.via( new SimpleFunction<Long, TableRow>() { @Override public TableRow apply(Long input) { return null; } })) .setCoder(TableRowJsonCoder.of()); tableRows.apply( BigQueryIO.writeTableRows() .to(tableRef) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_NEVER) .withoutValidation()); }
Example #11
Source File: PubsubSink.java From beam with Apache License 2.0 | 6 votes |
PubsubSink( String topic, String timestampLabel, String idLabel, Coder<WindowedValue<T>> coder, SimpleFunction<T, PubsubMessage> formatFn, boolean withAttributes, StreamingModeExecutionContext context) { this.topic = topic; this.timestampLabel = timestampLabel; this.idLabel = idLabel; @SuppressWarnings({"unchecked", "rawtypes"}) WindowedValueCoder<T> windowedCoder = (WindowedValueCoder) coder; this.coder = windowedCoder.getValueCoder(); this.withAttributes = withAttributes; this.formatFn = formatFn; this.context = context; }
Example #12
Source File: HadoopFormatIOReadTest.java From beam with Apache License 2.0 | 6 votes |
@BeforeClass public static void setUp() { serConf = loadTestConfiguration(EmployeeInputFormat.class, Text.class, Employee.class); myKeyTranslate = new SimpleFunction<Text, String>() { @Override public String apply(Text input) { return input.toString(); } }; myValueTranslate = new SimpleFunction<Employee, String>() { @Override public String apply(Employee input) { return input.getEmpName() + "_" + input.getEmpAddress(); } }; }
Example #13
Source File: HadoopFormatIOReadTest.java From beam with Apache License 2.0 | 6 votes |
/** * This test validates functionality of {@link HadoopFormatIO.Read#validateTransform() * Read.validateTransform()} function when myKeyTranslate's (simple function provided by user for * key translation) input type is not same as Hadoop InputFormat's keyClass(Which is property set * in configuration as "key.class"). */ @Test public void testReadValidationFailsWithWrongInputTypeKeyTranslationFunction() { SimpleFunction<LongWritable, String> myKeyTranslateWithWrongInputType = new SimpleFunction<LongWritable, String>() { @Override public String apply(LongWritable input) { return input.toString(); } }; HadoopFormatIO.Read<String, Employee> read = HadoopFormatIO.<String, Employee>read() .withConfiguration(serConf.get()) .withKeyTranslation(myKeyTranslateWithWrongInputType); thrown.expect(IllegalArgumentException.class); thrown.expectMessage( String.format( "Key translation's input type is not same as hadoop InputFormat : %s key " + "class : %s", serConf.get().getClass("mapreduce.job.inputformat.class", InputFormat.class), serConf.get().getClass("key.class", Object.class))); read.validateTransform(); }
Example #14
Source File: HadoopFormatIOReadTest.java From beam with Apache License 2.0 | 6 votes |
/** * This test validates functionality of {@link HadoopFormatIO.Read#validateTransform() * Read.validateTransform()} function when myValueTranslate's (simple function provided by user * for value translation) input type is not same as Hadoop InputFormat's valueClass(Which is * property set in configuration as "value.class"). */ @Test public void testReadValidationFailsWithWrongInputTypeValueTranslationFunction() { SimpleFunction<LongWritable, String> myValueTranslateWithWrongInputType = new SimpleFunction<LongWritable, String>() { @Override public String apply(LongWritable input) { return input.toString(); } }; HadoopFormatIO.Read<Text, String> read = HadoopFormatIO.<Text, String>read() .withConfiguration(serConf.get()) .withValueTranslation(myValueTranslateWithWrongInputType); String expectedMessage = String.format( "Value translation's input type is not same as hadoop InputFormat : " + "%s value class : %s", serConf.get().getClass("mapreduce.job.inputformat.class", InputFormat.class), serConf.get().getClass("value.class", Object.class)); thrown.expect(IllegalArgumentException.class); thrown.expectMessage(expectedMessage); read.validateTransform(); }
Example #15
Source File: PAssert.java From beam with Apache License 2.0 | 6 votes |
public PCollectionSingletonIterableAssert( PCollection<Iterable<T>> actual, AssertionWindows rewindowingStrategy, SimpleFunction<Iterable<ValueInSingleWindow<Iterable<T>>>, Iterable<Iterable<T>>> paneExtractor, PAssertionSite site) { this.actual = actual; @SuppressWarnings("unchecked") Coder<T> typedCoder = (Coder<T>) actual.getCoder().getCoderArguments().get(0); this.elementCoder = typedCoder; this.rewindowingStrategy = rewindowingStrategy; this.paneExtractor = paneExtractor; this.site = site; }
Example #16
Source File: KafkaIO.java From beam with Apache License 2.0 | 6 votes |
@Override public PDone expand(PCollection<KV<K, V>> input) { checkArgument(getTopic() != null, "withTopic() is required"); KvCoder<K, V> kvCoder = (KvCoder<K, V>) input.getCoder(); return input .apply( "Kafka ProducerRecord", MapElements.via( new SimpleFunction<KV<K, V>, ProducerRecord<K, V>>() { @Override public ProducerRecord<K, V> apply(KV<K, V> element) { return new ProducerRecord<>(getTopic(), element.getKey(), element.getValue()); } })) .setCoder(ProducerRecordCoder.of(kvCoder.getKeyCoder(), kvCoder.getValueCoder())) .apply(getWriteRecordsTransform()); }
Example #17
Source File: HadoopFormatIO.java From beam with Apache License 2.0 | 5 votes |
/** Returns the serialized output of transformed key or value object. */ @SuppressWarnings("unchecked") private <T, T3> T3 transformKeyOrValue( T input, @Nullable SimpleFunction<T, T3> simpleFunction, Coder<T3> coder) throws CoderException, ClassCastException { T3 output; if (null != simpleFunction) { output = simpleFunction.apply(input); } else { output = (T3) input; } return cloneIfPossiblyMutable(output, coder); }
Example #18
Source File: HadoopFormatIO.java From beam with Apache License 2.0 | 5 votes |
@SuppressWarnings("WeakerAccess") protected HadoopInputFormatBoundedSource( SerializableConfiguration conf, Coder<K> keyCoder, Coder<V> valueCoder, @Nullable SimpleFunction<?, K> keyTranslationFunction, @Nullable SimpleFunction<?, V> valueTranslationFunction, SerializableSplit inputSplit) { this.conf = conf; this.inputSplit = inputSplit; this.keyCoder = keyCoder; this.valueCoder = valueCoder; this.keyTranslationFunction = keyTranslationFunction; this.valueTranslationFunction = valueTranslationFunction; }
Example #19
Source File: CassandraIOTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testRead() throws Exception { PCollection<Scientist> output = pipeline.apply( CassandraIO.<Scientist>read() .withHosts(Collections.singletonList(CASSANDRA_HOST)) .withPort(cassandraPort) .withKeyspace(CASSANDRA_KEYSPACE) .withTable(CASSANDRA_TABLE) .withCoder(SerializableCoder.of(Scientist.class)) .withEntity(Scientist.class)); PAssert.thatSingleton(output.apply("Count", Count.globally())).isEqualTo(NUM_ROWS); PCollection<KV<String, Integer>> mapped = output.apply( MapElements.via( new SimpleFunction<Scientist, KV<String, Integer>>() { @Override public KV<String, Integer> apply(Scientist scientist) { return KV.of(scientist.name, scientist.id); } })); PAssert.that(mapped.apply("Count occurrences per scientist", Count.perKey())) .satisfies( input -> { for (KV<String, Long> element : input) { assertEquals(element.getKey(), NUM_ROWS / 10, element.getValue().longValue()); } return null; }); pipeline.run(); }
Example #20
Source File: HadoopFormatIO.java From beam with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") private HadoopInputFormatReader( HadoopInputFormatBoundedSource<K, V> source, @Nullable SimpleFunction keyTranslationFunction, @Nullable SimpleFunction valueTranslationFunction, SerializableSplit split, InputFormat inputFormatObj, TaskAttemptContext taskAttemptContext) { this.source = source; this.keyTranslationFunction = keyTranslationFunction; this.valueTranslationFunction = valueTranslationFunction; this.split = split; this.inputFormatObj = inputFormatObj; this.taskAttemptContext = taskAttemptContext; }
Example #21
Source File: SnowflakeIO.java From beam with Apache License 2.0 | 5 votes |
private PCollection<String> writeFiles(PCollection<T> input, String stagingBucketDir) { PCollection<String> mappedUserData = input .apply( MapElements.via( new SimpleFunction<T, Object[]>() { @Override public Object[] apply(T element) { return getUserDataMapper().mapRow(element); } })) .apply("Map Objects array to CSV lines", ParDo.of(new MapObjectsArrayToCsvFn())) .setCoder(StringUtf8Coder.of()); WriteFilesResult filesResult = mappedUserData.apply( "Write files to specified location", FileIO.<String>write() .via(TextIO.sink()) .to(stagingBucketDir) .withPrefix(getFileNameTemplate()) .withSuffix(".csv") .withCompression(Compression.GZIP)); return (PCollection) filesResult .getPerDestinationOutputFilenames() .apply("Parse KV filenames to Strings", Values.<String>create()); }
Example #22
Source File: TestPipelineTest.java From beam with Apache License 2.0 | 5 votes |
private static PCollection<String> pCollection(final Pipeline pipeline) { return pipeline .apply("Create", Create.of(WORDS).withCoder(StringUtf8Coder.of())) .apply( "Map1", MapElements.via( new SimpleFunction<String, String>() { @Override public String apply(final String input) { return WHATEVER; } })); }
Example #23
Source File: HadoopFormatIO.java From beam with Apache License 2.0 | 5 votes |
/** Transforms the values read from the source using the given value translation function. */ public Read<K, V> withValueTranslation(SimpleFunction<?, V> function) { checkArgument(function != null, "function can not be null"); // Sets value class to value translation function's output class type. return toBuilder() .setValueTranslationFunction(function) .setValueTypeDescriptor(function.getOutputTypeDescriptor()) .build(); }
Example #24
Source File: TestPipelineTest.java From beam with Apache License 2.0 | 5 votes |
@SuppressWarnings("UnusedReturnValue") private static PCollection<String> addTransform(final PCollection<String> pCollection) { return pCollection.apply( "Map2", MapElements.via( new SimpleFunction<String, String>() { @Override public String apply(final String input) { return WHATEVER; } })); }
Example #25
Source File: PipelineTest.java From beam with Apache License 2.0 | 5 votes |
private static PTransform<PCollection<? extends String>, PCollection<String>> addSuffix( final String suffix) { return MapElements.via( new SimpleFunction<String, String>() { @Override public String apply(String input) { return input + suffix; } }); }
Example #26
Source File: PCollectionTupleTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testExpandHasMatchingTags() { TupleTag<Integer> intTag = new TupleTag<>(); TupleTag<String> strTag = new TupleTag<>(); TupleTag<Long> longTag = new TupleTag<>(); Pipeline p = TestPipeline.create(); PCollection<Long> longs = p.apply(GenerateSequence.from(0).to(100)); PCollection<String> strs = p.apply(Create.of("foo", "bar", "baz")); PCollection<Integer> ints = longs.apply( MapElements.via( new SimpleFunction<Long, Integer>() { @Override public Integer apply(Long input) { return input.intValue(); } })); Map<TupleTag<?>, PCollection<?>> pcsByTag = ImmutableMap.<TupleTag<?>, PCollection<?>>builder() .put(strTag, strs) .put(intTag, ints) .put(longTag, longs) .build(); PCollectionTuple tuple = PCollectionTuple.of(intTag, ints).and(longTag, longs).and(strTag, strs); assertThat(tuple.getAll(), equalTo(pcsByTag)); PCollectionTuple reconstructed = PCollectionTuple.empty(p); for (Entry<TupleTag<?>, PValue> taggedValue : tuple.expand().entrySet()) { TupleTag<?> tag = taggedValue.getKey(); PValue value = taggedValue.getValue(); assertThat("The tag should map back to the value", tuple.get(tag), equalTo(value)); assertThat(value, equalTo(pcsByTag.get(tag))); reconstructed = reconstructed.and(tag, (PCollection) value); } assertThat(reconstructed, equalTo(tuple)); }
Example #27
Source File: HadoopFormatIO.java From beam with Apache License 2.0 | 5 votes |
/** Validates translation function given for key/value translation. */ private void validateTranslationFunction( TypeDescriptor<?> inputType, SimpleFunction<?, ?> simpleFunction, String errorMsg) { if (simpleFunction != null && !simpleFunction.getInputTypeDescriptor().equals(inputType)) { throw new IllegalArgumentException( String.format(errorMsg, getinputFormatClass().getRawType(), inputType.getRawType())); } }
Example #28
Source File: KafkaIO.java From beam with Apache License 2.0 | 5 votes |
@Override public PDone expand(PCollection<V> input) { return input .apply( "Kafka values with default key", MapElements.via( new SimpleFunction<V, KV<K, V>>() { @Override public KV<K, V> apply(V element) { return KV.of(null, element); } })) .setCoder(KvCoder.of(new NullOnlyCoder<>(), input.getCoder())) .apply(kvWriteTransform); }
Example #29
Source File: PAssert.java From beam with Apache License 2.0 | 5 votes |
private GroupThenAssertForSingleton( SerializableFunction<T, Void> checkerFn, AssertionWindows rewindowingStrategy, SimpleFunction<Iterable<ValueInSingleWindow<T>>, Iterable<T>> paneExtractor, PAssertionSite site) { this.checkerFn = checkerFn; this.rewindowingStrategy = rewindowingStrategy; this.paneExtractor = paneExtractor; this.site = site; }
Example #30
Source File: PAssert.java From beam with Apache License 2.0 | 5 votes |
private GroupThenAssert( SerializableFunction<Iterable<T>, Void> checkerFn, AssertionWindows rewindowingStrategy, SimpleFunction<Iterable<ValueInSingleWindow<T>>, Iterable<T>> paneExtractor, PAssertionSite site) { this.checkerFn = checkerFn; this.rewindowingStrategy = rewindowingStrategy; this.paneExtractor = paneExtractor; this.site = site; }