org.apache.flink.api.common.functions.MapFunction Java Examples
The following examples show how to use
org.apache.flink.api.common.functions.MapFunction.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Main.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); //并行度设置为 1 env.setParallelism(1); // env.setParallelism(4); SingleOutputStreamOperator<Word> data = env.socketTextStream("localhost", 9001) .map(new MapFunction<String, Word>() { @Override public Word map(String value) throws Exception { String[] split = value.split(","); return new Word(split[0], Integer.valueOf(split[1]), Long.valueOf(split[2])); } }); //Punctuated Watermark data.assignTimestampsAndWatermarks(new WordPunctuatedWatermark()); data.print(); env.execute("watermark demo"); }
Example #2
Source File: Main.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args); StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool); Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool); DataStreamSource<String> data = env.addSource(new FlinkKafkaConsumer011<>( parameterTool.get(METRICS_TOPIC), //这个 kafka topic 需要和上面的工具类的 topic 一致 new SimpleStringSchema(), props)); data.map(new MapFunction<String, Object>() { @Override public Object map(String string) throws Exception { writeEventToHbase(string, parameterTool); return string; } }).print(); env.execute("flink learning connectors hbase"); }
Example #3
Source File: BaseComQueue.java From Alink with Apache License 2.0 | 6 votes |
private DataSet<byte[]> clearObjs(DataSet<byte[]> raw) { final int localSessionId = sessionId; DataSet<byte[]> clear = expandDataSet2MaxParallelism( BatchOperator .getExecutionEnvironmentFromDataSets(raw) .fromElements(0)) .mapPartition(new MapPartitionFunction<Integer, byte[]>() { @Override public void mapPartition(Iterable<Integer> values, Collector<byte[]> out) { SessionSharedObjs.clear(localSessionId); } }); return raw .map(new MapFunction<byte[], byte[]>() { @Override public byte[] map(byte[] value) { return value; } }) .withBroadcastSet(clear, "barrier") .name("clearReturn"); }
Example #4
Source File: Graph.java From flink with Apache License 2.0 | 6 votes |
/** * Apply a function to the attribute of each edge in the graph. * * @param mapper the map function to apply. * @return a new graph */ @SuppressWarnings({ "unchecked", "rawtypes" }) public <NV> Graph<K, VV, NV> mapEdges(final MapFunction<Edge<K, EV>, NV> mapper) { TypeInformation<K> keyType = ((TupleTypeInfo<?>) edges.getType()).getTypeAt(0); TypeInformation<NV> valueType; if (mapper instanceof ResultTypeQueryable) { valueType = ((ResultTypeQueryable) mapper).getProducedType(); } else { valueType = TypeExtractor.createTypeInfo(MapFunction.class, mapper.getClass(), 1, edges.getType(), null); } TypeInformation<Edge<K, NV>> returnType = (TypeInformation<Edge<K, NV>>) new TupleTypeInfo( Edge.class, keyType, keyType, valueType); return mapEdges(mapper, returnType); }
Example #5
Source File: NiFiSourceMain.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); SiteToSiteClientConfig clientConfig = new SiteToSiteClient.Builder() .url("http://localhost:8080/nifi") .portName("Data for Flink") .requestBatchCount(5) .buildConfig(); SourceFunction<NiFiDataPacket> nifiSource = new NiFiSource(clientConfig); DataStream<NiFiDataPacket> streamSource = env.addSource(nifiSource).setParallelism(2); DataStream<String> dataStream = streamSource.map(new MapFunction<NiFiDataPacket, String>() { @Override public String map(NiFiDataPacket value) throws Exception { return new String(value.getContent(), Charset.defaultCharset()); } }); dataStream.print(); env.execute(); }
Example #6
Source File: NoRestartStrategyMain.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setGlobalJobParameters(ParameterTool.fromArgs(args)); env.setRestartStrategy(RestartStrategies.noRestart()); env.addSource(new SourceFunction<Long>() { @Override public void run(SourceContext<Long> sourceContext) throws Exception { while (true) { sourceContext.collect(null); } } @Override public void cancel() { } }) .map((MapFunction<Long, Long>) aLong -> aLong / 1) .print(); env.execute("zhisheng no Restart Strategy example"); }
Example #7
Source File: ClosureCleanerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override public MapFunction<Integer, Integer> getMap() { return new MapFunction<Integer, Integer>() { @Override public Integer map(Integer value) throws Exception { return value + add; } }; }
Example #8
Source File: ConnectedComponents.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private static DataSet<Long> getVertexDataSet(ExecutionEnvironment env, ParameterTool params) { if (params.has("vertices")) { return env.readCsvFile(params.get("vertices")).types(Long.class).map( new MapFunction<Tuple1<Long>, Long>() { public Long map(Tuple1<Long> value) { return value.f0; } }); } else { System.out.println("Executing Connected Components example with default vertices data set."); System.out.println("Use --vertices to specify file input."); return ConnectedComponentsData.getDefaultVertexDataSet(env); } }
Example #9
Source File: TypeExtractorTest.java From flink with Apache License 2.0 | 5 votes |
@SuppressWarnings({ "unchecked", "rawtypes" }) @Test public void testInputInference2() { EdgeMapper2<Boolean> em = new EdgeMapper2<Boolean>(); TypeInformation<?> ti = TypeExtractor.getMapReturnTypes((MapFunction) em, Types.BOOLEAN); Assert.assertTrue(ti.isTupleType()); Assert.assertEquals(3, ti.getArity()); TupleTypeInfo<?> tti = (TupleTypeInfo<?>) ti; Assert.assertEquals(BasicTypeInfo.LONG_TYPE_INFO, tti.getTypeAt(0)); Assert.assertEquals(BasicTypeInfo.LONG_TYPE_INFO, tti.getTypeAt(1)); Assert.assertEquals(BasicTypeInfo.BOOLEAN_TYPE_INFO, tti.getTypeAt(2)); }
Example #10
Source File: AvroTypeExtractionTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testSerializeWithAvro() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().enableForceAvro(); Path in = new Path(inFile.getAbsoluteFile().toURI()); AvroInputFormat<User> users = new AvroInputFormat<>(in, User.class); DataSet<User> usersDS = env.createInput(users) .map((MapFunction<User, User>) value -> { Map<CharSequence, Long> ab = new HashMap<>(1); ab.put("hehe", 12L); value.setTypeMap(ab); return value; }); usersDS.writeAsText(resultPath); env.execute("Simple Avro read job"); expected = "{\"name\": \"Alyssa\", \"favorite_number\": 256, \"favorite_color\": null, \"type_long_test\": null, \"type_double_test\": 123.45, \"type_null_test\": null, " + "\"type_bool_test\": true, \"type_array_string\": [\"ELEMENT 1\", \"ELEMENT 2\"], \"type_array_boolean\": [true, false], \"type_nullable_array\": null, " + "\"type_enum\": \"GREEN\", \"type_map\": {\"hehe\": 12}, \"type_fixed\": null, \"type_union\": null, \"type_nested\": {\"num\": 239, \"street\": \"Baker Street\", " + "\"city\": \"London\", \"state\": \"London\", \"zip\": \"NW1 6XE\"}, \"type_bytes\": {\"bytes\": \"\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\"}, " + "\"type_date\": 2014-03-01, \"type_time_millis\": 12:12:12.000, \"type_time_micros\": 123456, \"type_timestamp_millis\": 2014-03-01T12:12:12.321Z, \"type_timestamp_micros\": 123456, " + "\"type_decimal_bytes\": {\"bytes\": \"\\u0007Ð\"}, \"type_decimal_fixed\": [7, -48]}\n" + "{\"name\": \"Charlie\", \"favorite_number\": null, \"favorite_color\": \"blue\", \"type_long_test\": 1337, \"type_double_test\": 1.337, \"type_null_test\": null, " + "\"type_bool_test\": false, \"type_array_string\": [], \"type_array_boolean\": [], \"type_nullable_array\": null, \"type_enum\": \"RED\", \"type_map\": {\"hehe\": 12}, " + "\"type_fixed\": null, \"type_union\": null, \"type_nested\": {\"num\": 239, \"street\": \"Baker Street\", \"city\": \"London\", \"state\": \"London\", \"zip\": \"NW1 6XE\"}, " + "\"type_bytes\": {\"bytes\": \"\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\"}, \"type_date\": 2014-03-01, \"type_time_millis\": 12:12:12.000, " + "\"type_time_micros\": 123456, \"type_timestamp_millis\": 2014-03-01T12:12:12.321Z, \"type_timestamp_micros\": 123456, \"type_decimal_bytes\": {\"bytes\": \"\\u0007Ð\"}, " + "\"type_decimal_fixed\": [7, -48]}\n"; }
Example #11
Source File: KeyFunctions.java From flink with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") public static <T, K> org.apache.flink.api.common.operators.Operator<Tuple2<K, T>> appendKeyExtractor( org.apache.flink.api.common.operators.Operator<T> input, SelectorFunctionKeys<T, K> key) { if (input instanceof Union) { // if input is a union, we apply the key extractors recursively to all inputs org.apache.flink.api.common.operators.Operator<T> firstInput = ((Union) input).getFirstInput(); org.apache.flink.api.common.operators.Operator<T> secondInput = ((Union) input).getSecondInput(); org.apache.flink.api.common.operators.Operator<Tuple2<K, T>> firstInputWithKey = appendKeyExtractor(firstInput, key); org.apache.flink.api.common.operators.Operator<Tuple2<K, T>> secondInputWithKey = appendKeyExtractor(secondInput, key); return new Union(firstInputWithKey, secondInputWithKey, input.getName()); } TypeInformation<T> inputType = key.getInputType(); TypeInformation<Tuple2<K, T>> typeInfoWithKey = createTypeWithKey(key); KeyExtractingMapper<T, K> extractor = new KeyExtractingMapper(key.getKeyExtractor()); MapOperatorBase<T, Tuple2<K, T>, MapFunction<T, Tuple2<K, T>>> mapper = new MapOperatorBase<T, Tuple2<K, T>, MapFunction<T, Tuple2<K, T>>>( extractor, new UnaryOperatorInformation(inputType, typeInfoWithKey), "Key Extractor" ); mapper.setInput(input); mapper.setParallelism(input.getParallelism()); return mapper; }
Example #12
Source File: TypeExtractorTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@SuppressWarnings({ "rawtypes", "unchecked" }) @Test public void testSqlTimeTypes() { MapFunction<?, ?> function = new MapFunction<Tuple3<Date, Time, Timestamp>, Tuple3<Date, Time, Timestamp>>() { @Override public Tuple3<Date, Time, Timestamp> map(Tuple3<Date, Time, Timestamp> value) throws Exception { return null; } }; TypeInformation<?> ti = TypeExtractor.getMapReturnTypes( function, (TypeInformation) TypeInformation.of(new TypeHint<Tuple3<Date, Time, Timestamp>>() { })); Assert.assertTrue(ti.isTupleType()); TupleTypeInfo<?> tti = (TupleTypeInfo<?>) ti; Assert.assertEquals(SqlTimeTypeInfo.DATE, tti.getTypeAt(0)); Assert.assertEquals(SqlTimeTypeInfo.TIME, tti.getTypeAt(1)); Assert.assertEquals(SqlTimeTypeInfo.TIMESTAMP, tti.getTypeAt(2)); // use getForClass() Assert.assertEquals(tti.getTypeAt(0), TypeExtractor.getForClass(Date.class)); Assert.assertEquals(tti.getTypeAt(1), TypeExtractor.getForClass(Time.class)); Assert.assertEquals(tti.getTypeAt(2), TypeExtractor.getForClass(Timestamp.class)); // use getForObject() Assert.assertEquals(SqlTimeTypeInfo.DATE, TypeExtractor.getForObject(Date.valueOf("1998-12-12"))); Assert.assertEquals(SqlTimeTypeInfo.TIME, TypeExtractor.getForObject(Time.valueOf("12:37:45"))); Assert.assertEquals(SqlTimeTypeInfo.TIMESTAMP, TypeExtractor.getForObject(Timestamp.valueOf("1998-12-12 12:37:45"))); }
Example #13
Source File: MapCancelingITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
public void executeTask(MapFunction<Integer, Integer> mapper) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env .createInput(new InfiniteIntegerInputFormat(false)) .map(mapper) .output(new DiscardingOutputFormat<Integer>()); env.setParallelism(PARALLELISM); runAndCancelJob(env.createProgramPlan(), 5 * 1000, 10 * 1000); }
Example #14
Source File: GraphCsvReader.java From flink with Apache License 2.0 | 5 votes |
/** * Creates a Graph from CSV input without edge values. * The vertex values are specified through a vertices input file or a user-defined map function. * If no vertices input file is provided, the vertex IDs are automatically created from the edges * input file. * @param vertexKey the type of the vertex IDs * @param vertexValue the type of the vertex values * @return a Graph where the vertex IDs and vertex values. */ @SuppressWarnings({ "serial", "unchecked" }) public <K, VV> Graph<K, VV, NullValue> vertexTypes(Class<K> vertexKey, Class<VV> vertexValue) { if (edgeReader == null) { throw new RuntimeException("The edge input file cannot be null!"); } DataSet<Edge<K, NullValue>> edges = edgeReader .types(vertexKey, vertexKey) .name(GraphCsvReader.class.getName()) .map(new Tuple2ToEdgeMap<>()) .name("To Edge"); // the vertex value can be provided by an input file or a user-defined mapper if (vertexReader != null) { DataSet<Vertex<K, VV>> vertices = vertexReader .types(vertexKey, vertexValue) .name(GraphCsvReader.class.getName()) .map(new Tuple2ToVertexMap<>()) .name("Type conversion"); return Graph.fromDataSet(vertices, edges, executionContext); } else if (mapper != null) { return Graph.fromDataSet(edges, (MapFunction<K, VV>) mapper, executionContext); } else { throw new RuntimeException("Vertex values have to be specified through a vertices input file" + "or a user-defined map function."); } }
Example #15
Source File: TypeExtractorTest.java From flink with Apache License 2.0 | 5 votes |
@SuppressWarnings({ "rawtypes", "unchecked" }) @Test public void testBigBasicTypes() { MapFunction<?, ?> function = new MapFunction<Tuple2<BigInteger, BigDecimal>, Tuple2<BigInteger, BigDecimal>>() { @Override public Tuple2<BigInteger, BigDecimal> map(Tuple2<BigInteger, BigDecimal> value) throws Exception { return null; } }; TypeInformation<?> ti = TypeExtractor.getMapReturnTypes( function, (TypeInformation) TypeInformation.of(new TypeHint<Tuple2<BigInteger, BigDecimal>>() { })); Assert.assertTrue(ti.isTupleType()); TupleTypeInfo<?> tti = (TupleTypeInfo<?>) ti; Assert.assertEquals(BasicTypeInfo.BIG_INT_TYPE_INFO, tti.getTypeAt(0)); Assert.assertEquals(BasicTypeInfo.BIG_DEC_TYPE_INFO, tti.getTypeAt(1)); // use getForClass() Assert.assertTrue(TypeExtractor.getForClass(BigInteger.class).isBasicType()); Assert.assertTrue(TypeExtractor.getForClass(BigDecimal.class).isBasicType()); Assert.assertEquals(tti.getTypeAt(0), TypeExtractor.getForClass(BigInteger.class)); Assert.assertEquals(tti.getTypeAt(1), TypeExtractor.getForClass(BigDecimal.class)); // use getForObject() Assert.assertEquals(BasicTypeInfo.BIG_INT_TYPE_INFO, TypeExtractor.getForObject(new BigInteger("42"))); Assert.assertEquals(BasicTypeInfo.BIG_DEC_TYPE_INFO, TypeExtractor.getForObject(new BigDecimal("42.42"))); }
Example #16
Source File: DataStreamAllroundTestJobFactory.java From flink with Apache License 2.0 | 5 votes |
static <IN, OUT, STATE> ArtificialKeyedStateMapper<IN, OUT> createArtificialKeyedStateMapper( MapFunction<IN, OUT> mapFunction, JoinFunction<IN, STATE, STATE> inputAndOldStateToNewState, List<TypeSerializer<STATE>> stateSerializers, List<Class<STATE>> stateClasses) { List<ArtificialStateBuilder<IN>> artificialStateBuilders = new ArrayList<>(stateSerializers.size()); for (TypeSerializer<STATE> typeSerializer : stateSerializers) { artificialStateBuilders.add(createValueStateBuilder( inputAndOldStateToNewState, new ValueStateDescriptor<>( "valueState-" + typeSerializer.getClass().getSimpleName(), typeSerializer))); artificialStateBuilders.add(createListStateBuilder( inputAndOldStateToNewState, new ListStateDescriptor<>( "listState-" + typeSerializer.getClass().getSimpleName(), typeSerializer))); } for (Class<STATE> stateClass : stateClasses) { artificialStateBuilders.add(createValueStateBuilder( inputAndOldStateToNewState, new ValueStateDescriptor<>( "valueState-" + stateClass.getSimpleName(), stateClass))); artificialStateBuilders.add(createListStateBuilder( inputAndOldStateToNewState, new ListStateDescriptor<>( "listState-" + stateClass.getSimpleName(), stateClass))); } return new ArtificialKeyedStateMapper<>(mapFunction, artificialStateBuilders); }
Example #17
Source File: BaseComQueue.java From Alink with Apache License 2.0 | 5 votes |
private static DataSet<Row> serializeModel(DataSet<byte[]> model) { return model .map(new MapFunction<byte[], Row>() { @Override public Row map(byte[] value) { return (Row) SerializationUtils.deserialize(value); } }) .name("serializeModel"); }
Example #18
Source File: GroupCombineITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test // check if all API methods are callable public void testAPI() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple1<String>> ds = CollectionDataSets.getStringDataSet(env).map(new MapFunction<String, Tuple1<String>>() { @Override public Tuple1<String> map(String value) throws Exception { return new Tuple1<>(value); } }); // all methods on DataSet ds.combineGroup(new GroupCombineFunctionExample()) .output(new DiscardingOutputFormat<Tuple1<String>>()); // all methods on UnsortedGrouping ds.groupBy(0).combineGroup(new GroupCombineFunctionExample()) .output(new DiscardingOutputFormat<Tuple1<String>>()); // all methods on SortedGrouping ds.groupBy(0).sortGroup(0, Order.ASCENDING).combineGroup(new GroupCombineFunctionExample()) .output(new DiscardingOutputFormat<Tuple1<String>>()); env.execute(); }
Example #19
Source File: PlanGeneratorTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testGenerate() { final String fileA = "fileA"; final String fileB = "fileB"; final Map<String, DistributedCache.DistributedCacheEntry> originalArtifacts = Stream.of( Tuple2.of(fileA, new DistributedCache.DistributedCacheEntry("test1", true)), Tuple2.of(fileB, new DistributedCache.DistributedCacheEntry("test2", false)) ).collect(Collectors.toMap(x -> x.f0, x -> x.f1)); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(10); env.registerCachedFile("test1", fileA, true); env.registerCachedFile("test2", fileB, false); env.fromElements(1, 3, 5) .map((MapFunction<Integer, String>) value -> String.valueOf(value + 1)) .writeAsText("/tmp/csv"); final Plan generatedPlanUnderTest = env.createProgramPlan("test"); final Map<String, DistributedCache.DistributedCacheEntry> retrievedArtifacts = generatedPlanUnderTest .getCachedFiles() .stream() .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); assertEquals(1, generatedPlanUnderTest.getDataSinks().size()); assertEquals(10, generatedPlanUnderTest.getDefaultParallelism()); assertEquals(env.getConfig(), generatedPlanUnderTest.getExecutionConfig()); assertEquals("test", generatedPlanUnderTest.getJobName()); assertEquals(originalArtifacts.size(), retrievedArtifacts.size()); assertEquals(originalArtifacts.get(fileA), retrievedArtifacts.get(fileA)); assertEquals(originalArtifacts.get(fileB), retrievedArtifacts.get(fileB)); }
Example #20
Source File: TypeExtractorTest.java From flink with Apache License 2.0 | 5 votes |
@SuppressWarnings({ "unchecked", "rawtypes" }) @Test(expected=InvalidTypesException.class) public void testGenericTypeWithSuperclassInput() { TypeInformation<?> inputType = TypeExtractor.createTypeInfo(Map.class); MapFunction<?, ?> function = new MapFunction<HashMap<String, Object>,Map<String, Object>>(){ @Override public Map<String, Object> map(HashMap<String, Object> stringObjectMap) throws Exception { return stringObjectMap; } }; TypeExtractor.getMapReturnTypes(function, (TypeInformation) inputType); }
Example #21
Source File: ConnectedComponents.java From flink with Apache License 2.0 | 5 votes |
private static DataSet<Long> getVertexDataSet(ExecutionEnvironment env, ParameterTool params) { if (params.has("vertices")) { return env.readCsvFile(params.get("vertices")).types(Long.class).map( new MapFunction<Tuple1<Long>, Long>() { public Long map(Tuple1<Long> value) { return value.f0; } }); } else { System.out.println("Executing Connected Components example with default vertices data set."); System.out.println("Use --vertices to specify file input."); return ConnectedComponentsData.getDefaultVertexDataSet(env); } }
Example #22
Source File: ClosureCleanerTest.java From flink with Apache License 2.0 | 5 votes |
@Override public MapFunction<Integer, Integer> getMap() { return new MapFunction<Integer, Integer>() { @Override public Integer map(Integer value) throws Exception { return value + add; } }; }
Example #23
Source File: TypeExtractorTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@SuppressWarnings({ "unchecked", "rawtypes" }) @Test public void testInputInference1() { EdgeMapper<String, Double> em = new EdgeMapper<String, Double>(); TypeInformation<?> ti = TypeExtractor.getMapReturnTypes((MapFunction) em, TypeInformation.of(new TypeHint<Tuple3<String, String, Double>>(){})); Assert.assertTrue(ti.isTupleType()); Assert.assertEquals(3, ti.getArity()); TupleTypeInfo<?> tti = (TupleTypeInfo<?>) ti; Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tti.getTypeAt(0)); Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tti.getTypeAt(1)); Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tti.getTypeAt(2)); }
Example #24
Source File: Main4.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); //并行度设置为 1 env.setParallelism(1); // env.setParallelism(4); OutputTag<Word> lateDataTag = new OutputTag<Word>("late") { }; SingleOutputStreamOperator<Word> data = env.socketTextStream("localhost", 9001) .map(new MapFunction<String, Word>() { @Override public Word map(String value) throws Exception { String[] split = value.split(","); return new Word(split[0], Integer.valueOf(split[1]), Long.valueOf(split[2])); } }).assignTimestampsAndWatermarks(new WordPeriodicWatermark()); SingleOutputStreamOperator<Word> sum = data.keyBy(0) .timeWindow(Time.seconds(10)) // .allowedLateness(Time.milliseconds(2)) .sideOutputLateData(lateDataTag) .sum(1); sum.print(); sum.getSideOutput(lateDataTag) .print(); env.execute("watermark demo"); }
Example #25
Source File: Main2.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); //并行度设置为 1 env.setParallelism(1); // env.setParallelism(4); SingleOutputStreamOperator<Word> data = env.socketTextStream("localhost", 9001) .map(new MapFunction<String, Word>() { @Override public Word map(String value) throws Exception { String[] split = value.split(","); return new Word(split[0], Integer.valueOf(split[1]), Long.valueOf(split[2])); } }); //BoundedOutOfOrdernessTimestampExtractor data.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<Word>(Time.seconds(10)) { @Override public long extractTimestamp(Word element) { return element.getTimestamp(); } }); data.print(); env.execute("watermark demo"); }
Example #26
Source File: Main4.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(2); DataStreamSource<String> data = env.socketTextStream("localhost", 9001); data.map(new MapFunction<String, Tuple2<String, Long>>() { @Override public Tuple2<String, Long> map(String s) throws Exception { String[] split = s.split(","); return new Tuple2<>(split[0], Long.valueOf(split[1])); } }).assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<Tuple2<String, Long>>() { private long currentTimestamp; @Nullable @Override public Watermark getCurrentWatermark() { return new Watermark(currentTimestamp); } @Override public long extractTimestamp(Tuple2<String, Long> tuple2, long l) { long timestamp = tuple2.f1; currentTimestamp = Math.max(timestamp, currentTimestamp); return timestamp; } }).keyBy(0) .window(EventTimeSessionWindows.withGap(Time.minutes(5))) .sum(1) .print("session "); System.out.println(env.getExecutionPlan()); env.execute(); }
Example #27
Source File: SequenceStreamingFileSinkITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testWriteSequenceFile() throws Exception { final File folder = TEMPORARY_FOLDER.newFolder(); final Path testPath = Path.fromLocalFile(folder); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); env.enableCheckpointing(100); DataStream<Tuple2<Long, String>> stream = env.addSource( new FiniteTestSource<>(testData), TypeInformation.of(new TypeHint<Tuple2<Long, String>>() { }) ); stream.map(new MapFunction<Tuple2<Long, String>, Tuple2<LongWritable, Text>>() { @Override public Tuple2<LongWritable, Text> map(Tuple2<Long, String> value) throws Exception { return new Tuple2<>(new LongWritable(value.f0), new Text(value.f1)); } }).addSink( StreamingFileSink.forBulkFormat( testPath, new SequenceFileWriterFactory<>(configuration, LongWritable.class, Text.class, "BZip2") ).build()); env.execute(); validateResults(folder, testData); }
Example #28
Source File: GroupCombineITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testPartialReduceWithDifferentInputOutputType() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // data DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple2<Long, Tuple3<Integer, Long, String>>> dsWrapped = ds // wrap values as Kv pairs with the grouping key as key .map(new Tuple3KvWrapper()); List<Tuple2<Integer, Long>> result = dsWrapped .groupBy(0) // reduce partially .combineGroup(new Tuple3toTuple2GroupReduce()) .groupBy(0) // reduce fully to check result .reduceGroup(new Tuple2toTuple2GroupReduce()) //unwrap .map(new MapFunction<Tuple2<Long, Tuple2<Integer, Long>>, Tuple2<Integer, Long>>() { @Override public Tuple2<Integer, Long> map(Tuple2<Long, Tuple2<Integer, Long>> value) throws Exception { return value.f1; } }).collect(); String expected = "1,3\n" + "5,20\n" + "15,58\n" + "34,52\n" + "65,70\n" + "111,96\n"; compareResultAsTuples(result, expected); }
Example #29
Source File: DataStreamAllroundTestJobFactory.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
static <IN, OUT, STATE> ArtificialKeyedStateMapper<IN, OUT> createArtificialKeyedStateMapper( MapFunction<IN, OUT> mapFunction, JoinFunction<IN, STATE, STATE> inputAndOldStateToNewState, List<TypeSerializer<STATE>> stateSerializers, List<Class<STATE>> stateClasses) { List<ArtificialStateBuilder<IN>> artificialStateBuilders = new ArrayList<>(stateSerializers.size()); for (TypeSerializer<STATE> typeSerializer : stateSerializers) { artificialStateBuilders.add(createValueStateBuilder( inputAndOldStateToNewState, new ValueStateDescriptor<>( "valueState-" + typeSerializer.getClass().getSimpleName(), typeSerializer))); artificialStateBuilders.add(createListStateBuilder( inputAndOldStateToNewState, new ListStateDescriptor<>( "listState-" + typeSerializer.getClass().getSimpleName(), typeSerializer))); } for (Class<STATE> stateClass : stateClasses) { artificialStateBuilders.add(createValueStateBuilder( inputAndOldStateToNewState, new ValueStateDescriptor<>( "valueState-" + stateClass.getSimpleName(), stateClass))); artificialStateBuilders.add(createListStateBuilder( inputAndOldStateToNewState, new ListStateDescriptor<>( "listState-" + stateClass.getSimpleName(), stateClass))); } return new ArtificialKeyedStateMapper<>(mapFunction, artificialStateBuilders); }
Example #30
Source File: CoGroupConnectedComponentsITCase.java From flink with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple1<Long>> initialVertices = env.readCsvFile(verticesPath).fieldDelimiter(" ").types(Long.class).name("Vertices"); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class).name("Edges"); DataSet<Tuple2<Long, Long>> verticesWithId = initialVertices.map(new MapFunction<Tuple1<Long>, Tuple2<Long, Long>>() { @Override public Tuple2<Long, Long> map(Tuple1<Long> value) throws Exception { return new Tuple2<>(value.f0, value.f0); } }).name("Assign Vertex Ids"); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, MAX_ITERATIONS, 0); JoinOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> joinWithNeighbors = iteration.getWorkset() .join(edges).where(0).equalTo(0) .with(new JoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() { @Override public Tuple2<Long, Long> join(Tuple2<Long, Long> first, Tuple2<Long, Long> second) throws Exception { return new Tuple2<>(second.f1, first.f1); } }) .name("Join Candidate Id With Neighbor"); CoGroupOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> minAndUpdate = joinWithNeighbors .coGroup(iteration.getSolutionSet()).where(0).equalTo(0) .with(new MinIdAndUpdate()) .name("min Id and Update"); iteration.closeWith(minAndUpdate, minAndUpdate).writeAsCsv(resultPath, "\n", " ").name("Result"); env.execute("Workset Connected Components"); }