org.apache.flink.api.common.functions.MapFunction Java Examples

The following examples show how to use org.apache.flink.api.common.functions.MapFunction. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Main.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        //并行度设置为 1
        env.setParallelism(1);
//        env.setParallelism(4);

        SingleOutputStreamOperator<Word> data = env.socketTextStream("localhost", 9001)
                .map(new MapFunction<String, Word>() {
                    @Override
                    public Word map(String value) throws Exception {
                        String[] split = value.split(",");
                        return new Word(split[0], Integer.valueOf(split[1]), Long.valueOf(split[2]));
                    }
                });

        //Punctuated Watermark
        data.assignTimestampsAndWatermarks(new WordPunctuatedWatermark());

        data.print();
        env.execute("watermark demo");
    }
 
Example #2
Source File: Main.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

    DataStreamSource<String> data = env.addSource(new FlinkKafkaConsumer011<>(
            parameterTool.get(METRICS_TOPIC),   //这个 kafka topic 需要和上面的工具类的 topic 一致
            new SimpleStringSchema(),
            props));

    data.map(new MapFunction<String, Object>() {
        @Override
        public Object map(String string) throws Exception {
            writeEventToHbase(string, parameterTool);
            return string;
        }
    }).print();

    env.execute("flink learning connectors hbase");
}
 
Example #3
Source File: BaseComQueue.java    From Alink with Apache License 2.0 6 votes vote down vote up
private DataSet<byte[]> clearObjs(DataSet<byte[]> raw) {
	final int localSessionId = sessionId;
	DataSet<byte[]> clear = expandDataSet2MaxParallelism(
		BatchOperator
			.getExecutionEnvironmentFromDataSets(raw)
			.fromElements(0))
		.mapPartition(new MapPartitionFunction<Integer, byte[]>() {
			@Override
			public void mapPartition(Iterable<Integer> values, Collector<byte[]> out) {
				SessionSharedObjs.clear(localSessionId);
			}
		});
	return raw
		.map(new MapFunction<byte[], byte[]>() {
			@Override
			public byte[] map(byte[] value) {
				return value;
			}
		})
		.withBroadcastSet(clear, "barrier")
		.name("clearReturn");

}
 
Example #4
Source File: Graph.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Apply a function to the attribute of each edge in the graph.
 *
 * @param mapper the map function to apply.
 * @return a new graph
 */
@SuppressWarnings({ "unchecked", "rawtypes" })
public <NV> Graph<K, VV, NV> mapEdges(final MapFunction<Edge<K, EV>, NV> mapper) {

	TypeInformation<K> keyType = ((TupleTypeInfo<?>) edges.getType()).getTypeAt(0);

	TypeInformation<NV> valueType;

	if (mapper instanceof ResultTypeQueryable) {
		valueType = ((ResultTypeQueryable) mapper).getProducedType();
	} else {
		valueType = TypeExtractor.createTypeInfo(MapFunction.class, mapper.getClass(), 1, edges.getType(), null);
	}

	TypeInformation<Edge<K, NV>> returnType = (TypeInformation<Edge<K, NV>>) new TupleTypeInfo(
			Edge.class, keyType, keyType, valueType);

	return mapEdges(mapper, returnType);
}
 
Example #5
Source File: NiFiSourceMain.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    SiteToSiteClientConfig clientConfig = new SiteToSiteClient.Builder()
            .url("http://localhost:8080/nifi")
            .portName("Data for Flink")
            .requestBatchCount(5)
            .buildConfig();

    SourceFunction<NiFiDataPacket> nifiSource = new NiFiSource(clientConfig);
    DataStream<NiFiDataPacket> streamSource = env.addSource(nifiSource).setParallelism(2);

    DataStream<String> dataStream = streamSource.map(new MapFunction<NiFiDataPacket, String>() {
        @Override
        public String map(NiFiDataPacket value) throws Exception {
            return new String(value.getContent(), Charset.defaultCharset());
        }
    });

    dataStream.print();
    env.execute();
}
 
Example #6
Source File: NoRestartStrategyMain.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().setGlobalJobParameters(ParameterTool.fromArgs(args));
    env.setRestartStrategy(RestartStrategies.noRestart());

    env.addSource(new SourceFunction<Long>() {
        @Override
        public void run(SourceContext<Long> sourceContext) throws Exception {
            while (true) {
                sourceContext.collect(null);
            }
        }
        @Override
        public void cancel() {
        }
    })
            .map((MapFunction<Long, Long>) aLong -> aLong / 1)
            .print();

    env.execute("zhisheng no Restart Strategy example");
}
 
Example #7
Source File: ClosureCleanerTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public MapFunction<Integer, Integer> getMap() {
	return new MapFunction<Integer, Integer>() {
		@Override
		public Integer map(Integer value) throws Exception {
			return value + add;
		}
	};
}
 
Example #8
Source File: ConnectedComponents.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private static DataSet<Long> getVertexDataSet(ExecutionEnvironment env, ParameterTool params) {
	if (params.has("vertices")) {
		return env.readCsvFile(params.get("vertices")).types(Long.class).map(
			new MapFunction<Tuple1<Long>, Long>() {
				public Long map(Tuple1<Long> value) {
					return value.f0;
				}
			});
	} else {
		System.out.println("Executing Connected Components example with default vertices data set.");
		System.out.println("Use --vertices to specify file input.");
		return ConnectedComponentsData.getDefaultVertexDataSet(env);
	}
}
 
Example #9
Source File: TypeExtractorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({ "unchecked", "rawtypes" })
@Test
public void testInputInference2() {
	EdgeMapper2<Boolean> em = new EdgeMapper2<Boolean>();
	TypeInformation<?> ti = TypeExtractor.getMapReturnTypes((MapFunction) em, Types.BOOLEAN);
	Assert.assertTrue(ti.isTupleType());
	Assert.assertEquals(3, ti.getArity());
	TupleTypeInfo<?> tti = (TupleTypeInfo<?>) ti;
	Assert.assertEquals(BasicTypeInfo.LONG_TYPE_INFO, tti.getTypeAt(0));
	Assert.assertEquals(BasicTypeInfo.LONG_TYPE_INFO, tti.getTypeAt(1));
	Assert.assertEquals(BasicTypeInfo.BOOLEAN_TYPE_INFO, tti.getTypeAt(2));
}
 
Example #10
Source File: AvroTypeExtractionTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testSerializeWithAvro() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().enableForceAvro();
	Path in = new Path(inFile.getAbsoluteFile().toURI());

	AvroInputFormat<User> users = new AvroInputFormat<>(in, User.class);
	DataSet<User> usersDS = env.createInput(users)
			.map((MapFunction<User, User>) value -> {
				Map<CharSequence, Long> ab = new HashMap<>(1);
				ab.put("hehe", 12L);
				value.setTypeMap(ab);
				return value;
			});

	usersDS.writeAsText(resultPath);

	env.execute("Simple Avro read job");

	expected = "{\"name\": \"Alyssa\", \"favorite_number\": 256, \"favorite_color\": null, \"type_long_test\": null, \"type_double_test\": 123.45, \"type_null_test\": null, " +
		"\"type_bool_test\": true, \"type_array_string\": [\"ELEMENT 1\", \"ELEMENT 2\"], \"type_array_boolean\": [true, false], \"type_nullable_array\": null, " +
		"\"type_enum\": \"GREEN\", \"type_map\": {\"hehe\": 12}, \"type_fixed\": null, \"type_union\": null, \"type_nested\": {\"num\": 239, \"street\": \"Baker Street\", " +
		"\"city\": \"London\", \"state\": \"London\", \"zip\": \"NW1 6XE\"}, \"type_bytes\": {\"bytes\": \"\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\"}, " +
		"\"type_date\": 2014-03-01, \"type_time_millis\": 12:12:12.000, \"type_time_micros\": 123456, \"type_timestamp_millis\": 2014-03-01T12:12:12.321Z, \"type_timestamp_micros\": 123456, " +
		"\"type_decimal_bytes\": {\"bytes\": \"\\u0007Ð\"}, \"type_decimal_fixed\": [7, -48]}\n" +
		"{\"name\": \"Charlie\", \"favorite_number\": null, \"favorite_color\": \"blue\", \"type_long_test\": 1337, \"type_double_test\": 1.337, \"type_null_test\": null, " +
		"\"type_bool_test\": false, \"type_array_string\": [], \"type_array_boolean\": [], \"type_nullable_array\": null, \"type_enum\": \"RED\", \"type_map\": {\"hehe\": 12}, " +
		"\"type_fixed\": null, \"type_union\": null, \"type_nested\": {\"num\": 239, \"street\": \"Baker Street\", \"city\": \"London\", \"state\": \"London\", \"zip\": \"NW1 6XE\"}, " +
		"\"type_bytes\": {\"bytes\": \"\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\"}, \"type_date\": 2014-03-01, \"type_time_millis\": 12:12:12.000, " +
		"\"type_time_micros\": 123456, \"type_timestamp_millis\": 2014-03-01T12:12:12.321Z, \"type_timestamp_micros\": 123456, \"type_decimal_bytes\": {\"bytes\": \"\\u0007Ð\"}, " +
		"\"type_decimal_fixed\": [7, -48]}\n";

}
 
Example #11
Source File: KeyFunctions.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public static <T, K> org.apache.flink.api.common.operators.Operator<Tuple2<K, T>> appendKeyExtractor(
		org.apache.flink.api.common.operators.Operator<T> input,
		SelectorFunctionKeys<T, K> key) {

	if (input instanceof Union) {
		// if input is a union, we apply the key extractors recursively to all inputs
		org.apache.flink.api.common.operators.Operator<T> firstInput = ((Union) input).getFirstInput();
		org.apache.flink.api.common.operators.Operator<T> secondInput = ((Union) input).getSecondInput();

		org.apache.flink.api.common.operators.Operator<Tuple2<K, T>> firstInputWithKey =
				appendKeyExtractor(firstInput, key);
		org.apache.flink.api.common.operators.Operator<Tuple2<K, T>> secondInputWithKey =
				appendKeyExtractor(secondInput, key);

		return new Union(firstInputWithKey, secondInputWithKey, input.getName());
	}

	TypeInformation<T> inputType = key.getInputType();
	TypeInformation<Tuple2<K, T>> typeInfoWithKey = createTypeWithKey(key);
	KeyExtractingMapper<T, K> extractor = new KeyExtractingMapper(key.getKeyExtractor());

	MapOperatorBase<T, Tuple2<K, T>, MapFunction<T, Tuple2<K, T>>> mapper =
			new MapOperatorBase<T, Tuple2<K, T>, MapFunction<T, Tuple2<K, T>>>(
					extractor,
					new UnaryOperatorInformation(inputType, typeInfoWithKey),
					"Key Extractor"
			);

	mapper.setInput(input);
	mapper.setParallelism(input.getParallelism());

	return mapper;
}
 
Example #12
Source File: TypeExtractorTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testSqlTimeTypes() {
	MapFunction<?, ?> function = new MapFunction<Tuple3<Date, Time, Timestamp>, Tuple3<Date, Time, Timestamp>>() {
		@Override
		public Tuple3<Date, Time, Timestamp> map(Tuple3<Date, Time, Timestamp> value) throws Exception {
			return null;
		}
	};

	TypeInformation<?> ti = TypeExtractor.getMapReturnTypes(
		function,
		(TypeInformation) TypeInformation.of(new TypeHint<Tuple3<Date, Time, Timestamp>>() {
	}));

	Assert.assertTrue(ti.isTupleType());
	TupleTypeInfo<?> tti = (TupleTypeInfo<?>) ti;
	Assert.assertEquals(SqlTimeTypeInfo.DATE, tti.getTypeAt(0));
	Assert.assertEquals(SqlTimeTypeInfo.TIME, tti.getTypeAt(1));
	Assert.assertEquals(SqlTimeTypeInfo.TIMESTAMP, tti.getTypeAt(2));

	// use getForClass()
	Assert.assertEquals(tti.getTypeAt(0), TypeExtractor.getForClass(Date.class));
	Assert.assertEquals(tti.getTypeAt(1), TypeExtractor.getForClass(Time.class));
	Assert.assertEquals(tti.getTypeAt(2), TypeExtractor.getForClass(Timestamp.class));

	// use getForObject()
	Assert.assertEquals(SqlTimeTypeInfo.DATE, TypeExtractor.getForObject(Date.valueOf("1998-12-12")));
	Assert.assertEquals(SqlTimeTypeInfo.TIME, TypeExtractor.getForObject(Time.valueOf("12:37:45")));
	Assert.assertEquals(SqlTimeTypeInfo.TIMESTAMP, TypeExtractor.getForObject(Timestamp.valueOf("1998-12-12 12:37:45")));
}
 
Example #13
Source File: MapCancelingITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public void executeTask(MapFunction<Integer, Integer> mapper) throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	env
			.createInput(new InfiniteIntegerInputFormat(false))
			.map(mapper)
			.output(new DiscardingOutputFormat<Integer>());

	env.setParallelism(PARALLELISM);

	runAndCancelJob(env.createProgramPlan(), 5 * 1000, 10 * 1000);
}
 
Example #14
Source File: GraphCsvReader.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a Graph from CSV input without edge values.
 * The vertex values are specified through a vertices input file or a user-defined map function.
 * If no vertices input file is provided, the vertex IDs are automatically created from the edges
 * input file.
 * @param vertexKey the type of the vertex IDs
 * @param vertexValue the type of the vertex values
 * @return a Graph where the vertex IDs and vertex values.
 */
@SuppressWarnings({ "serial", "unchecked" })
public <K, VV> Graph<K, VV, NullValue> vertexTypes(Class<K> vertexKey, Class<VV> vertexValue) {

	if (edgeReader == null) {
		throw new RuntimeException("The edge input file cannot be null!");
	}

	DataSet<Edge<K, NullValue>> edges = edgeReader
		.types(vertexKey, vertexKey)
			.name(GraphCsvReader.class.getName())
		.map(new Tuple2ToEdgeMap<>())
			.name("To Edge");

	// the vertex value can be provided by an input file or a user-defined mapper
	if (vertexReader != null) {
		DataSet<Vertex<K, VV>> vertices = vertexReader
			.types(vertexKey, vertexValue)
				.name(GraphCsvReader.class.getName())
			.map(new Tuple2ToVertexMap<>())
				.name("Type conversion");

		return Graph.fromDataSet(vertices, edges, executionContext);
	}
	else if (mapper != null) {
		return Graph.fromDataSet(edges, (MapFunction<K, VV>) mapper, executionContext);
	}
	else {
		throw new RuntimeException("Vertex values have to be specified through a vertices input file"
				+ "or a user-defined map function.");
	}
}
 
Example #15
Source File: TypeExtractorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testBigBasicTypes() {
	MapFunction<?, ?> function = new MapFunction<Tuple2<BigInteger, BigDecimal>, Tuple2<BigInteger, BigDecimal>>() {
		@Override
		public Tuple2<BigInteger, BigDecimal> map(Tuple2<BigInteger, BigDecimal> value) throws Exception {
			return null;
		}
	};

	TypeInformation<?> ti = TypeExtractor.getMapReturnTypes(
		function,
		(TypeInformation) TypeInformation.of(new TypeHint<Tuple2<BigInteger, BigDecimal>>() {
	}));

	Assert.assertTrue(ti.isTupleType());
	TupleTypeInfo<?> tti = (TupleTypeInfo<?>) ti;
	Assert.assertEquals(BasicTypeInfo.BIG_INT_TYPE_INFO, tti.getTypeAt(0));
	Assert.assertEquals(BasicTypeInfo.BIG_DEC_TYPE_INFO, tti.getTypeAt(1));

	// use getForClass()
	Assert.assertTrue(TypeExtractor.getForClass(BigInteger.class).isBasicType());
	Assert.assertTrue(TypeExtractor.getForClass(BigDecimal.class).isBasicType());
	Assert.assertEquals(tti.getTypeAt(0), TypeExtractor.getForClass(BigInteger.class));
	Assert.assertEquals(tti.getTypeAt(1), TypeExtractor.getForClass(BigDecimal.class));

	// use getForObject()
	Assert.assertEquals(BasicTypeInfo.BIG_INT_TYPE_INFO, TypeExtractor.getForObject(new BigInteger("42")));
	Assert.assertEquals(BasicTypeInfo.BIG_DEC_TYPE_INFO, TypeExtractor.getForObject(new BigDecimal("42.42")));
}
 
Example #16
Source File: DataStreamAllroundTestJobFactory.java    From flink with Apache License 2.0 5 votes vote down vote up
static <IN, OUT, STATE> ArtificialKeyedStateMapper<IN, OUT> createArtificialKeyedStateMapper(
	MapFunction<IN, OUT> mapFunction,
	JoinFunction<IN, STATE, STATE> inputAndOldStateToNewState,
	List<TypeSerializer<STATE>> stateSerializers,
	List<Class<STATE>> stateClasses) {

	List<ArtificialStateBuilder<IN>> artificialStateBuilders = new ArrayList<>(stateSerializers.size());
	for (TypeSerializer<STATE> typeSerializer : stateSerializers) {
		artificialStateBuilders.add(createValueStateBuilder(
			inputAndOldStateToNewState,
			new ValueStateDescriptor<>(
				"valueState-" + typeSerializer.getClass().getSimpleName(),
				typeSerializer)));

		artificialStateBuilders.add(createListStateBuilder(
			inputAndOldStateToNewState,
			new ListStateDescriptor<>(
				"listState-" + typeSerializer.getClass().getSimpleName(),
				typeSerializer)));
	}

	for (Class<STATE> stateClass : stateClasses) {
		artificialStateBuilders.add(createValueStateBuilder(
			inputAndOldStateToNewState,
			new ValueStateDescriptor<>(
				"valueState-" + stateClass.getSimpleName(),
				stateClass)));

		artificialStateBuilders.add(createListStateBuilder(
			inputAndOldStateToNewState,
			new ListStateDescriptor<>(
				"listState-" + stateClass.getSimpleName(),
				stateClass)));
	}

	return new ArtificialKeyedStateMapper<>(mapFunction, artificialStateBuilders);
}
 
Example #17
Source File: BaseComQueue.java    From Alink with Apache License 2.0 5 votes vote down vote up
private static DataSet<Row> serializeModel(DataSet<byte[]> model) {
	return model
		.map(new MapFunction<byte[], Row>() {
			@Override
			public Row map(byte[] value) {
				return (Row) SerializationUtils.deserialize(value);
			}
		})
		.name("serializeModel");
}
 
Example #18
Source File: GroupCombineITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
// check if all API methods are callable
public void testAPI() throws Exception {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple1<String>> ds = CollectionDataSets.getStringDataSet(env).map(new MapFunction<String, Tuple1<String>>() {
		@Override
		public Tuple1<String> map(String value) throws Exception {
			return new Tuple1<>(value);
		}
	});

	// all methods on DataSet
	ds.combineGroup(new GroupCombineFunctionExample())
	.output(new DiscardingOutputFormat<Tuple1<String>>());

	// all methods on UnsortedGrouping
	ds.groupBy(0).combineGroup(new GroupCombineFunctionExample())
	.output(new DiscardingOutputFormat<Tuple1<String>>());

	// all methods on SortedGrouping
	ds.groupBy(0).sortGroup(0, Order.ASCENDING).combineGroup(new GroupCombineFunctionExample())
	.output(new DiscardingOutputFormat<Tuple1<String>>());

	env.execute();
}
 
Example #19
Source File: PlanGeneratorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testGenerate() {

	final String fileA = "fileA";
	final String fileB = "fileB";

	final Map<String, DistributedCache.DistributedCacheEntry> originalArtifacts = Stream.of(
			Tuple2.of(fileA, new DistributedCache.DistributedCacheEntry("test1", true)),
			Tuple2.of(fileB, new DistributedCache.DistributedCacheEntry("test2", false))
	).collect(Collectors.toMap(x -> x.f0, x -> x.f1));

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(10);
	env.registerCachedFile("test1", fileA, true);
	env.registerCachedFile("test2", fileB, false);

	env.fromElements(1, 3, 5)
			.map((MapFunction<Integer, String>) value -> String.valueOf(value + 1))
			.writeAsText("/tmp/csv");

	final Plan generatedPlanUnderTest = env.createProgramPlan("test");

	final Map<String, DistributedCache.DistributedCacheEntry> retrievedArtifacts =
			generatedPlanUnderTest
					.getCachedFiles()
					.stream()
					.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));

	assertEquals(1, generatedPlanUnderTest.getDataSinks().size());
	assertEquals(10, generatedPlanUnderTest.getDefaultParallelism());
	assertEquals(env.getConfig(), generatedPlanUnderTest.getExecutionConfig());
	assertEquals("test", generatedPlanUnderTest.getJobName());

	assertEquals(originalArtifacts.size(), retrievedArtifacts.size());
	assertEquals(originalArtifacts.get(fileA), retrievedArtifacts.get(fileA));
	assertEquals(originalArtifacts.get(fileB), retrievedArtifacts.get(fileB));
}
 
Example #20
Source File: TypeExtractorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({ "unchecked", "rawtypes" })
@Test(expected=InvalidTypesException.class)
public void testGenericTypeWithSuperclassInput() {
	TypeInformation<?> inputType = TypeExtractor.createTypeInfo(Map.class);

	MapFunction<?, ?> function = new MapFunction<HashMap<String, Object>,Map<String, Object>>(){

		@Override
		public Map<String, Object> map(HashMap<String, Object> stringObjectMap) throws Exception {
			return stringObjectMap;
		}
	};

	TypeExtractor.getMapReturnTypes(function, (TypeInformation) inputType);
}
 
Example #21
Source File: ConnectedComponents.java    From flink with Apache License 2.0 5 votes vote down vote up
private static DataSet<Long> getVertexDataSet(ExecutionEnvironment env, ParameterTool params) {
	if (params.has("vertices")) {
		return env.readCsvFile(params.get("vertices")).types(Long.class).map(
			new MapFunction<Tuple1<Long>, Long>() {
				public Long map(Tuple1<Long> value) {
					return value.f0;
				}
			});
	} else {
		System.out.println("Executing Connected Components example with default vertices data set.");
		System.out.println("Use --vertices to specify file input.");
		return ConnectedComponentsData.getDefaultVertexDataSet(env);
	}
}
 
Example #22
Source File: ClosureCleanerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public MapFunction<Integer, Integer> getMap() {
	return new MapFunction<Integer, Integer>() {
		@Override
		public Integer map(Integer value) throws Exception {
			return value + add;
		}
	};
}
 
Example #23
Source File: TypeExtractorTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({ "unchecked", "rawtypes" })
@Test
public void testInputInference1() {
	EdgeMapper<String, Double> em = new EdgeMapper<String, Double>();
	TypeInformation<?> ti = TypeExtractor.getMapReturnTypes((MapFunction) em, TypeInformation.of(new TypeHint<Tuple3<String, String, Double>>(){}));
	Assert.assertTrue(ti.isTupleType());
	Assert.assertEquals(3, ti.getArity());
	TupleTypeInfo<?> tti = (TupleTypeInfo<?>) ti;
	Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tti.getTypeAt(0));
	Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tti.getTypeAt(1));
	Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tti.getTypeAt(2));
}
 
Example #24
Source File: Main4.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        //并行度设置为 1
        env.setParallelism(1);
//        env.setParallelism(4);

        OutputTag<Word> lateDataTag = new OutputTag<Word>("late") {
        };

        SingleOutputStreamOperator<Word> data = env.socketTextStream("localhost", 9001)
                .map(new MapFunction<String, Word>() {
                    @Override
                    public Word map(String value) throws Exception {
                        String[] split = value.split(",");
                        return new Word(split[0], Integer.valueOf(split[1]), Long.valueOf(split[2]));
                    }
                }).assignTimestampsAndWatermarks(new WordPeriodicWatermark());

        SingleOutputStreamOperator<Word> sum = data.keyBy(0)
                .timeWindow(Time.seconds(10))
//                .allowedLateness(Time.milliseconds(2))
                .sideOutputLateData(lateDataTag)
                .sum(1);

        sum.print();

        sum.getSideOutput(lateDataTag)
                .print();

        env.execute("watermark demo");
    }
 
Example #25
Source File: Main2.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        //并行度设置为 1
        env.setParallelism(1);
//        env.setParallelism(4);

        SingleOutputStreamOperator<Word> data = env.socketTextStream("localhost", 9001)
                .map(new MapFunction<String, Word>() {
                    @Override
                    public Word map(String value) throws Exception {
                        String[] split = value.split(",");
                        return new Word(split[0], Integer.valueOf(split[1]), Long.valueOf(split[2]));
                    }
                });

        //BoundedOutOfOrdernessTimestampExtractor
        data.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<Word>(Time.seconds(10)) {
            @Override
            public long extractTimestamp(Word element) {
                return element.getTimestamp();
            }
        });

        data.print();
        env.execute("watermark demo");
    }
 
Example #26
Source File: Main4.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    env.setParallelism(2);
    DataStreamSource<String> data = env.socketTextStream("localhost", 9001);

    data.map(new MapFunction<String, Tuple2<String, Long>>() {
        @Override
        public Tuple2<String, Long> map(String s) throws Exception {
            String[] split = s.split(",");
            return new Tuple2<>(split[0], Long.valueOf(split[1]));
        }
    }).assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<Tuple2<String, Long>>() {
        private long currentTimestamp;

        @Nullable
        @Override
        public Watermark getCurrentWatermark() {
            return new Watermark(currentTimestamp);
        }

        @Override
        public long extractTimestamp(Tuple2<String, Long> tuple2, long l) {
            long timestamp = tuple2.f1;
            currentTimestamp = Math.max(timestamp, currentTimestamp);
            return timestamp;
        }
    }).keyBy(0)
            .window(EventTimeSessionWindows.withGap(Time.minutes(5)))
            .sum(1)
            .print("session ");
    System.out.println(env.getExecutionPlan());
    env.execute();
}
 
Example #27
Source File: SequenceStreamingFileSinkITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testWriteSequenceFile() throws Exception {
	final File folder = TEMPORARY_FOLDER.newFolder();
	final Path testPath = Path.fromLocalFile(folder);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.enableCheckpointing(100);

	DataStream<Tuple2<Long, String>> stream = env.addSource(
			new FiniteTestSource<>(testData),
			TypeInformation.of(new TypeHint<Tuple2<Long, String>>() {

			})
	);

	stream.map(new MapFunction<Tuple2<Long, String>, Tuple2<LongWritable, Text>>() {
		@Override
		public Tuple2<LongWritable, Text> map(Tuple2<Long, String> value) throws Exception {
			return new Tuple2<>(new LongWritable(value.f0), new Text(value.f1));
		}
	}).addSink(
		StreamingFileSink.forBulkFormat(
			testPath,
			new SequenceFileWriterFactory<>(configuration, LongWritable.class, Text.class, "BZip2")
		).build());

	env.execute();

	validateResults(folder, testData);
}
 
Example #28
Source File: GroupCombineITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testPartialReduceWithDifferentInputOutputType() throws Exception {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// data
	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);

	DataSet<Tuple2<Long, Tuple3<Integer, Long, String>>> dsWrapped = ds
			// wrap values as Kv pairs with the grouping key as key
			.map(new Tuple3KvWrapper());

	List<Tuple2<Integer, Long>> result = dsWrapped
			.groupBy(0)
			// reduce partially
			.combineGroup(new Tuple3toTuple2GroupReduce())
			.groupBy(0)
			// reduce fully to check result
			.reduceGroup(new Tuple2toTuple2GroupReduce())
			//unwrap
			.map(new MapFunction<Tuple2<Long, Tuple2<Integer, Long>>, Tuple2<Integer, Long>>() {
				@Override
				public Tuple2<Integer, Long> map(Tuple2<Long, Tuple2<Integer, Long>> value) throws Exception {
					return value.f1;
				}
			}).collect();

	String expected = "1,3\n" +
			"5,20\n" +
			"15,58\n" +
			"34,52\n" +
			"65,70\n" +
			"111,96\n";

	compareResultAsTuples(result, expected);
}
 
Example #29
Source File: DataStreamAllroundTestJobFactory.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
static <IN, OUT, STATE> ArtificialKeyedStateMapper<IN, OUT> createArtificialKeyedStateMapper(
	MapFunction<IN, OUT> mapFunction,
	JoinFunction<IN, STATE, STATE> inputAndOldStateToNewState,
	List<TypeSerializer<STATE>> stateSerializers,
	List<Class<STATE>> stateClasses) {

	List<ArtificialStateBuilder<IN>> artificialStateBuilders = new ArrayList<>(stateSerializers.size());
	for (TypeSerializer<STATE> typeSerializer : stateSerializers) {
		artificialStateBuilders.add(createValueStateBuilder(
			inputAndOldStateToNewState,
			new ValueStateDescriptor<>(
				"valueState-" + typeSerializer.getClass().getSimpleName(),
				typeSerializer)));

		artificialStateBuilders.add(createListStateBuilder(
			inputAndOldStateToNewState,
			new ListStateDescriptor<>(
				"listState-" + typeSerializer.getClass().getSimpleName(),
				typeSerializer)));
	}

	for (Class<STATE> stateClass : stateClasses) {
		artificialStateBuilders.add(createValueStateBuilder(
			inputAndOldStateToNewState,
			new ValueStateDescriptor<>(
				"valueState-" + stateClass.getSimpleName(),
				stateClass)));

		artificialStateBuilders.add(createListStateBuilder(
			inputAndOldStateToNewState,
			new ListStateDescriptor<>(
				"listState-" + stateClass.getSimpleName(),
				stateClass)));
	}

	return new ArtificialKeyedStateMapper<>(mapFunction, artificialStateBuilders);
}
 
Example #30
Source File: CoGroupConnectedComponentsITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple1<Long>> initialVertices = env.readCsvFile(verticesPath).fieldDelimiter(" ").types(Long.class).name("Vertices");

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class).name("Edges");

	DataSet<Tuple2<Long, Long>> verticesWithId = initialVertices.map(new MapFunction<Tuple1<Long>, Tuple2<Long, Long>>() {
		@Override
		public Tuple2<Long, Long> map(Tuple1<Long> value) throws Exception {
			return new Tuple2<>(value.f0, value.f0);
		}
	}).name("Assign Vertex Ids");

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, MAX_ITERATIONS, 0);

	JoinOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> joinWithNeighbors = iteration.getWorkset()
			.join(edges).where(0).equalTo(0)
			.with(new JoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {
				@Override
				public Tuple2<Long, Long> join(Tuple2<Long, Long> first, Tuple2<Long, Long> second) throws Exception {
					return new Tuple2<>(second.f1, first.f1);
				}
			})
			.name("Join Candidate Id With Neighbor");

	CoGroupOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> minAndUpdate = joinWithNeighbors
			.coGroup(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new MinIdAndUpdate())
			.name("min Id and Update");

	iteration.closeWith(minAndUpdate, minAndUpdate).writeAsCsv(resultPath, "\n", " ").name("Result");

	env.execute("Workset Connected Components");
}