org.apache.flink.api.common.functions.MapFunction Java Examples

The following examples show how to use org.apache.flink.api.common.functions.MapFunction. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: NiFiSourceMain.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    SiteToSiteClientConfig clientConfig = new SiteToSiteClient.Builder()
            .url("http://localhost:8080/nifi")
            .portName("Data for Flink")
            .requestBatchCount(5)
            .buildConfig();

    SourceFunction<NiFiDataPacket> nifiSource = new NiFiSource(clientConfig);
    DataStream<NiFiDataPacket> streamSource = env.addSource(nifiSource).setParallelism(2);

    DataStream<String> dataStream = streamSource.map(new MapFunction<NiFiDataPacket, String>() {
        @Override
        public String map(NiFiDataPacket value) throws Exception {
            return new String(value.getContent(), Charset.defaultCharset());
        }
    });

    dataStream.print();
    env.execute();
}

Example #2

Source File: BaseComQueue.java From Alink with Apache License 2.0

6 votes

private DataSet<byte[]> clearObjs(DataSet<byte[]> raw) {
	final int localSessionId = sessionId;
	DataSet<byte[]> clear = expandDataSet2MaxParallelism(
		BatchOperator
			.getExecutionEnvironmentFromDataSets(raw)
			.fromElements(0))
		.mapPartition(new MapPartitionFunction<Integer, byte[]>() {
			@Override
			public void mapPartition(Iterable<Integer> values, Collector<byte[]> out) {
				SessionSharedObjs.clear(localSessionId);
			}
		});
	return raw
		.map(new MapFunction<byte[], byte[]>() {
			@Override
			public byte[] map(byte[] value) {
				return value;
			}
		})
		.withBroadcastSet(clear, "barrier")
		.name("clearReturn");

}

Example #3

Source File: Main.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        //并行度设置为 1
        env.setParallelism(1);
//        env.setParallelism(4);

        SingleOutputStreamOperator<Word> data = env.socketTextStream("localhost", 9001)
                .map(new MapFunction<String, Word>() {
                    @Override
                    public Word map(String value) throws Exception {
                        String[] split = value.split(",");
                        return new Word(split[0], Integer.valueOf(split[1]), Long.valueOf(split[2]));
                    }
                });

        //Punctuated Watermark
        data.assignTimestampsAndWatermarks(new WordPunctuatedWatermark());

        data.print();
        env.execute("watermark demo");
    }

Example #4

Source File: NoRestartStrategyMain.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().setGlobalJobParameters(ParameterTool.fromArgs(args));
    env.setRestartStrategy(RestartStrategies.noRestart());

    env.addSource(new SourceFunction<Long>() {
        @Override
        public void run(SourceContext<Long> sourceContext) throws Exception {
            while (true) {
                sourceContext.collect(null);
            }
        }
        @Override
        public void cancel() {
        }
    })
            .map((MapFunction<Long, Long>) aLong -> aLong / 1)
            .print();

    env.execute("zhisheng no Restart Strategy example");
}

Example #5

Source File: Main.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

    DataStreamSource<String> data = env.addSource(new FlinkKafkaConsumer011<>(
            parameterTool.get(METRICS_TOPIC),   //这个 kafka topic 需要和上面的工具类的 topic 一致
            new SimpleStringSchema(),
            props));

    data.map(new MapFunction<String, Object>() {
        @Override
        public Object map(String string) throws Exception {
            writeEventToHbase(string, parameterTool);
            return string;
        }
    }).print();

    env.execute("flink learning connectors hbase");
}

Example #6

Source File: Graph.java From flink with Apache License 2.0

6 votes

/**
 * Apply a function to the attribute of each edge in the graph.
 *
 * @param mapper the map function to apply.
 * @return a new graph
 */
@SuppressWarnings({ "unchecked", "rawtypes" })
public <NV> Graph<K, VV, NV> mapEdges(final MapFunction<Edge<K, EV>, NV> mapper) {

	TypeInformation<K> keyType = ((TupleTypeInfo<?>) edges.getType()).getTypeAt(0);

	TypeInformation<NV> valueType;

	if (mapper instanceof ResultTypeQueryable) {
		valueType = ((ResultTypeQueryable) mapper).getProducedType();
	} else {
		valueType = TypeExtractor.createTypeInfo(MapFunction.class, mapper.getClass(), 1, edges.getType(), null);
	}

	TypeInformation<Edge<K, NV>> returnType = (TypeInformation<Edge<K, NV>>) new TupleTypeInfo(
			Edge.class, keyType, keyType, valueType);

	return mapEdges(mapper, returnType);
}

Example #7

Source File: StatisticsHelper.java From Alink with Apache License 2.0

5 votes

/**
 * calculate correlation. result is tuple2, f0 is summary, f1 is correlation.
 */
public static DataSet<Tuple2<BaseVectorSummary, CorrelationResult>> vectorPearsonCorrelation(BatchOperator in, String selectedColName) {
    return vectorSummarizer(in, selectedColName, true)
        .map(new MapFunction<BaseVectorSummarizer, Tuple2<BaseVectorSummary, CorrelationResult>>() {
            @Override
            public Tuple2<BaseVectorSummary, CorrelationResult> map(BaseVectorSummarizer summarizer) {
                return Tuple2.of(summarizer.toSummary(), summarizer.correlation());
            }
        });
}

Example #8

Source File: MapCancelingITCase.java From flink with Apache License 2.0

5 votes

public void executeTask(MapFunction<Integer, Integer> mapper) throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	env
			.createInput(new InfiniteIntegerInputFormat(false))
			.map(mapper)
			.output(new DiscardingOutputFormat<Integer>());

	env.setParallelism(PARALLELISM);

	runAndCancelJob(env.createProgramPlan(), 5 * 1000, 10 * 1000);
}

Example #9

Source File: UdfAnalyzerTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testPutStaticException() {
	try {
		final UdfAnalyzer ua = new UdfAnalyzer(MapFunction.class, PutStaticMapper.class, "operator",
				BasicTypeInfo.STRING_TYPE_INFO, null, BasicTypeInfo.STRING_TYPE_INFO, null, null, true);
		ua.analyze();
		Assert.fail();
	}
	catch (CodeErrorException e) {
		// ok
	}
}

Example #10

Source File: WindowTriangles.java From gelly-streaming with Apache License 2.0

5 votes

@SuppressWarnings("serial")
private static SimpleEdgeStream<Long, NullValue> getGraphStream(StreamExecutionEnvironment env) {

   	if (fileOutput) {
		return new SimpleEdgeStream<>(env.readTextFile(edgeInputPath)
			.map(new MapFunction<String, Edge<Long, Long>>() {
				@Override
				public Edge<Long, Long> map(String s) {
					String[] fields = s.split("\\s");
					long src = Long.parseLong(fields[0]);
					long trg = Long.parseLong(fields[1]);
					long timestamp = Long.parseLong(fields[2]);
					return new Edge<>(src, trg, timestamp);
				}
			}), new EdgeValueTimestampExtractor(), env).mapEdges(new RemoveEdgeValue());
	}

   	return new SimpleEdgeStream<>(env.generateSequence(1, 10).flatMap(
               new FlatMapFunction<Long, Edge<Long, Long>>() {
                   @Override
                   public void flatMap(Long key, Collector<Edge<Long, Long>> out) throws Exception {
                   	for (int i = 1; i < 3; i++) {
						long target = key + i;
						out.collect(new Edge<>(key, target, key*100 + (i-1)*50));
					}
                   }
               }), new EdgeValueTimestampExtractor(), env).mapEdges(new RemoveEdgeValue()); 
   }

Example #11

Source File: TypeExtractorTest.java From flink with Apache License 2.0

5 votes

@SuppressWarnings({ "unchecked", "rawtypes" })
@Test
public void testInputMismatchWithRawFuntion() {
	MapFunction<?, ?> function = new MapWithResultTypeQueryable();

	TypeInformation<?> ti = TypeExtractor.getMapReturnTypes((MapFunction)function, BasicTypeInfo.INT_TYPE_INFO);
	Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, ti);
}

Example #12

Source File: SimpleEdgeStream.java From gelly-streaming with Apache License 2.0

5 votes

/**
 * Apply a function to the attribute of each edge in the graph stream.
 *
 * @param mapper the map function to apply.
 * @return a new graph stream.
 */
public <NV> SimpleEdgeStream<K, NV> mapEdges(final MapFunction<Edge<K, EV>, NV> mapper) {
	TypeInformation<K> keyType = ((TupleTypeInfo<?>) edges.getType()).getTypeAt(0);
	DataStream<Edge<K, NV>> mappedEdges = edges.map(new ApplyMapperToEdgeWithType<>(mapper,
			keyType));
	return new SimpleEdgeStream<>(mappedEdges, this.context);
}

Example #13

Source File: GroupCombineITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testPartialReduceWithDifferentInputOutputType() throws Exception {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// data
	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);

	DataSet<Tuple2<Long, Tuple3<Integer, Long, String>>> dsWrapped = ds
			// wrap values as Kv pairs with the grouping key as key
			.map(new Tuple3KvWrapper());

	List<Tuple2<Integer, Long>> result = dsWrapped
			.groupBy(0)
			// reduce partially
			.combineGroup(new Tuple3toTuple2GroupReduce())
			.groupBy(0)
			// reduce fully to check result
			.reduceGroup(new Tuple2toTuple2GroupReduce())
			//unwrap
			.map(new MapFunction<Tuple2<Long, Tuple2<Integer, Long>>, Tuple2<Integer, Long>>() {
				@Override
				public Tuple2<Integer, Long> map(Tuple2<Long, Tuple2<Integer, Long>> value) throws Exception {
					return value.f1;
				}
			}).collect();

	String expected = "1,3\n" +
			"5,20\n" +
			"15,58\n" +
			"34,52\n" +
			"65,70\n" +
			"111,96\n";

	compareResultAsTuples(result, expected);
}

Example #14

Source File: ConsumePipelinedAndBlockingResultITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	DataSet<Tuple1<Long>> pipelinedSource = env.fromElements(new Tuple1<Long>(1L));

	DataSet<Tuple1<Long>> slowBlockingSource = env.generateSequence(0, 10).map(
			new MapFunction<Long, Tuple1<Long>>() {
				@Override
				public Tuple1<Long> map(Long value) throws Exception {
					Thread.sleep(200);

					return new Tuple1<Long>(value);
				}
			}
	);

	slowBlockingSource.join(slowBlockingSource)
			.where(0).equalTo(0).output(new DiscardingOutputFormat<Tuple2<Tuple1<Long>, Tuple1<Long>>>());

	// Join the slow blocking and the pipelined source. This test should verify that this works
	// w/o problems and the blocking result is not requested too early.
	pipelinedSource.join(slowBlockingSource)
			.where(0).equalTo(0)
			.output(new DiscardingOutputFormat<Tuple2<Tuple1<Long>, Tuple1<Long>>>());

	env.execute("Consume one pipelined and one blocking result test job");
}

Example #15

Source File: GroupCombineITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testPartialReduceWithIdenticalInputOutputType() throws Exception {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// data
	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);

	DataSet<Tuple2<Long, Tuple3<Integer, Long, String>>> dsWrapped = ds
			// wrap values as Kv pairs with the grouping key as key
			.map(new Tuple3KvWrapper());

	List<Tuple3<Integer, Long, String>> result = dsWrapped
			.groupBy(0)
			// reduce partially
			.combineGroup(new Tuple3toTuple3GroupReduce())
			.groupBy(0)
			// reduce fully to check result
			.reduceGroup(new Tuple3toTuple3GroupReduce())
			//unwrap
			.map(new MapFunction<Tuple2<Long, Tuple3<Integer, Long, String>>, Tuple3<Integer, Long, String>>() {
				@Override
				public Tuple3<Integer, Long, String> map(Tuple2<Long, Tuple3<Integer, Long, String>> value) throws Exception {
					return value.f1;
				}
			}).collect();

	String expected = "1,1,combined\n" +
			"5,4,combined\n" +
			"15,9,combined\n" +
			"34,16,combined\n" +
			"65,25,combined\n" +
			"111,36,combined\n";

	compareResultAsTuples(result, expected);
}

Example #16

Source File: VertexDegree.java From flink with Apache License 2.0

5 votes

@Override
public DataSet<Vertex<K, LongValue>> runInternal(Graph<K, VV, EV> input)
		throws Exception {
	MapFunction<Edge<K, EV>, Vertex<K, LongValue>> mapEdgeToId = reduceOnTargetId.get() ?
		new MapEdgeToTargetId<>() : new MapEdgeToSourceId<>();

	// v
	DataSet<Vertex<K, LongValue>> vertexIds = input
		.getEdges()
		.map(mapEdgeToId)
			.setParallelism(parallelism)
			.name("Edge to vertex ID");

	// v, deg(v)
	DataSet<Vertex<K, LongValue>> degree = vertexIds
		.groupBy(0)
		.reduce(new DegreeCount<>())
		.setCombineHint(CombineHint.HASH)
			.setParallelism(parallelism)
			.name("Degree count");

	if (includeZeroDegreeVertices.get()) {
		degree = input
			.getVertices()
			.leftOuterJoin(degree)
			.where(0)
			.equalTo(0)
			.with(new JoinVertexWithVertexDegree<>())
				.setParallelism(parallelism)
				.name("Zero degree vertices");
	}

	return degree;
}

Example #17

Source File: StreamingJobGraphGeneratorTest.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Verifies that the chain start/end is correctly set.
 */
@Test
public void testChainStartEndSetting() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> CHAIN(Map -> Print)
	env.fromElements(1, 2, 3)
		.map(new MapFunction<Integer, Integer>() {
			@Override
			public Integer map(Integer value) throws Exception {
				return value;
			}
		})
		.print();
	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	JobVertex sourceVertex = verticesSorted.get(0);
	JobVertex mapPrintVertex = verticesSorted.get(1);

	assertEquals(ResultPartitionType.PIPELINED_BOUNDED, sourceVertex.getProducedDataSets().get(0).getResultType());
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED, mapPrintVertex.getInputs().get(0).getSource().getResultType());

	StreamConfig sourceConfig = new StreamConfig(sourceVertex.getConfiguration());
	StreamConfig mapConfig = new StreamConfig(mapPrintVertex.getConfiguration());
	Map<Integer, StreamConfig> chainedConfigs = mapConfig.getTransitiveChainedTaskConfigs(getClass().getClassLoader());
	StreamConfig printConfig = chainedConfigs.values().iterator().next();

	assertTrue(sourceConfig.isChainStart());
	assertTrue(sourceConfig.isChainEnd());

	assertTrue(mapConfig.isChainStart());
	assertFalse(mapConfig.isChainEnd());

	assertFalse(printConfig.isChainStart());
	assertTrue(printConfig.isChainEnd());
}

Example #18

Source File: TypeExtractorTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@SuppressWarnings({ "unchecked", "rawtypes" })
@Test
public void testInputInference1() {
	EdgeMapper<String, Double> em = new EdgeMapper<String, Double>();
	TypeInformation<?> ti = TypeExtractor.getMapReturnTypes((MapFunction) em, TypeInformation.of(new TypeHint<Tuple3<String, String, Double>>(){}));
	Assert.assertTrue(ti.isTupleType());
	Assert.assertEquals(3, ti.getArity());
	TupleTypeInfo<?> tti = (TupleTypeInfo<?>) ti;
	Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tti.getTypeAt(0));
	Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tti.getTypeAt(1));
	Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tti.getTypeAt(2));
}

Example #19

Source File: ProjectOperator.java From flink with Apache License 2.0

5 votes

@Override
protected org.apache.flink.api.common.operators.base.MapOperatorBase<IN, OUT, MapFunction<IN, OUT>> translateToDataFlow(Operator<IN> input) {
	String name = getName() != null ? getName() : "Projection " + Arrays.toString(fields);
	// create operator
	PlanProjectOperator<IN, OUT> ppo = new PlanProjectOperator<IN, OUT>(fields, name, getInputType(), getResultType(), context.getConfig());
	// set input
	ppo.setInput(input);
	// set parallelism
	ppo.setParallelism(this.getParallelism());
	ppo.setSemanticProperties(SemanticPropUtil.createProjectionPropertiesSingle(fields, (CompositeType<?>) getInputType()));

	return ppo;
}

Example #20

Source File: LambdaExtractionTest.java From flink with Apache License 2.0

5 votes

@Test
public void testMapLambda() {
	MapFunction<Tuple2<Tuple1<Integer>, Boolean>, Tuple2<Tuple1<Integer>, String>> f = (i) -> null;

	TypeInformation<?> ti = TypeExtractor.getMapReturnTypes(f, NESTED_TUPLE_BOOLEAN_TYPE, null, true);
	if (!(ti instanceof MissingTypeInfo)) {
		assertTrue(ti.isTupleType());
		assertEquals(2, ti.getArity());
		assertTrue(((TupleTypeInfo<?>) ti).getTypeAt(0).isTupleType());
		assertEquals(((TupleTypeInfo<?>) ti).getTypeAt(1), BasicTypeInfo.STRING_TYPE_INFO);
	}
}

Example #21

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

private JobGraph createJobGraphForManagedMemoryFractionTest(
	final List<ResourceSpec> resourceSpecs,
	@Nullable final List<Integer> managedMemoryWeights) throws Exception {

	final Method opMethod = getSetResourcesMethodAndSetAccessible(SingleOutputStreamOperator.class);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	final DataStream<Integer> source = env.addSource(new ParallelSourceFunction<Integer>() {
		@Override
		public void run(SourceContext<Integer> ctx) {
		}

		@Override
		public void cancel() {
		}
	});
	opMethod.invoke(source, resourceSpecs.get(0));

	// CHAIN(source -> map1) in default slot sharing group
	final DataStream<Integer> map1 = source.map((MapFunction<Integer, Integer>) value -> value);
	opMethod.invoke(map1, resourceSpecs.get(1));

	// CHAIN(map2) in default slot sharing group
	final DataStream<Integer> map2 = map1.rebalance().map((MapFunction<Integer, Integer>) value -> value);
	opMethod.invoke(map2, resourceSpecs.get(2));

	// CHAIN(map3) in test slot sharing group
	final DataStream<Integer> map3 = map2.rebalance().map(value -> value).slotSharingGroup("test");
	opMethod.invoke(map3, resourceSpecs.get(3));

	if (managedMemoryWeights != null) {
		source.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(0));
		map1.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(1));
		map2.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(2));
		map3.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(3));
	}

	return StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());
}

Example #22

Source File: LambdaExtractionTest.java From flink with Apache License 2.0

5 votes

@SuppressWarnings("rawtypes")
@Test
public void testLambdaTypeErasure() {
	MapFunction<Tuple1<Integer>, Tuple1> f = (i) -> null;
	TypeInformation<?> ti = TypeExtractor.getMapReturnTypes(f, new TypeHint<Tuple1<Integer>>(){}.getTypeInfo(), null, true);
	assertTrue(ti instanceof MissingTypeInfo);
}

Example #23

Source File: CoGroupConnectedComponentsITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple1<Long>> initialVertices = env.readCsvFile(verticesPath).fieldDelimiter(" ").types(Long.class).name("Vertices");

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class).name("Edges");

	DataSet<Tuple2<Long, Long>> verticesWithId = initialVertices.map(new MapFunction<Tuple1<Long>, Tuple2<Long, Long>>() {
		@Override
		public Tuple2<Long, Long> map(Tuple1<Long> value) throws Exception {
			return new Tuple2<>(value.f0, value.f0);
		}
	}).name("Assign Vertex Ids");

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, MAX_ITERATIONS, 0);

	JoinOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> joinWithNeighbors = iteration.getWorkset()
			.join(edges).where(0).equalTo(0)
			.with(new JoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {
				@Override
				public Tuple2<Long, Long> join(Tuple2<Long, Long> first, Tuple2<Long, Long> second) throws Exception {
					return new Tuple2<>(second.f1, first.f1);
				}
			})
			.name("Join Candidate Id With Neighbor");

	CoGroupOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> minAndUpdate = joinWithNeighbors
			.coGroup(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new MinIdAndUpdate())
			.name("min Id and Update");

	iteration.closeWith(minAndUpdate, minAndUpdate).writeAsCsv(resultPath, "\n", " ").name("Result");

	env.execute("Workset Connected Components");
}

Example #24

Source File: DataStreamAllroundTestJobFactory.java From Flink-CEPplus with Apache License 2.0

5 votes

static <IN, OUT, STATE> ArtificialKeyedStateMapper<IN, OUT> createArtificialKeyedStateMapper(
	MapFunction<IN, OUT> mapFunction,
	JoinFunction<IN, STATE, STATE> inputAndOldStateToNewState,
	List<TypeSerializer<STATE>> stateSerializers,
	List<Class<STATE>> stateClasses) {

	List<ArtificialStateBuilder<IN>> artificialStateBuilders = new ArrayList<>(stateSerializers.size());
	for (TypeSerializer<STATE> typeSerializer : stateSerializers) {
		artificialStateBuilders.add(createValueStateBuilder(
			inputAndOldStateToNewState,
			new ValueStateDescriptor<>(
				"valueState-" + typeSerializer.getClass().getSimpleName(),
				typeSerializer)));

		artificialStateBuilders.add(createListStateBuilder(
			inputAndOldStateToNewState,
			new ListStateDescriptor<>(
				"listState-" + typeSerializer.getClass().getSimpleName(),
				typeSerializer)));
	}

	for (Class<STATE> stateClass : stateClasses) {
		artificialStateBuilders.add(createValueStateBuilder(
			inputAndOldStateToNewState,
			new ValueStateDescriptor<>(
				"valueState-" + stateClass.getSimpleName(),
				stateClass)));

		artificialStateBuilders.add(createListStateBuilder(
			inputAndOldStateToNewState,
			new ListStateDescriptor<>(
				"listState-" + stateClass.getSimpleName(),
				stateClass)));
	}

	return new ArtificialKeyedStateMapper<>(mapFunction, artificialStateBuilders);
}

Example #25

Source File: GroupCombineITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testPartialReduceWithDifferentInputOutputType() throws Exception {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// data
	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);

	DataSet<Tuple2<Long, Tuple3<Integer, Long, String>>> dsWrapped = ds
			// wrap values as Kv pairs with the grouping key as key
			.map(new Tuple3KvWrapper());

	List<Tuple2<Integer, Long>> result = dsWrapped
			.groupBy(0)
			// reduce partially
			.combineGroup(new Tuple3toTuple2GroupReduce())
			.groupBy(0)
			// reduce fully to check result
			.reduceGroup(new Tuple2toTuple2GroupReduce())
			//unwrap
			.map(new MapFunction<Tuple2<Long, Tuple2<Integer, Long>>, Tuple2<Integer, Long>>() {
				@Override
				public Tuple2<Integer, Long> map(Tuple2<Long, Tuple2<Integer, Long>> value) throws Exception {
					return value.f1;
				}
			}).collect();

	String expected = "1,3\n" +
			"5,20\n" +
			"15,58\n" +
			"34,52\n" +
			"65,70\n" +
			"111,96\n";

	compareResultAsTuples(result, expected);
}

Example #26

Source File: SequenceStreamingFileSinkITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testWriteSequenceFile() throws Exception {
	final File folder = TEMPORARY_FOLDER.newFolder();
	final Path testPath = Path.fromLocalFile(folder);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.enableCheckpointing(100);

	DataStream<Tuple2<Long, String>> stream = env.addSource(
			new FiniteTestSource<>(testData),
			TypeInformation.of(new TypeHint<Tuple2<Long, String>>() {

			})
	);

	stream.map(new MapFunction<Tuple2<Long, String>, Tuple2<LongWritable, Text>>() {
		@Override
		public Tuple2<LongWritable, Text> map(Tuple2<Long, String> value) throws Exception {
			return new Tuple2<>(new LongWritable(value.f0), new Text(value.f1));
		}
	}).addSink(
		StreamingFileSink.forBulkFormat(
			testPath,
			new SequenceFileWriterFactory<>(configuration, LongWritable.class, Text.class, "BZip2")
		).build());

	env.execute();

	validateResults(folder, testData);
}

Example #27

Source File: Main4.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    env.setParallelism(2);
    DataStreamSource<String> data = env.socketTextStream("localhost", 9001);

    data.map(new MapFunction<String, Tuple2<String, Long>>() {
        @Override
        public Tuple2<String, Long> map(String s) throws Exception {
            String[] split = s.split(",");
            return new Tuple2<>(split[0], Long.valueOf(split[1]));
        }
    }).assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<Tuple2<String, Long>>() {
        private long currentTimestamp;

        @Nullable
        @Override
        public Watermark getCurrentWatermark() {
            return new Watermark(currentTimestamp);
        }

        @Override
        public long extractTimestamp(Tuple2<String, Long> tuple2, long l) {
            long timestamp = tuple2.f1;
            currentTimestamp = Math.max(timestamp, currentTimestamp);
            return timestamp;
        }
    }).keyBy(0)
            .window(EventTimeSessionWindows.withGap(Time.minutes(5)))
            .sum(1)
            .print("session ");
    System.out.println(env.getExecutionPlan());
    env.execute();
}

Example #28

Source File: Main2.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        //并行度设置为 1
        env.setParallelism(1);
//        env.setParallelism(4);

        SingleOutputStreamOperator<Word> data = env.socketTextStream("localhost", 9001)
                .map(new MapFunction<String, Word>() {
                    @Override
                    public Word map(String value) throws Exception {
                        String[] split = value.split(",");
                        return new Word(split[0], Integer.valueOf(split[1]), Long.valueOf(split[2]));
                    }
                });

        //BoundedOutOfOrdernessTimestampExtractor
        data.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<Word>(Time.seconds(10)) {
            @Override
            public long extractTimestamp(Word element) {
                return element.getTimestamp();
            }
        });

        data.print();
        env.execute("watermark demo");
    }

Example #29

Source File: Main4.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        //并行度设置为 1
        env.setParallelism(1);
//        env.setParallelism(4);

        OutputTag<Word> lateDataTag = new OutputTag<Word>("late") {
        };

        SingleOutputStreamOperator<Word> data = env.socketTextStream("localhost", 9001)
                .map(new MapFunction<String, Word>() {
                    @Override
                    public Word map(String value) throws Exception {
                        String[] split = value.split(",");
                        return new Word(split[0], Integer.valueOf(split[1]), Long.valueOf(split[2]));
                    }
                }).assignTimestampsAndWatermarks(new WordPeriodicWatermark());

        SingleOutputStreamOperator<Word> sum = data.keyBy(0)
                .timeWindow(Time.seconds(10))
//                .allowedLateness(Time.milliseconds(2))
                .sideOutputLateData(lateDataTag)
                .sum(1);

        sum.print();

        sum.getSideOutput(lateDataTag)
                .print();

        env.execute("watermark demo");
    }

Example #30

Source File: ConnectedComponents.java From Flink-CEPplus with Apache License 2.0

5 votes

private static DataSet<Long> getVertexDataSet(ExecutionEnvironment env, ParameterTool params) {
	if (params.has("vertices")) {
		return env.readCsvFile(params.get("vertices")).types(Long.class).map(
			new MapFunction<Tuple1<Long>, Long>() {
				public Long map(Tuple1<Long> value) {
					return value.f0;
				}
			});
	} else {
		System.out.println("Executing Connected Components example with default vertices data set.");
		System.out.println("Use --vertices to specify file input.");
		return ConnectedComponentsData.getDefaultVertexDataSet(env);
	}
}