Java Code Examples for org.apache.flink.streaming.api.datastream.DataStreamUtils

The following examples show how to use org.apache.flink.streaming.api.datastream.DataStreamUtils. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Flink-CEPplus   Source File: CollectITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCollect() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	final long n = 10;
	DataStream<Long> stream = env.generateSequence(1, n);

	long i = 1;
	for (Iterator<Long> it = DataStreamUtils.collect(stream); it.hasNext(); ) {
		long x = it.next();
		assertEquals("received wrong element", i, x);
		i++;
	}

	assertEquals("received wrong number of elements", n + 1, i);
}
 
Example 2
Source Project: flink   Source File: CollectITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCollect() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	final long n = 10;
	DataStream<Long> stream = env.generateSequence(1, n);

	long i = 1;
	for (Iterator<Long> it = DataStreamUtils.collect(stream); it.hasNext(); ) {
		long x = it.next();
		assertEquals("received wrong element", i, x);
		i++;
	}

	assertEquals("received wrong number of elements", n + 1, i);
}
 
Example 3
Source Project: flink   Source File: CollectITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCollect() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	final long n = 10;
	DataStream<Long> stream = env.generateSequence(1, n);

	long i = 1;
	for (Iterator<Long> it = DataStreamUtils.collect(stream); it.hasNext(); ) {
		long x = it.next();
		assertEquals("received wrong element", i, x);
		i++;
	}

	assertEquals("received wrong number of elements", n + 1, i);
}
 
Example 4
Source Project: flink   Source File: RegionFailoverITCase.java    License: Apache License 2.0 5 votes vote down vote up
private JobGraph createJobGraph() {

		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(NUM_OF_REGIONS);
		env.setMaxParallelism(MAX_PARALLELISM);
		env.enableCheckpointing(200, CheckpointingMode.EXACTLY_ONCE);
		env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
		env.disableOperatorChaining();
		env.getConfig().disableSysoutLogging();

		// Use DataStreamUtils#reinterpretAsKeyed to avoid merge regions and this stream graph would exist num of 'NUM_OF_REGIONS' individual regions.
		DataStreamUtils.reinterpretAsKeyedStream(
			env.addSource(new StringGeneratingSourceFunction(NUM_ELEMENTS, NUM_ELEMENTS / NUM_OF_RESTARTS))
				.name(MULTI_REGION_SOURCE_NAME)
				.setParallelism(NUM_OF_REGIONS),
			(KeySelector<Tuple2<Integer, Integer>, Integer>) value -> value.f0,
			TypeInformation.of(Integer.class))
			.map(new FailingMapperFunction(NUM_OF_RESTARTS))
			.setParallelism(NUM_OF_REGIONS)
			.addSink(new ValidatingSink())
			.setParallelism(NUM_OF_REGIONS);

		// another stream graph totally disconnected with the above one.
		env.addSource(new StringGeneratingSourceFunction(NUM_ELEMENTS, NUM_ELEMENTS / NUM_OF_RESTARTS)).
			name(SINGLE_REGION_SOURCE_NAME).setParallelism(1)
			.map((MapFunction<Tuple2<Integer, Integer>, Object>) value -> value).setParallelism(1);

		return env.getStreamGraph().getJobGraph();
	}
 
Example 5
Source Project: stateful-functions   Source File: FlinkUniverse.java    License: Apache License 2.0 5 votes vote down vote up
private SingleOutputStreamOperator<Message> functionOperator(
    DataStream<Message> input, Map<EgressIdentifier<?>, OutputTag<Object>> sideOutputs) {

  TypeInformation<Message> typeInfo = input.getType();

  FunctionGroupDispatchFactory operatorFactory = new FunctionGroupDispatchFactory(sideOutputs);

  return DataStreamUtils.reinterpretAsKeyedStream(input, new MessageKeySelector())
      .transform(StatefulFunctionsJobConstants.FUNCTION_OPERATOR_NAME, typeInfo, operatorFactory)
      .uid(StatefulFunctionsJobConstants.FUNCTION_OPERATOR_UID);
}
 
Example 6
Source Project: flink-statefun   Source File: FlinkUniverse.java    License: Apache License 2.0 5 votes vote down vote up
private SingleOutputStreamOperator<Message> functionOperator(
    DataStream<Message> input, Map<EgressIdentifier<?>, OutputTag<Object>> sideOutputs) {

  TypeInformation<Message> typeInfo = input.getType();

  FunctionGroupDispatchFactory operatorFactory =
      new FunctionGroupDispatchFactory(configuration, sideOutputs);

  return DataStreamUtils.reinterpretAsKeyedStream(input, new MessageKeySelector())
      .transform(StatefulFunctionsJobConstants.FUNCTION_OPERATOR_NAME, typeInfo, operatorFactory)
      .uid(StatefulFunctionsJobConstants.FUNCTION_OPERATOR_UID);
}
 
Example 7
Source Project: incubator-iotdb   Source File: FlinkTsFileStreamSource.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws IOException {
	String path = "test.tsfile";
	TsFileUtils.writeTsFile(path);
	new File(path).deleteOnExit();
	String[] filedNames = {
		QueryConstant.RESERVED_TIME,
		"device_1.sensor_1",
		"device_1.sensor_2",
		"device_1.sensor_3",
		"device_2.sensor_1",
		"device_2.sensor_2",
		"device_2.sensor_3"
	};
	TypeInformation[] typeInformations = new TypeInformation[] {
		Types.LONG,
		Types.LONG,
		Types.LONG,
		Types.LONG,
		Types.LONG,
		Types.LONG,
		Types.LONG
	};
	List<Path> paths = Arrays.stream(filedNames)
		.filter(s -> !s.equals(QueryConstant.RESERVED_TIME))
		.map(Path::new)
		.collect(Collectors.toList());
	RowTypeInfo rowTypeInfo = new RowTypeInfo(typeInformations, filedNames);
	QueryExpression queryExpression = QueryExpression.create(paths, null);
	RowRowRecordParser parser = RowRowRecordParser.create(rowTypeInfo, queryExpression.getSelectedSeries());
	TsFileInputFormat<Row> inputFormat = new TsFileInputFormat<>(queryExpression, parser);
	StreamExecutionEnvironment senv = StreamExecutionEnvironment.getExecutionEnvironment();
	inputFormat.setFilePath("source.tsfile");
	DataStream<Row> source = senv.createInput(inputFormat);
	DataStream<String> rowString = source.map(Row::toString);
	Iterator<String> result = DataStreamUtils.collect(rowString);
	while (result.hasNext()) {
		System.out.println(result.next());
	}
}
 
Example 8
@Test
public void testStreamExecution() throws Exception {
	// read files in a directory
	TsFileInputFormat<Row> inputFormat = prepareInputFormat(tmpDir);
	DataStream<Row> source = senv.createInput(inputFormat);
	Iterator<String> rowStringIterator = DataStreamUtils.collect(source.map(Row::toString));
	String[] result = StreamSupport.stream(
		Spliterators.spliteratorUnknownSize(rowStringIterator, 0),
		false).sorted().toArray(String[]::new);
	String[] expected = {
		"1,1.2,20,null,2.3,11,19",
		"10,null,20,50,25.4,10,21",
		"11,1.4,21,null,null,null,null",
		"12,1.2,20,51,null,null,null",
		"14,7.2,10,11,null,null,null",
		"15,6.2,20,21,null,null,null",
		"16,9.2,30,31,null,null,null",
		"2,null,20,50,25.4,10,21",
		"3,1.4,21,null,null,null,null",
		"4,1.2,20,51,null,null,null",
		"6,7.2,10,11,null,null,null",
		"7,6.2,20,21,null,null,null",
		"8,9.2,30,31,null,null,null",
		"9,1.2,20,null,2.3,11,19"
	};
	assertArrayEquals(expected, result);
}
 
Example 9
@Override
public void run() {

    JMetalLogger.logger.info("Run Fink method in the streaming data source invoked") ;
    JMetalLogger.logger.info("TOPIC: " + topic) ;

   // environment.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(1,0));
    //environment.enableCheckpointing(10);

    DataStream<String> data= environment.addSource(
            new FlinkKafkaConsumer010<String>(topic, new SimpleStringSchema(),kafkaParams));
    /**
     * DataDeserializer<Counter> dataDeserializer = new DataDeserializer<>();
     *             //Object o =dataDeserializer.deserialize(value.value(),"avsc/Counter.avsc");
     *             //GenericData.Record rc=(GenericData.Record)o;
     *             Counter counter = dataDeserializer.deserialize(value.value(),"avsc/Counter.avsc");
     */

    try {
        if (data != null) {
            Iterator<String> it = DataStreamUtils.collect(data);

            while (it.hasNext()) {
                byte [] bytes= it.next().getBytes();
                DataDeserializer<Counter> dataDeserializer = new DataDeserializer<>();
                Counter counter = dataDeserializer.deserialize(bytes,"avsc/Counter.avsc");
                Integer number = (Integer) counter.get(0);
                observable.setChanged();
                observable.notifyObservers(new ObservedValue<Integer>(number));
            }

        }
    }catch(Exception e){
        e.printStackTrace();
    }


}
 
Example 10
Source Project: flink   Source File: RegionFailoverITCase.java    License: Apache License 2.0 5 votes vote down vote up
private JobGraph createJobGraph() {

		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(NUM_OF_REGIONS);
		env.setMaxParallelism(MAX_PARALLELISM);
		env.enableCheckpointing(200, CheckpointingMode.EXACTLY_ONCE);
		env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
		env.disableOperatorChaining();

		// Use DataStreamUtils#reinterpretAsKeyed to avoid merge regions and this stream graph would exist num of 'NUM_OF_REGIONS' individual regions.
		DataStreamUtils.reinterpretAsKeyedStream(
			env.addSource(new StringGeneratingSourceFunction(NUM_ELEMENTS, NUM_ELEMENTS / NUM_OF_RESTARTS))
				.name(MULTI_REGION_SOURCE_NAME)
				.setParallelism(NUM_OF_REGIONS),
			(KeySelector<Tuple2<Integer, Integer>, Integer>) value -> value.f0,
			TypeInformation.of(Integer.class))
			.map(new FailingMapperFunction(NUM_OF_RESTARTS))
			.setParallelism(NUM_OF_REGIONS)
			.addSink(new ValidatingSink())
			.setParallelism(NUM_OF_REGIONS);

		// another stream graph totally disconnected with the above one.
		env.addSource(new StringGeneratingSourceFunction(NUM_ELEMENTS, NUM_ELEMENTS / NUM_OF_RESTARTS)).
			name(SINGLE_REGION_SOURCE_NAME).setParallelism(1)
			.map((MapFunction<Tuple2<Integer, Integer>, Object>) value -> value).setParallelism(1);

		return env.getStreamGraph().getJobGraph();
	}
 
Example 11
/**
 * This test checks that reinterpreting a data stream to a keyed stream works as expected. This test consists of
 * two jobs. The first job materializes a keyBy into files, one files per partition. The second job opens the
 * files created by the first jobs as sources (doing the correct assignment of files to partitions) and
 * reinterprets the sources as keyed, because we know they have been partitioned in a keyBy from the first job.
 */
@Test
public void testReinterpretAsKeyedStream() throws Exception {

	final int maxParallelism = 8;
	final int numEventsPerInstance = 100;
	final int parallelism = 3;
	final int numTotalEvents = numEventsPerInstance * parallelism;
	final int numUniqueKeys = 100;

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);
	env.setMaxParallelism(maxParallelism);
	env.setParallelism(parallelism);
	env.enableCheckpointing(100);
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0L));

	final List<File> partitionFiles = new ArrayList<>(parallelism);
	for (int i = 0; i < parallelism; ++i) {
		File partitionFile = temporaryFolder.newFile();
		partitionFiles.add(i, partitionFile);
	}

	env.addSource(new RandomTupleSource(numEventsPerInstance, numUniqueKeys))
		.keyBy(0)
		.addSink(new ToPartitionFileSink(partitionFiles));

	env.execute();

	DataStreamUtils.reinterpretAsKeyedStream(
		env.addSource(new FromPartitionFileSource(partitionFiles)),
		(KeySelector<Tuple2<Integer, Integer>, Integer>) value -> value.f0,
		TypeInformation.of(Integer.class))
		.timeWindow(Time.seconds(1)) // test that also timers and aggregated state work as expected
		.reduce((ReduceFunction<Tuple2<Integer, Integer>>) (value1, value2) ->
			new Tuple2<>(value1.f0, value1.f1 + value2.f1))
		.addSink(new ValidatingSink(numTotalEvents)).setParallelism(1);

	env.execute();
}
 
Example 12
Source Project: flink   Source File: DataStreamConversionUtilTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testBasicConvert() throws Exception {
	StreamExecutionEnvironment env = MLEnvironmentFactory.getDefault().getStreamExecutionEnvironment();
	DataStream<Row> input = env.fromElements(Row.of("a"));
	Table table1 = DataStreamConversionUtil.toTable(MLEnvironmentFactory.DEFAULT_ML_ENVIRONMENT_ID, input, new String[]{"word"});
	Assert.assertEquals(
		new TableSchema(new String[]{"word"}, new TypeInformation[]{TypeInformation.of(String.class)}),
		table1.getSchema()
	);
	DataStream<Row> rowDataStream = DataStreamConversionUtil.fromTable(MLEnvironmentFactory.DEFAULT_ML_ENVIRONMENT_ID, table1);
	Iterator<Row> result = DataStreamUtils.collect(rowDataStream);
	Assert.assertEquals(Row.of("a"), result.next());
	Assert.assertFalse(result.hasNext());
}
 
Example 13
Source Project: yauaa   Source File: TestUserAgentAnalysisMapperClass.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testClassDefinitionDataStream() throws Exception {
    StreamExecutionEnvironment environment = LocalStreamEnvironment.getExecutionEnvironment();

    DataStream<TestRecord> resultDataStream = environment
        .fromElements(
            "Mozilla/5.0 (X11; Linux x86_64) " +
                "AppleWebKit/537.36 (KHTML, like Gecko) " +
                "Chrome/48.0.2564.82 Safari/537.36",

            "Mozilla/5.0 (Linux; Android 7.0; Nexus 6 Build/NBD90Z) " +
                "AppleWebKit/537.36 (KHTML, like Gecko) " +
                "Chrome/53.0.2785.124 Mobile Safari/537.36"
        )

        .map((MapFunction<String, TestRecord>) TestRecord::new)

        .map(new MyUserAgentAnalysisMapper());

    List<TestRecord> result = new ArrayList<>(5);
    DataStreamUtils
        .collect(resultDataStream)
        .forEachRemaining(result::add);

    assertEquals(2, result.size());

    assertThat(result, hasItems(
        new TestRecord(
            "Mozilla/5.0 (X11; Linux x86_64) " +
                "AppleWebKit/537.36 (KHTML, like Gecko) " +
                "Chrome/48.0.2564.82 Safari/537.36",
            "Desktop",
            "Chrome 48.0.2564.82",
            null),

        new TestRecord(
            "Mozilla/5.0 (Linux; Android 7.0; Nexus 6 Build/NBD90Z) " +
                "AppleWebKit/537.36 (KHTML, like Gecko) " +
                "Chrome/53.0.2785.124 Mobile Safari/537.36",
            "Phone",
            "Chrome 53.0.2785.124",
            null)
    ));
}
 
Example 14
Source Project: flink   Source File: FlinkKafkaShuffle.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * The read side of {@link FlinkKafkaShuffle#persistentKeyBy}.
 *
 * <p>Each consumer task should read kafka partitions equal to the key group indices it is assigned.
 * The number of kafka partitions is the maximum parallelism of the consumer.
 * This version only supports numberOfPartitions = consumerParallelism.
 * In the case of using {@link TimeCharacteristic#EventTime}, a consumer task is responsible to emit
 * watermarks. Watermarks are read from the corresponding Kafka partitions. Notice that a consumer task only starts
 * to emit a watermark after receiving at least one watermark from each producer task to make sure watermarks
 * are monotonically increasing. Hence a consumer task needs to know `producerParallelism` as well.
 *
 * <p>Attention: make sure kafkaProperties include
 * {@link FlinkKafkaShuffle#PRODUCER_PARALLELISM} and {@link FlinkKafkaShuffle#PARTITION_NUMBER} explicitly.
 * {@link FlinkKafkaShuffle#PRODUCER_PARALLELISM} is the parallelism of the producer.
 * {@link FlinkKafkaShuffle#PARTITION_NUMBER} is the number of partitions.
 * They are not necessarily the same and allowed to be set independently.
 *
 * @see FlinkKafkaShuffle#persistentKeyBy
 * @see FlinkKafkaShuffle#writeKeyBy
 *
 * @param topic 			The topic of Kafka where data is persisted
 * @param env 				Execution environment. readKeyBy's environment can be different from writeKeyBy's
 * @param typeInformation 	Type information of the data persisted in Kafka
 * @param kafkaProperties 	kafka properties for Kafka Consumer
 * @param keySelector 		key selector to retrieve key
 * @param <T> 				Schema type
 * @param <K> 				Key type
 * @return Keyed data stream
 */
public static <T, K> KeyedStream<T, K> readKeyBy(
		String topic,
		StreamExecutionEnvironment env,
		TypeInformation<T> typeInformation,
		Properties kafkaProperties,
		KeySelector<T, K> keySelector) {

	TypeSerializer<T> typeSerializer = typeInformation.createSerializer(env.getConfig());
	TypeInformationSerializationSchema<T> schema =
		new TypeInformationSerializationSchema<>(typeInformation, typeSerializer);

	SourceFunction<T> kafkaConsumer  =
		new FlinkKafkaShuffleConsumer<>(topic, schema, typeSerializer, kafkaProperties);

	// TODO: consider situations where numberOfPartitions != consumerParallelism
	Preconditions.checkArgument(
		kafkaProperties.getProperty(PARTITION_NUMBER) != null,
		"Missing partition number for Kafka Shuffle");
	int numberOfPartitions = PropertiesUtil.getInt(kafkaProperties, PARTITION_NUMBER, Integer.MIN_VALUE);
	DataStream<T> outputDataStream = env.addSource(kafkaConsumer).setParallelism(numberOfPartitions);

	return DataStreamUtils.reinterpretAsKeyedStream(outputDataStream, keySelector);
}
 
Example 15
@Override
public void run() {

    JMetalLogger.logger.info("Run Fink method in the streaming data source invoked") ;
    JMetalLogger.logger.info("TOPIC: " + topic) ;

   // environment.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(1,0));
    //environment.enableCheckpointing(10);

    DataStream<String> data= environment.addSource(
            new FlinkKafkaConsumer010<String>(topic, new SimpleStringSchema(),kafkaParams));




    try {
        Iterator<String> it=DataStreamUtils.collect(data);
        while (it.hasNext()){
            Integer number = Integer.parseInt(it.next());
            //Integer number = it.next();
            observable.setChanged();
            observable.notifyObservers(new ObservedValue<Integer>(number));
        }

    } catch (Exception e){
        e.printStackTrace();
    }


}