Java Code Examples for org.apache.flink.streaming.api.datastream.DataStream#addSink()

The following examples show how to use org.apache.flink.streaming.api.datastream.DataStream#addSink() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: KafkaExample.java    From flink with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
		.addSource(
			new FlinkKafkaConsumer<>(
				parameterTool.getRequired("input-topic"),
				new KafkaEventSchema(),
				parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
		.keyBy("word")
		.map(new RollingAdditionMapper());

	input.addSink(
		new FlinkKafkaProducer<>(
			parameterTool.getRequired("output-topic"),
			new KeyedSerializationSchemaWrapper<>(new KafkaEventSchema()),
			parameterTool.getProperties(),
			FlinkKafkaProducer.Semantic.EXACTLY_ONCE));

	env.execute("Modern Kafka Example");
}
 
Example 2
Source File: SavepointITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a streaming JobGraph from the StreamEnvironment.
 */
private JobGraph createJobGraph(
	int parallelism,
	int numberOfRetries,
	long restartDelay) {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.disableOperatorChaining();
	env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(numberOfRetries, restartDelay));

	DataStream<Integer> stream = env
		.addSource(new InfiniteTestSource())
		.shuffle()
		.map(new StatefulCounter());

	stream.addSink(new DiscardingSink<>());

	return env.getStreamGraph().getJobGraph();
}
 
Example 3
Source File: ParquetStreamingFileSinkITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteParquetAvroReflect() throws Exception {

	final File folder = TEMPORARY_FOLDER.newFolder();

	final List<Datum> data = Arrays.asList(
			new Datum("a", 1), new Datum("b", 2), new Datum("c", 3));

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.enableCheckpointing(100);

	DataStream<Datum> stream = env.addSource(
			new FiniteTestSource<>(data), TypeInformation.of(Datum.class));

	stream.addSink(
			StreamingFileSink.forBulkFormat(
					Path.fromLocalFile(folder),
					ParquetAvroWriters.forReflectRecord(Datum.class))
					.build());

	env.execute();

	validateResults(folder, ReflectData.get(), data);
}
 
Example 4
Source File: AvroStreamingFileSinkITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteAvroSpecific() throws Exception {
	File folder = TEMPORARY_FOLDER.newFolder();

	List<Address> data = Arrays.asList(
		new Address(1, "a", "b", "c", "12345"),
		new Address(2, "p", "q", "r", "12345"),
		new Address(3, "x", "y", "z", "12345"));

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.enableCheckpointing(100);

	AvroWriterFactory<Address> avroWriterFactory = AvroWriters.forSpecificRecord(Address.class);
	DataStream<Address> stream = env.addSource(
		new FiniteTestSource<>(data),
		TypeInformation.of(Address.class));
	stream.addSink(StreamingFileSink.forBulkFormat(
		Path.fromLocalFile(folder),
		avroWriterFactory).build());
	env.execute();

	validateResults(folder, new SpecificDatumReader<>(Address.class), data);
}
 
Example 5
Source File: FlinkPulsarITest.java    From pulsar-flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testClientCacheParameterPassedToTasks() throws Exception {
    int numTopic = 5;
    int numElements = 20;

    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.getConfig().disableSysoutLogging();
    see.setParallelism(3);

    List<String> topics = new ArrayList<>();
    for (int i = 0; i < numTopic; i++) {
        topics.add(newTopic());
    }

    DataStream<Row> stream = see.addSource(new MultiTopicSource(topics, numElements));

    Properties sinkProp = sinkProperties();
    sinkProp.setProperty(FLUSH_ON_CHECKPOINT_OPTION_KEY, "true");
    sinkProp.setProperty(CLIENT_CACHE_SIZE_OPTION_KEY, "7");
    stream.addSink(new AssertSink(serviceUrl, adminUrl, 7, sinkProp, intRowWithTopicType()));
    see.execute("write with topics");
}
 
Example 6
Source File: BucketingSinkFaultToleranceITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public void testProgram(StreamExecutionEnvironment env) {
	assertTrue("Broken test setup", NUM_STRINGS % 40 == 0);

	env.enableCheckpointing(20);
	env.setParallelism(12);
	env.disableOperatorChaining();

	DataStream<String> stream = env.addSource(new StringGeneratingSourceFunction(NUM_STRINGS)).startNewChain();

	DataStream<String> mapped = stream
			.map(new OnceFailingIdentityMapper(NUM_STRINGS));

	BucketingSink<String> sink = new BucketingSink<String>(outPath)
			.setBucketer(new BasePathBucketer<String>())
			.setBatchSize(10000)
			.setValidLengthPrefix("")
			.setPartPrefix(PART_PREFIX)
			.setPendingPrefix("")
			.setPendingSuffix(PENDING_SUFFIX)
			.setInProgressSuffix(IN_PROGRESS_SUFFIX);

	mapped.addSink(sink);

}
 
Example 7
Source File: KinesisProducerMain.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);

    DataStream<String> simpleStringStream = env.addSource(new EventsGenerator());


    Properties kinesisConsumerConfig = new Properties();
    kinesisConsumerConfig.setProperty("aws.region", parameterTool.getRequired("aws.region"));
    kinesisConsumerConfig.setProperty("aws.credentials.provider.basic.accesskeyid", parameterTool.getRequired("aws.accesskey"));
    kinesisConsumerConfig.setProperty("aws.credentials.provider.basic.secretkey", parameterTool.getRequired("aws.secretkey"));

    FlinkKinesisProducer<String> kinesis = new FlinkKinesisProducer<>(
            new SimpleStringSchema(), kinesisConsumerConfig);

    kinesis.setFailOnError(true);
    kinesis.setDefaultStream("zhisheng");
    kinesis.setDefaultPartition("0");

    simpleStringStream.addSink(kinesis);

    env.execute();
}
 
Example 8
Source File: WriteToKafka.java    From kafka-flink-101 with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

  Properties properties = new Properties();
  properties.setProperty("bootstrap.servers", "localhost:9092");

  DataStream<String> stream = env.addSource(new SimpleStringGenerator());
  stream.addSink(new FlinkKafkaProducer09<>("flink-demo", new SimpleStringSchema(), properties));

  env.execute();
}
 
Example 9
Source File: KafkaTestEnvironmentImpl.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public <T> DataStreamSink<T> writeToKafkaWithTimestamps(DataStream<T> stream, String topic, KeyedSerializationSchema<T> serSchema, Properties props) {
	FlinkKafkaProducer<T> prod = new FlinkKafkaProducer<T>(
		topic,
		serSchema,
		props,
		Optional.of(new FlinkFixedPartitioner<>()),
		producerSemantic,
		FlinkKafkaProducer.DEFAULT_KAFKA_PRODUCERS_POOL_SIZE);

	prod.setWriteTimestampToKafka(true);

	return stream.addSink(prod);
}
 
Example 10
Source File: ParquetStreamingFileSinkITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testWriteParquetAvroSpecific() throws Exception {

	final File folder = TEMPORARY_FOLDER.newFolder();

	final List<Address> data = Arrays.asList(
			new Address(1, "a", "b", "c", "12345"),
			new Address(2, "p", "q", "r", "12345"),
			new Address(3, "x", "y", "z", "12345")
	);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.enableCheckpointing(100);

	DataStream<Address> stream = env.addSource(
			new FiniteTestSource<>(data), TypeInformation.of(Address.class));

	stream.addSink(
			StreamingFileSink.forBulkFormat(
					Path.fromLocalFile(folder),
					ParquetAvroWriters.forSpecificRecord(Address.class))
			.build());

	env.execute();

	validateResults(folder, SpecificData.get(), data);
}
 
Example 11
Source File: StreamGraphGeneratorTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that the max parallelism is automatically set to the parallelism if it has not been
 * specified.
 */
@Test
public void testAutoMaxParallelism() {
	int globalParallelism = 42;
	int mapParallelism = 17;
	int maxParallelism = 21;
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(globalParallelism);

	DataStream<Integer> source = env.fromElements(1, 2, 3);

	DataStream<Integer> keyedResult1 = source.keyBy(value -> value).map(new NoOpIntMap());

	DataStream<Integer> keyedResult2 = keyedResult1.keyBy(value -> value).map(new NoOpIntMap()).setParallelism(mapParallelism);

	DataStream<Integer> keyedResult3 = keyedResult2.keyBy(value -> value).map(new NoOpIntMap()).setMaxParallelism(maxParallelism);

	DataStream<Integer> keyedResult4 = keyedResult3.keyBy(value -> value).map(new NoOpIntMap()).setMaxParallelism(maxParallelism).setParallelism(mapParallelism);

	keyedResult4.addSink(new DiscardingSink<>());

	StreamGraph graph = env.getStreamGraph();

	StreamNode keyedResult3Node = graph.getStreamNode(keyedResult3.getId());
	StreamNode keyedResult4Node = graph.getStreamNode(keyedResult4.getId());

	assertEquals(maxParallelism, keyedResult3Node.getMaxParallelism());
	assertEquals(maxParallelism, keyedResult4Node.getMaxParallelism());
}
 
Example 12
Source File: StreamGraphGeneratorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that the max parallelism is automatically set to the parallelism if it has not been
 * specified.
 */
@Test
public void testAutoMaxParallelism() {
	int globalParallelism = 42;
	int mapParallelism = 17;
	int maxParallelism = 21;
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(globalParallelism);

	DataStream<Integer> source = env.fromElements(1, 2, 3);

	DataStream<Integer> keyedResult1 = source.keyBy(value -> value).map(new NoOpIntMap());

	DataStream<Integer> keyedResult2 = keyedResult1.keyBy(value -> value).map(new NoOpIntMap()).setParallelism(mapParallelism);

	DataStream<Integer> keyedResult3 = keyedResult2.keyBy(value -> value).map(new NoOpIntMap()).setMaxParallelism(maxParallelism);

	DataStream<Integer> keyedResult4 = keyedResult3.keyBy(value -> value).map(new NoOpIntMap()).setMaxParallelism(maxParallelism).setParallelism(mapParallelism);

	keyedResult4.addSink(new DiscardingSink<>());

	StreamGraph graph = env.getStreamGraph();

	StreamNode keyedResult3Node = graph.getStreamNode(keyedResult3.getId());
	StreamNode keyedResult4Node = graph.getStreamNode(keyedResult4.getId());

	assertEquals(maxParallelism, keyedResult3Node.getMaxParallelism());
	assertEquals(maxParallelism, keyedResult4Node.getMaxParallelism());
}
 
Example 13
Source File: StreamGraphGeneratorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * This tests whether virtual Transformations behave correctly.
 *
 * <p>Verifies that partitioning, output selector, selected names are correctly set in the
 * StreamGraph when they are intermixed.
 */
@Test
public void testVirtualTransformations() throws Exception {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 10);

	DataStream<Integer> rebalanceMap = source.rebalance().map(new NoOpIntMap());

	// verify that only the partitioning that was set last is used
	DataStream<Integer> broadcastMap = rebalanceMap
			.forward()
			.global()
			.broadcast()
			.map(new NoOpIntMap());

	broadcastMap.addSink(new DiscardingSink<>());

	// verify that partitioning is preserved across union and split/select
	EvenOddOutputSelector selector1 = new EvenOddOutputSelector();
	EvenOddOutputSelector selector2 = new EvenOddOutputSelector();
	EvenOddOutputSelector selector3 = new EvenOddOutputSelector();

	DataStream<Integer> map1Operator = rebalanceMap
			.map(new NoOpIntMap());

	DataStream<Integer> map1 = map1Operator
			.broadcast()
			.split(selector1)
			.select("even");

	DataStream<Integer> map2Operator = rebalanceMap
			.map(new NoOpIntMap());

	DataStream<Integer> map2 = map2Operator
			.split(selector2)
			.select("odd")
			.global();

	DataStream<Integer> map3Operator = rebalanceMap
			.map(new NoOpIntMap());

	DataStream<Integer> map3 = map3Operator
			.global()
			.split(selector3)
			.select("even")
			.shuffle();

	SingleOutputStreamOperator<Integer> unionedMap = map1.union(map2).union(map3)
			.map(new NoOpIntMap());

	unionedMap.addSink(new DiscardingSink<>());

	StreamGraph graph = env.getStreamGraph();

	// rebalanceMap
	assertTrue(graph.getStreamNode(rebalanceMap.getId()).getInEdges().get(0).getPartitioner() instanceof RebalancePartitioner);

	// verify that only last partitioning takes precedence
	assertTrue(graph.getStreamNode(broadcastMap.getId()).getInEdges().get(0).getPartitioner() instanceof BroadcastPartitioner);
	assertEquals(rebalanceMap.getId(), graph.getSourceVertex(graph.getStreamNode(broadcastMap.getId()).getInEdges().get(0)).getId());

	// verify that partitioning in unions is preserved and that it works across split/select
	assertTrue(graph.getStreamNode(map1Operator.getId()).getOutEdges().get(0).getPartitioner() instanceof BroadcastPartitioner);
	assertTrue(graph.getStreamNode(map1Operator.getId()).getOutEdges().get(0).getSelectedNames().get(0).equals("even"));
	assertTrue(graph.getStreamNode(map1Operator.getId()).getOutputSelectors().contains(selector1));

	assertTrue(graph.getStreamNode(map2Operator.getId()).getOutEdges().get(0).getPartitioner() instanceof GlobalPartitioner);
	assertTrue(graph.getStreamNode(map2Operator.getId()).getOutEdges().get(0).getSelectedNames().get(0).equals("odd"));
	assertTrue(graph.getStreamNode(map2Operator.getId()).getOutputSelectors().contains(selector2));

	assertTrue(graph.getStreamNode(map3Operator.getId()).getOutEdges().get(0).getPartitioner() instanceof ShufflePartitioner);
	assertTrue(graph.getStreamNode(map3Operator.getId()).getOutEdges().get(0).getSelectedNames().get(0).equals("even"));
	assertTrue(graph.getStreamNode(map3Operator.getId()).getOutputSelectors().contains(selector3));
}
 
Example 14
Source File: ProcessTaxiStream.java    From amazon-kinesis-analytics-taxi-consumer with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();


  ParameterTool parameter;

  if (env instanceof LocalStreamEnvironment) {
    //read the parameters specified from the command line
    parameter = ParameterTool.fromArgs(args);
  } else {
    //read the parameters from the Kinesis Analytics environment
    Map<String, Properties> applicationProperties = KinesisAnalyticsRuntime.getApplicationProperties();

    Properties flinkProperties = applicationProperties.get("FlinkApplicationProperties");

    if (flinkProperties == null) {
      throw new RuntimeException("Unable to load FlinkApplicationProperties properties from the Kinesis Analytics Runtime.");
    }

    parameter = ParameterToolUtils.fromApplicationProperties(flinkProperties);
  }


  //enable event time processing
  if (parameter.get("EventTime", "true").equals("true")) {
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
  }


  //set Kinesis consumer properties
  Properties kinesisConsumerConfig = new Properties();
  //set the region the Kinesis stream is located in
  kinesisConsumerConfig.setProperty(AWSConfigConstants.AWS_REGION, parameter.get("Region", DEFAULT_REGION_NAME));
  //obtain credentials through the DefaultCredentialsProviderChain, which includes the instance metadata
  kinesisConsumerConfig.setProperty(AWSConfigConstants.AWS_CREDENTIALS_PROVIDER, "AUTO");
  //poll new events from the Kinesis stream once every second
  kinesisConsumerConfig.setProperty(ConsumerConfigConstants.SHARD_GETRECORDS_INTERVAL_MILLIS, "1000");


  //create Kinesis source
  DataStream<Event> kinesisStream = env.addSource(new FlinkKinesisConsumer<>(
      //read events from the Kinesis stream passed in as a parameter
      parameter.get("InputStreamName", DEFAULT_STREAM_NAME),
      //deserialize events with EventSchema
      new EventDeserializationSchema(),
      //using the previously defined properties
      kinesisConsumerConfig
  ));


  DataStream<TripEvent> trips = kinesisStream
      //extract watermarks from watermark events
      .assignTimestampsAndWatermarks(new TimestampAssigner())
      //remove all events that aren't TripEvents
      .filter(event -> TripEvent.class.isAssignableFrom(event.getClass()))
      //cast Event to TripEvent
      .map(event -> (TripEvent) event)
      //remove all events with geo coordinates outside of NYC
      .filter(GeoUtils::hasValidCoordinates);


  DataStream<PickupCount> pickupCounts = trips
      //compute geo hash for every event
      .map(new TripToGeoHash())
      .keyBy("geoHash")
      //collect all events in a one hour window
      .timeWindow(Time.hours(1))
      //count events per geo hash in the one hour window
      .apply(new CountByGeoHash());


  DataStream<AverageTripDuration> tripDurations = trips
      .flatMap(new TripToTripDuration())
      .keyBy("pickupGeoHash", "airportCode")
      .timeWindow(Time.hours(1))
      .apply(new TripDurationToAverageTripDuration());


  if (parameter.has("ElasticsearchEndpoint")) {
    String elasticsearchEndpoint = parameter.get("ElasticsearchEndpoint");
    final String region = parameter.get("Region", DEFAULT_REGION_NAME);

    //remove trailling /
    if (elasticsearchEndpoint.endsWith(("/"))) {
      elasticsearchEndpoint = elasticsearchEndpoint.substring(0, elasticsearchEndpoint.length()-1);
    }

    pickupCounts.addSink(AmazonElasticsearchSink.buildElasticsearchSink(elasticsearchEndpoint, region, "pickup_count", "pickup_count"));
    tripDurations.addSink(AmazonElasticsearchSink.buildElasticsearchSink(elasticsearchEndpoint, region, "trip_duration", "trip_duration"));
  }


  LOG.info("Reading events from stream {}", parameter.get("InputStreamName", DEFAULT_STREAM_NAME));

  env.execute();
}
 
Example 15
Source File: RollingSinkITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * This tests {@link SequenceFileWriter}
 * with non-rolling output but with compression.
 */
@Test
public void testNonRollingSequenceFileWithCompressionWriter() throws Exception {
	final int numElements = 20;
	final String outPath = hdfsURI + "/seq-non-rolling-out";
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(2);

	DataStream<Tuple2<Integer, String>> source = env.addSource(new TestSourceFunction(numElements))
			.broadcast()
			.filter(new OddEvenFilter());

	DataStream<Tuple2<IntWritable, Text>> mapped =  source.map(new MapFunction<Tuple2<Integer, String>, Tuple2<IntWritable, Text>>() {
		private static final long serialVersionUID = 1L;

		@Override
		public Tuple2<IntWritable, Text> map(Tuple2<Integer, String> value) throws Exception {
			return Tuple2.of(new IntWritable(value.f0), new Text(value.f1));
		}
	});

	RollingSink<Tuple2<IntWritable, Text>> sink = new RollingSink<Tuple2<IntWritable, Text>>(outPath)
			.setWriter(new SequenceFileWriter<IntWritable, Text>("Default", SequenceFile.CompressionType.BLOCK))
			.setBucketer(new NonRollingBucketer())
			.setPartPrefix("part")
			.setPendingPrefix("")
			.setPendingSuffix("");

	mapped.addSink(sink);

	env.execute("RollingSink String Write Test");

	FSDataInputStream inStream = dfs.open(new Path(outPath + "/part-0-0"));

	SequenceFile.Reader reader = new SequenceFile.Reader(inStream,
			1000,
			0,
			100000,
			new Configuration());

	IntWritable intWritable = new IntWritable();
	Text txt = new Text();

	for (int i = 0; i < numElements; i += 2) {
		reader.next(intWritable, txt);
		Assert.assertEquals(i, intWritable.get());
		Assert.assertEquals("message #" + i, txt.toString());
	}

	reader.close();
	inStream.close();

	inStream = dfs.open(new Path(outPath + "/part-1-0"));

	reader = new SequenceFile.Reader(inStream,
			1000,
			0,
			100000,
			new Configuration());

	for (int i = 1; i < numElements; i += 2) {
		reader.next(intWritable, txt);
		Assert.assertEquals(i, intWritable.get());
		Assert.assertEquals("message #" + i, txt.toString());
	}

	reader.close();
	inStream.close();
}
 
Example 16
Source File: TransactionProcessorTest.java    From flink-tutorials with Apache License 2.0 4 votes vote down vote up
@Override
protected void writeTransactionResults(ParameterTool params, DataStream<TransactionResult> transactionResults) {
	transactionResults.addSink(transactionResultSink);
}
 
Example 17
Source File: TaskManagerProcessFailureStreamingRecoveryITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public void testTaskManagerFailure(Configuration configuration, final File coordinateDir) throws Exception {

	final File tempCheckpointDir = tempFolder.newFolder();

	StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment(
		"localhost",
		1337, // not needed since we use ZooKeeper
		configuration);
	env.setParallelism(PARALLELISM);
			env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 1000));
	env.enableCheckpointing(200);

	env.setStateBackend(new FsStateBackend(tempCheckpointDir.getAbsoluteFile().toURI()));

	DataStream<Long> result = env.addSource(new SleepyDurableGenerateSequence(coordinateDir, DATA_COUNT))
			// add a non-chained no-op map to test the chain state restore logic
			.map(new MapFunction<Long, Long>() {
				@Override
				public Long map(Long value) throws Exception {
					return value;
				}
			}).startNewChain()
					// populate the coordinate directory so we can proceed to TaskManager failure
			.map(new Mapper(coordinateDir));

	//write result to temporary file
	result.addSink(new CheckpointedSink(DATA_COUNT));

	try {
		// blocking call until execution is done
		env.execute();

		// TODO: Figure out why this fails when ran with other tests
		// Check whether checkpoints have been cleaned up properly
		// assertDirectoryEmpty(tempCheckpointDir);
	}
	finally {
		// clean up
		if (tempCheckpointDir.exists()) {
			FileUtils.deleteDirectory(tempCheckpointDir);
		}
	}
}
 
Example 18
Source File: KafkaTestEnvironmentImpl.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public <T> DataStreamSink<T> produceIntoKafka(DataStream<T> stream, String topic, KeyedSerializationSchema<T> serSchema, Properties props, FlinkKafkaPartitioner<T> partitioner) {
	FlinkKafkaProducer09<T> prod = new FlinkKafkaProducer09<>(topic, serSchema, props, partitioner);
	prod.setFlushOnCheckpoint(true);
	return stream.addSink(prod);
}
 
Example 19
Source File: StreamGraphGeneratorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the KeyGroupStreamPartitioner are properly set up with the correct value of
 * maximum parallelism.
 */
@Test
public void testSetupOfKeyGroupPartitioner() {
	int maxParallelism = 42;
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().setMaxParallelism(maxParallelism);

	DataStream<Integer> source = env.fromElements(1, 2, 3);

	DataStream<Integer> keyedResult = source.keyBy(value -> value).map(new NoOpIntMap());

	keyedResult.addSink(new DiscardingSink<>());

	StreamGraph graph = env.getStreamGraph();

	StreamNode keyedResultNode = graph.getStreamNode(keyedResult.getId());

	StreamPartitioner<?> streamPartitioner = keyedResultNode.getInEdges().get(0).getPartitioner();
}
 
Example 20
Source File: DataStreamTestBase.java    From flink-spector with Apache License 2.0 2 votes vote down vote up
/**
 * Inspect a {@link DataStream} using a {@link OutputMatcherFactory}.
 *
 * @param stream  {@link DataStream} to test.
 * @param matcher {@link OutputMatcherFactory} to use.
 * @param trigger {@link VerifyFinishedTrigger}
 *                to finish the assertion early.
 * @param <T>     type of the stream.
 */
public <T> void assertStream(DataStream<T> stream,
                             Matcher<Iterable<T>> matcher,
                             VerifyFinishedTrigger trigger) {
    stream.addSink(createTestSink(matcher, trigger));
}