org.apache.flink.streaming.util.serialization.SimpleStringSchema Java Examples

The following examples show how to use org.apache.flink.streaming.util.serialization.SimpleStringSchema. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: WriteIntoKafka.java    From kafka-example with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	// create execution environment
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// parse user parameters
	ParameterTool parameterTool = ParameterTool.fromArgs(args);

	// add a simple source which is writing some strings
	DataStream<String> messageStream = env.addSource(new SimpleStringGenerator());

	// write stream to Kafka
	messageStream.addSink(new KafkaSink<>(parameterTool.getRequired("bootstrap.servers"),
			parameterTool.getRequired("topic"),
			new SimpleStringSchema()));

	env.execute();
}
 
Example #2
Source File: ReadFromKafka.java    From kafka-example with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	// create execution environment
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// parse user parameters
	ParameterTool parameterTool = ParameterTool.fromArgs(args);

	DataStream<String> messageStream = env.addSource(new FlinkKafkaConsumer082<>(parameterTool.getRequired("topic"), new SimpleStringSchema(), parameterTool.getProperties()));

	// print() will write the contents of the stream to the TaskManager's standard out stream
	// the rebelance call is causing a repartitioning of the data so that all machines
	// see the messages (for example in cases when "num kafka partitions" < "num flink operators"
	messageStream.rebalance().map(new MapFunction<String, String>() {
		private static final long serialVersionUID = -6867736771747690202L;

		@Override
		public String map(String value) throws Exception {
			return "Kafka and Flink says: " + value;
		}
	}).print();

	env.execute();
}
 
Example #3
Source File: ReadFromKafka.java    From kafka-flink-101 with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
  // create execution environment
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

  Properties properties = new Properties();
  properties.setProperty("bootstrap.servers", "localhost:9092");
  properties.setProperty("group.id", "flink_consumer");


  DataStream<String> stream = env
          .addSource(new FlinkKafkaConsumer09<>("flink-demo", new SimpleStringSchema(), properties));

  stream.map(new MapFunction<String, String>() {
    private static final long serialVersionUID = -6867736771747690202L;

    @Override
    public String map(String value) throws Exception {
      return "Stream Value: " + value;
    }
  }).print();

  env.execute();
}
 
Example #4
Source File: AdvertisingTopologyFlinkState.java    From yahoo-streaming-benchmark with Apache License 2.0 5 votes vote down vote up
/**
 * Create a Kafka source
 */
private static FlinkKafkaConsumer082<String> kafkaSource(BenchmarkConfig config) {
  return new FlinkKafkaConsumer082<>(
    config.kafkaTopic,
    new SimpleStringSchema(),
    config.getParameters().getProperties());
}
 
Example #5
Source File: KafkaWindowedWordCountExample.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {
	PipelineOptionsFactory.register(KafkaStreamingWordCountOptions.class);
	KafkaStreamingWordCountOptions options = PipelineOptionsFactory.fromArgs(args).as(KafkaStreamingWordCountOptions.class);
	options.setJobName("KafkaExample - WindowSize: " + options.getWindowSize() + " seconds");
	options.setStreaming(true);
	options.setCheckpointingInterval(1000L);
	options.setNumberOfExecutionRetries(5);
	options.setExecutionRetryDelay(3000L);
	options.setRunner(FlinkPipelineRunner.class);

	System.out.println(options.getKafkaTopic() +" "+ options.getZookeeper() +" "+ options.getBroker() +" "+ options.getGroup() );
	Pipeline pipeline = Pipeline.create(options);

	Properties p = new Properties();
	p.setProperty("zookeeper.connect", options.getZookeeper());
	p.setProperty("bootstrap.servers", options.getBroker());
	p.setProperty("group.id", options.getGroup());

	// this is the Flink consumer that reads the input to
	// the program from a kafka topic.
	FlinkKafkaConsumer08<String> kafkaConsumer = new FlinkKafkaConsumer08<>(
			options.getKafkaTopic(),
			new SimpleStringSchema(), p);

	PCollection<String> words = pipeline
			.apply(Read.from(new UnboundedFlinkSource<>(kafkaConsumer)).named("StreamingWordCount"))
			.apply(ParDo.of(new ExtractWordsFn()))
			.apply(Window.<String>into(FixedWindows.of(Duration.standardSeconds(options.getWindowSize())))
					.triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO)
					.discardingFiredPanes());

	PCollection<KV<String, Long>> wordCounts =
			words.apply(Count.<String>perElement());

	wordCounts.apply(ParDo.of(new FormatAsStringFn()))
			.apply(TextIO.Write.to("./outputKafka.txt"));

	pipeline.run();
}
 
Example #6
Source File: AdvertisingTopologyNative.java    From yahoo-streaming-benchmark with Apache License 2.0 5 votes vote down vote up
/**
 * Create Kafka Source
 */
private static FlinkKafkaConsumer082<String> kafkaSource(BenchmarkConfig config) {
  return new FlinkKafkaConsumer082<>(
    config.kafkaTopic,
    new SimpleStringSchema(),
    config.getParameters().getProperties());
}
 
Example #7
Source File: AdvertisingTopologyFlinkWindows.java    From yahoo-streaming-benchmark with Apache License 2.0 5 votes vote down vote up
/**
 * Configure Kafka source
 */
private static FlinkKafkaConsumer082<String> kafkaSource(BenchmarkConfig config) {
  return new FlinkKafkaConsumer082<>(
    config.kafkaTopic,
    new SimpleStringSchema(),
    config.getParameters().getProperties());
}
 
Example #8
Source File: AdvertisingTopologyRedisDirect.java    From yahoo-streaming-benchmark with Apache License 2.0 5 votes vote down vote up
/**
 * Choose either Kafka or data generator as source
 */
private static DataStream<String> sourceStream(BenchmarkConfig config, StreamExecutionEnvironment env) {
  RichParallelSourceFunction<String> source;
  String sourceName;
  if (config.useLocalEventGenerator) {
    HighKeyCardinalityGeneratorSource eventGenerator = new HighKeyCardinalityGeneratorSource(config);
    source = eventGenerator;
    sourceName = "EventGenerator";
  } else {
    source = new FlinkKafkaConsumer082<>(config.kafkaTopic, new SimpleStringSchema(), config.getParameters().getProperties());
    sourceName = "Kafka";
  }

  return env.addSource(source, sourceName);
}
 
Example #9
Source File: FlinkKafkaSourceExample.java    From huaweicloud-cs-sdk with Apache License 2.0 5 votes vote down vote up
public void readKafka() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    DataStream<String> messageStream = env.addSource(new FlinkKafkaConsumer010<String>(topic,
            new SimpleStringSchema(),
            properties));
    messageStream.rebalance().print();
    try {
        env.execute();
    }catch(Exception e) {
        System.out.println(e.getMessage());
    }
}
 
Example #10
Source File: AdvertisingTopologyFlinkStateHighKeyCard.java    From yahoo-streaming-benchmark with Apache License 2.0 5 votes vote down vote up
/**
 * Setup kafka source
 */
private static FlinkKafkaConsumer08<String> kafkaSource(BenchmarkConfig config) {
  return new FlinkKafkaConsumer08<>(
    config.kafkaTopic,
    new SimpleStringSchema(),
    config.getParameters().getProperties());
}
 
Example #11
Source File: KafkaTopicValidator.java    From yahoo-streaming-benchmark with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().setGlobalJobParameters(parameterTool);
	DataStream<String> rawMessageStream = env.addSource(new FlinkKafkaConsumer082<>(
		parameterTool.getRequired("kafka.topic"),
		new SimpleStringSchema(),
		parameterTool.getProperties()));

	rawMessageStream.print();

	env.execute();
}
 
Example #12
Source File: TwitterIntoKafka.java    From flink-streaming-etl with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	// set up the streaming execution environment
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	ParameterTool params = ParameterTool.fromPropertiesFile(args[0]);
	DataStream<String> twitterStreamString = env.addSource(new TwitterSource(params.getProperties()));
	DataStream<String> filteredStream = twitterStreamString.flatMap(new ParseJson());
	filteredStream.flatMap(new ThroughputLogger(5000L)).setParallelism(1);

	filteredStream.addSink(new FlinkKafkaProducer09<>("twitter", new SimpleStringSchema(), params.getProperties()));

	// execute program
	env.execute("Ingest data from Twitter to Kafka");
}
 
Example #13
Source File: WriteToKafka.java    From kafka-flink-101 with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

  Properties properties = new Properties();
  properties.setProperty("bootstrap.servers", "localhost:9092");

  DataStream<String> stream = env.addSource(new SimpleStringGenerator());
  stream.addSink(new FlinkKafkaProducer09<>("flink-demo", new SimpleStringSchema(), properties));

  env.execute();
}
 
Example #14
Source File: WriteIntoKafka.java    From flinkDemo with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    // create execution environment
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    Map properties= new HashMap();
    properties.put("bootstrap.servers", "192.168.10.63:6667,192.168.10.64:6667,192.168.10.65:6667");
    properties.put("group.id", "t10");
    properties.put("enable.auto.commit", "false");
    properties.put("auto.commit.interval.ms", "1000");
    properties.put("auto.offset.reset", "earliest");
    properties.put("session.timeout.ms", "30000");
    properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    properties.put("topic", "kks-topic-FFT");
    //KafkaConsumer<String,String> kafkaConsumer = new KafkaConsumer<String, String>(properties);
    // parse user parameters
    //ParameterTool parameterTool = ParameterTool.fromArgs(args);
    ParameterTool parameterTool = ParameterTool.fromMap(properties);

    // add a simple source which is writing some strings
    DataStream<String> messageStream = env.addSource(new SimpleStringGenerator());

    // write stream to Kafka
    messageStream.addSink(new FlinkKafkaProducer010<>(parameterTool.getRequired("bootstrap.servers"),
            parameterTool.getRequired("topic"),
            new SimpleStringSchema()));

    messageStream.rebalance().map(new MapFunction<String, String>() {
        private static final long serialVersionUID = 1L;

        @Override
        public String map(String value) throws Exception {
            return value;
        }
    });

    messageStream.print();

    env.execute();
}
 
Example #15
Source File: FlinkKafkaSinkExample.java    From huaweicloud-cs-sdk with Apache License 2.0 5 votes vote down vote up
public void writeKafka() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    DataStream<String> messageStream = env.addSource(new KafkaSourceGenerator());
    messageStream.addSink(new FlinkKafkaProducer010<String>(topic,
            new SimpleStringSchema(),
            properties));
    try {
        env.execute();
    }catch(Exception e) {
        System.out.println(e.getMessage());
    }
}
 
Example #16
Source File: StreamingJob.java    From Mastering-Apache-Flink with MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {
	// set up the streaming execution environment
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// env.enableCheckpointing(5000);
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	Properties properties = new Properties();
	properties.setProperty("bootstrap.servers", "localhost:9092");

	properties.setProperty("zookeeper.connect", "localhost:2181");
	properties.setProperty("group.id", "test");

	FlinkKafkaConsumer09<String> myConsumer = new FlinkKafkaConsumer09<>("temp", new SimpleStringSchema(),
			properties);
	myConsumer.assignTimestampsAndWatermarks(new CustomWatermarkEmitter());


	DataStream<Tuple2<String, Double>> keyedStream = env.addSource(myConsumer).flatMap(new Splitter()).keyBy(0)
			.timeWindow(Time.seconds(300))
			.apply(new WindowFunction<Tuple2<String, Double>, Tuple2<String, Double>, Tuple, TimeWindow>() {

				@Override
				public void apply(Tuple key, TimeWindow window, Iterable<Tuple2<String, Double>> input,
						Collector<Tuple2<String, Double>> out) throws Exception {
					double sum = 0L;
					int count = 0;
					for (Tuple2<String, Double> record : input) {
						sum += record.f1;
						count++;
					}

					Tuple2<String, Double> result = input.iterator().next();
					result.f1 = (sum/count);
					out.collect(result);

				}
			});

	keyedStream.print();

	// execute program
	env.execute("Flink Streaming Java API Skeleton");
}
 
Example #17
Source File: UnitTestSuiteFlink.java    From df_data_service with Apache License 2.0 4 votes vote down vote up
public static void testFlinkSQL() {

        LOG.info("Only Unit Testing Function is enabled");
        String resultFile = "/home/vagrant/test.txt";

        try {

            String jarPath = DFInitService.class.getProtectionDomain().getCodeSource().getLocation().getPath();
            StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", 6123, jarPath)
                    .setParallelism(1);
            String kafkaTopic = "finance";
            String kafkaTopic_stage = "df_trans_stage_finance";
            String kafkaTopic_out = "df_trans_out_finance";



            StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
            Properties properties = new Properties();
            properties.setProperty("bootstrap.servers", "localhost:9092");
            properties.setProperty("group.id", "consumer3");

            // Internal covert Json String to Json - Begin
            DataStream<String> stream = env
                    .addSource(new FlinkKafkaConsumer09<>(kafkaTopic, new SimpleStringSchema(), properties));

            stream.map(new MapFunction<String, String>() {
                @Override
                public String map(String jsonString) throws Exception {
                    return jsonString.replaceAll("\\\\", "").replace("\"{", "{").replace("}\"","}");
                }
            }).addSink(new FlinkKafkaProducer09<String>("localhost:9092", kafkaTopic_stage, new SimpleStringSchema()));
            // Internal covert Json String to Json - End

            String[] fieldNames =  new String[] {"name"};
            Class<?>[] fieldTypes = new Class<?>[] {String.class};

            Kafka09AvroTableSource kafkaTableSource = new Kafka09AvroTableSource(
                    kafkaTopic_stage,
                    properties,
                    fieldNames,
                    fieldTypes);

            //kafkaTableSource.setFailOnMissingField(true);

            tableEnv.registerTableSource("Orders", kafkaTableSource);

            //Table result = tableEnv.sql("SELECT STREAM name FROM Orders");
            Table result = tableEnv.sql("SELECT name FROM Orders");

            Files.deleteIfExists(Paths.get(resultFile));

            // create a TableSink
            TableSink sink = new CsvTableSink(resultFile, "|");
            // write the result Table to the TableSink
            result.writeToSink(sink);

            env.execute("FlinkConsumer");

        } catch (Exception e) {
            e.printStackTrace();
        }
    }
 
Example #18
Source File: AdvertisingTopologyNative.java    From streaming-benchmarks with Apache License 2.0 4 votes vote down vote up
public static void main(final String[] args) throws Exception {

        ParameterTool parameterTool = ParameterTool.fromArgs(args);

        Map conf = Utils.findAndReadConfigFile(parameterTool.getRequired("confPath"), true);
        int kafkaPartitions = ((Number)conf.get("kafka.partitions")).intValue();
        int hosts = ((Number)conf.get("process.hosts")).intValue();
        int cores = ((Number)conf.get("process.cores")).intValue();

        ParameterTool flinkBenchmarkParams = ParameterTool.fromMap(getFlinkConfs(conf));

        LOG.info("conf: {}", conf);
        LOG.info("Parameters used: {}", flinkBenchmarkParams.toMap());

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.getConfig().setGlobalJobParameters(flinkBenchmarkParams);

		// Set the buffer timeout (default 100)
        // Lowering the timeout will lead to lower latencies, but will eventually reduce throughput.
        env.setBufferTimeout(flinkBenchmarkParams.getLong("flink.buffer-timeout", 100));

        if(flinkBenchmarkParams.has("flink.checkpoint-interval")) {
            // enable checkpointing for fault tolerance
            env.enableCheckpointing(flinkBenchmarkParams.getLong("flink.checkpoint-interval", 1000));
        }
        // set default parallelism for all operators (recommended value: number of available worker CPU cores in the cluster (hosts * cores))
        env.setParallelism(hosts * cores);

        DataStream<String> messageStream = env
                .addSource(new FlinkKafkaConsumer082<String>(
                        flinkBenchmarkParams.getRequired("topic"),
                        new SimpleStringSchema(),
                        flinkBenchmarkParams.getProperties())).setParallelism(Math.min(hosts * cores, kafkaPartitions));

        messageStream
                .rebalance()
                // Parse the String as JSON
                .flatMap(new DeserializeBolt())

                //Filter the records if event type is "view"
                .filter(new EventFilterBolt())

                // project the event
                .<Tuple2<String, String>>project(2, 5)

                // perform join with redis data
                .flatMap(new RedisJoinBolt())

                // process campaign
                .keyBy(0)
                .flatMap(new CampaignProcessor());


        env.execute();
    }
 
Example #19
Source File: ReadFromKafka.java    From flinkDemo with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
    // create execution environment
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    Map properties= new HashMap();
    properties.put("bootstrap.servers", "192.168.10.63:6667,192.168.10.64:6667,192.168.10.65:6667");
    properties.put("group.id", "dec-esc-group-vib-calc");
    properties.put("enable.auto.commit", "true");
    properties.put("auto.commit.interval.ms", "1000");
    properties.put("auto.offset.reset", "earliest");
    properties.put("session.timeout.ms", "30000");
    properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    properties.put("topic", "dec-vibration-test");
    //KafkaConsumer<String,String> kafkaConsumer = new KafkaConsumer<String, String>(properties);
    // parse user parameters
    //ParameterTool parameterTool = ParameterTool.fromArgs(args);
    ParameterTool parameterTool = ParameterTool.fromMap(properties);

    FlinkKafkaConsumer010 consumer010 = new FlinkKafkaConsumer010(
                     parameterTool.getRequired("topic"), new SimpleStringSchema(), parameterTool.getProperties());

  //  consumer010.setStartFromEarliest();

    DataStream<String> messageStream = env
            .addSource(consumer010);

    // print() will write the contents of the stream to the TaskManager's standard out stream
    // the rebelance call is causing a repartitioning of the data so that all machines
    // see the messages (for example in cases when "num kafka partitions" < "num flink operators"
    messageStream.rebalance().map(new MapFunction<String, String>() {
        private static final long serialVersionUID = 1L;

        @Override
        public String map(String value) throws Exception {
            return value;

        }
    });


    messageStream.print();

    env.execute();
}
 
Example #20
Source File: WriteIntoKafka.java    From kafka-example with Apache License 2.0 4 votes vote down vote up
public SimpleStringSchema() {
}