Java Code Examples for org.apache.flink.api.java.utils.ParameterTool

The following examples show how to use org.apache.flink.api.java.utils.ParameterTool. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: flink-learning   Source File: FlinkKafkaConsumerTest1.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
        final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
        StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
        Properties props = buildKafkaProps(parameterTool);
        //kafka topic list
        List<String> topics = Arrays.asList(parameterTool.get("metrics.topic"), parameterTool.get("logs.topic"));
        FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(topics, new MetricSchema(), props);
        //kafka topic Pattern
        //FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(java.utils.regex.Pattern.compile("test-topic-[0-9]"), new MetricSchema(), props);


//        consumer.setStartFromLatest();
//        consumer.setStartFromEarliest()
        DataStreamSource<MetricEvent> data = env.addSource(consumer);

        data.print();

        env.execute("flink kafka connector test");
    }
 
Example 2
Source Project: jstorm   Source File: StreamWordCount.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

//        env.addS
        ParameterTool tool = ParameterTool.fromArgs(args);
        int sourceParallel = Integer.parseInt(tool.get("s"));
        int operatorParallel = Integer.parseInt(tool.get("p"));

        System.out.println("sourceParallel: " + sourceParallel + ", operatorParallel: " + operatorParallel);

        env.setParallelism(operatorParallel);

        // get input data
        DataStream<String> text = env.addSource(new WordSource()).setParallelism(sourceParallel);

        DataStream<Tuple2<String, Integer>> counts =
                // split up the lines in pairs (2-tuples) containing: (word,1)
                text.flatMap(new LineSplitter())
                        // group by the tuple field "0" and sum up tuple field "1"
                        .keyBy(0)
                        .sum(1);

        // execute program
//        env.execute("StreamWordCount");
        System.out.println(env.getExecutionPlan());
    }
 
Example 3
Source Project: Flink-CEPplus   Source File: IterationConvergence.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void configure(ParameterTool parameterTool) {
	if (!parameterTool.has("iterations") && !parameterTool.has("convergence_threshold")) {
		// no configuration so use default iterations and maximum threshold
		value.iterations = defaultIterations;
		value.convergenceThreshold = Double.MAX_VALUE;
	} else {
		// use configured values and maximum default for unset values
		value.iterations = parameterTool.getInt("iterations", Integer.MAX_VALUE);
		Util.checkParameter(value.iterations > 0,
			"iterations must be greater than zero");

		value.convergenceThreshold = parameterTool.getDouble("convergence_threshold", Double.MAX_VALUE);
		Util.checkParameter(value.convergenceThreshold > 0,
			"convergence threshold must be greater than zero");
	}
}
 
Example 4
Source Project: Flink-CEPplus   Source File: DoubleParameter.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void configure(ParameterTool parameterTool) {
	value = hasDefaultValue ? parameterTool.getDouble(name, defaultValue) : parameterTool.getDouble(name);

	if (hasMinimumValue) {
		if (minimumValueInclusive) {
			Util.checkParameter(value >= minimumValue,
				name + " must be greater than or equal to " + minimumValue);
		} else {
			Util.checkParameter(value > minimumValue,
				name + " must be greater than " + minimumValue);
		}
	}

	if (hasMaximumValue) {
		if (maximumValueInclusive) {
			Util.checkParameter(value <= maximumValue,
				name + " must be less than or equal to " + maximumValue);
		} else {
			Util.checkParameter(value < maximumValue,
				name + " must be less than " + maximumValue);
		}
	}
}
 
Example 5
Source Project: Flink-CEPplus   Source File: GridGraph.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void configure(ParameterTool parameterTool) throws ProgramParametrizationException {
	super.configure(parameterTool);

	// add dimensions as ordered by dimension ID (dim0, dim1, dim2, ...)

	Map<Integer, String> dimensionMap = new TreeMap<>();

	// first parse all dimensions into a sorted map
	for (String key : parameterTool.toMap().keySet()) {
		if (key.startsWith(PREFIX)) {
			int dimensionId = Integer.parseInt(key.substring(PREFIX.length()));
			dimensionMap.put(dimensionId, parameterTool.get(key));
		}
	}

	// then store dimensions in order
	for (String field : dimensionMap.values()) {
		dimensions.add(new Dimension(field));
	}
}
 
Example 6
Source Project: flink-tutorials   Source File: AvroDataGeneratorJob.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool params = Utils.parseArgs(args);
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	KafkaSerializationSchema<Message> schema = ClouderaRegistryKafkaSerializationSchema.<Message>
			builder(params.getRequired(K_KAFKA_TOPIC))
			.setConfig(Utils.readSchemaRegistryProperties(params))
			.setKey(Message::getId)
			.build();

	FlinkKafkaProducer<Message> kafkaSink = new FlinkKafkaProducer<>(
			"default", schema, Utils.readKafkaProperties(params), FlinkKafkaProducer.Semantic.AT_LEAST_ONCE);

	DataStream<Message> input = env.addSource(new DataGeneratorSource()).name("Data Generator Source");

	input.addSink(kafkaSink)
			.name("Kafka Sink")
			.uid("Kafka Sink");

	input.print();

	env.execute("Data Generator Job");
}
 
Example 7
Source Project: flink   Source File: DoubleParameter.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void configure(ParameterTool parameterTool) {
	value = hasDefaultValue ? parameterTool.getDouble(name, defaultValue) : parameterTool.getDouble(name);

	if (hasMinimumValue) {
		if (minimumValueInclusive) {
			Util.checkParameter(value >= minimumValue,
				name + " must be greater than or equal to " + minimumValue);
		} else {
			Util.checkParameter(value > minimumValue,
				name + " must be greater than " + minimumValue);
		}
	}

	if (hasMaximumValue) {
		if (maximumValueInclusive) {
			Util.checkParameter(value <= maximumValue,
				name + " must be less than or equal to " + maximumValue);
		} else {
			Util.checkParameter(value < maximumValue,
				name + " must be less than " + maximumValue);
		}
	}
}
 
Example 8
Source Project: Flink-CEPplus   Source File: KafkaExampleUtil.java    License: Apache License 2.0 6 votes vote down vote up
public static StreamExecutionEnvironment prepareExecutionEnv(ParameterTool parameterTool)
	throws Exception {

	if (parameterTool.getNumberOfParameters() < 5) {
		System.out.println("Missing parameters!\n" +
			"Usage: Kafka --input-topic <topic> --output-topic <topic> " +
			"--bootstrap.servers <kafka brokers> " +
			"--zookeeper.connect <zk quorum> --group.id <some id>");
		throw new Exception("Missing parameters!\n" +
			"Usage: Kafka --input-topic <topic> --output-topic <topic> " +
			"--bootstrap.servers <kafka brokers> " +
			"--zookeeper.connect <zk quorum> --group.id <some id>");
	}

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().disableSysoutLogging();
	env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(4, 10000));
	env.enableCheckpointing(5000); // create a checkpoint every 5 seconds
	env.getConfig().setGlobalJobParameters(parameterTool); // make parameters available in the web interface
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	return env;
}
 
Example 9
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().setGlobalJobParameters(ParameterTool.fromArgs(args));
    //每隔 10s 重启一次,如果两分钟内重启过三次则停止 Job
    env.setRestartStrategy(RestartStrategies.failureRateRestart(3, Time.minutes(2), Time.seconds(10)));

    env.addSource(new SourceFunction<Long>() {
        @Override
        public void run(SourceContext<Long> sourceContext) throws Exception {
            while (true) {
                sourceContext.collect(null);
            }
        }
        @Override
        public void cancel() {
        }
    })
            .map((MapFunction<Long, Long>) aLong -> aLong / 1)
            .print();

    env.execute("zhisheng failureRate Restart Strategy example");
}
 
Example 10
public static void main(String[] args) throws Exception {
	final ParameterTool params = ParameterTool.fromArgs(args);
	final String latchFilePath = params.getRequired("latchFilePath");
	final String outputPath = params.getRequired("outputPath");

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().setExecutionMode(ExecutionMode.BATCH_FORCED);
	env.setParallelism(4);

	env.generateSequence(0, 1000)
		.map(new BlockingIncrementingMapFunction(latchFilePath))
		.writeAsText(outputPath, FileSystem.WriteMode.OVERWRITE)
		.setParallelism(1);

	env.execute();
}
 
Example 11
Source Project: flink-learning   Source File: Main.java    License: Apache License 2.0 6 votes vote down vote up
private static void writeEventToHbase(String string, ParameterTool parameterTool) throws IOException {
    Configuration configuration = HBaseConfiguration.create();
    configuration.set(HBASE_ZOOKEEPER_QUORUM, parameterTool.get(HBASE_ZOOKEEPER_QUORUM));
    configuration.set(HBASE_ZOOKEEPER_PROPERTY_CLIENTPORT, parameterTool.get(HBASE_ZOOKEEPER_PROPERTY_CLIENTPORT));
    configuration.set(HBASE_RPC_TIMEOUT, parameterTool.get(HBASE_RPC_TIMEOUT));
    configuration.set(HBASE_CLIENT_OPERATION_TIMEOUT, parameterTool.get(HBASE_CLIENT_OPERATION_TIMEOUT));
    configuration.set(HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, parameterTool.get(HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD));

    Connection connect = ConnectionFactory.createConnection(configuration);
    Admin admin = connect.getAdmin();
    if (!admin.tableExists(HBASE_TABLE_NAME)) { //检查是否有该表,如果没有,创建
        admin.createTable(new HTableDescriptor(HBASE_TABLE_NAME).addFamily(new HColumnDescriptor(INFO_STREAM)));
    }
    Table table = connect.getTable(HBASE_TABLE_NAME);
    TimeStamp ts = new TimeStamp(new Date());
    Date date = ts.getDate();
    Put put = new Put(Bytes.toBytes(date.getTime()));
    put.addColumn(Bytes.toBytes(INFO_STREAM), Bytes.toBytes("test"), Bytes.toBytes(string));
    table.put(put);
    table.close();
    connect.close();
}
 
Example 12
Source Project: flink-learning   Source File: DataSetBrocastMain.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool params = ParameterTool.fromArgs(args);
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

    //1. 待广播的数据
    DataSet<Integer> toBroadcast = env.fromElements(1, 2, 3);

    env.fromElements("a", "b")
            .map(new RichMapFunction<String, String>() {
                List<Integer> broadcastData;

                @Override
                public void open(Configuration parameters) throws Exception {
                    // 3. 获取广播的DataSet数据 作为一个Collection
                    broadcastData = getRuntimeContext().getBroadcastVariable("zhisheng");
                }

                @Override
                public String map(String value) throws Exception {
                    return broadcastData.get(1) + value;
                }
            }).withBroadcastSet(toBroadcast, "zhisheng")// 2. 广播DataSet
            .print();
}
 
Example 13
Source Project: flink-learning   Source File: Sink2ES6Main.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    DataStreamSource<MetricEvent> data = KafkaConfigUtil.buildSource(env);

    List<HttpHost> esAddresses = ESSinkUtil.getEsAddresses(parameterTool.get(ELASTICSEARCH_HOSTS));
    int bulkSize = parameterTool.getInt(ELASTICSEARCH_BULK_FLUSH_MAX_ACTIONS, 40);
    int sinkParallelism = parameterTool.getInt(STREAM_SINK_PARALLELISM, 5);

    log.info("-----esAddresses = {}, parameterTool = {}, ", esAddresses, parameterTool);

    ESSinkUtil.addSink(esAddresses, bulkSize, sinkParallelism, data,
            (MetricEvent metric, RuntimeContext runtimeContext, RequestIndexer requestIndexer) -> {
                requestIndexer.add(Requests.indexRequest()
                        .index(ZHISHENG + "_" + metric.getName())
                        .type(ZHISHENG)
                        .source(GsonUtil.toJSONBytes(metric), XContentType.JSON));
            },
            parameterTool);
    env.execute("flink learning connectors es6");
}
 
Example 14
Source Project: flink   Source File: KafkaExample.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
		.addSource(
			new FlinkKafkaConsumer<>(
				parameterTool.getRequired("input-topic"),
				new KafkaEventSchema(),
				parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
		.keyBy("word")
		.map(new RollingAdditionMapper());

	input.addSink(
		new FlinkKafkaProducer<>(
			parameterTool.getRequired("output-topic"),
			new KeyedSerializationSchemaWrapper<>(new KafkaEventSchema()),
			parameterTool.getProperties(),
			FlinkKafkaProducer.Semantic.EXACTLY_ONCE));

	env.execute("Modern Kafka Example");
}
 
Example 15
public static void main(String[] args) throws Exception {

		ParameterTool params = ParameterTool.fromArgs(args);
		final String input = params.get("input", pathToRideData);

		final int maxEventDelay = 60;       // events are out of order by max 60 seconds
		final int servingSpeedFactor = 600; // events of 10 minutes are served in 1 second

		// set up streaming execution environment
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(ExerciseBase.parallelism);

		// start the data generator
		DataStream<TaxiRide> rides = env.addSource(rideSourceOrTest(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor)));

		DataStream<TaxiRide> filteredRides = rides
				// keep only those rides and both start and end in NYC
				.filter(new NYCFilter());

		// print the filtered stream
		printOrTest(filteredRides);

		// run the cleansing pipeline
		env.execute("Taxi Ride Cleansing");
	}
 
Example 16
static SourceFunction<Event> createEventSource(ParameterTool pt) {
	return new SequenceGeneratorSource(
		pt.getInt(
			SEQUENCE_GENERATOR_SRC_KEYSPACE.key(),
			SEQUENCE_GENERATOR_SRC_KEYSPACE.defaultValue()),
		pt.getInt(
			SEQUENCE_GENERATOR_SRC_PAYLOAD_SIZE.key(),
			SEQUENCE_GENERATOR_SRC_PAYLOAD_SIZE.defaultValue()),
		pt.getLong(
			SEQUENCE_GENERATOR_SRC_EVENT_TIME_MAX_OUT_OF_ORDERNESS.key(),
			SEQUENCE_GENERATOR_SRC_EVENT_TIME_MAX_OUT_OF_ORDERNESS.defaultValue()),
		pt.getLong(
			SEQUENCE_GENERATOR_SRC_EVENT_TIME_CLOCK_PROGRESS.key(),
			SEQUENCE_GENERATOR_SRC_EVENT_TIME_CLOCK_PROGRESS.defaultValue()),
		pt.getLong(
			SEQUENCE_GENERATOR_SRC_SLEEP_TIME.key(),
			SEQUENCE_GENERATOR_SRC_SLEEP_TIME.defaultValue()),
		pt.getLong(
			SEQUENCE_GENERATOR_SRC_SLEEP_AFTER_ELEMENTS.key(),
			SEQUENCE_GENERATOR_SRC_SLEEP_AFTER_ELEMENTS.defaultValue()));
}
 
Example 17
Source Project: flink   Source File: MatrixVectorMul.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);
		System.out.println("Usage: MatrixVectorMul [--output <path>] [--dimension <dimension> --data-size <data_size>] [--resource-name <resource_name>]");

		// Set up the execution environment
		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		// Make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		final int dimension = params.getInt("dimension", DEFAULT_DIM);
		final int dataSize = params.getInt("data-size", DEFAULT_DATA_SIZE);
		final String resourceName = params.get("resource-name", DEFAULT_RESOURCE_NAME);

		DataStream<List<Float>> result = env.addSource(new RandomVectorSource(dimension, dataSize))
						.map(new Multiplier(dimension, resourceName));

		// Emit result
		if (params.has("output")) {
			result.addSink(StreamingFileSink.forRowFormat(new Path(params.get("output")),
					new SimpleStringEncoder<List<Float>>()).build());
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			result.print();
		}
		// Execute program
		env.execute("Matrix-Vector Multiplication");
	}
 
Example 18
Source Project: pravega-samples   Source File: PipelineRunner.java    License: Apache License 2.0 5 votes vote down vote up
private void parseConfigurations(String[] args) {

		log.info("ApplicationMain Main.. Arguments: {}", Arrays.asList(args));

		ParameterTool parameterTool = ParameterTool.fromArgs(args);
		log.info("Parameter Tool: {}", parameterTool.toMap());

		if(!parameterTool.has("mode")) {
			printUsage();
			System.exit(1);
		}

		String configDirPath = parameterTool.get("configDir", "conf");
		try {
			byte[] configurationData = Files.readAllBytes(Paths.get(configDirPath + File.separator + configFile));
			String jsonData = new String(configurationData);
			log.info("App Configurations raw data: {}", jsonData);
			Gson gson = new Gson();
			appConfiguration = gson.fromJson(jsonData, AppConfiguration.class);
		} catch (IOException e) {
			log.error("Could not read {}",configFile, e);
			System.exit(1);
		}

		runMode = parameterTool.getInt("mode");
		pravegaConfig = PravegaConfig.fromParams(parameterTool).withDefaultScope(DEFAULT_SCOPE);
		stream = pravegaConfig.resolve(parameterTool.get(STREAM_PARAMETER, DEFAULT_STREAM));
	}
 
Example 19
Source Project: flink-learning   Source File: SplitEvent.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool params = ParameterTool.fromArgs(args);
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().setGlobalJobParameters(params);

    DataStreamSource<MetricEvent> data = KafkaConfigUtil.buildSource(env);  //从 Kafka 获取到所有的数据流
    SplitStream<MetricEvent> splitData = data.split(new OutputSelector<MetricEvent>() {
        @Override
        public Iterable<String> select(MetricEvent metricEvent) {
            List<String> tags = new ArrayList<>();
            String type = metricEvent.getTags().get("type");
            switch (type) {
                case "machine":
                    tags.add("machine");
                    break;
                case "docker":
                    tags.add("docker");
                    break;
                case "application":
                    tags.add("application");
                    break;
                case "middleware":
                    tags.add("middleware");
                    break;
                default:
                    break;
            }
            return tags;
        }
    });

    DataStream<MetricEvent> machine = splitData.select("machine");
    DataStream<MetricEvent> docker = splitData.select("docker");
    DataStream<MetricEvent> application = splitData.select("application");
    DataStream<MetricEvent> middleware = splitData.select("middleware");

}
 
Example 20
Source Project: flink   Source File: DoubleParameterTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testMaxOutOfRange() {
	parameter.setMaximumValue(0, false);

	expectedException.expect(ProgramParametrizationException.class);
	expectedException.expectMessage("test must be less than 0.0");

	parameter.configure(ParameterTool.fromArgs(new String[]{"--test", "1"}));
}
 
Example 21
Source Project: flink-connectors   Source File: FlinkPravegaReaderTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testRgScope() {
    PravegaConfig config = new PravegaConfig(new Properties(), Collections.emptyMap(), ParameterTool.fromMap(Collections.emptyMap()));

    // no scope
    TestableStreamingReaderBuilder builder = new TestableStreamingReaderBuilder()
            .forStream(SAMPLE_STREAM, SAMPLE_CUT)
            .withPravegaConfig(config);

    FlinkPravegaReader<Integer> reader;
    try {
        builder.buildSourceFunction();
        fail();
    } catch (IllegalStateException e) {
        // "missing reader group scope"
    }

    // default scope
    config.withDefaultScope(SAMPLE_SCOPE);
    reader = builder.buildSourceFunction();
    assertEquals(SAMPLE_SCOPE, reader.readerGroupScope);

    // explicit scope
    builder.withReaderGroupScope("myscope");
    reader = builder.buildSourceFunction();
    assertEquals("myscope", reader.readerGroupScope);
}
 
Example 22
public static void main(String[] args) throws Exception {

		ParameterTool params = ParameterTool.fromArgs(args);
		final String input = params.get("input", ExerciseBase.pathToRideData);
		final int popThreshold = params.getInt("threshold", 20);

		final int maxEventDelay = 60;       // events are out of order by max 60 seconds
		final int servingSpeedFactor = 600; // events of 10 minutes are served in 1 second

		// set up streaming execution environment
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.setParallelism(ExerciseBase.parallelism);

		// start the data generator
		DataStream<TaxiRide> rides = env.addSource(rideSourceOrTest(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor)));

		// find popular places
		DataStream<Tuple5<Float, Float, Long, Boolean, Integer>> popularSpots = rides
				// remove all rides which are not within NYC
				.filter(new NYCFilter())
				// match ride to grid cell and event type (start or end)
				.map(new GridCellMatcher())
				// partition by cell id and event type
				.<KeyedStream<Tuple2<Integer, Boolean>, Tuple2<Integer, Boolean>>>keyBy(0, 1)
				// build sliding window
				.timeWindow(Time.minutes(15), Time.minutes(5))
				// count ride events in window
				.apply(new RideCounter())
				// filter by popularity threshold
				.filter((Tuple4<Integer, Long, Boolean, Integer> count) -> (count.f3 >= popThreshold))
				// map grid cell to coordinates
				.map(new GridToCoordinates());

		// print result on stdout
		printOrTest(popularSpots);

		// execute the transformation pipeline
		env.execute("Popular Places");
	}
 
Example 23
Source Project: flink   Source File: PeriodicStreamingJob.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool params = ParameterTool.fromArgs(args);
	String outputPath = params.getRequired("outputPath");
	int recordsPerSecond = params.getInt("recordsPerSecond", 10);
	int duration = params.getInt("durationInSecond", 60);
	int offset = params.getInt("offsetInSecond", 0);

	StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment();
	sEnv.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
	sEnv.enableCheckpointing(4000);
	sEnv.getConfig().setAutoWatermarkInterval(1000);

	// execute a simple pass through program.
	PeriodicSourceGenerator generator = new PeriodicSourceGenerator(
		recordsPerSecond, duration, offset);
	DataStream<Tuple> rows = sEnv.addSource(generator);

	DataStream<Tuple> result = rows
		.keyBy(1)
		.timeWindow(Time.seconds(5))
		.sum(0);

	result.writeAsText(outputPath + "/result.txt", FileSystem.WriteMode.OVERWRITE)
		.setParallelism(1);

	sEnv.execute();
}
 
Example 24
Source Project: flink-tutorials   Source File: KafkaToHDFSAvroJob.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        ParameterTool params = Utils.parseArgs(args);

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        KafkaDeserializationSchema<Message> schema = ClouderaRegistryKafkaDeserializationSchema
                .builder(Message.class)
                .setConfig(Utils.readSchemaRegistryProperties(params))
                .build();

        FlinkKafkaConsumer<Message> consumer = new FlinkKafkaConsumer<Message>(params.getRequired(K_KAFKA_TOPIC), schema, Utils.readKafkaProperties(params));

        DataStream<String> source = env.addSource(consumer)
                .name("Kafka Source")
                .uid("Kafka Source")
                .map(record -> record.getId() + "," + record.getName() + "," + record.getDescription())
                .name("ToOutputString");

        StreamingFileSink<String> sink = StreamingFileSink
                .forRowFormat(new Path(params.getRequired(K_HDFS_OUTPUT)), new SimpleStringEncoder<String>("UTF-8"))
                .build();

        source.addSink(sink)
                .name("FS Sink")
                .uid("FS Sink");

        source.print();

        env.execute("Flink Streaming Secured Job Sample");
    }
 
Example 25
Source Project: flink-learning   Source File: FlinkKafkaSchemaTest1.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    Properties props = buildKafkaProps(parameterTool);
    //kafka topic list
    List<String> topics = Arrays.asList(parameterTool.get("metrics.topic"));
    FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(topics, new KafkaDeserializationSchemaWrapper<>(new MetricSchema()), props);

    DataStreamSource<MetricEvent> data = env.addSource(consumer);

    data.print();

    env.execute("flink kafka connector test");
}
 
Example 26
Source Project: flink   Source File: DoubleParameterTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testMinAndMaxInRange() {
	parameter.setMinimumValue(-1, false);
	parameter.setMaximumValue(1, false);
	parameter.configure(ParameterTool.fromArgs(new String[]{"--test", "0"}));
	Assert.assertEquals(new Double(0), parameter.getValue());
}
 
Example 27
public static void main(String[] args) throws Exception {

		ParameterTool params = ParameterTool.fromArgs(args);
		final String input = params.get("input", ExerciseBase.pathToRideData);

		final int maxEventDelay = 60;       	// events are out of order by at most 60 seconds
		final int servingSpeedFactor = 1800; 	// 30 minutes worth of events are served every second

		// set up streaming execution environment
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.setParallelism(ExerciseBase.parallelism);

		// setup a stream of taxi rides
		DataStream<TaxiRide> rides = env.addSource(rideSourceOrTest(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor)));

		// add a socket source for the query stream
		BroadcastStream<String> queryStream = env
				.addSource(stringSourceOrTest(new SocketTextStreamFunction("localhost", 9999, "\n", -1)))
				.assignTimestampsAndWatermarks(new QueryStreamAssigner())
				.broadcast(queryDescriptor);

		// connect the two streams and process queries
		DataStream<Tuple2<String, String>> results = rides
				.keyBy((TaxiRide ride) -> ride.taxiId)
				.connect(queryStream)
				.process(new QueryProcessor());

		printOrTest(results);

		env.execute("Taxi Query");
	}
 
Example 28
Source Project: flink   Source File: ChoiceParameterTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testWithDefaultWithParameter() {
	parameter.setDefaultValue("default").addChoices("c0", "c1", "c2");
	Assert.assertEquals("[--choice <default | c0 | c1 | c2>]", parameter.getUsage());

	parameter.configure(ParameterTool.fromArgs(new String[]{"--choice", "c1"}));
	Assert.assertEquals("c1", parameter.getValue());
}
 
Example 29
Source Project: flink   Source File: DoubleParameterTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testWithDefaultWithParameter() {
	parameter.setDefaultValue(43.21);
	Assert.assertEquals("[--test TEST]", parameter.getUsage());

	parameter.configure(ParameterTool.fromArgs(new String[]{"--test", "12.34"}));
	Assert.assertEquals(new Double(12.34), parameter.getValue());
}
 
Example 30
Source Project: flink   Source File: Elasticsearch5SinkExample.java    License: Apache License 2.0 5 votes vote down vote up
private static IndexRequest createIndexRequest(String element, ParameterTool parameterTool) {
	Map<String, Object> json = new HashMap<>();
	json.put("data", element);

	return Requests.indexRequest()
		.index(parameterTool.getRequired("index"))
		.type(parameterTool.getRequired("type"))
		.id(element)
		.source(json);
}