Java Code Examples for org.apache.flink.api.java.utils.ParameterTool
The following examples show how to use
org.apache.flink.api.java.utils.ParameterTool.
These examples are extracted from open source projects.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: flink-learning Author: zhisheng17 File: FlinkKafkaConsumerTest1.java License: Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args); StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool); Properties props = buildKafkaProps(parameterTool); //kafka topic list List<String> topics = Arrays.asList(parameterTool.get("metrics.topic"), parameterTool.get("logs.topic")); FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(topics, new MetricSchema(), props); //kafka topic Pattern //FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(java.utils.regex.Pattern.compile("test-topic-[0-9]"), new MetricSchema(), props); // consumer.setStartFromLatest(); // consumer.setStartFromEarliest() DataStreamSource<MetricEvent> data = env.addSource(consumer); data.print(); env.execute("flink kafka connector test"); }
Example #2
Source Project: jstorm Author: alibaba File: StreamWordCount.java License: Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // env.addS ParameterTool tool = ParameterTool.fromArgs(args); int sourceParallel = Integer.parseInt(tool.get("s")); int operatorParallel = Integer.parseInt(tool.get("p")); System.out.println("sourceParallel: " + sourceParallel + ", operatorParallel: " + operatorParallel); env.setParallelism(operatorParallel); // get input data DataStream<String> text = env.addSource(new WordSource()).setParallelism(sourceParallel); DataStream<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1) text.flatMap(new LineSplitter()) // group by the tuple field "0" and sum up tuple field "1" .keyBy(0) .sum(1); // execute program // env.execute("StreamWordCount"); System.out.println(env.getExecutionPlan()); }
Example #3
Source Project: Flink-CEPplus Author: ljygz File: IterationConvergence.java License: Apache License 2.0 | 6 votes |
@Override public void configure(ParameterTool parameterTool) { if (!parameterTool.has("iterations") && !parameterTool.has("convergence_threshold")) { // no configuration so use default iterations and maximum threshold value.iterations = defaultIterations; value.convergenceThreshold = Double.MAX_VALUE; } else { // use configured values and maximum default for unset values value.iterations = parameterTool.getInt("iterations", Integer.MAX_VALUE); Util.checkParameter(value.iterations > 0, "iterations must be greater than zero"); value.convergenceThreshold = parameterTool.getDouble("convergence_threshold", Double.MAX_VALUE); Util.checkParameter(value.convergenceThreshold > 0, "convergence threshold must be greater than zero"); } }
Example #4
Source Project: Flink-CEPplus Author: ljygz File: DoubleParameter.java License: Apache License 2.0 | 6 votes |
@Override public void configure(ParameterTool parameterTool) { value = hasDefaultValue ? parameterTool.getDouble(name, defaultValue) : parameterTool.getDouble(name); if (hasMinimumValue) { if (minimumValueInclusive) { Util.checkParameter(value >= minimumValue, name + " must be greater than or equal to " + minimumValue); } else { Util.checkParameter(value > minimumValue, name + " must be greater than " + minimumValue); } } if (hasMaximumValue) { if (maximumValueInclusive) { Util.checkParameter(value <= maximumValue, name + " must be less than or equal to " + maximumValue); } else { Util.checkParameter(value < maximumValue, name + " must be less than " + maximumValue); } } }
Example #5
Source Project: Flink-CEPplus Author: ljygz File: GridGraph.java License: Apache License 2.0 | 6 votes |
@Override public void configure(ParameterTool parameterTool) throws ProgramParametrizationException { super.configure(parameterTool); // add dimensions as ordered by dimension ID (dim0, dim1, dim2, ...) Map<Integer, String> dimensionMap = new TreeMap<>(); // first parse all dimensions into a sorted map for (String key : parameterTool.toMap().keySet()) { if (key.startsWith(PREFIX)) { int dimensionId = Integer.parseInt(key.substring(PREFIX.length())); dimensionMap.put(dimensionId, parameterTool.get(key)); } } // then store dimensions in order for (String field : dimensionMap.values()) { dimensions.add(new Dimension(field)); } }
Example #6
Source Project: flink-learning Author: zhisheng17 File: FlinkKafkaSchemaTest1.java License: Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args); StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool); Properties props = buildKafkaProps(parameterTool); //kafka topic list List<String> topics = Arrays.asList(parameterTool.get("metrics.topic")); FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(topics, new KafkaDeserializationSchemaWrapper<>(new MetricSchema()), props); DataStreamSource<MetricEvent> data = env.addSource(consumer); data.print(); env.execute("flink kafka connector test"); }
Example #7
Source Project: flink-tutorials Author: cloudera File: AvroDataGeneratorJob.java License: Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { ParameterTool params = Utils.parseArgs(args); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); KafkaSerializationSchema<Message> schema = ClouderaRegistryKafkaSerializationSchema.<Message> builder(params.getRequired(K_KAFKA_TOPIC)) .setConfig(Utils.readSchemaRegistryProperties(params)) .setKey(Message::getId) .build(); FlinkKafkaProducer<Message> kafkaSink = new FlinkKafkaProducer<>( "default", schema, Utils.readKafkaProperties(params), FlinkKafkaProducer.Semantic.AT_LEAST_ONCE); DataStream<Message> input = env.addSource(new DataGeneratorSource()).name("Data Generator Source"); input.addSink(kafkaSink) .name("Kafka Sink") .uid("Kafka Sink"); input.print(); env.execute("Data Generator Job"); }
Example #8
Source Project: flink Author: flink-tpc-ds File: DoubleParameter.java License: Apache License 2.0 | 6 votes |
@Override public void configure(ParameterTool parameterTool) { value = hasDefaultValue ? parameterTool.getDouble(name, defaultValue) : parameterTool.getDouble(name); if (hasMinimumValue) { if (minimumValueInclusive) { Util.checkParameter(value >= minimumValue, name + " must be greater than or equal to " + minimumValue); } else { Util.checkParameter(value > minimumValue, name + " must be greater than " + minimumValue); } } if (hasMaximumValue) { if (maximumValueInclusive) { Util.checkParameter(value <= maximumValue, name + " must be less than or equal to " + maximumValue); } else { Util.checkParameter(value < maximumValue, name + " must be less than " + maximumValue); } } }
Example #9
Source Project: Flink-CEPplus Author: ljygz File: KafkaExampleUtil.java License: Apache License 2.0 | 6 votes |
public static StreamExecutionEnvironment prepareExecutionEnv(ParameterTool parameterTool) throws Exception { if (parameterTool.getNumberOfParameters() < 5) { System.out.println("Missing parameters!\n" + "Usage: Kafka --input-topic <topic> --output-topic <topic> " + "--bootstrap.servers <kafka brokers> " + "--zookeeper.connect <zk quorum> --group.id <some id>"); throw new Exception("Missing parameters!\n" + "Usage: Kafka --input-topic <topic> --output-topic <topic> " + "--bootstrap.servers <kafka brokers> " + "--zookeeper.connect <zk quorum> --group.id <some id>"); } StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSysoutLogging(); env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(4, 10000)); env.enableCheckpointing(5000); // create a checkpoint every 5 seconds env.getConfig().setGlobalJobParameters(parameterTool); // make parameters available in the web interface env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); return env; }
Example #10
Source Project: flink-learning Author: zhisheng17 File: FailureRateRestartStrategyMain.java License: Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setGlobalJobParameters(ParameterTool.fromArgs(args)); //每隔 10s 重启一次,如果两分钟内重启过三次则停止 Job env.setRestartStrategy(RestartStrategies.failureRateRestart(3, Time.minutes(2), Time.seconds(10))); env.addSource(new SourceFunction<Long>() { @Override public void run(SourceContext<Long> sourceContext) throws Exception { while (true) { sourceContext.collect(null); } } @Override public void cancel() { } }) .map((MapFunction<Long, Long>) aLong -> aLong / 1) .print(); env.execute("zhisheng failureRate Restart Strategy example"); }
Example #11
Source Project: flink Author: flink-tpc-ds File: DataSetFineGrainedRecoveryTestProgram.java License: Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final String latchFilePath = params.getRequired("latchFilePath"); final String outputPath = params.getRequired("outputPath"); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setExecutionMode(ExecutionMode.BATCH_FORCED); env.setParallelism(4); env.generateSequence(0, 1000) .map(new BlockingIncrementingMapFunction(latchFilePath)) .writeAsText(outputPath, FileSystem.WriteMode.OVERWRITE) .setParallelism(1); env.execute(); }
Example #12
Source Project: flink-learning Author: zhisheng17 File: Main.java License: Apache License 2.0 | 6 votes |
private static void writeEventToHbase(String string, ParameterTool parameterTool) throws IOException { Configuration configuration = HBaseConfiguration.create(); configuration.set(HBASE_ZOOKEEPER_QUORUM, parameterTool.get(HBASE_ZOOKEEPER_QUORUM)); configuration.set(HBASE_ZOOKEEPER_PROPERTY_CLIENTPORT, parameterTool.get(HBASE_ZOOKEEPER_PROPERTY_CLIENTPORT)); configuration.set(HBASE_RPC_TIMEOUT, parameterTool.get(HBASE_RPC_TIMEOUT)); configuration.set(HBASE_CLIENT_OPERATION_TIMEOUT, parameterTool.get(HBASE_CLIENT_OPERATION_TIMEOUT)); configuration.set(HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, parameterTool.get(HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD)); Connection connect = ConnectionFactory.createConnection(configuration); Admin admin = connect.getAdmin(); if (!admin.tableExists(HBASE_TABLE_NAME)) { //检查是否有该表,如果没有,创建 admin.createTable(new HTableDescriptor(HBASE_TABLE_NAME).addFamily(new HColumnDescriptor(INFO_STREAM))); } Table table = connect.getTable(HBASE_TABLE_NAME); TimeStamp ts = new TimeStamp(new Date()); Date date = ts.getDate(); Put put = new Put(Bytes.toBytes(date.getTime())); put.addColumn(Bytes.toBytes(INFO_STREAM), Bytes.toBytes("test"), Bytes.toBytes(string)); table.put(put); table.close(); connect.close(); }
Example #13
Source Project: flink-learning Author: zhisheng17 File: DataSetBrocastMain.java License: Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); //1. 待广播的数据 DataSet<Integer> toBroadcast = env.fromElements(1, 2, 3); env.fromElements("a", "b") .map(new RichMapFunction<String, String>() { List<Integer> broadcastData; @Override public void open(Configuration parameters) throws Exception { // 3. 获取广播的DataSet数据 作为一个Collection broadcastData = getRuntimeContext().getBroadcastVariable("zhisheng"); } @Override public String map(String value) throws Exception { return broadcastData.get(1) + value; } }).withBroadcastSet(toBroadcast, "zhisheng")// 2. 广播DataSet .print(); }
Example #14
Source Project: flink-learning Author: zhisheng17 File: Sink2ES6Main.java License: Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args); StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool); DataStreamSource<MetricEvent> data = KafkaConfigUtil.buildSource(env); List<HttpHost> esAddresses = ESSinkUtil.getEsAddresses(parameterTool.get(ELASTICSEARCH_HOSTS)); int bulkSize = parameterTool.getInt(ELASTICSEARCH_BULK_FLUSH_MAX_ACTIONS, 40); int sinkParallelism = parameterTool.getInt(STREAM_SINK_PARALLELISM, 5); log.info("-----esAddresses = {}, parameterTool = {}, ", esAddresses, parameterTool); ESSinkUtil.addSink(esAddresses, bulkSize, sinkParallelism, data, (MetricEvent metric, RuntimeContext runtimeContext, RequestIndexer requestIndexer) -> { requestIndexer.add(Requests.indexRequest() .index(ZHISHENG + "_" + metric.getName()) .type(ZHISHENG) .source(GsonUtil.toJSONBytes(metric), XContentType.JSON)); }, parameterTool); env.execute("flink learning connectors es6"); }
Example #15
Source Project: flink Author: flink-tpc-ds File: KafkaExample.java License: Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { // parse input arguments final ParameterTool parameterTool = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool); DataStream<KafkaEvent> input = env .addSource( new FlinkKafkaConsumer<>( parameterTool.getRequired("input-topic"), new KafkaEventSchema(), parameterTool.getProperties()) .assignTimestampsAndWatermarks(new CustomWatermarkExtractor())) .keyBy("word") .map(new RollingAdditionMapper()); input.addSink( new FlinkKafkaProducer<>( parameterTool.getRequired("output-topic"), new KeyedSerializationSchemaWrapper<>(new KafkaEventSchema()), parameterTool.getProperties(), FlinkKafkaProducer.Semantic.EXACTLY_ONCE)); env.execute("Modern Kafka Example"); }
Example #16
Source Project: flink-training-exercises Author: ververica File: RideCleansingSolution.java License: Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); final String input = params.get("input", pathToRideData); final int maxEventDelay = 60; // events are out of order by max 60 seconds final int servingSpeedFactor = 600; // events of 10 minutes are served in 1 second // set up streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(ExerciseBase.parallelism); // start the data generator DataStream<TaxiRide> rides = env.addSource(rideSourceOrTest(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor))); DataStream<TaxiRide> filteredRides = rides // keep only those rides and both start and end in NYC .filter(new NYCFilter()); // print the filtered stream printOrTest(filteredRides); // run the cleansing pipeline env.execute("Taxi Ride Cleansing"); }
Example #17
Source Project: Flink-CEPplus Author: ljygz File: DataStreamAllroundTestJobFactory.java License: Apache License 2.0 | 6 votes |
static SourceFunction<Event> createEventSource(ParameterTool pt) { return new SequenceGeneratorSource( pt.getInt( SEQUENCE_GENERATOR_SRC_KEYSPACE.key(), SEQUENCE_GENERATOR_SRC_KEYSPACE.defaultValue()), pt.getInt( SEQUENCE_GENERATOR_SRC_PAYLOAD_SIZE.key(), SEQUENCE_GENERATOR_SRC_PAYLOAD_SIZE.defaultValue()), pt.getLong( SEQUENCE_GENERATOR_SRC_EVENT_TIME_MAX_OUT_OF_ORDERNESS.key(), SEQUENCE_GENERATOR_SRC_EVENT_TIME_MAX_OUT_OF_ORDERNESS.defaultValue()), pt.getLong( SEQUENCE_GENERATOR_SRC_EVENT_TIME_CLOCK_PROGRESS.key(), SEQUENCE_GENERATOR_SRC_EVENT_TIME_CLOCK_PROGRESS.defaultValue()), pt.getLong( SEQUENCE_GENERATOR_SRC_SLEEP_TIME.key(), SEQUENCE_GENERATOR_SRC_SLEEP_TIME.defaultValue()), pt.getLong( SEQUENCE_GENERATOR_SRC_SLEEP_AFTER_ELEMENTS.key(), SEQUENCE_GENERATOR_SRC_SLEEP_AFTER_ELEMENTS.defaultValue())); }
Example #18
Source Project: flink Author: apache File: MatrixVectorMul.java License: Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { // Checking input parameters final ParameterTool params = ParameterTool.fromArgs(args); System.out.println("Usage: MatrixVectorMul [--output <path>] [--dimension <dimension> --data-size <data_size>] [--resource-name <resource_name>]"); // Set up the execution environment final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // Make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); final int dimension = params.getInt("dimension", DEFAULT_DIM); final int dataSize = params.getInt("data-size", DEFAULT_DATA_SIZE); final String resourceName = params.get("resource-name", DEFAULT_RESOURCE_NAME); DataStream<List<Float>> result = env.addSource(new RandomVectorSource(dimension, dataSize)) .map(new Multiplier(dimension, resourceName)); // Emit result if (params.has("output")) { result.addSink(StreamingFileSink.forRowFormat(new Path(params.get("output")), new SimpleStringEncoder<List<Float>>()).build()); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); result.print(); } // Execute program env.execute("Matrix-Vector Multiplication"); }
Example #19
Source Project: pravega-samples Author: pravega File: PipelineRunner.java License: Apache License 2.0 | 5 votes |
private void parseConfigurations(String[] args) { log.info("ApplicationMain Main.. Arguments: {}", Arrays.asList(args)); ParameterTool parameterTool = ParameterTool.fromArgs(args); log.info("Parameter Tool: {}", parameterTool.toMap()); if(!parameterTool.has("mode")) { printUsage(); System.exit(1); } String configDirPath = parameterTool.get("configDir", "conf"); try { byte[] configurationData = Files.readAllBytes(Paths.get(configDirPath + File.separator + configFile)); String jsonData = new String(configurationData); log.info("App Configurations raw data: {}", jsonData); Gson gson = new Gson(); appConfiguration = gson.fromJson(jsonData, AppConfiguration.class); } catch (IOException e) { log.error("Could not read {}",configFile, e); System.exit(1); } runMode = parameterTool.getInt("mode"); pravegaConfig = PravegaConfig.fromParams(parameterTool).withDefaultScope(DEFAULT_SCOPE); stream = pravegaConfig.resolve(parameterTool.get(STREAM_PARAMETER, DEFAULT_STREAM)); }
Example #20
Source Project: flink-learning Author: zhisheng17 File: SplitEvent.java License: Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setGlobalJobParameters(params); DataStreamSource<MetricEvent> data = KafkaConfigUtil.buildSource(env); //从 Kafka 获取到所有的数据流 SplitStream<MetricEvent> splitData = data.split(new OutputSelector<MetricEvent>() { @Override public Iterable<String> select(MetricEvent metricEvent) { List<String> tags = new ArrayList<>(); String type = metricEvent.getTags().get("type"); switch (type) { case "machine": tags.add("machine"); break; case "docker": tags.add("docker"); break; case "application": tags.add("application"); break; case "middleware": tags.add("middleware"); break; default: break; } return tags; } }); DataStream<MetricEvent> machine = splitData.select("machine"); DataStream<MetricEvent> docker = splitData.select("docker"); DataStream<MetricEvent> application = splitData.select("application"); DataStream<MetricEvent> middleware = splitData.select("middleware"); }
Example #21
Source Project: flink Author: flink-tpc-ds File: DoubleParameterTest.java License: Apache License 2.0 | 5 votes |
@Test public void testMaxOutOfRange() { parameter.setMaximumValue(0, false); expectedException.expect(ProgramParametrizationException.class); expectedException.expectMessage("test must be less than 0.0"); parameter.configure(ParameterTool.fromArgs(new String[]{"--test", "1"})); }
Example #22
Source Project: flink-connectors Author: pravega File: FlinkPravegaReaderTest.java License: Apache License 2.0 | 5 votes |
@Test public void testRgScope() { PravegaConfig config = new PravegaConfig(new Properties(), Collections.emptyMap(), ParameterTool.fromMap(Collections.emptyMap())); // no scope TestableStreamingReaderBuilder builder = new TestableStreamingReaderBuilder() .forStream(SAMPLE_STREAM, SAMPLE_CUT) .withPravegaConfig(config); FlinkPravegaReader<Integer> reader; try { builder.buildSourceFunction(); fail(); } catch (IllegalStateException e) { // "missing reader group scope" } // default scope config.withDefaultScope(SAMPLE_SCOPE); reader = builder.buildSourceFunction(); assertEquals(SAMPLE_SCOPE, reader.readerGroupScope); // explicit scope builder.withReaderGroupScope("myscope"); reader = builder.buildSourceFunction(); assertEquals("myscope", reader.readerGroupScope); }
Example #23
Source Project: flink-training-exercises Author: ververica File: PopularPlacesSolution.java License: Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); final String input = params.get("input", ExerciseBase.pathToRideData); final int popThreshold = params.getInt("threshold", 20); final int maxEventDelay = 60; // events are out of order by max 60 seconds final int servingSpeedFactor = 600; // events of 10 minutes are served in 1 second // set up streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(ExerciseBase.parallelism); // start the data generator DataStream<TaxiRide> rides = env.addSource(rideSourceOrTest(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor))); // find popular places DataStream<Tuple5<Float, Float, Long, Boolean, Integer>> popularSpots = rides // remove all rides which are not within NYC .filter(new NYCFilter()) // match ride to grid cell and event type (start or end) .map(new GridCellMatcher()) // partition by cell id and event type .<KeyedStream<Tuple2<Integer, Boolean>, Tuple2<Integer, Boolean>>>keyBy(0, 1) // build sliding window .timeWindow(Time.minutes(15), Time.minutes(5)) // count ride events in window .apply(new RideCounter()) // filter by popularity threshold .filter((Tuple4<Integer, Long, Boolean, Integer> count) -> (count.f3 >= popThreshold)) // map grid cell to coordinates .map(new GridToCoordinates()); // print result on stdout printOrTest(popularSpots); // execute the transformation pipeline env.execute("Popular Places"); }
Example #24
Source Project: flink Author: apache File: PeriodicStreamingJob.java License: Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); String outputPath = params.getRequired("outputPath"); int recordsPerSecond = params.getInt("recordsPerSecond", 10); int duration = params.getInt("durationInSecond", 60); int offset = params.getInt("offsetInSecond", 0); StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment(); sEnv.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); sEnv.enableCheckpointing(4000); sEnv.getConfig().setAutoWatermarkInterval(1000); // execute a simple pass through program. PeriodicSourceGenerator generator = new PeriodicSourceGenerator( recordsPerSecond, duration, offset); DataStream<Tuple> rows = sEnv.addSource(generator); DataStream<Tuple> result = rows .keyBy(1) .timeWindow(Time.seconds(5)) .sum(0); result.writeAsText(outputPath + "/result.txt", FileSystem.WriteMode.OVERWRITE) .setParallelism(1); sEnv.execute(); }
Example #25
Source Project: flink-tutorials Author: cloudera File: KafkaToHDFSAvroJob.java License: Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ParameterTool params = Utils.parseArgs(args); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); KafkaDeserializationSchema<Message> schema = ClouderaRegistryKafkaDeserializationSchema .builder(Message.class) .setConfig(Utils.readSchemaRegistryProperties(params)) .build(); FlinkKafkaConsumer<Message> consumer = new FlinkKafkaConsumer<Message>(params.getRequired(K_KAFKA_TOPIC), schema, Utils.readKafkaProperties(params)); DataStream<String> source = env.addSource(consumer) .name("Kafka Source") .uid("Kafka Source") .map(record -> record.getId() + "," + record.getName() + "," + record.getDescription()) .name("ToOutputString"); StreamingFileSink<String> sink = StreamingFileSink .forRowFormat(new Path(params.getRequired(K_HDFS_OUTPUT)), new SimpleStringEncoder<String>("UTF-8")) .build(); source.addSink(sink) .name("FS Sink") .uid("FS Sink"); source.print(); env.execute("Flink Streaming Secured Job Sample"); }
Example #26
Source Project: flink Author: apache File: DoubleParameterTest.java License: Apache License 2.0 | 5 votes |
@Test public void testMinAndMaxInRange() { parameter.setMinimumValue(-1, false); parameter.setMaximumValue(1, false); parameter.configure(ParameterTool.fromArgs(new String[]{"--test", "0"})); Assert.assertEquals(new Double(0), parameter.getValue()); }
Example #27
Source Project: flink-training-exercises Author: ververica File: TaxiQuerySolution.java License: Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); final String input = params.get("input", ExerciseBase.pathToRideData); final int maxEventDelay = 60; // events are out of order by at most 60 seconds final int servingSpeedFactor = 1800; // 30 minutes worth of events are served every second // set up streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(ExerciseBase.parallelism); // setup a stream of taxi rides DataStream<TaxiRide> rides = env.addSource(rideSourceOrTest(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor))); // add a socket source for the query stream BroadcastStream<String> queryStream = env .addSource(stringSourceOrTest(new SocketTextStreamFunction("localhost", 9999, "\n", -1))) .assignTimestampsAndWatermarks(new QueryStreamAssigner()) .broadcast(queryDescriptor); // connect the two streams and process queries DataStream<Tuple2<String, String>> results = rides .keyBy((TaxiRide ride) -> ride.taxiId) .connect(queryStream) .process(new QueryProcessor()); printOrTest(results); env.execute("Taxi Query"); }
Example #28
Source Project: flink Author: apache File: ChoiceParameterTest.java License: Apache License 2.0 | 5 votes |
@Test public void testWithDefaultWithParameter() { parameter.setDefaultValue("default").addChoices("c0", "c1", "c2"); Assert.assertEquals("[--choice <default | c0 | c1 | c2>]", parameter.getUsage()); parameter.configure(ParameterTool.fromArgs(new String[]{"--choice", "c1"})); Assert.assertEquals("c1", parameter.getValue()); }
Example #29
Source Project: flink Author: flink-tpc-ds File: DoubleParameterTest.java License: Apache License 2.0 | 5 votes |
@Test public void testWithDefaultWithParameter() { parameter.setDefaultValue(43.21); Assert.assertEquals("[--test TEST]", parameter.getUsage()); parameter.configure(ParameterTool.fromArgs(new String[]{"--test", "12.34"})); Assert.assertEquals(new Double(12.34), parameter.getValue()); }
Example #30
Source Project: flink Author: apache File: Elasticsearch5SinkExample.java License: Apache License 2.0 | 5 votes |
private static IndexRequest createIndexRequest(String element, ParameterTool parameterTool) { Map<String, Object> json = new HashMap<>(); json.put("data", element); return Requests.indexRequest() .index(parameterTool.getRequired("index")) .type(parameterTool.getRequired("type")) .id(element) .source(json); }