org.apache.flink.api.java.utils.ParameterTool Java Examples
The following examples show how to use
org.apache.flink.api.java.utils.ParameterTool.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FlinkKafkaConsumerTest1.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args); StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool); Properties props = buildKafkaProps(parameterTool); //kafka topic list List<String> topics = Arrays.asList(parameterTool.get("metrics.topic"), parameterTool.get("logs.topic")); FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(topics, new MetricSchema(), props); //kafka topic Pattern //FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(java.utils.regex.Pattern.compile("test-topic-[0-9]"), new MetricSchema(), props); // consumer.setStartFromLatest(); // consumer.setStartFromEarliest() DataStreamSource<MetricEvent> data = env.addSource(consumer); data.print(); env.execute("flink kafka connector test"); }
Example #2
Source File: StreamWordCount.java From jstorm with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // env.addS ParameterTool tool = ParameterTool.fromArgs(args); int sourceParallel = Integer.parseInt(tool.get("s")); int operatorParallel = Integer.parseInt(tool.get("p")); System.out.println("sourceParallel: " + sourceParallel + ", operatorParallel: " + operatorParallel); env.setParallelism(operatorParallel); // get input data DataStream<String> text = env.addSource(new WordSource()).setParallelism(sourceParallel); DataStream<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1) text.flatMap(new LineSplitter()) // group by the tuple field "0" and sum up tuple field "1" .keyBy(0) .sum(1); // execute program // env.execute("StreamWordCount"); System.out.println(env.getExecutionPlan()); }
Example #3
Source File: IterationConvergence.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Override public void configure(ParameterTool parameterTool) { if (!parameterTool.has("iterations") && !parameterTool.has("convergence_threshold")) { // no configuration so use default iterations and maximum threshold value.iterations = defaultIterations; value.convergenceThreshold = Double.MAX_VALUE; } else { // use configured values and maximum default for unset values value.iterations = parameterTool.getInt("iterations", Integer.MAX_VALUE); Util.checkParameter(value.iterations > 0, "iterations must be greater than zero"); value.convergenceThreshold = parameterTool.getDouble("convergence_threshold", Double.MAX_VALUE); Util.checkParameter(value.convergenceThreshold > 0, "convergence threshold must be greater than zero"); } }
Example #4
Source File: DoubleParameter.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Override public void configure(ParameterTool parameterTool) { value = hasDefaultValue ? parameterTool.getDouble(name, defaultValue) : parameterTool.getDouble(name); if (hasMinimumValue) { if (minimumValueInclusive) { Util.checkParameter(value >= minimumValue, name + " must be greater than or equal to " + minimumValue); } else { Util.checkParameter(value > minimumValue, name + " must be greater than " + minimumValue); } } if (hasMaximumValue) { if (maximumValueInclusive) { Util.checkParameter(value <= maximumValue, name + " must be less than or equal to " + maximumValue); } else { Util.checkParameter(value < maximumValue, name + " must be less than " + maximumValue); } } }
Example #5
Source File: GridGraph.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Override public void configure(ParameterTool parameterTool) throws ProgramParametrizationException { super.configure(parameterTool); // add dimensions as ordered by dimension ID (dim0, dim1, dim2, ...) Map<Integer, String> dimensionMap = new TreeMap<>(); // first parse all dimensions into a sorted map for (String key : parameterTool.toMap().keySet()) { if (key.startsWith(PREFIX)) { int dimensionId = Integer.parseInt(key.substring(PREFIX.length())); dimensionMap.put(dimensionId, parameterTool.get(key)); } } // then store dimensions in order for (String field : dimensionMap.values()) { dimensions.add(new Dimension(field)); } }
Example #6
Source File: FlinkKafkaSchemaTest1.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args); StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool); Properties props = buildKafkaProps(parameterTool); //kafka topic list List<String> topics = Arrays.asList(parameterTool.get("metrics.topic")); FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(topics, new KafkaDeserializationSchemaWrapper<>(new MetricSchema()), props); DataStreamSource<MetricEvent> data = env.addSource(consumer); data.print(); env.execute("flink kafka connector test"); }
Example #7
Source File: AvroDataGeneratorJob.java From flink-tutorials with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { ParameterTool params = Utils.parseArgs(args); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); KafkaSerializationSchema<Message> schema = ClouderaRegistryKafkaSerializationSchema.<Message> builder(params.getRequired(K_KAFKA_TOPIC)) .setConfig(Utils.readSchemaRegistryProperties(params)) .setKey(Message::getId) .build(); FlinkKafkaProducer<Message> kafkaSink = new FlinkKafkaProducer<>( "default", schema, Utils.readKafkaProperties(params), FlinkKafkaProducer.Semantic.AT_LEAST_ONCE); DataStream<Message> input = env.addSource(new DataGeneratorSource()).name("Data Generator Source"); input.addSink(kafkaSink) .name("Kafka Sink") .uid("Kafka Sink"); input.print(); env.execute("Data Generator Job"); }
Example #8
Source File: DoubleParameter.java From flink with Apache License 2.0 | 6 votes |
@Override public void configure(ParameterTool parameterTool) { value = hasDefaultValue ? parameterTool.getDouble(name, defaultValue) : parameterTool.getDouble(name); if (hasMinimumValue) { if (minimumValueInclusive) { Util.checkParameter(value >= minimumValue, name + " must be greater than or equal to " + minimumValue); } else { Util.checkParameter(value > minimumValue, name + " must be greater than " + minimumValue); } } if (hasMaximumValue) { if (maximumValueInclusive) { Util.checkParameter(value <= maximumValue, name + " must be less than or equal to " + maximumValue); } else { Util.checkParameter(value < maximumValue, name + " must be less than " + maximumValue); } } }
Example #9
Source File: KafkaExampleUtil.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
public static StreamExecutionEnvironment prepareExecutionEnv(ParameterTool parameterTool) throws Exception { if (parameterTool.getNumberOfParameters() < 5) { System.out.println("Missing parameters!\n" + "Usage: Kafka --input-topic <topic> --output-topic <topic> " + "--bootstrap.servers <kafka brokers> " + "--zookeeper.connect <zk quorum> --group.id <some id>"); throw new Exception("Missing parameters!\n" + "Usage: Kafka --input-topic <topic> --output-topic <topic> " + "--bootstrap.servers <kafka brokers> " + "--zookeeper.connect <zk quorum> --group.id <some id>"); } StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSysoutLogging(); env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(4, 10000)); env.enableCheckpointing(5000); // create a checkpoint every 5 seconds env.getConfig().setGlobalJobParameters(parameterTool); // make parameters available in the web interface env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); return env; }
Example #10
Source File: FailureRateRestartStrategyMain.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setGlobalJobParameters(ParameterTool.fromArgs(args)); //每隔 10s 重启一次,如果两分钟内重启过三次则停止 Job env.setRestartStrategy(RestartStrategies.failureRateRestart(3, Time.minutes(2), Time.seconds(10))); env.addSource(new SourceFunction<Long>() { @Override public void run(SourceContext<Long> sourceContext) throws Exception { while (true) { sourceContext.collect(null); } } @Override public void cancel() { } }) .map((MapFunction<Long, Long>) aLong -> aLong / 1) .print(); env.execute("zhisheng failureRate Restart Strategy example"); }
Example #11
Source File: DataSetFineGrainedRecoveryTestProgram.java From flink with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final String latchFilePath = params.getRequired("latchFilePath"); final String outputPath = params.getRequired("outputPath"); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setExecutionMode(ExecutionMode.BATCH_FORCED); env.setParallelism(4); env.generateSequence(0, 1000) .map(new BlockingIncrementingMapFunction(latchFilePath)) .writeAsText(outputPath, FileSystem.WriteMode.OVERWRITE) .setParallelism(1); env.execute(); }
Example #12
Source File: Main.java From flink-learning with Apache License 2.0 | 6 votes |
private static void writeEventToHbase(String string, ParameterTool parameterTool) throws IOException { Configuration configuration = HBaseConfiguration.create(); configuration.set(HBASE_ZOOKEEPER_QUORUM, parameterTool.get(HBASE_ZOOKEEPER_QUORUM)); configuration.set(HBASE_ZOOKEEPER_PROPERTY_CLIENTPORT, parameterTool.get(HBASE_ZOOKEEPER_PROPERTY_CLIENTPORT)); configuration.set(HBASE_RPC_TIMEOUT, parameterTool.get(HBASE_RPC_TIMEOUT)); configuration.set(HBASE_CLIENT_OPERATION_TIMEOUT, parameterTool.get(HBASE_CLIENT_OPERATION_TIMEOUT)); configuration.set(HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, parameterTool.get(HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD)); Connection connect = ConnectionFactory.createConnection(configuration); Admin admin = connect.getAdmin(); if (!admin.tableExists(HBASE_TABLE_NAME)) { //检查是否有该表,如果没有,创建 admin.createTable(new HTableDescriptor(HBASE_TABLE_NAME).addFamily(new HColumnDescriptor(INFO_STREAM))); } Table table = connect.getTable(HBASE_TABLE_NAME); TimeStamp ts = new TimeStamp(new Date()); Date date = ts.getDate(); Put put = new Put(Bytes.toBytes(date.getTime())); put.addColumn(Bytes.toBytes(INFO_STREAM), Bytes.toBytes("test"), Bytes.toBytes(string)); table.put(put); table.close(); connect.close(); }
Example #13
Source File: DataSetBrocastMain.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); //1. 待广播的数据 DataSet<Integer> toBroadcast = env.fromElements(1, 2, 3); env.fromElements("a", "b") .map(new RichMapFunction<String, String>() { List<Integer> broadcastData; @Override public void open(Configuration parameters) throws Exception { // 3. 获取广播的DataSet数据 作为一个Collection broadcastData = getRuntimeContext().getBroadcastVariable("zhisheng"); } @Override public String map(String value) throws Exception { return broadcastData.get(1) + value; } }).withBroadcastSet(toBroadcast, "zhisheng")// 2. 广播DataSet .print(); }
Example #14
Source File: Sink2ES6Main.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args); StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool); DataStreamSource<MetricEvent> data = KafkaConfigUtil.buildSource(env); List<HttpHost> esAddresses = ESSinkUtil.getEsAddresses(parameterTool.get(ELASTICSEARCH_HOSTS)); int bulkSize = parameterTool.getInt(ELASTICSEARCH_BULK_FLUSH_MAX_ACTIONS, 40); int sinkParallelism = parameterTool.getInt(STREAM_SINK_PARALLELISM, 5); log.info("-----esAddresses = {}, parameterTool = {}, ", esAddresses, parameterTool); ESSinkUtil.addSink(esAddresses, bulkSize, sinkParallelism, data, (MetricEvent metric, RuntimeContext runtimeContext, RequestIndexer requestIndexer) -> { requestIndexer.add(Requests.indexRequest() .index(ZHISHENG + "_" + metric.getName()) .type(ZHISHENG) .source(GsonUtil.toJSONBytes(metric), XContentType.JSON)); }, parameterTool); env.execute("flink learning connectors es6"); }
Example #15
Source File: KafkaExample.java From flink with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { // parse input arguments final ParameterTool parameterTool = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool); DataStream<KafkaEvent> input = env .addSource( new FlinkKafkaConsumer<>( parameterTool.getRequired("input-topic"), new KafkaEventSchema(), parameterTool.getProperties()) .assignTimestampsAndWatermarks(new CustomWatermarkExtractor())) .keyBy("word") .map(new RollingAdditionMapper()); input.addSink( new FlinkKafkaProducer<>( parameterTool.getRequired("output-topic"), new KeyedSerializationSchemaWrapper<>(new KafkaEventSchema()), parameterTool.getProperties(), FlinkKafkaProducer.Semantic.EXACTLY_ONCE)); env.execute("Modern Kafka Example"); }
Example #16
Source File: RideCleansingSolution.java From flink-training-exercises with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); final String input = params.get("input", pathToRideData); final int maxEventDelay = 60; // events are out of order by max 60 seconds final int servingSpeedFactor = 600; // events of 10 minutes are served in 1 second // set up streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(ExerciseBase.parallelism); // start the data generator DataStream<TaxiRide> rides = env.addSource(rideSourceOrTest(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor))); DataStream<TaxiRide> filteredRides = rides // keep only those rides and both start and end in NYC .filter(new NYCFilter()); // print the filtered stream printOrTest(filteredRides); // run the cleansing pipeline env.execute("Taxi Ride Cleansing"); }
Example #17
Source File: DataStreamAllroundTestJobFactory.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
static SourceFunction<Event> createEventSource(ParameterTool pt) { return new SequenceGeneratorSource( pt.getInt( SEQUENCE_GENERATOR_SRC_KEYSPACE.key(), SEQUENCE_GENERATOR_SRC_KEYSPACE.defaultValue()), pt.getInt( SEQUENCE_GENERATOR_SRC_PAYLOAD_SIZE.key(), SEQUENCE_GENERATOR_SRC_PAYLOAD_SIZE.defaultValue()), pt.getLong( SEQUENCE_GENERATOR_SRC_EVENT_TIME_MAX_OUT_OF_ORDERNESS.key(), SEQUENCE_GENERATOR_SRC_EVENT_TIME_MAX_OUT_OF_ORDERNESS.defaultValue()), pt.getLong( SEQUENCE_GENERATOR_SRC_EVENT_TIME_CLOCK_PROGRESS.key(), SEQUENCE_GENERATOR_SRC_EVENT_TIME_CLOCK_PROGRESS.defaultValue()), pt.getLong( SEQUENCE_GENERATOR_SRC_SLEEP_TIME.key(), SEQUENCE_GENERATOR_SRC_SLEEP_TIME.defaultValue()), pt.getLong( SEQUENCE_GENERATOR_SRC_SLEEP_AFTER_ELEMENTS.key(), SEQUENCE_GENERATOR_SRC_SLEEP_AFTER_ELEMENTS.defaultValue())); }
Example #18
Source File: MatrixVectorMul.java From flink with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { // Checking input parameters final ParameterTool params = ParameterTool.fromArgs(args); System.out.println("Usage: MatrixVectorMul [--output <path>] [--dimension <dimension> --data-size <data_size>] [--resource-name <resource_name>]"); // Set up the execution environment final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // Make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); final int dimension = params.getInt("dimension", DEFAULT_DIM); final int dataSize = params.getInt("data-size", DEFAULT_DATA_SIZE); final String resourceName = params.get("resource-name", DEFAULT_RESOURCE_NAME); DataStream<List<Float>> result = env.addSource(new RandomVectorSource(dimension, dataSize)) .map(new Multiplier(dimension, resourceName)); // Emit result if (params.has("output")) { result.addSink(StreamingFileSink.forRowFormat(new Path(params.get("output")), new SimpleStringEncoder<List<Float>>()).build()); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); result.print(); } // Execute program env.execute("Matrix-Vector Multiplication"); }
Example #19
Source File: PipelineRunner.java From pravega-samples with Apache License 2.0 | 5 votes |
private void parseConfigurations(String[] args) { log.info("ApplicationMain Main.. Arguments: {}", Arrays.asList(args)); ParameterTool parameterTool = ParameterTool.fromArgs(args); log.info("Parameter Tool: {}", parameterTool.toMap()); if(!parameterTool.has("mode")) { printUsage(); System.exit(1); } String configDirPath = parameterTool.get("configDir", "conf"); try { byte[] configurationData = Files.readAllBytes(Paths.get(configDirPath + File.separator + configFile)); String jsonData = new String(configurationData); log.info("App Configurations raw data: {}", jsonData); Gson gson = new Gson(); appConfiguration = gson.fromJson(jsonData, AppConfiguration.class); } catch (IOException e) { log.error("Could not read {}",configFile, e); System.exit(1); } runMode = parameterTool.getInt("mode"); pravegaConfig = PravegaConfig.fromParams(parameterTool).withDefaultScope(DEFAULT_SCOPE); stream = pravegaConfig.resolve(parameterTool.get(STREAM_PARAMETER, DEFAULT_STREAM)); }
Example #20
Source File: SplitEvent.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setGlobalJobParameters(params); DataStreamSource<MetricEvent> data = KafkaConfigUtil.buildSource(env); //从 Kafka 获取到所有的数据流 SplitStream<MetricEvent> splitData = data.split(new OutputSelector<MetricEvent>() { @Override public Iterable<String> select(MetricEvent metricEvent) { List<String> tags = new ArrayList<>(); String type = metricEvent.getTags().get("type"); switch (type) { case "machine": tags.add("machine"); break; case "docker": tags.add("docker"); break; case "application": tags.add("application"); break; case "middleware": tags.add("middleware"); break; default: break; } return tags; } }); DataStream<MetricEvent> machine = splitData.select("machine"); DataStream<MetricEvent> docker = splitData.select("docker"); DataStream<MetricEvent> application = splitData.select("application"); DataStream<MetricEvent> middleware = splitData.select("middleware"); }
Example #21
Source File: DoubleParameterTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testMaxOutOfRange() { parameter.setMaximumValue(0, false); expectedException.expect(ProgramParametrizationException.class); expectedException.expectMessage("test must be less than 0.0"); parameter.configure(ParameterTool.fromArgs(new String[]{"--test", "1"})); }
Example #22
Source File: FlinkPravegaReaderTest.java From flink-connectors with Apache License 2.0 | 5 votes |
@Test public void testRgScope() { PravegaConfig config = new PravegaConfig(new Properties(), Collections.emptyMap(), ParameterTool.fromMap(Collections.emptyMap())); // no scope TestableStreamingReaderBuilder builder = new TestableStreamingReaderBuilder() .forStream(SAMPLE_STREAM, SAMPLE_CUT) .withPravegaConfig(config); FlinkPravegaReader<Integer> reader; try { builder.buildSourceFunction(); fail(); } catch (IllegalStateException e) { // "missing reader group scope" } // default scope config.withDefaultScope(SAMPLE_SCOPE); reader = builder.buildSourceFunction(); assertEquals(SAMPLE_SCOPE, reader.readerGroupScope); // explicit scope builder.withReaderGroupScope("myscope"); reader = builder.buildSourceFunction(); assertEquals("myscope", reader.readerGroupScope); }
Example #23
Source File: PopularPlacesSolution.java From flink-training-exercises with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); final String input = params.get("input", ExerciseBase.pathToRideData); final int popThreshold = params.getInt("threshold", 20); final int maxEventDelay = 60; // events are out of order by max 60 seconds final int servingSpeedFactor = 600; // events of 10 minutes are served in 1 second // set up streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(ExerciseBase.parallelism); // start the data generator DataStream<TaxiRide> rides = env.addSource(rideSourceOrTest(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor))); // find popular places DataStream<Tuple5<Float, Float, Long, Boolean, Integer>> popularSpots = rides // remove all rides which are not within NYC .filter(new NYCFilter()) // match ride to grid cell and event type (start or end) .map(new GridCellMatcher()) // partition by cell id and event type .<KeyedStream<Tuple2<Integer, Boolean>, Tuple2<Integer, Boolean>>>keyBy(0, 1) // build sliding window .timeWindow(Time.minutes(15), Time.minutes(5)) // count ride events in window .apply(new RideCounter()) // filter by popularity threshold .filter((Tuple4<Integer, Long, Boolean, Integer> count) -> (count.f3 >= popThreshold)) // map grid cell to coordinates .map(new GridToCoordinates()); // print result on stdout printOrTest(popularSpots); // execute the transformation pipeline env.execute("Popular Places"); }
Example #24
Source File: PeriodicStreamingJob.java From flink with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); String outputPath = params.getRequired("outputPath"); int recordsPerSecond = params.getInt("recordsPerSecond", 10); int duration = params.getInt("durationInSecond", 60); int offset = params.getInt("offsetInSecond", 0); StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment(); sEnv.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); sEnv.enableCheckpointing(4000); sEnv.getConfig().setAutoWatermarkInterval(1000); // execute a simple pass through program. PeriodicSourceGenerator generator = new PeriodicSourceGenerator( recordsPerSecond, duration, offset); DataStream<Tuple> rows = sEnv.addSource(generator); DataStream<Tuple> result = rows .keyBy(1) .timeWindow(Time.seconds(5)) .sum(0); result.writeAsText(outputPath + "/result.txt", FileSystem.WriteMode.OVERWRITE) .setParallelism(1); sEnv.execute(); }
Example #25
Source File: KafkaToHDFSAvroJob.java From flink-tutorials with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ParameterTool params = Utils.parseArgs(args); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); KafkaDeserializationSchema<Message> schema = ClouderaRegistryKafkaDeserializationSchema .builder(Message.class) .setConfig(Utils.readSchemaRegistryProperties(params)) .build(); FlinkKafkaConsumer<Message> consumer = new FlinkKafkaConsumer<Message>(params.getRequired(K_KAFKA_TOPIC), schema, Utils.readKafkaProperties(params)); DataStream<String> source = env.addSource(consumer) .name("Kafka Source") .uid("Kafka Source") .map(record -> record.getId() + "," + record.getName() + "," + record.getDescription()) .name("ToOutputString"); StreamingFileSink<String> sink = StreamingFileSink .forRowFormat(new Path(params.getRequired(K_HDFS_OUTPUT)), new SimpleStringEncoder<String>("UTF-8")) .build(); source.addSink(sink) .name("FS Sink") .uid("FS Sink"); source.print(); env.execute("Flink Streaming Secured Job Sample"); }
Example #26
Source File: DoubleParameterTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testMinAndMaxInRange() { parameter.setMinimumValue(-1, false); parameter.setMaximumValue(1, false); parameter.configure(ParameterTool.fromArgs(new String[]{"--test", "0"})); Assert.assertEquals(new Double(0), parameter.getValue()); }
Example #27
Source File: TaxiQuerySolution.java From flink-training-exercises with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); final String input = params.get("input", ExerciseBase.pathToRideData); final int maxEventDelay = 60; // events are out of order by at most 60 seconds final int servingSpeedFactor = 1800; // 30 minutes worth of events are served every second // set up streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(ExerciseBase.parallelism); // setup a stream of taxi rides DataStream<TaxiRide> rides = env.addSource(rideSourceOrTest(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor))); // add a socket source for the query stream BroadcastStream<String> queryStream = env .addSource(stringSourceOrTest(new SocketTextStreamFunction("localhost", 9999, "\n", -1))) .assignTimestampsAndWatermarks(new QueryStreamAssigner()) .broadcast(queryDescriptor); // connect the two streams and process queries DataStream<Tuple2<String, String>> results = rides .keyBy((TaxiRide ride) -> ride.taxiId) .connect(queryStream) .process(new QueryProcessor()); printOrTest(results); env.execute("Taxi Query"); }
Example #28
Source File: ChoiceParameterTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testWithDefaultWithParameter() { parameter.setDefaultValue("default").addChoices("c0", "c1", "c2"); Assert.assertEquals("[--choice <default | c0 | c1 | c2>]", parameter.getUsage()); parameter.configure(ParameterTool.fromArgs(new String[]{"--choice", "c1"})); Assert.assertEquals("c1", parameter.getValue()); }
Example #29
Source File: DoubleParameterTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testWithDefaultWithParameter() { parameter.setDefaultValue(43.21); Assert.assertEquals("[--test TEST]", parameter.getUsage()); parameter.configure(ParameterTool.fromArgs(new String[]{"--test", "12.34"})); Assert.assertEquals(new Double(12.34), parameter.getValue()); }
Example #30
Source File: Elasticsearch5SinkExample.java From flink with Apache License 2.0 | 5 votes |
private static IndexRequest createIndexRequest(String element, ParameterTool parameterTool) { Map<String, Object> json = new HashMap<>(); json.put("data", element); return Requests.indexRequest() .index(parameterTool.getRequired("index")) .type(parameterTool.getRequired("type")) .id(element) .source(json); }