Java Code Examples for org.apache.flink.api.java.utils.ParameterTool#has()
The following examples show how to use
org.apache.flink.api.java.utils.ParameterTool#has() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LongParameter.java From flink with Apache License 2.0 | 6 votes |
@Override public void configure(ParameterTool parameterTool) { if (hasDefaultValue && !parameterTool.has(name)) { // skip checks for min and max when using default value value = defaultValue; } else { value = parameterTool.getLong(name); if (hasMinimumValue) { Util.checkParameter(value >= minimumValue, name + " must be greater than or equal to " + minimumValue); } if (hasMaximumValue) { Util.checkParameter(value <= maximumValue, name + " must be less than or equal to " + maximumValue); } } }
Example 2
Source File: MatrixVectorMul.java From flink with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { // Checking input parameters final ParameterTool params = ParameterTool.fromArgs(args); System.out.println("Usage: MatrixVectorMul [--output <path>] [--dimension <dimension> --data-size <data_size>] [--resource-name <resource_name>]"); // Set up the execution environment final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // Make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); final int dimension = params.getInt("dimension", DEFAULT_DIM); final int dataSize = params.getInt("data-size", DEFAULT_DATA_SIZE); final String resourceName = params.get("resource-name", DEFAULT_RESOURCE_NAME); DataStream<List<Float>> result = env.addSource(new RandomVectorSource(dimension, dataSize)) .map(new Multiplier(dimension, resourceName)); // Emit result if (params.has("output")) { result.addSink(StreamingFileSink.forRowFormat(new Path(params.get("output")), new SimpleStringEncoder<List<Float>>()).build()); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); result.print(); } // Execute program env.execute("Matrix-Vector Multiplication"); }
Example 3
Source File: KafkaThroughput.java From flink-perf with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException, UnknownHostException, InterruptedException { final ParameterTool pt = ParameterTool.fromArgs(args); TopologyBuilder builder = new TopologyBuilder(); BrokerHosts hosts = new ZkHosts(pt.getRequired("zookeeper")); SpoutConfig spoutConfig = new SpoutConfig(hosts, pt.getRequired("topic"), "/" + pt.getRequired("topic"), UUID.randomUUID().toString()); spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme()); KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig); builder.setSpout("source", kafkaSpout, pt.getInt("sourceParallelism")); builder.setBolt("sink", new Throughput.Sink(pt), pt.getInt("sinkParallelism")).noneGrouping("source"); Config conf = new Config(); conf.setDebug(false); if (!pt.has("local")) { conf.setNumWorkers(pt.getInt("par", 2)); StormSubmitter.submitTopologyWithProgressBar("kafka-spout-"+pt.get("name", "no_name"), conf, builder.createTopology()); } else { conf.setMaxTaskParallelism(pt.getInt("par", 2)); LocalCluster cluster = new LocalCluster(); cluster.submitTopology("kafka-spout", conf, builder.createTopology()); Thread.sleep(300000); cluster.shutdown(); } }
Example 4
Source File: TridentThroughput.java From flink-perf with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ParameterTool pt = ParameterTool.fromArgs(args); int par = pt.getInt("para"); TridentTopology topology = new TridentTopology(); Stream sourceStream = topology.newStream("source", new Generator(pt)).parallelismHint(pt.getInt("sourceParallelism")); Stream repart = sourceStream.partitionBy(new Fields("id")); for(int i = 0; i < pt.getInt("repartitions", 1) - 1; i++) { repart = repart.each(new Fields("id"), new IdentityEach(), new Fields("id"+i)).partitionBy(new Fields("id"+i)); } repart.each(new Fields("id", "host", "time", "payload"), new Sink(pt), new Fields("dontcare")).parallelismHint(pt.getInt("sinkParallelism")); Config conf = new Config(); conf.setDebug(false); if (!pt.has("local")) { conf.setNumWorkers(par); StormSubmitter.submitTopologyWithProgressBar("throughput-"+pt.get("name", "no_name"), conf, topology.build()); } else { conf.setMaxTaskParallelism(par); LocalCluster cluster = new LocalCluster(); cluster.submitTopology("throughput", conf, topology.build()); Thread.sleep(30000); cluster.shutdown(); } }
Example 5
Source File: WebLogAnalysis.java From flink with Apache License 2.0 | 5 votes |
private static DataSet<Tuple2<String, String>> getDocumentsDataSet(ExecutionEnvironment env, ParameterTool params) { // Create DataSet for documents relation (URL, Doc-Text) if (params.has("documents")) { return env.readCsvFile(params.get("documents")) .fieldDelimiter("|") .types(String.class, String.class); } else { System.out.println("Executing WebLogAnalysis example with default documents data set."); System.out.println("Use --documents to specify file input."); return WebLogData.getDocumentDataSet(env); } }
Example 6
Source File: ConnectedComponents.java From flink with Apache License 2.0 | 5 votes |
private static DataSet<Tuple2<Long, Long>> getEdgeDataSet(ExecutionEnvironment env, ParameterTool params) { if (params.has("edges")) { return env.readCsvFile(params.get("edges")).fieldDelimiter(" ").types(Long.class, Long.class); } else { System.out.println("Executing Connected Components example with default edges data set."); System.out.println("Use --edges to specify file input."); return ConnectedComponentsData.getDefaultEdgeDataSet(env); } }
Example 7
Source File: TopSpeedWindowing.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.getConfig().setGlobalJobParameters(params); @SuppressWarnings({"rawtypes", "serial"}) DataStream<Tuple4<Integer, Integer, Double, Long>> carData; if (params.has("input")) { carData = env.readTextFile(params.get("input")).map(new ParseCarData()); } else { System.out.println("Executing TopSpeedWindowing example with default input data set."); System.out.println("Use --input to specify file input."); carData = env.addSource(CarSource.create(2)); } int evictionSec = 10; double triggerMeters = 50; DataStream<Tuple4<Integer, Integer, Double, Long>> topSpeeds = carData .assignTimestampsAndWatermarks(new CarTimestamp()) .keyBy(0) .window(GlobalWindows.create()) .evictor(TimeEvictor.of(Time.of(evictionSec, TimeUnit.SECONDS))) .trigger(DeltaTrigger.of(triggerMeters, new DeltaFunction<Tuple4<Integer, Integer, Double, Long>>() { private static final long serialVersionUID = 1L; @Override public double getDelta( Tuple4<Integer, Integer, Double, Long> oldDataPoint, Tuple4<Integer, Integer, Double, Long> newDataPoint) { return newDataPoint.f2 - oldDataPoint.f2; } }, carData.getType().createSerializer(env.getConfig()))) .maxBy(1); if (params.has("output")) { topSpeeds.writeAsText(params.get("output")); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); topSpeeds.print(); } env.execute("CarTopSpeedWindowingExample"); }
Example 8
Source File: ElasticsearchSinkBase.java From flink with Apache License 2.0 | 4 votes |
public ElasticsearchSinkBase( ElasticsearchApiCallBridge<C> callBridge, Map<String, String> userConfig, ElasticsearchSinkFunction<T> elasticsearchSinkFunction, ActionRequestFailureHandler failureHandler) { this.callBridge = checkNotNull(callBridge); this.elasticsearchSinkFunction = checkNotNull(elasticsearchSinkFunction); this.failureHandler = checkNotNull(failureHandler); // we eagerly check if the user-provided sink function and failure handler is serializable; // otherwise, if they aren't serializable, users will merely get a non-informative error message // "ElasticsearchSinkBase is not serializable" checkArgument(InstantiationUtil.isSerializable(elasticsearchSinkFunction), "The implementation of the provided ElasticsearchSinkFunction is not serializable. " + "The object probably contains or references non-serializable fields."); checkArgument(InstantiationUtil.isSerializable(failureHandler), "The implementation of the provided ActionRequestFailureHandler is not serializable. " + "The object probably contains or references non-serializable fields."); // extract and remove bulk processor related configuration from the user-provided config, // so that the resulting user config only contains configuration related to the Elasticsearch client. checkNotNull(userConfig); // copy config so we can remove entries without side-effects userConfig = new HashMap<>(userConfig); ParameterTool params = ParameterTool.fromMap(userConfig); if (params.has(CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS)) { bulkProcessorFlushMaxActions = params.getInt(CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS); userConfig.remove(CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS); } else { bulkProcessorFlushMaxActions = null; } if (params.has(CONFIG_KEY_BULK_FLUSH_MAX_SIZE_MB)) { bulkProcessorFlushMaxSizeMb = params.getInt(CONFIG_KEY_BULK_FLUSH_MAX_SIZE_MB); userConfig.remove(CONFIG_KEY_BULK_FLUSH_MAX_SIZE_MB); } else { bulkProcessorFlushMaxSizeMb = null; } if (params.has(CONFIG_KEY_BULK_FLUSH_INTERVAL_MS)) { bulkProcessorFlushIntervalMillis = params.getLong(CONFIG_KEY_BULK_FLUSH_INTERVAL_MS); userConfig.remove(CONFIG_KEY_BULK_FLUSH_INTERVAL_MS); } else { bulkProcessorFlushIntervalMillis = null; } boolean bulkProcessorFlushBackoffEnable = params.getBoolean(CONFIG_KEY_BULK_FLUSH_BACKOFF_ENABLE, true); userConfig.remove(CONFIG_KEY_BULK_FLUSH_BACKOFF_ENABLE); if (bulkProcessorFlushBackoffEnable) { this.bulkProcessorFlushBackoffPolicy = new BulkFlushBackoffPolicy(); if (params.has(CONFIG_KEY_BULK_FLUSH_BACKOFF_TYPE)) { bulkProcessorFlushBackoffPolicy.setBackoffType(FlushBackoffType.valueOf(params.get(CONFIG_KEY_BULK_FLUSH_BACKOFF_TYPE))); userConfig.remove(CONFIG_KEY_BULK_FLUSH_BACKOFF_TYPE); } if (params.has(CONFIG_KEY_BULK_FLUSH_BACKOFF_RETRIES)) { bulkProcessorFlushBackoffPolicy.setMaxRetryCount(params.getInt(CONFIG_KEY_BULK_FLUSH_BACKOFF_RETRIES)); userConfig.remove(CONFIG_KEY_BULK_FLUSH_BACKOFF_RETRIES); } if (params.has(CONFIG_KEY_BULK_FLUSH_BACKOFF_DELAY)) { bulkProcessorFlushBackoffPolicy.setDelayMillis(params.getLong(CONFIG_KEY_BULK_FLUSH_BACKOFF_DELAY)); userConfig.remove(CONFIG_KEY_BULK_FLUSH_BACKOFF_DELAY); } } else { bulkProcessorFlushBackoffPolicy = null; } this.userConfig = userConfig; }
Example 9
Source File: WebLogAnalysis.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setGlobalJobParameters(params); // get input data DataSet<Tuple2<String, String>> documents = getDocumentsDataSet(env, params); DataSet<Tuple3<Integer, String, Integer>> ranks = getRanksDataSet(env, params); DataSet<Tuple2<String, String>> visits = getVisitsDataSet(env, params); // Retain documents with keywords DataSet<Tuple1<String>> filterDocs = documents .filter(new FilterDocByKeyWords()) .project(0); // Filter ranks by minimum rank DataSet<Tuple3<Integer, String, Integer>> filterRanks = ranks .filter(new FilterByRank()); // Filter visits by visit date DataSet<Tuple1<String>> filterVisits = visits .filter(new FilterVisitsByDate()) .project(0); // Join the filtered documents and ranks, i.e., get all URLs with min rank and keywords DataSet<Tuple3<Integer, String, Integer>> joinDocsRanks = filterDocs.join(filterRanks) .where(0).equalTo(1) .projectSecond(0, 1, 2); // Anti-join urls with visits, i.e., retain all URLs which have NOT been visited in a certain time DataSet<Tuple3<Integer, String, Integer>> result = joinDocsRanks.coGroup(filterVisits) .where(1).equalTo(0) .with(new AntiJoinVisits()); // emit result if (params.has("output")) { result.writeAsCsv(params.get("output"), "\n", "|"); // execute program env.execute("WebLogAnalysis Example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); result.print(); } }
Example 10
Source File: EnumTriangles.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { // Checking input parameters final ParameterTool params = ParameterTool.fromArgs(args); // set up execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // read input data DataSet<Edge> edges; if (params.has("edges")) { edges = env.readCsvFile(params.get("edges")) .fieldDelimiter(" ") .includeFields(true, true) .types(Integer.class, Integer.class) .map(new TupleEdgeConverter()); } else { System.out.println("Executing EnumTriangles example with default edges data set."); System.out.println("Use --edges to specify file input."); edges = EnumTrianglesData.getDefaultEdgeDataSet(env); } // project edges by vertex id DataSet<Edge> edgesById = edges .map(new EdgeByIdProjector()); DataSet<Triad> triangles = edgesById // build triads .groupBy(Edge.V1).sortGroup(Edge.V2, Order.ASCENDING).reduceGroup(new TriadBuilder()) // filter triads .join(edgesById).where(Triad.V2, Triad.V3).equalTo(Edge.V1, Edge.V2).with(new TriadFilter()); // emit result if (params.has("output")) { triangles.writeAsCsv(params.get("output"), "\n", ","); // execute program env.execute("Basic Triangle Enumeration Example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); triangles.print(); } }
Example 11
Source File: BooleanParameter.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Override public void configure(ParameterTool parameterTool) { value = parameterTool.has(name); }
Example 12
Source File: TPCHQuery10.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); if (!params.has("customer") && !params.has("orders") && !params.has("lineitem") && !params.has("nation")) { System.err.println(" This program expects data from the TPC-H benchmark as input data."); System.err.println(" Due to legal restrictions, we can not ship generated data."); System.err.println(" You can find the TPC-H data generator at http://www.tpc.org/tpch/."); System.err.println(" Usage: TPCHQuery10 --customer <path> --orders <path> --lineitem <path> --nation <path> [--output <path>]"); return; } // get customer data set: (custkey, name, address, nationkey, acctbal) DataSet<Tuple5<Integer, String, String, Integer, Double>> customers = getCustomerDataSet(env, params.get("customer")); // get orders data set: (orderkey, custkey, orderdate) DataSet<Tuple3<Integer, Integer, String>> orders = getOrdersDataSet(env, params.get("orders")); // get lineitem data set: (orderkey, extendedprice, discount, returnflag) DataSet<Tuple4<Integer, Double, Double, String>> lineitems = getLineitemDataSet(env, params.get("lineitem")); // get nation data set: (nationkey, name) DataSet<Tuple2<Integer, String>> nations = getNationsDataSet(env, params.get("nation")); // orders filtered by year: (orderkey, custkey) DataSet<Tuple2<Integer, Integer>> ordersFilteredByYear = // filter by year orders.filter(order -> Integer.parseInt(order.f2.substring(0, 4)) > 1990) // project fields out that are no longer required .project(0, 1); // lineitems filtered by flag: (orderkey, revenue) DataSet<Tuple2<Integer, Double>> lineitemsFilteredByFlag = // filter by flag lineitems.filter(lineitem -> lineitem.f3.equals("R")) // compute revenue and project out return flag // revenue per item = l_extendedprice * (1 - l_discount) .map(lineitem -> new Tuple2<>(lineitem.f0, lineitem.f1 * (1 - lineitem.f2))) .returns(Types.TUPLE(Types.INT, Types.DOUBLE)); // for lambda with generics // join orders with lineitems: (custkey, revenue) DataSet<Tuple2<Integer, Double>> revenueByCustomer = ordersFilteredByYear.joinWithHuge(lineitemsFilteredByFlag) .where(0).equalTo(0) .projectFirst(1).projectSecond(1); revenueByCustomer = revenueByCustomer.groupBy(0).aggregate(Aggregations.SUM, 1); // join customer with nation (custkey, name, address, nationname, acctbal) DataSet<Tuple5<Integer, String, String, String, Double>> customerWithNation = customers .joinWithTiny(nations) .where(3).equalTo(0) .projectFirst(0, 1, 2).projectSecond(1).projectFirst(4); // join customer (with nation) with revenue (custkey, name, address, nationname, acctbal, revenue) DataSet<Tuple6<Integer, String, String, String, Double, Double>> result = customerWithNation.join(revenueByCustomer) .where(0).equalTo(0) .projectFirst(0, 1, 2, 3, 4).projectSecond(1); // emit result if (params.has("output")) { result.writeAsCsv(params.get("output"), "\n", "|"); // execute program env.execute("TPCH Query 10 Example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); result.print(); } }
Example 13
Source File: SocketWindowWordCount.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { // the host and the port to connect to final String hostname; final int port; try { final ParameterTool params = ParameterTool.fromArgs(args); hostname = params.has("hostname") ? params.get("hostname") : "localhost"; port = params.getInt("port"); } catch (Exception e) { System.err.println("No port specified. Please run 'SocketWindowWordCount " + "--hostname <hostname> --port <port>', where hostname (localhost by default) " + "and port is the address of the text server"); System.err.println("To start a simple text server, run 'netcat -l <port>' and " + "type the input text into the command line"); return; } // get the execution environment final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // get input data by connecting to the socket DataStream<String> text = env.socketTextStream(hostname, port, "\n"); // parse the data, group it, window it, and aggregate the counts DataStream<WordWithCount> windowCounts = text .flatMap(new FlatMapFunction<String, WordWithCount>() { @Override public void flatMap(String value, Collector<WordWithCount> out) { for (String word : value.split("\\s")) { out.collect(new WordWithCount(word, 1L)); } } }) .keyBy("word") .timeWindow(Time.seconds(5)) .reduce(new ReduceFunction<WordWithCount>() { @Override public WordWithCount reduce(WordWithCount a, WordWithCount b) { return new WordWithCount(a.word, a.count + b.count); } }); // print the results with a single thread, rather than in parallel windowCounts.print().setParallelism(1); env.execute("Socket Window WordCount"); }
Example 14
Source File: KMeans.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { // Checking input parameters final ParameterTool params = ParameterTool.fromArgs(args); // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setGlobalJobParameters(params); // make parameters available in the web interface // get input data: // read the points and centroids from the provided paths or fall back to default data DataSet<Point> points = getPointDataSet(params, env); DataSet<Centroid> centroids = getCentroidDataSet(params, env); // set number of bulk iterations for KMeans algorithm IterativeDataSet<Centroid> loop = centroids.iterate(params.getInt("iterations", 10)); DataSet<Centroid> newCentroids = points // compute closest centroid for each point .map(new SelectNearestCenter()).withBroadcastSet(loop, "centroids") // count and sum point coordinates for each centroid .map(new CountAppender()) .groupBy(0).reduce(new CentroidAccumulator()) // compute new centroids from point counts and coordinate sums .map(new CentroidAverager()); // feed new centroids back into next iteration DataSet<Centroid> finalCentroids = loop.closeWith(newCentroids); DataSet<Tuple2<Integer, Point>> clusteredPoints = points // assign points to final clusters .map(new SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids"); // emit result if (params.has("output")) { clusteredPoints.writeAsCsv(params.get("output"), "\n", " "); // since file sinks are lazy, we trigger the execution explicitly env.execute("KMeans Example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); clusteredPoints.print(); } }
Example 15
Source File: KMeans.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { // Checking input parameters final ParameterTool params = ParameterTool.fromArgs(args); // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setGlobalJobParameters(params); // make parameters available in the web interface // get input data: // read the points and centroids from the provided paths or fall back to default data DataSet<Point> points = getPointDataSet(params, env); DataSet<Centroid> centroids = getCentroidDataSet(params, env); // set number of bulk iterations for KMeans algorithm IterativeDataSet<Centroid> loop = centroids.iterate(params.getInt("iterations", 10)); DataSet<Centroid> newCentroids = points // compute closest centroid for each point .map(new SelectNearestCenter()).withBroadcastSet(loop, "centroids") // count and sum point coordinates for each centroid .map(new CountAppender()) .groupBy(0).reduce(new CentroidAccumulator()) // compute new centroids from point counts and coordinate sums .map(new CentroidAverager()); // feed new centroids back into next iteration DataSet<Centroid> finalCentroids = loop.closeWith(newCentroids); DataSet<Tuple2<Integer, Point>> clusteredPoints = points // assign points to final clusters .map(new SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids"); // emit result if (params.has("output")) { clusteredPoints.writeAsCsv(params.get("output"), "\n", " "); // since file sinks are lazy, we trigger the execution explicitly env.execute("KMeans Example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); clusteredPoints.print(); } }
Example 16
Source File: BooleanParameter.java From flink with Apache License 2.0 | 4 votes |
@Override public void configure(ParameterTool parameterTool) { value = parameterTool.has(name); }
Example 17
Source File: TopSpeedWindowing.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.getConfig().setGlobalJobParameters(params); @SuppressWarnings({"rawtypes", "serial"}) DataStream<Tuple4<Integer, Integer, Double, Long>> carData; if (params.has("input")) { carData = env.readTextFile(params.get("input")).map(new ParseCarData()); } else { System.out.println("Executing TopSpeedWindowing example with default input data set."); System.out.println("Use --input to specify file input."); carData = env.addSource(CarSource.create(2)); } int evictionSec = 10; double triggerMeters = 50; DataStream<Tuple4<Integer, Integer, Double, Long>> topSpeeds = carData .assignTimestampsAndWatermarks(new CarTimestamp()) .keyBy(0) .window(GlobalWindows.create()) .evictor(TimeEvictor.of(Time.of(evictionSec, TimeUnit.SECONDS))) .trigger(DeltaTrigger.of(triggerMeters, new DeltaFunction<Tuple4<Integer, Integer, Double, Long>>() { private static final long serialVersionUID = 1L; @Override public double getDelta( Tuple4<Integer, Integer, Double, Long> oldDataPoint, Tuple4<Integer, Integer, Double, Long> newDataPoint) { return newDataPoint.f2 - oldDataPoint.f2; } }, carData.getType().createSerializer(env.getConfig()))) .maxBy(1); if (params.has("output")) { topSpeeds.writeAsText(params.get("output")); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); topSpeeds.print(); } env.execute("CarTopSpeedWindowingExample"); }
Example 18
Source File: AdvertisingTopologyNative.java From streaming-benchmarks with Apache License 2.0 | 4 votes |
public static void main(final String[] args) throws Exception { ParameterTool parameterTool = ParameterTool.fromArgs(args); Map conf = Utils.findAndReadConfigFile(parameterTool.getRequired("confPath"), true); int kafkaPartitions = ((Number)conf.get("kafka.partitions")).intValue(); int hosts = ((Number)conf.get("process.hosts")).intValue(); int cores = ((Number)conf.get("process.cores")).intValue(); ParameterTool flinkBenchmarkParams = ParameterTool.fromMap(getFlinkConfs(conf)); LOG.info("conf: {}", conf); LOG.info("Parameters used: {}", flinkBenchmarkParams.toMap()); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setGlobalJobParameters(flinkBenchmarkParams); // Set the buffer timeout (default 100) // Lowering the timeout will lead to lower latencies, but will eventually reduce throughput. env.setBufferTimeout(flinkBenchmarkParams.getLong("flink.buffer-timeout", 100)); if(flinkBenchmarkParams.has("flink.checkpoint-interval")) { // enable checkpointing for fault tolerance env.enableCheckpointing(flinkBenchmarkParams.getLong("flink.checkpoint-interval", 1000)); } // set default parallelism for all operators (recommended value: number of available worker CPU cores in the cluster (hosts * cores)) env.setParallelism(hosts * cores); DataStream<String> messageStream = env .addSource(new FlinkKafkaConsumer082<String>( flinkBenchmarkParams.getRequired("topic"), new SimpleStringSchema(), flinkBenchmarkParams.getProperties())).setParallelism(Math.min(hosts * cores, kafkaPartitions)); messageStream .rebalance() // Parse the String as JSON .flatMap(new DeserializeBolt()) //Filter the records if event type is "view" .filter(new EventFilterBolt()) // project the event .<Tuple2<String, String>>project(2, 5) // perform join with redis data .flatMap(new RedisJoinBolt()) // process campaign .keyBy(0) .flatMap(new CampaignProcessor()); env.execute(); }
Example 19
Source File: TwitterExample.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { // Checking input parameters final ParameterTool params = ParameterTool.fromArgs(args); System.out.println("Usage: TwitterExample [--output <path>] " + "[--twitter-source.consumerKey <key> --twitter-source.consumerSecret <secret> --twitter-source.token <token> --twitter-source.tokenSecret <tokenSecret>]"); // set up the execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); env.setParallelism(params.getInt("parallelism", 1)); // get input data DataStream<String> streamSource; if (params.has(TwitterSource.CONSUMER_KEY) && params.has(TwitterSource.CONSUMER_SECRET) && params.has(TwitterSource.TOKEN) && params.has(TwitterSource.TOKEN_SECRET) ) { streamSource = env.addSource(new TwitterSource(params.getProperties())); } else { System.out.println("Executing TwitterStream example with default props."); System.out.println("Use --twitter-source.consumerKey <key> --twitter-source.consumerSecret <secret> " + "--twitter-source.token <token> --twitter-source.tokenSecret <tokenSecret> specify the authentication info."); // get default test text data streamSource = env.fromElements(TwitterExampleData.TEXTS); } DataStream<Tuple2<String, Integer>> tweets = streamSource // selecting English tweets and splitting to (word, 1) .flatMap(new SelectEnglishAndTokenizeFlatMap()) // group by words and sum their occurrences .keyBy(0).sum(1); // emit result if (params.has("output")) { tweets.writeAsText(params.get("output")); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); tweets.print(); } // execute program env.execute("Twitter Streaming Example"); }
Example 20
Source File: TridentForwardThroughput.java From flink-perf with Apache License 2.0 | 3 votes |
public static void main(String[] args) throws Exception { ParameterTool pt = ParameterTool.fromArgs(args); int par = pt.getInt("para"); TridentTopology topology = new TridentTopology(); Stream sourceStream = topology.newStream("source", new Generator(pt)).parallelismHint(pt.getInt("sourceParallelism")); sourceStream.localOrShuffle().each(FIELDS, new Sink(pt), new Fields("dontcare")); Config conf = new Config(); conf.setDebug(false); // conf.setMaxSpoutPending(pt.getInt("maxPending", 1000)); //System.exit(1); if (!pt.has("local")) { conf.setNumWorkers(par); StormSubmitter.submitTopologyWithProgressBar("forward-throughput-"+pt.get("name", "no_name"), conf, topology.build()); } else { conf.setMaxTaskParallelism(par); LocalCluster cluster = new LocalCluster(); cluster.submitTopology("forward-throughput", conf, topology.build()); Thread.sleep(300000); cluster.shutdown(); } }