org.apache.flink.api.java.utils.ParameterTool#has

Source File: LongParameter.java From flink with Apache License 2.0

6 votes

@Override
public void configure(ParameterTool parameterTool) {
	if (hasDefaultValue && !parameterTool.has(name)) {
		// skip checks for min and max when using default value
		value = defaultValue;
	} else {
		value = parameterTool.getLong(name);

		if (hasMinimumValue) {
			Util.checkParameter(value >= minimumValue,
				name + " must be greater than or equal to " + minimumValue);
		}

		if (hasMaximumValue) {
			Util.checkParameter(value <= maximumValue,
				name + " must be less than or equal to " + maximumValue);
		}
	}
}

Source File: MatrixVectorMul.java From flink with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);
		System.out.println("Usage: MatrixVectorMul [--output <path>] [--dimension <dimension> --data-size <data_size>] [--resource-name <resource_name>]");

		// Set up the execution environment
		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		// Make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		final int dimension = params.getInt("dimension", DEFAULT_DIM);
		final int dataSize = params.getInt("data-size", DEFAULT_DATA_SIZE);
		final String resourceName = params.get("resource-name", DEFAULT_RESOURCE_NAME);

		DataStream<List<Float>> result = env.addSource(new RandomVectorSource(dimension, dataSize))
						.map(new Multiplier(dimension, resourceName));

		// Emit result
		if (params.has("output")) {
			result.addSink(StreamingFileSink.forRowFormat(new Path(params.get("output")),
					new SimpleStringEncoder<List<Float>>()).build());
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			result.print();
		}
		// Execute program
		env.execute("Matrix-Vector Multiplication");
	}

Source File: KafkaThroughput.java From flink-perf with Apache License 2.0

5 votes

public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException, UnknownHostException, InterruptedException {
	final ParameterTool pt = ParameterTool.fromArgs(args);

	TopologyBuilder builder = new TopologyBuilder();
	BrokerHosts hosts = new ZkHosts(pt.getRequired("zookeeper"));
	SpoutConfig spoutConfig = new SpoutConfig(hosts, pt.getRequired("topic"), "/" + pt.getRequired("topic"), UUID.randomUUID().toString());
	spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
	KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);
	builder.setSpout("source", kafkaSpout, pt.getInt("sourceParallelism"));

	builder.setBolt("sink", new Throughput.Sink(pt), pt.getInt("sinkParallelism")).noneGrouping("source");

	Config conf = new Config();
	conf.setDebug(false);

	if (!pt.has("local")) {
		conf.setNumWorkers(pt.getInt("par", 2));

		StormSubmitter.submitTopologyWithProgressBar("kafka-spout-"+pt.get("name", "no_name"), conf, builder.createTopology());
	} else {
		conf.setMaxTaskParallelism(pt.getInt("par", 2));

		LocalCluster cluster = new LocalCluster();
		cluster.submitTopology("kafka-spout", conf, builder.createTopology());

		Thread.sleep(300000);

		cluster.shutdown();
	}
}

Source File: TridentThroughput.java From flink-perf with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	int par = pt.getInt("para");


	TridentTopology topology = new TridentTopology();
	Stream sourceStream = topology.newStream("source", new Generator(pt)).parallelismHint(pt.getInt("sourceParallelism"));

	Stream repart = sourceStream.partitionBy(new Fields("id"));
	for(int i = 0; i < pt.getInt("repartitions", 1) - 1; i++) {
		repart = repart.each(new Fields("id"), new IdentityEach(), new Fields("id"+i)).partitionBy(new Fields("id"+i));
	}
	repart.each(new Fields("id", "host", "time", "payload"), new Sink(pt), new Fields("dontcare")).parallelismHint(pt.getInt("sinkParallelism"));

	Config conf = new Config();
	conf.setDebug(false);

	if (!pt.has("local")) {
		conf.setNumWorkers(par);

		StormSubmitter.submitTopologyWithProgressBar("throughput-"+pt.get("name", "no_name"), conf, topology.build());
	}
	else {
		conf.setMaxTaskParallelism(par);

		LocalCluster cluster = new LocalCluster();
		cluster.submitTopology("throughput", conf, topology.build());

		Thread.sleep(30000);

		cluster.shutdown();
	}

}

Source File: WebLogAnalysis.java From flink with Apache License 2.0

5 votes

private static DataSet<Tuple2<String, String>> getDocumentsDataSet(ExecutionEnvironment env, ParameterTool params) {
	// Create DataSet for documents relation (URL, Doc-Text)
	if (params.has("documents")) {
		return env.readCsvFile(params.get("documents"))
					.fieldDelimiter("|")
					.types(String.class, String.class);
	} else {
		System.out.println("Executing WebLogAnalysis example with default documents data set.");
		System.out.println("Use --documents to specify file input.");
		return WebLogData.getDocumentDataSet(env);
	}
}

Source File: ConnectedComponents.java From flink with Apache License 2.0

5 votes

private static DataSet<Tuple2<Long, Long>> getEdgeDataSet(ExecutionEnvironment env, ParameterTool params) {
	if (params.has("edges")) {
		return env.readCsvFile(params.get("edges")).fieldDelimiter(" ").types(Long.class, Long.class);
	} else {
		System.out.println("Executing Connected Components example with default edges data set.");
		System.out.println("Use --edges to specify file input.");
		return ConnectedComponentsData.getDefaultEdgeDataSet(env);
	}
}

Source File: TopSpeedWindowing.java From flink with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.getConfig().setGlobalJobParameters(params);

		@SuppressWarnings({"rawtypes", "serial"})
		DataStream<Tuple4<Integer, Integer, Double, Long>> carData;
		if (params.has("input")) {
			carData = env.readTextFile(params.get("input")).map(new ParseCarData());
		} else {
			System.out.println("Executing TopSpeedWindowing example with default input data set.");
			System.out.println("Use --input to specify file input.");
			carData = env.addSource(CarSource.create(2));
		}

		int evictionSec = 10;
		double triggerMeters = 50;
		DataStream<Tuple4<Integer, Integer, Double, Long>> topSpeeds = carData
				.assignTimestampsAndWatermarks(new CarTimestamp())
				.keyBy(0)
				.window(GlobalWindows.create())
				.evictor(TimeEvictor.of(Time.of(evictionSec, TimeUnit.SECONDS)))
				.trigger(DeltaTrigger.of(triggerMeters,
						new DeltaFunction<Tuple4<Integer, Integer, Double, Long>>() {
							private static final long serialVersionUID = 1L;

							@Override
							public double getDelta(
									Tuple4<Integer, Integer, Double, Long> oldDataPoint,
									Tuple4<Integer, Integer, Double, Long> newDataPoint) {
								return newDataPoint.f2 - oldDataPoint.f2;
							}
						}, carData.getType().createSerializer(env.getConfig())))
				.maxBy(1);

		if (params.has("output")) {
			topSpeeds.writeAsText(params.get("output"));
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			topSpeeds.print();
		}

		env.execute("CarTopSpeedWindowingExample");
	}

Source File: ElasticsearchSinkBase.java From flink with Apache License 2.0

4 votes

public ElasticsearchSinkBase(
	ElasticsearchApiCallBridge<C> callBridge,
	Map<String, String> userConfig,
	ElasticsearchSinkFunction<T> elasticsearchSinkFunction,
	ActionRequestFailureHandler failureHandler) {

	this.callBridge = checkNotNull(callBridge);
	this.elasticsearchSinkFunction = checkNotNull(elasticsearchSinkFunction);
	this.failureHandler = checkNotNull(failureHandler);

	// we eagerly check if the user-provided sink function and failure handler is serializable;
	// otherwise, if they aren't serializable, users will merely get a non-informative error message
	// "ElasticsearchSinkBase is not serializable"

	checkArgument(InstantiationUtil.isSerializable(elasticsearchSinkFunction),
		"The implementation of the provided ElasticsearchSinkFunction is not serializable. " +
			"The object probably contains or references non-serializable fields.");

	checkArgument(InstantiationUtil.isSerializable(failureHandler),
		"The implementation of the provided ActionRequestFailureHandler is not serializable. " +
			"The object probably contains or references non-serializable fields.");

	// extract and remove bulk processor related configuration from the user-provided config,
	// so that the resulting user config only contains configuration related to the Elasticsearch client.

	checkNotNull(userConfig);

	// copy config so we can remove entries without side-effects
	userConfig = new HashMap<>(userConfig);

	ParameterTool params = ParameterTool.fromMap(userConfig);

	if (params.has(CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS)) {
		bulkProcessorFlushMaxActions = params.getInt(CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS);
		userConfig.remove(CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS);
	} else {
		bulkProcessorFlushMaxActions = null;
	}

	if (params.has(CONFIG_KEY_BULK_FLUSH_MAX_SIZE_MB)) {
		bulkProcessorFlushMaxSizeMb = params.getInt(CONFIG_KEY_BULK_FLUSH_MAX_SIZE_MB);
		userConfig.remove(CONFIG_KEY_BULK_FLUSH_MAX_SIZE_MB);
	} else {
		bulkProcessorFlushMaxSizeMb = null;
	}

	if (params.has(CONFIG_KEY_BULK_FLUSH_INTERVAL_MS)) {
		bulkProcessorFlushIntervalMillis = params.getLong(CONFIG_KEY_BULK_FLUSH_INTERVAL_MS);
		userConfig.remove(CONFIG_KEY_BULK_FLUSH_INTERVAL_MS);
	} else {
		bulkProcessorFlushIntervalMillis = null;
	}

	boolean bulkProcessorFlushBackoffEnable = params.getBoolean(CONFIG_KEY_BULK_FLUSH_BACKOFF_ENABLE, true);
	userConfig.remove(CONFIG_KEY_BULK_FLUSH_BACKOFF_ENABLE);

	if (bulkProcessorFlushBackoffEnable) {
		this.bulkProcessorFlushBackoffPolicy = new BulkFlushBackoffPolicy();

		if (params.has(CONFIG_KEY_BULK_FLUSH_BACKOFF_TYPE)) {
			bulkProcessorFlushBackoffPolicy.setBackoffType(FlushBackoffType.valueOf(params.get(CONFIG_KEY_BULK_FLUSH_BACKOFF_TYPE)));
			userConfig.remove(CONFIG_KEY_BULK_FLUSH_BACKOFF_TYPE);
		}

		if (params.has(CONFIG_KEY_BULK_FLUSH_BACKOFF_RETRIES)) {
			bulkProcessorFlushBackoffPolicy.setMaxRetryCount(params.getInt(CONFIG_KEY_BULK_FLUSH_BACKOFF_RETRIES));
			userConfig.remove(CONFIG_KEY_BULK_FLUSH_BACKOFF_RETRIES);
		}

		if (params.has(CONFIG_KEY_BULK_FLUSH_BACKOFF_DELAY)) {
			bulkProcessorFlushBackoffPolicy.setDelayMillis(params.getLong(CONFIG_KEY_BULK_FLUSH_BACKOFF_DELAY));
			userConfig.remove(CONFIG_KEY_BULK_FLUSH_BACKOFF_DELAY);
		}

	} else {
		bulkProcessorFlushBackoffPolicy = null;
	}

	this.userConfig = userConfig;
}

Source File: WebLogAnalysis.java From flink with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		env.getConfig().setGlobalJobParameters(params);

		// get input data
		DataSet<Tuple2<String, String>> documents = getDocumentsDataSet(env, params);
		DataSet<Tuple3<Integer, String, Integer>> ranks = getRanksDataSet(env, params);
		DataSet<Tuple2<String, String>> visits = getVisitsDataSet(env, params);

		// Retain documents with keywords
		DataSet<Tuple1<String>> filterDocs = documents
				.filter(new FilterDocByKeyWords())
				.project(0);

		// Filter ranks by minimum rank
		DataSet<Tuple3<Integer, String, Integer>> filterRanks = ranks
				.filter(new FilterByRank());

		// Filter visits by visit date
		DataSet<Tuple1<String>> filterVisits = visits
				.filter(new FilterVisitsByDate())
				.project(0);

		// Join the filtered documents and ranks, i.e., get all URLs with min rank and keywords
		DataSet<Tuple3<Integer, String, Integer>> joinDocsRanks =
				filterDocs.join(filterRanks)
							.where(0).equalTo(1)
							.projectSecond(0, 1, 2);

		// Anti-join urls with visits, i.e., retain all URLs which have NOT been visited in a certain time
		DataSet<Tuple3<Integer, String, Integer>> result =
				joinDocsRanks.coGroup(filterVisits)
								.where(1).equalTo(0)
								.with(new AntiJoinVisits());

		// emit result
		if (params.has("output")) {
			result.writeAsCsv(params.get("output"), "\n", "|");
			// execute program
			env.execute("WebLogAnalysis Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			result.print();
		}
	}

Source File: EnumTriangles.java From flink with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up execution environment
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// read input data
		DataSet<Edge> edges;
		if (params.has("edges")) {
			edges = env.readCsvFile(params.get("edges"))
					.fieldDelimiter(" ")
					.includeFields(true, true)
					.types(Integer.class, Integer.class)
					.map(new TupleEdgeConverter());
		} else {
			System.out.println("Executing EnumTriangles example with default edges data set.");
			System.out.println("Use --edges to specify file input.");
			edges = EnumTrianglesData.getDefaultEdgeDataSet(env);
		}

		// project edges by vertex id
		DataSet<Edge> edgesById = edges
				.map(new EdgeByIdProjector());

		DataSet<Triad> triangles = edgesById
				// build triads
				.groupBy(Edge.V1).sortGroup(Edge.V2, Order.ASCENDING).reduceGroup(new TriadBuilder())
				// filter triads
				.join(edgesById).where(Triad.V2, Triad.V3).equalTo(Edge.V1, Edge.V2).with(new TriadFilter());

		// emit result
		if (params.has("output")) {
			triangles.writeAsCsv(params.get("output"), "\n", ",");
			// execute program
			env.execute("Basic Triangle Enumeration Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			triangles.print();
		}
	}

Source File: BooleanParameter.java From Flink-CEPplus with Apache License 2.0

4 votes

@Override
public void configure(ParameterTool parameterTool) {
	value = parameterTool.has(name);
}

Source File: TPCHQuery10.java From Flink-CEPplus with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		if (!params.has("customer") && !params.has("orders") && !params.has("lineitem") && !params.has("nation")) {
			System.err.println("  This program expects data from the TPC-H benchmark as input data.");
			System.err.println("  Due to legal restrictions, we can not ship generated data.");
			System.err.println("  You can find the TPC-H data generator at http://www.tpc.org/tpch/.");
			System.err.println("  Usage: TPCHQuery10 --customer <path> --orders <path> --lineitem <path> --nation <path> [--output <path>]");
			return;
		}

		// get customer data set: (custkey, name, address, nationkey, acctbal)
		DataSet<Tuple5<Integer, String, String, Integer, Double>> customers =
			getCustomerDataSet(env, params.get("customer"));
		// get orders data set: (orderkey, custkey, orderdate)
		DataSet<Tuple3<Integer, Integer, String>> orders =
			getOrdersDataSet(env, params.get("orders"));
		// get lineitem data set: (orderkey, extendedprice, discount, returnflag)
		DataSet<Tuple4<Integer, Double, Double, String>> lineitems =
			getLineitemDataSet(env, params.get("lineitem"));
		// get nation data set: (nationkey, name)
		DataSet<Tuple2<Integer, String>> nations =
			getNationsDataSet(env, params.get("nation"));

		// orders filtered by year: (orderkey, custkey)
		DataSet<Tuple2<Integer, Integer>> ordersFilteredByYear =
				// filter by year
				orders.filter(order -> Integer.parseInt(order.f2.substring(0, 4)) > 1990)
				// project fields out that are no longer required
				.project(0, 1);

		// lineitems filtered by flag: (orderkey, revenue)
		DataSet<Tuple2<Integer, Double>> lineitemsFilteredByFlag =
				// filter by flag
				lineitems.filter(lineitem -> lineitem.f3.equals("R"))
				// compute revenue and project out return flag
				// revenue per item = l_extendedprice * (1 - l_discount)
				.map(lineitem -> new Tuple2<>(lineitem.f0, lineitem.f1 * (1 - lineitem.f2)))
				.returns(Types.TUPLE(Types.INT, Types.DOUBLE)); // for lambda with generics

		// join orders with lineitems: (custkey, revenue)
		DataSet<Tuple2<Integer, Double>> revenueByCustomer =
				ordersFilteredByYear.joinWithHuge(lineitemsFilteredByFlag)
									.where(0).equalTo(0)
									.projectFirst(1).projectSecond(1);

		revenueByCustomer = revenueByCustomer.groupBy(0).aggregate(Aggregations.SUM, 1);

		// join customer with nation (custkey, name, address, nationname, acctbal)
		DataSet<Tuple5<Integer, String, String, String, Double>> customerWithNation = customers
						.joinWithTiny(nations)
						.where(3).equalTo(0)
						.projectFirst(0, 1, 2).projectSecond(1).projectFirst(4);

		// join customer (with nation) with revenue (custkey, name, address, nationname, acctbal, revenue)
		DataSet<Tuple6<Integer, String, String, String, Double, Double>> result =
				customerWithNation.join(revenueByCustomer)
				.where(0).equalTo(0)
				.projectFirst(0, 1, 2, 3, 4).projectSecond(1);

		// emit result
		if (params.has("output")) {
			result.writeAsCsv(params.get("output"), "\n", "|");
			// execute program
			env.execute("TPCH Query 10 Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			result.print();
		}

	}

Source File: SocketWindowWordCount.java From flink with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		// the host and the port to connect to
		final String hostname;
		final int port;
		try {
			final ParameterTool params = ParameterTool.fromArgs(args);
			hostname = params.has("hostname") ? params.get("hostname") : "localhost";
			port = params.getInt("port");
		} catch (Exception e) {
			System.err.println("No port specified. Please run 'SocketWindowWordCount " +
				"--hostname <hostname> --port <port>', where hostname (localhost by default) " +
				"and port is the address of the text server");
			System.err.println("To start a simple text server, run 'netcat -l <port>' and " +
				"type the input text into the command line");
			return;
		}

		// get the execution environment
		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		// get input data by connecting to the socket
		DataStream<String> text = env.socketTextStream(hostname, port, "\n");

		// parse the data, group it, window it, and aggregate the counts
		DataStream<WordWithCount> windowCounts = text

				.flatMap(new FlatMapFunction<String, WordWithCount>() {
					@Override
					public void flatMap(String value, Collector<WordWithCount> out) {
						for (String word : value.split("\\s")) {
							out.collect(new WordWithCount(word, 1L));
						}
					}
				})

				.keyBy("word")
				.timeWindow(Time.seconds(5))

				.reduce(new ReduceFunction<WordWithCount>() {
					@Override
					public WordWithCount reduce(WordWithCount a, WordWithCount b) {
						return new WordWithCount(a.word, a.count + b.count);
					}
				});

		// print the results with a single thread, rather than in parallel
		windowCounts.print().setParallelism(1);

		env.execute("Socket Window WordCount");
	}

Source File: KMeans.java From flink with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up execution environment
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.getConfig().setGlobalJobParameters(params); // make parameters available in the web interface

		// get input data:
		// read the points and centroids from the provided paths or fall back to default data
		DataSet<Point> points = getPointDataSet(params, env);
		DataSet<Centroid> centroids = getCentroidDataSet(params, env);

		// set number of bulk iterations for KMeans algorithm
		IterativeDataSet<Centroid> loop = centroids.iterate(params.getInt("iterations", 10));

		DataSet<Centroid> newCentroids = points
			// compute closest centroid for each point
			.map(new SelectNearestCenter()).withBroadcastSet(loop, "centroids")
			// count and sum point coordinates for each centroid
			.map(new CountAppender())
			.groupBy(0).reduce(new CentroidAccumulator())
			// compute new centroids from point counts and coordinate sums
			.map(new CentroidAverager());

		// feed new centroids back into next iteration
		DataSet<Centroid> finalCentroids = loop.closeWith(newCentroids);

		DataSet<Tuple2<Integer, Point>> clusteredPoints = points
			// assign points to final clusters
			.map(new SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids");

		// emit result
		if (params.has("output")) {
			clusteredPoints.writeAsCsv(params.get("output"), "\n", " ");

			// since file sinks are lazy, we trigger the execution explicitly
			env.execute("KMeans Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			clusteredPoints.print();
		}
	}

Source File: KMeans.java From flink with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up execution environment
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.getConfig().setGlobalJobParameters(params); // make parameters available in the web interface

		// get input data:
		// read the points and centroids from the provided paths or fall back to default data
		DataSet<Point> points = getPointDataSet(params, env);
		DataSet<Centroid> centroids = getCentroidDataSet(params, env);

		// set number of bulk iterations for KMeans algorithm
		IterativeDataSet<Centroid> loop = centroids.iterate(params.getInt("iterations", 10));

		DataSet<Centroid> newCentroids = points
			// compute closest centroid for each point
			.map(new SelectNearestCenter()).withBroadcastSet(loop, "centroids")
			// count and sum point coordinates for each centroid
			.map(new CountAppender())
			.groupBy(0).reduce(new CentroidAccumulator())
			// compute new centroids from point counts and coordinate sums
			.map(new CentroidAverager());

		// feed new centroids back into next iteration
		DataSet<Centroid> finalCentroids = loop.closeWith(newCentroids);

		DataSet<Tuple2<Integer, Point>> clusteredPoints = points
			// assign points to final clusters
			.map(new SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids");

		// emit result
		if (params.has("output")) {
			clusteredPoints.writeAsCsv(params.get("output"), "\n", " ");

			// since file sinks are lazy, we trigger the execution explicitly
			env.execute("KMeans Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			clusteredPoints.print();
		}
	}

Source File: BooleanParameter.java From flink with Apache License 2.0

4 votes

@Override
public void configure(ParameterTool parameterTool) {
	value = parameterTool.has(name);
}

Source File: TopSpeedWindowing.java From Flink-CEPplus with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.getConfig().setGlobalJobParameters(params);

		@SuppressWarnings({"rawtypes", "serial"})
		DataStream<Tuple4<Integer, Integer, Double, Long>> carData;
		if (params.has("input")) {
			carData = env.readTextFile(params.get("input")).map(new ParseCarData());
		} else {
			System.out.println("Executing TopSpeedWindowing example with default input data set.");
			System.out.println("Use --input to specify file input.");
			carData = env.addSource(CarSource.create(2));
		}

		int evictionSec = 10;
		double triggerMeters = 50;
		DataStream<Tuple4<Integer, Integer, Double, Long>> topSpeeds = carData
				.assignTimestampsAndWatermarks(new CarTimestamp())
				.keyBy(0)
				.window(GlobalWindows.create())
				.evictor(TimeEvictor.of(Time.of(evictionSec, TimeUnit.SECONDS)))
				.trigger(DeltaTrigger.of(triggerMeters,
						new DeltaFunction<Tuple4<Integer, Integer, Double, Long>>() {
							private static final long serialVersionUID = 1L;

							@Override
							public double getDelta(
									Tuple4<Integer, Integer, Double, Long> oldDataPoint,
									Tuple4<Integer, Integer, Double, Long> newDataPoint) {
								return newDataPoint.f2 - oldDataPoint.f2;
							}
						}, carData.getType().createSerializer(env.getConfig())))
				.maxBy(1);

		if (params.has("output")) {
			topSpeeds.writeAsText(params.get("output"));
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			topSpeeds.print();
		}

		env.execute("CarTopSpeedWindowingExample");
	}

Source File: AdvertisingTopologyNative.java From streaming-benchmarks with Apache License 2.0

4 votes

public static void main(final String[] args) throws Exception {

        ParameterTool parameterTool = ParameterTool.fromArgs(args);

        Map conf = Utils.findAndReadConfigFile(parameterTool.getRequired("confPath"), true);
        int kafkaPartitions = ((Number)conf.get("kafka.partitions")).intValue();
        int hosts = ((Number)conf.get("process.hosts")).intValue();
        int cores = ((Number)conf.get("process.cores")).intValue();

        ParameterTool flinkBenchmarkParams = ParameterTool.fromMap(getFlinkConfs(conf));

        LOG.info("conf: {}", conf);
        LOG.info("Parameters used: {}", flinkBenchmarkParams.toMap());

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.getConfig().setGlobalJobParameters(flinkBenchmarkParams);

		// Set the buffer timeout (default 100)
        // Lowering the timeout will lead to lower latencies, but will eventually reduce throughput.
        env.setBufferTimeout(flinkBenchmarkParams.getLong("flink.buffer-timeout", 100));

        if(flinkBenchmarkParams.has("flink.checkpoint-interval")) {
            // enable checkpointing for fault tolerance
            env.enableCheckpointing(flinkBenchmarkParams.getLong("flink.checkpoint-interval", 1000));
        }
        // set default parallelism for all operators (recommended value: number of available worker CPU cores in the cluster (hosts * cores))
        env.setParallelism(hosts * cores);

        DataStream<String> messageStream = env
                .addSource(new FlinkKafkaConsumer082<String>(
                        flinkBenchmarkParams.getRequired("topic"),
                        new SimpleStringSchema(),
                        flinkBenchmarkParams.getProperties())).setParallelism(Math.min(hosts * cores, kafkaPartitions));

        messageStream
                .rebalance()
                // Parse the String as JSON
                .flatMap(new DeserializeBolt())

                //Filter the records if event type is "view"
                .filter(new EventFilterBolt())

                // project the event
                .<Tuple2<String, String>>project(2, 5)

                // perform join with redis data
                .flatMap(new RedisJoinBolt())

                // process campaign
                .keyBy(0)
                .flatMap(new CampaignProcessor());


        env.execute();
    }

Source File: TwitterExample.java From flink with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);
		System.out.println("Usage: TwitterExample [--output <path>] " +
				"[--twitter-source.consumerKey <key> --twitter-source.consumerSecret <secret> --twitter-source.token <token> --twitter-source.tokenSecret <tokenSecret>]");

		// set up the execution environment
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		env.setParallelism(params.getInt("parallelism", 1));

		// get input data
		DataStream<String> streamSource;
		if (params.has(TwitterSource.CONSUMER_KEY) &&
				params.has(TwitterSource.CONSUMER_SECRET) &&
				params.has(TwitterSource.TOKEN) &&
				params.has(TwitterSource.TOKEN_SECRET)
				) {
			streamSource = env.addSource(new TwitterSource(params.getProperties()));
		} else {
			System.out.println("Executing TwitterStream example with default props.");
			System.out.println("Use --twitter-source.consumerKey <key> --twitter-source.consumerSecret <secret> " +
					"--twitter-source.token <token> --twitter-source.tokenSecret <tokenSecret> specify the authentication info.");
			// get default test text data
			streamSource = env.fromElements(TwitterExampleData.TEXTS);
		}

		DataStream<Tuple2<String, Integer>> tweets = streamSource
				// selecting English tweets and splitting to (word, 1)
				.flatMap(new SelectEnglishAndTokenizeFlatMap())
				// group by words and sum their occurrences
				.keyBy(0).sum(1);

		// emit result
		if (params.has("output")) {
			tweets.writeAsText(params.get("output"));
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			tweets.print();
		}

		// execute program
		env.execute("Twitter Streaming Example");
	}

Source File: TridentForwardThroughput.java From flink-perf with Apache License 2.0

3 votes

public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	int par = pt.getInt("para");

	TridentTopology topology = new TridentTopology();
	Stream sourceStream = topology.newStream("source", new Generator(pt)).parallelismHint(pt.getInt("sourceParallelism"));
	sourceStream.localOrShuffle().each(FIELDS, new Sink(pt), new Fields("dontcare"));

	Config conf = new Config();
	conf.setDebug(false);

//	conf.setMaxSpoutPending(pt.getInt("maxPending", 1000));

	//System.exit(1);

	if (!pt.has("local")) {
		conf.setNumWorkers(par);

		StormSubmitter.submitTopologyWithProgressBar("forward-throughput-"+pt.get("name", "no_name"), conf, topology.build());
	}
	else {
		conf.setMaxTaskParallelism(par);

		LocalCluster cluster = new LocalCluster();
		cluster.submitTopology("forward-throughput", conf, topology.build());

		Thread.sleep(300000);

		cluster.shutdown();
	}

}

Java Code Examples for org.apache.flink.api.java.utils.ParameterTool#has()