Java Code Examples for org.apache.flink.api.java.utils.ParameterTool#has()

The following examples show how to use org.apache.flink.api.java.utils.ParameterTool#has() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LongParameter.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public void configure(ParameterTool parameterTool) {
	if (hasDefaultValue && !parameterTool.has(name)) {
		// skip checks for min and max when using default value
		value = defaultValue;
	} else {
		value = parameterTool.getLong(name);

		if (hasMinimumValue) {
			Util.checkParameter(value >= minimumValue,
				name + " must be greater than or equal to " + minimumValue);
		}

		if (hasMaximumValue) {
			Util.checkParameter(value <= maximumValue,
				name + " must be less than or equal to " + maximumValue);
		}
	}
}
 
Example 2
Source File: MatrixVectorMul.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);
		System.out.println("Usage: MatrixVectorMul [--output <path>] [--dimension <dimension> --data-size <data_size>] [--resource-name <resource_name>]");

		// Set up the execution environment
		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		// Make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		final int dimension = params.getInt("dimension", DEFAULT_DIM);
		final int dataSize = params.getInt("data-size", DEFAULT_DATA_SIZE);
		final String resourceName = params.get("resource-name", DEFAULT_RESOURCE_NAME);

		DataStream<List<Float>> result = env.addSource(new RandomVectorSource(dimension, dataSize))
						.map(new Multiplier(dimension, resourceName));

		// Emit result
		if (params.has("output")) {
			result.addSink(StreamingFileSink.forRowFormat(new Path(params.get("output")),
					new SimpleStringEncoder<List<Float>>()).build());
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			result.print();
		}
		// Execute program
		env.execute("Matrix-Vector Multiplication");
	}
 
Example 3
Source File: KafkaThroughput.java    From flink-perf with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException, UnknownHostException, InterruptedException {
	final ParameterTool pt = ParameterTool.fromArgs(args);

	TopologyBuilder builder = new TopologyBuilder();
	BrokerHosts hosts = new ZkHosts(pt.getRequired("zookeeper"));
	SpoutConfig spoutConfig = new SpoutConfig(hosts, pt.getRequired("topic"), "/" + pt.getRequired("topic"), UUID.randomUUID().toString());
	spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
	KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);
	builder.setSpout("source", kafkaSpout, pt.getInt("sourceParallelism"));

	builder.setBolt("sink", new Throughput.Sink(pt), pt.getInt("sinkParallelism")).noneGrouping("source");

	Config conf = new Config();
	conf.setDebug(false);

	if (!pt.has("local")) {
		conf.setNumWorkers(pt.getInt("par", 2));

		StormSubmitter.submitTopologyWithProgressBar("kafka-spout-"+pt.get("name", "no_name"), conf, builder.createTopology());
	} else {
		conf.setMaxTaskParallelism(pt.getInt("par", 2));

		LocalCluster cluster = new LocalCluster();
		cluster.submitTopology("kafka-spout", conf, builder.createTopology());

		Thread.sleep(300000);

		cluster.shutdown();
	}
}
 
Example 4
Source File: TridentThroughput.java    From flink-perf with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	int par = pt.getInt("para");


	TridentTopology topology = new TridentTopology();
	Stream sourceStream = topology.newStream("source", new Generator(pt)).parallelismHint(pt.getInt("sourceParallelism"));

	Stream repart = sourceStream.partitionBy(new Fields("id"));
	for(int i = 0; i < pt.getInt("repartitions", 1) - 1; i++) {
		repart = repart.each(new Fields("id"), new IdentityEach(), new Fields("id"+i)).partitionBy(new Fields("id"+i));
	}
	repart.each(new Fields("id", "host", "time", "payload"), new Sink(pt), new Fields("dontcare")).parallelismHint(pt.getInt("sinkParallelism"));

	Config conf = new Config();
	conf.setDebug(false);

	if (!pt.has("local")) {
		conf.setNumWorkers(par);

		StormSubmitter.submitTopologyWithProgressBar("throughput-"+pt.get("name", "no_name"), conf, topology.build());
	}
	else {
		conf.setMaxTaskParallelism(par);

		LocalCluster cluster = new LocalCluster();
		cluster.submitTopology("throughput", conf, topology.build());

		Thread.sleep(30000);

		cluster.shutdown();
	}

}
 
Example 5
Source File: WebLogAnalysis.java    From flink with Apache License 2.0 5 votes vote down vote up
private static DataSet<Tuple2<String, String>> getDocumentsDataSet(ExecutionEnvironment env, ParameterTool params) {
	// Create DataSet for documents relation (URL, Doc-Text)
	if (params.has("documents")) {
		return env.readCsvFile(params.get("documents"))
					.fieldDelimiter("|")
					.types(String.class, String.class);
	} else {
		System.out.println("Executing WebLogAnalysis example with default documents data set.");
		System.out.println("Use --documents to specify file input.");
		return WebLogData.getDocumentDataSet(env);
	}
}
 
Example 6
Source File: ConnectedComponents.java    From flink with Apache License 2.0 5 votes vote down vote up
private static DataSet<Tuple2<Long, Long>> getEdgeDataSet(ExecutionEnvironment env, ParameterTool params) {
	if (params.has("edges")) {
		return env.readCsvFile(params.get("edges")).fieldDelimiter(" ").types(Long.class, Long.class);
	} else {
		System.out.println("Executing Connected Components example with default edges data set.");
		System.out.println("Use --edges to specify file input.");
		return ConnectedComponentsData.getDefaultEdgeDataSet(env);
	}
}
 
Example 7
Source File: TopSpeedWindowing.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.getConfig().setGlobalJobParameters(params);

		@SuppressWarnings({"rawtypes", "serial"})
		DataStream<Tuple4<Integer, Integer, Double, Long>> carData;
		if (params.has("input")) {
			carData = env.readTextFile(params.get("input")).map(new ParseCarData());
		} else {
			System.out.println("Executing TopSpeedWindowing example with default input data set.");
			System.out.println("Use --input to specify file input.");
			carData = env.addSource(CarSource.create(2));
		}

		int evictionSec = 10;
		double triggerMeters = 50;
		DataStream<Tuple4<Integer, Integer, Double, Long>> topSpeeds = carData
				.assignTimestampsAndWatermarks(new CarTimestamp())
				.keyBy(0)
				.window(GlobalWindows.create())
				.evictor(TimeEvictor.of(Time.of(evictionSec, TimeUnit.SECONDS)))
				.trigger(DeltaTrigger.of(triggerMeters,
						new DeltaFunction<Tuple4<Integer, Integer, Double, Long>>() {
							private static final long serialVersionUID = 1L;

							@Override
							public double getDelta(
									Tuple4<Integer, Integer, Double, Long> oldDataPoint,
									Tuple4<Integer, Integer, Double, Long> newDataPoint) {
								return newDataPoint.f2 - oldDataPoint.f2;
							}
						}, carData.getType().createSerializer(env.getConfig())))
				.maxBy(1);

		if (params.has("output")) {
			topSpeeds.writeAsText(params.get("output"));
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			topSpeeds.print();
		}

		env.execute("CarTopSpeedWindowingExample");
	}
 
Example 8
Source File: ElasticsearchSinkBase.java    From flink with Apache License 2.0 4 votes vote down vote up
public ElasticsearchSinkBase(
	ElasticsearchApiCallBridge<C> callBridge,
	Map<String, String> userConfig,
	ElasticsearchSinkFunction<T> elasticsearchSinkFunction,
	ActionRequestFailureHandler failureHandler) {

	this.callBridge = checkNotNull(callBridge);
	this.elasticsearchSinkFunction = checkNotNull(elasticsearchSinkFunction);
	this.failureHandler = checkNotNull(failureHandler);

	// we eagerly check if the user-provided sink function and failure handler is serializable;
	// otherwise, if they aren't serializable, users will merely get a non-informative error message
	// "ElasticsearchSinkBase is not serializable"

	checkArgument(InstantiationUtil.isSerializable(elasticsearchSinkFunction),
		"The implementation of the provided ElasticsearchSinkFunction is not serializable. " +
			"The object probably contains or references non-serializable fields.");

	checkArgument(InstantiationUtil.isSerializable(failureHandler),
		"The implementation of the provided ActionRequestFailureHandler is not serializable. " +
			"The object probably contains or references non-serializable fields.");

	// extract and remove bulk processor related configuration from the user-provided config,
	// so that the resulting user config only contains configuration related to the Elasticsearch client.

	checkNotNull(userConfig);

	// copy config so we can remove entries without side-effects
	userConfig = new HashMap<>(userConfig);

	ParameterTool params = ParameterTool.fromMap(userConfig);

	if (params.has(CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS)) {
		bulkProcessorFlushMaxActions = params.getInt(CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS);
		userConfig.remove(CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS);
	} else {
		bulkProcessorFlushMaxActions = null;
	}

	if (params.has(CONFIG_KEY_BULK_FLUSH_MAX_SIZE_MB)) {
		bulkProcessorFlushMaxSizeMb = params.getInt(CONFIG_KEY_BULK_FLUSH_MAX_SIZE_MB);
		userConfig.remove(CONFIG_KEY_BULK_FLUSH_MAX_SIZE_MB);
	} else {
		bulkProcessorFlushMaxSizeMb = null;
	}

	if (params.has(CONFIG_KEY_BULK_FLUSH_INTERVAL_MS)) {
		bulkProcessorFlushIntervalMillis = params.getLong(CONFIG_KEY_BULK_FLUSH_INTERVAL_MS);
		userConfig.remove(CONFIG_KEY_BULK_FLUSH_INTERVAL_MS);
	} else {
		bulkProcessorFlushIntervalMillis = null;
	}

	boolean bulkProcessorFlushBackoffEnable = params.getBoolean(CONFIG_KEY_BULK_FLUSH_BACKOFF_ENABLE, true);
	userConfig.remove(CONFIG_KEY_BULK_FLUSH_BACKOFF_ENABLE);

	if (bulkProcessorFlushBackoffEnable) {
		this.bulkProcessorFlushBackoffPolicy = new BulkFlushBackoffPolicy();

		if (params.has(CONFIG_KEY_BULK_FLUSH_BACKOFF_TYPE)) {
			bulkProcessorFlushBackoffPolicy.setBackoffType(FlushBackoffType.valueOf(params.get(CONFIG_KEY_BULK_FLUSH_BACKOFF_TYPE)));
			userConfig.remove(CONFIG_KEY_BULK_FLUSH_BACKOFF_TYPE);
		}

		if (params.has(CONFIG_KEY_BULK_FLUSH_BACKOFF_RETRIES)) {
			bulkProcessorFlushBackoffPolicy.setMaxRetryCount(params.getInt(CONFIG_KEY_BULK_FLUSH_BACKOFF_RETRIES));
			userConfig.remove(CONFIG_KEY_BULK_FLUSH_BACKOFF_RETRIES);
		}

		if (params.has(CONFIG_KEY_BULK_FLUSH_BACKOFF_DELAY)) {
			bulkProcessorFlushBackoffPolicy.setDelayMillis(params.getLong(CONFIG_KEY_BULK_FLUSH_BACKOFF_DELAY));
			userConfig.remove(CONFIG_KEY_BULK_FLUSH_BACKOFF_DELAY);
		}

	} else {
		bulkProcessorFlushBackoffPolicy = null;
	}

	this.userConfig = userConfig;
}
 
Example 9
Source File: WebLogAnalysis.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		env.getConfig().setGlobalJobParameters(params);

		// get input data
		DataSet<Tuple2<String, String>> documents = getDocumentsDataSet(env, params);
		DataSet<Tuple3<Integer, String, Integer>> ranks = getRanksDataSet(env, params);
		DataSet<Tuple2<String, String>> visits = getVisitsDataSet(env, params);

		// Retain documents with keywords
		DataSet<Tuple1<String>> filterDocs = documents
				.filter(new FilterDocByKeyWords())
				.project(0);

		// Filter ranks by minimum rank
		DataSet<Tuple3<Integer, String, Integer>> filterRanks = ranks
				.filter(new FilterByRank());

		// Filter visits by visit date
		DataSet<Tuple1<String>> filterVisits = visits
				.filter(new FilterVisitsByDate())
				.project(0);

		// Join the filtered documents and ranks, i.e., get all URLs with min rank and keywords
		DataSet<Tuple3<Integer, String, Integer>> joinDocsRanks =
				filterDocs.join(filterRanks)
							.where(0).equalTo(1)
							.projectSecond(0, 1, 2);

		// Anti-join urls with visits, i.e., retain all URLs which have NOT been visited in a certain time
		DataSet<Tuple3<Integer, String, Integer>> result =
				joinDocsRanks.coGroup(filterVisits)
								.where(1).equalTo(0)
								.with(new AntiJoinVisits());

		// emit result
		if (params.has("output")) {
			result.writeAsCsv(params.get("output"), "\n", "|");
			// execute program
			env.execute("WebLogAnalysis Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			result.print();
		}
	}
 
Example 10
Source File: EnumTriangles.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up execution environment
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// read input data
		DataSet<Edge> edges;
		if (params.has("edges")) {
			edges = env.readCsvFile(params.get("edges"))
					.fieldDelimiter(" ")
					.includeFields(true, true)
					.types(Integer.class, Integer.class)
					.map(new TupleEdgeConverter());
		} else {
			System.out.println("Executing EnumTriangles example with default edges data set.");
			System.out.println("Use --edges to specify file input.");
			edges = EnumTrianglesData.getDefaultEdgeDataSet(env);
		}

		// project edges by vertex id
		DataSet<Edge> edgesById = edges
				.map(new EdgeByIdProjector());

		DataSet<Triad> triangles = edgesById
				// build triads
				.groupBy(Edge.V1).sortGroup(Edge.V2, Order.ASCENDING).reduceGroup(new TriadBuilder())
				// filter triads
				.join(edgesById).where(Triad.V2, Triad.V3).equalTo(Edge.V1, Edge.V2).with(new TriadFilter());

		// emit result
		if (params.has("output")) {
			triangles.writeAsCsv(params.get("output"), "\n", ",");
			// execute program
			env.execute("Basic Triangle Enumeration Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			triangles.print();
		}
	}
 
Example 11
Source File: BooleanParameter.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Override
public void configure(ParameterTool parameterTool) {
	value = parameterTool.has(name);
}
 
Example 12
Source File: TPCHQuery10.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		if (!params.has("customer") && !params.has("orders") && !params.has("lineitem") && !params.has("nation")) {
			System.err.println("  This program expects data from the TPC-H benchmark as input data.");
			System.err.println("  Due to legal restrictions, we can not ship generated data.");
			System.err.println("  You can find the TPC-H data generator at http://www.tpc.org/tpch/.");
			System.err.println("  Usage: TPCHQuery10 --customer <path> --orders <path> --lineitem <path> --nation <path> [--output <path>]");
			return;
		}

		// get customer data set: (custkey, name, address, nationkey, acctbal)
		DataSet<Tuple5<Integer, String, String, Integer, Double>> customers =
			getCustomerDataSet(env, params.get("customer"));
		// get orders data set: (orderkey, custkey, orderdate)
		DataSet<Tuple3<Integer, Integer, String>> orders =
			getOrdersDataSet(env, params.get("orders"));
		// get lineitem data set: (orderkey, extendedprice, discount, returnflag)
		DataSet<Tuple4<Integer, Double, Double, String>> lineitems =
			getLineitemDataSet(env, params.get("lineitem"));
		// get nation data set: (nationkey, name)
		DataSet<Tuple2<Integer, String>> nations =
			getNationsDataSet(env, params.get("nation"));

		// orders filtered by year: (orderkey, custkey)
		DataSet<Tuple2<Integer, Integer>> ordersFilteredByYear =
				// filter by year
				orders.filter(order -> Integer.parseInt(order.f2.substring(0, 4)) > 1990)
				// project fields out that are no longer required
				.project(0, 1);

		// lineitems filtered by flag: (orderkey, revenue)
		DataSet<Tuple2<Integer, Double>> lineitemsFilteredByFlag =
				// filter by flag
				lineitems.filter(lineitem -> lineitem.f3.equals("R"))
				// compute revenue and project out return flag
				// revenue per item = l_extendedprice * (1 - l_discount)
				.map(lineitem -> new Tuple2<>(lineitem.f0, lineitem.f1 * (1 - lineitem.f2)))
				.returns(Types.TUPLE(Types.INT, Types.DOUBLE)); // for lambda with generics

		// join orders with lineitems: (custkey, revenue)
		DataSet<Tuple2<Integer, Double>> revenueByCustomer =
				ordersFilteredByYear.joinWithHuge(lineitemsFilteredByFlag)
									.where(0).equalTo(0)
									.projectFirst(1).projectSecond(1);

		revenueByCustomer = revenueByCustomer.groupBy(0).aggregate(Aggregations.SUM, 1);

		// join customer with nation (custkey, name, address, nationname, acctbal)
		DataSet<Tuple5<Integer, String, String, String, Double>> customerWithNation = customers
						.joinWithTiny(nations)
						.where(3).equalTo(0)
						.projectFirst(0, 1, 2).projectSecond(1).projectFirst(4);

		// join customer (with nation) with revenue (custkey, name, address, nationname, acctbal, revenue)
		DataSet<Tuple6<Integer, String, String, String, Double, Double>> result =
				customerWithNation.join(revenueByCustomer)
				.where(0).equalTo(0)
				.projectFirst(0, 1, 2, 3, 4).projectSecond(1);

		// emit result
		if (params.has("output")) {
			result.writeAsCsv(params.get("output"), "\n", "|");
			// execute program
			env.execute("TPCH Query 10 Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			result.print();
		}

	}
 
Example 13
Source File: SocketWindowWordCount.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		// the host and the port to connect to
		final String hostname;
		final int port;
		try {
			final ParameterTool params = ParameterTool.fromArgs(args);
			hostname = params.has("hostname") ? params.get("hostname") : "localhost";
			port = params.getInt("port");
		} catch (Exception e) {
			System.err.println("No port specified. Please run 'SocketWindowWordCount " +
				"--hostname <hostname> --port <port>', where hostname (localhost by default) " +
				"and port is the address of the text server");
			System.err.println("To start a simple text server, run 'netcat -l <port>' and " +
				"type the input text into the command line");
			return;
		}

		// get the execution environment
		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		// get input data by connecting to the socket
		DataStream<String> text = env.socketTextStream(hostname, port, "\n");

		// parse the data, group it, window it, and aggregate the counts
		DataStream<WordWithCount> windowCounts = text

				.flatMap(new FlatMapFunction<String, WordWithCount>() {
					@Override
					public void flatMap(String value, Collector<WordWithCount> out) {
						for (String word : value.split("\\s")) {
							out.collect(new WordWithCount(word, 1L));
						}
					}
				})

				.keyBy("word")
				.timeWindow(Time.seconds(5))

				.reduce(new ReduceFunction<WordWithCount>() {
					@Override
					public WordWithCount reduce(WordWithCount a, WordWithCount b) {
						return new WordWithCount(a.word, a.count + b.count);
					}
				});

		// print the results with a single thread, rather than in parallel
		windowCounts.print().setParallelism(1);

		env.execute("Socket Window WordCount");
	}
 
Example 14
Source File: KMeans.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up execution environment
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.getConfig().setGlobalJobParameters(params); // make parameters available in the web interface

		// get input data:
		// read the points and centroids from the provided paths or fall back to default data
		DataSet<Point> points = getPointDataSet(params, env);
		DataSet<Centroid> centroids = getCentroidDataSet(params, env);

		// set number of bulk iterations for KMeans algorithm
		IterativeDataSet<Centroid> loop = centroids.iterate(params.getInt("iterations", 10));

		DataSet<Centroid> newCentroids = points
			// compute closest centroid for each point
			.map(new SelectNearestCenter()).withBroadcastSet(loop, "centroids")
			// count and sum point coordinates for each centroid
			.map(new CountAppender())
			.groupBy(0).reduce(new CentroidAccumulator())
			// compute new centroids from point counts and coordinate sums
			.map(new CentroidAverager());

		// feed new centroids back into next iteration
		DataSet<Centroid> finalCentroids = loop.closeWith(newCentroids);

		DataSet<Tuple2<Integer, Point>> clusteredPoints = points
			// assign points to final clusters
			.map(new SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids");

		// emit result
		if (params.has("output")) {
			clusteredPoints.writeAsCsv(params.get("output"), "\n", " ");

			// since file sinks are lazy, we trigger the execution explicitly
			env.execute("KMeans Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			clusteredPoints.print();
		}
	}
 
Example 15
Source File: KMeans.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up execution environment
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.getConfig().setGlobalJobParameters(params); // make parameters available in the web interface

		// get input data:
		// read the points and centroids from the provided paths or fall back to default data
		DataSet<Point> points = getPointDataSet(params, env);
		DataSet<Centroid> centroids = getCentroidDataSet(params, env);

		// set number of bulk iterations for KMeans algorithm
		IterativeDataSet<Centroid> loop = centroids.iterate(params.getInt("iterations", 10));

		DataSet<Centroid> newCentroids = points
			// compute closest centroid for each point
			.map(new SelectNearestCenter()).withBroadcastSet(loop, "centroids")
			// count and sum point coordinates for each centroid
			.map(new CountAppender())
			.groupBy(0).reduce(new CentroidAccumulator())
			// compute new centroids from point counts and coordinate sums
			.map(new CentroidAverager());

		// feed new centroids back into next iteration
		DataSet<Centroid> finalCentroids = loop.closeWith(newCentroids);

		DataSet<Tuple2<Integer, Point>> clusteredPoints = points
			// assign points to final clusters
			.map(new SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids");

		// emit result
		if (params.has("output")) {
			clusteredPoints.writeAsCsv(params.get("output"), "\n", " ");

			// since file sinks are lazy, we trigger the execution explicitly
			env.execute("KMeans Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			clusteredPoints.print();
		}
	}
 
Example 16
Source File: BooleanParameter.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public void configure(ParameterTool parameterTool) {
	value = parameterTool.has(name);
}
 
Example 17
Source File: TopSpeedWindowing.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.getConfig().setGlobalJobParameters(params);

		@SuppressWarnings({"rawtypes", "serial"})
		DataStream<Tuple4<Integer, Integer, Double, Long>> carData;
		if (params.has("input")) {
			carData = env.readTextFile(params.get("input")).map(new ParseCarData());
		} else {
			System.out.println("Executing TopSpeedWindowing example with default input data set.");
			System.out.println("Use --input to specify file input.");
			carData = env.addSource(CarSource.create(2));
		}

		int evictionSec = 10;
		double triggerMeters = 50;
		DataStream<Tuple4<Integer, Integer, Double, Long>> topSpeeds = carData
				.assignTimestampsAndWatermarks(new CarTimestamp())
				.keyBy(0)
				.window(GlobalWindows.create())
				.evictor(TimeEvictor.of(Time.of(evictionSec, TimeUnit.SECONDS)))
				.trigger(DeltaTrigger.of(triggerMeters,
						new DeltaFunction<Tuple4<Integer, Integer, Double, Long>>() {
							private static final long serialVersionUID = 1L;

							@Override
							public double getDelta(
									Tuple4<Integer, Integer, Double, Long> oldDataPoint,
									Tuple4<Integer, Integer, Double, Long> newDataPoint) {
								return newDataPoint.f2 - oldDataPoint.f2;
							}
						}, carData.getType().createSerializer(env.getConfig())))
				.maxBy(1);

		if (params.has("output")) {
			topSpeeds.writeAsText(params.get("output"));
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			topSpeeds.print();
		}

		env.execute("CarTopSpeedWindowingExample");
	}
 
Example 18
Source File: AdvertisingTopologyNative.java    From streaming-benchmarks with Apache License 2.0 4 votes vote down vote up
public static void main(final String[] args) throws Exception {

        ParameterTool parameterTool = ParameterTool.fromArgs(args);

        Map conf = Utils.findAndReadConfigFile(parameterTool.getRequired("confPath"), true);
        int kafkaPartitions = ((Number)conf.get("kafka.partitions")).intValue();
        int hosts = ((Number)conf.get("process.hosts")).intValue();
        int cores = ((Number)conf.get("process.cores")).intValue();

        ParameterTool flinkBenchmarkParams = ParameterTool.fromMap(getFlinkConfs(conf));

        LOG.info("conf: {}", conf);
        LOG.info("Parameters used: {}", flinkBenchmarkParams.toMap());

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.getConfig().setGlobalJobParameters(flinkBenchmarkParams);

		// Set the buffer timeout (default 100)
        // Lowering the timeout will lead to lower latencies, but will eventually reduce throughput.
        env.setBufferTimeout(flinkBenchmarkParams.getLong("flink.buffer-timeout", 100));

        if(flinkBenchmarkParams.has("flink.checkpoint-interval")) {
            // enable checkpointing for fault tolerance
            env.enableCheckpointing(flinkBenchmarkParams.getLong("flink.checkpoint-interval", 1000));
        }
        // set default parallelism for all operators (recommended value: number of available worker CPU cores in the cluster (hosts * cores))
        env.setParallelism(hosts * cores);

        DataStream<String> messageStream = env
                .addSource(new FlinkKafkaConsumer082<String>(
                        flinkBenchmarkParams.getRequired("topic"),
                        new SimpleStringSchema(),
                        flinkBenchmarkParams.getProperties())).setParallelism(Math.min(hosts * cores, kafkaPartitions));

        messageStream
                .rebalance()
                // Parse the String as JSON
                .flatMap(new DeserializeBolt())

                //Filter the records if event type is "view"
                .filter(new EventFilterBolt())

                // project the event
                .<Tuple2<String, String>>project(2, 5)

                // perform join with redis data
                .flatMap(new RedisJoinBolt())

                // process campaign
                .keyBy(0)
                .flatMap(new CampaignProcessor());


        env.execute();
    }
 
Example 19
Source File: TwitterExample.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);
		System.out.println("Usage: TwitterExample [--output <path>] " +
				"[--twitter-source.consumerKey <key> --twitter-source.consumerSecret <secret> --twitter-source.token <token> --twitter-source.tokenSecret <tokenSecret>]");

		// set up the execution environment
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		env.setParallelism(params.getInt("parallelism", 1));

		// get input data
		DataStream<String> streamSource;
		if (params.has(TwitterSource.CONSUMER_KEY) &&
				params.has(TwitterSource.CONSUMER_SECRET) &&
				params.has(TwitterSource.TOKEN) &&
				params.has(TwitterSource.TOKEN_SECRET)
				) {
			streamSource = env.addSource(new TwitterSource(params.getProperties()));
		} else {
			System.out.println("Executing TwitterStream example with default props.");
			System.out.println("Use --twitter-source.consumerKey <key> --twitter-source.consumerSecret <secret> " +
					"--twitter-source.token <token> --twitter-source.tokenSecret <tokenSecret> specify the authentication info.");
			// get default test text data
			streamSource = env.fromElements(TwitterExampleData.TEXTS);
		}

		DataStream<Tuple2<String, Integer>> tweets = streamSource
				// selecting English tweets and splitting to (word, 1)
				.flatMap(new SelectEnglishAndTokenizeFlatMap())
				// group by words and sum their occurrences
				.keyBy(0).sum(1);

		// emit result
		if (params.has("output")) {
			tweets.writeAsText(params.get("output"));
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			tweets.print();
		}

		// execute program
		env.execute("Twitter Streaming Example");
	}
 
Example 20
Source File: TridentForwardThroughput.java    From flink-perf with Apache License 2.0 3 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	int par = pt.getInt("para");

	TridentTopology topology = new TridentTopology();
	Stream sourceStream = topology.newStream("source", new Generator(pt)).parallelismHint(pt.getInt("sourceParallelism"));
	sourceStream.localOrShuffle().each(FIELDS, new Sink(pt), new Fields("dontcare"));

	Config conf = new Config();
	conf.setDebug(false);

//	conf.setMaxSpoutPending(pt.getInt("maxPending", 1000));

	//System.exit(1);

	if (!pt.has("local")) {
		conf.setNumWorkers(par);

		StormSubmitter.submitTopologyWithProgressBar("forward-throughput-"+pt.get("name", "no_name"), conf, topology.build());
	}
	else {
		conf.setMaxTaskParallelism(par);

		LocalCluster cluster = new LocalCluster();
		cluster.submitTopology("forward-throughput", conf, topology.build());

		Thread.sleep(300000);

		cluster.shutdown();
	}

}