Java Code Examples for org.apache.flink.api.java.utils.ParameterTool#getInt()

The following examples show how to use org.apache.flink.api.java.utils.ParameterTool#getInt() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: IterationConvergence.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public void configure(ParameterTool parameterTool) {
	if (!parameterTool.has("iterations") && !parameterTool.has("convergence_threshold")) {
		// no configuration so use default iterations and maximum threshold
		value.iterations = defaultIterations;
		value.convergenceThreshold = Double.MAX_VALUE;
	} else {
		// use configured values and maximum default for unset values
		value.iterations = parameterTool.getInt("iterations", Integer.MAX_VALUE);
		Util.checkParameter(value.iterations > 0,
			"iterations must be greater than zero");

		value.convergenceThreshold = parameterTool.getDouble("convergence_threshold", Double.MAX_VALUE);
		Util.checkParameter(value.convergenceThreshold > 0,
			"convergence threshold must be greater than zero");
	}
}
 
Example 2
Source File: ForwardThroughput.java    From flink-perf with Apache License 2.0 5 votes vote down vote up
public Generator(ParameterTool pt) {
	this.delay = pt.getInt("delay");
	this.withFt = pt.has("ft");
	this.latFreq = pt.getInt("latencyFreq");
	this.sleepFreq = pt.getInt("sleepFreq");
	this.pt = pt;
}
 
Example 3
Source File: PipelineRunner.java    From pravega-samples with Apache License 2.0 5 votes vote down vote up
private void parseConfigurations(String[] args) {

		log.info("ApplicationMain Main.. Arguments: {}", Arrays.asList(args));

		ParameterTool parameterTool = ParameterTool.fromArgs(args);
		log.info("Parameter Tool: {}", parameterTool.toMap());

		if(!parameterTool.has("mode")) {
			printUsage();
			System.exit(1);
		}

		String configDirPath = parameterTool.get("configDir", "conf");
		try {
			byte[] configurationData = Files.readAllBytes(Paths.get(configDirPath + File.separator + configFile));
			String jsonData = new String(configurationData);
			log.info("App Configurations raw data: {}", jsonData);
			Gson gson = new Gson();
			appConfiguration = gson.fromJson(jsonData, AppConfiguration.class);
		} catch (IOException e) {
			log.error("Could not read {}",configFile, e);
			System.exit(1);
		}

		runMode = parameterTool.getInt("mode");
		pravegaConfig = PravegaConfig.fromParams(parameterTool).withDefaultScope(DEFAULT_SCOPE);
		stream = pravegaConfig.resolve(parameterTool.get(STREAM_PARAMETER, DEFAULT_STREAM));
	}
 
Example 4
Source File: SummarizationJobParameters.java    From timely with Apache License 2.0 5 votes vote down vote up
public SummarizationJobParameters(ParameterTool params) {
    timelyHostname = params.getRequired("timelyHostname");
    timelyTcpPort = params.getInt("timelyTcpPort", 4241);
    timelyHttpsPort = params.getInt("timelyHttpsPort", 4242);
    timelyWssPort = params.getInt("timelyWssPort", 4243);
    clientAuth = params.getBoolean("clientAuth", false);
    doLogin = params.getBoolean("doLogin", false);
    timelyUsername = params.get("timelyUsername", null);
    timelyPassword = params.get("timelyPassword", null);
    keyStoreFile = params.getRequired("keyStoreFile");
    keyStoreType = params.get("keyStoreType", "JKS");
    keyStorePass = params.getRequired("keyStorePass");
    trustStoreFile = params.getRequired("trustStoreFile");
    trustStoreType = params.get("trustStoreType", "JKS");
    trustStorePass = params.getRequired("trustStorePass");
    hostVerificationEnabled = params.getBoolean("hostVerificationEnabled", true);
    bufferSize = params.getInt("bufferSize", 10485760);
    String metricNames = params.getRequired("metrics");
    if (null != metricNames) {
        metrics = metricNames.split(",");
    } else {
        metrics = null;
    }
    startTime = params.getLong("startTime", 0L);
    endTime = params.getLong("endTime", 0L);
    interval = params.getRequired("interval");
    intervalUnits = params.getRequired("intervalUnits");
}
 
Example 5
Source File: DataStreamAllroundTestJobFactory.java    From flink with Apache License 2.0 5 votes vote down vote up
static SlidingEventTimeWindows createSlidingWindow(ParameterTool pt) {
	long slideSize = pt.getLong(
		TEST_SLIDE_SIZE.key(),
		TEST_SLIDE_SIZE.defaultValue());

	long slideFactor = pt.getInt(
		TEST_SLIDE_FACTOR.key(),
		TEST_SLIDE_FACTOR.defaultValue()
	);

	return SlidingEventTimeWindows.of(Time.milliseconds(slideSize * slideFactor), Time.milliseconds(slideSize));
}
 
Example 6
Source File: ThroughputHostsTracking.java    From flink-perf with Apache License 2.0 5 votes vote down vote up
public Generator(ParameterTool pt) {
	this.payload = new byte[pt.getInt("payload")];
	this.delay = pt.getInt("delay");
	this.withFt = pt.has("ft");
	this.latFreq = pt.getInt("latencyFreq");
	this.sleepFreq = pt.getInt("sleepFreq");
}
 
Example 7
Source File: DataStreamAllroundTestJobFactory.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
static SlidingEventTimeWindows createSlidingWindow(ParameterTool pt) {
	long slideSize = pt.getLong(
		TEST_SLIDE_SIZE.key(),
		TEST_SLIDE_SIZE.defaultValue());

	long slideFactor = pt.getInt(
		TEST_SLIDE_FACTOR.key(),
		TEST_SLIDE_FACTOR.defaultValue()
	);

	return SlidingEventTimeWindows.of(Time.milliseconds(slideSize * slideFactor), Time.milliseconds(slideSize));
}
 
Example 8
Source File: ThroughputHostsTracking.java    From flink-perf with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	int par = pt.getInt("para");

	TopologyBuilder builder = new TopologyBuilder();

	builder.setSpout("source0", new Generator(pt), pt.getInt("sourceParallelism"));
	int i = 0;
	for(; i < pt.getInt("repartitions", 1) - 1;i++) {
		System.out.println("adding source"+i+" --> source"+(i+1));
		builder.setBolt("source"+(i+1), new RepartPassThroughBolt(pt), pt.getInt("sinkParallelism")).fieldsGrouping("source" + i, new Fields("id"));
	}
	System.out.println("adding final source"+i+" --> sink");

	builder.setBolt("sink", new Sink(pt), pt.getInt("sinkParallelism")).fieldsGrouping("source"+i, new Fields("id"));


	Config conf = new Config();
	conf.setDebug(false);
	//System.exit(1);

	if (!pt.has("local")) {
		conf.setNumWorkers(par);

		StormSubmitter.submitTopologyWithProgressBar("throughput-"+pt.get("name", "no_name"), conf, builder.createTopology());
	}
	else {
		conf.setMaxTaskParallelism(par);

		LocalCluster cluster = new LocalCluster();
		cluster.submitTopology("throughput", conf, builder.createTopology());

		Thread.sleep(300000);

		cluster.shutdown();
	}

}
 
Example 9
Source File: DataStreamAllroundTestJobFactory.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
static FlatMapFunction<Tuple2<Integer, List<Event>>, String> createSlidingWindowCheckMapper(ParameterTool pt) {
	return new SlidingWindowCheckMapper(pt.getInt(
		TEST_SLIDE_FACTOR.key(),
		TEST_SLIDE_FACTOR.defaultValue()
	));
}
 
Example 10
Source File: ElasticsearchSinkBase.java    From flink with Apache License 2.0 4 votes vote down vote up
public ElasticsearchSinkBase(
	ElasticsearchApiCallBridge<C> callBridge,
	Map<String, String> userConfig,
	ElasticsearchSinkFunction<T> elasticsearchSinkFunction,
	ActionRequestFailureHandler failureHandler) {

	this.callBridge = checkNotNull(callBridge);
	this.elasticsearchSinkFunction = checkNotNull(elasticsearchSinkFunction);
	this.failureHandler = checkNotNull(failureHandler);
	// we eagerly check if the user-provided sink function and failure handler is serializable;
	// otherwise, if they aren't serializable, users will merely get a non-informative error message
	// "ElasticsearchSinkBase is not serializable"

	checkArgument(InstantiationUtil.isSerializable(elasticsearchSinkFunction),
		"The implementation of the provided ElasticsearchSinkFunction is not serializable. " +
			"The object probably contains or references non-serializable fields.");

	checkArgument(InstantiationUtil.isSerializable(failureHandler),
		"The implementation of the provided ActionRequestFailureHandler is not serializable. " +
			"The object probably contains or references non-serializable fields.");

	// extract and remove bulk processor related configuration from the user-provided config,
	// so that the resulting user config only contains configuration related to the Elasticsearch client.

	checkNotNull(userConfig);

	// copy config so we can remove entries without side-effects
	userConfig = new HashMap<>(userConfig);

	ParameterTool params = ParameterTool.fromMap(userConfig);

	if (params.has(CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS)) {
		bulkProcessorFlushMaxActions = params.getInt(CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS);
		userConfig.remove(CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS);
	} else {
		bulkProcessorFlushMaxActions = null;
	}

	if (params.has(CONFIG_KEY_BULK_FLUSH_MAX_SIZE_MB)) {
		bulkProcessorFlushMaxSizeMb = params.getInt(CONFIG_KEY_BULK_FLUSH_MAX_SIZE_MB);
		userConfig.remove(CONFIG_KEY_BULK_FLUSH_MAX_SIZE_MB);
	} else {
		bulkProcessorFlushMaxSizeMb = null;
	}

	if (params.has(CONFIG_KEY_BULK_FLUSH_INTERVAL_MS)) {
		bulkProcessorFlushIntervalMillis = params.getLong(CONFIG_KEY_BULK_FLUSH_INTERVAL_MS);
		userConfig.remove(CONFIG_KEY_BULK_FLUSH_INTERVAL_MS);
	} else {
		bulkProcessorFlushIntervalMillis = null;
	}

	boolean bulkProcessorFlushBackoffEnable = params.getBoolean(CONFIG_KEY_BULK_FLUSH_BACKOFF_ENABLE, true);
	userConfig.remove(CONFIG_KEY_BULK_FLUSH_BACKOFF_ENABLE);

	if (bulkProcessorFlushBackoffEnable) {
		this.bulkProcessorFlushBackoffPolicy = new BulkFlushBackoffPolicy();

		if (params.has(CONFIG_KEY_BULK_FLUSH_BACKOFF_TYPE)) {
			bulkProcessorFlushBackoffPolicy.setBackoffType(FlushBackoffType.valueOf(params.get(CONFIG_KEY_BULK_FLUSH_BACKOFF_TYPE)));
			userConfig.remove(CONFIG_KEY_BULK_FLUSH_BACKOFF_TYPE);
		}

		if (params.has(CONFIG_KEY_BULK_FLUSH_BACKOFF_RETRIES)) {
			bulkProcessorFlushBackoffPolicy.setMaxRetryCount(params.getInt(CONFIG_KEY_BULK_FLUSH_BACKOFF_RETRIES));
			userConfig.remove(CONFIG_KEY_BULK_FLUSH_BACKOFF_RETRIES);
		}

		if (params.has(CONFIG_KEY_BULK_FLUSH_BACKOFF_DELAY)) {
			bulkProcessorFlushBackoffPolicy.setDelayMillis(params.getLong(CONFIG_KEY_BULK_FLUSH_BACKOFF_DELAY));
			userConfig.remove(CONFIG_KEY_BULK_FLUSH_BACKOFF_DELAY);
		}

	} else {
		bulkProcessorFlushBackoffPolicy = null;
	}

	this.userConfig = userConfig;
}
 
Example 11
Source File: Latency.java    From flink-perf with Apache License 2.0 4 votes vote down vote up
public Source(ParameterTool pt) {
	this.pt = pt;
	payload = new byte[pt.getInt("payload")];
}
 
Example 12
Source File: QsStateClient.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(final String[] args) throws Exception {

		ParameterTool parameters = ParameterTool.fromArgs(args);

		// setup values
		String jobId = parameters.getRequired("job-id");
		String host = parameters.get("host", "localhost");
		int port = parameters.getInt("port", 9069);
		int numIterations = parameters.getInt("iterations", 1500);

		QueryableStateClient client = new QueryableStateClient(host, port);
		client.setExecutionConfig(new ExecutionConfig());

		MapStateDescriptor<EmailId, EmailInformation> stateDescriptor =
				new MapStateDescriptor<>(
						QsConstants.STATE_NAME,
						TypeInformation.of(new TypeHint<EmailId>() {

						}),
						TypeInformation.of(new TypeHint<EmailInformation>() {

						})
				);

		// wait for state to exist
		for (int i = 0; i < BOOTSTRAP_RETRIES; i++) { // ~120s
			try {
				getMapState(jobId, client, stateDescriptor);
				break;
			} catch (ExecutionException e) {
				if (e.getCause() instanceof UnknownKeyOrNamespaceException) {
					System.err.println("State does not exist yet; sleeping 500ms");
					Thread.sleep(500L);
				} else {
					throw e;
				}
			}

			if (i == (BOOTSTRAP_RETRIES - 1)) {
				throw new RuntimeException("Timeout: state doesn't exist after 120s");
			}
		}

		// query state
		for (int iterations = 0; iterations < numIterations; iterations++) {

			MapState<EmailId, EmailInformation> mapState =
				getMapState(jobId, client, stateDescriptor);

			int counter = 0;
			for (Map.Entry<EmailId, EmailInformation> entry: mapState.entries()) {
				// this is to force deserialization
				entry.getKey();
				entry.getValue();
				counter++;
			}
			System.out.println("MapState has " + counter + " entries"); // we look for it in the test

			Thread.sleep(100L);
		}
	}
 
Example 13
Source File: PulsarConsumerSourceWordCount.java    From pulsar with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
    // parse input arguments
    final ParameterTool parameterTool = ParameterTool.fromArgs(args);

    if (parameterTool.getNumberOfParameters() < 2) {
        System.out.println("Missing parameters!");
        System.out.println("Usage: pulsar --service-url <pulsar-service-url> --input-topic <topic> --subscription <sub> --output-topic <topic>");
        return;
    }

    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().disableSysoutLogging();
    env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(4, 10000));
    env.enableCheckpointing(5000);
    env.getConfig().setGlobalJobParameters(parameterTool);
    env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);

    String serviceUrl = parameterTool.getRequired("service-url");
    String inputTopic = parameterTool.getRequired("input-topic");
    String subscription = parameterTool.get("subscription", "flink-examples");
    String outputTopic = parameterTool.get("output-topic", null);
    int parallelism = parameterTool.getInt("parallelism", 1);

    System.out.println("Parameters:");
    System.out.println("\tServiceUrl:\t" + serviceUrl);
    System.out.println("\tInputTopic:\t" + inputTopic);
    System.out.println("\tSubscription:\t" + subscription);
    System.out.println("\tOutputTopic:\t" + outputTopic);
    System.out.println("\tParallelism:\t" + parallelism);

    PulsarSourceBuilder<String> builder = PulsarSourceBuilder.builder(new SimpleStringSchema())
        .serviceUrl(serviceUrl)
        .topic(inputTopic)
        .subscriptionName(subscription);
    SourceFunction<String> src = builder.build();
    DataStream<String> input = env.addSource(src);

    DataStream<WordWithCount> wc = input
        .flatMap((FlatMapFunction<String, WordWithCount>) (line, collector) -> {
            for (String word : line.split("\\s")) {
                collector.collect(new WordWithCount(word, 1));
            }
        })
        .returns(WordWithCount.class)
        .keyBy("word")
        .timeWindow(Time.seconds(5))
        .reduce((ReduceFunction<WordWithCount>) (c1, c2) ->
            new WordWithCount(c1.word, c1.count + c2.count));

    if (null != outputTopic) {
        wc.addSink(new FlinkPulsarProducer<>(
            serviceUrl,
            outputTopic,
            new AuthenticationDisabled(),
            wordWithCount -> wordWithCount.toString().getBytes(UTF_8),
            wordWithCount -> wordWithCount.word,
            null
        )).setParallelism(parallelism);
    } else {
        // print the results with a single thread, rather than in parallel
        wc.print().setParallelism(1);
    }

    env.execute("Pulsar Stream WordCount");
}
 
Example 14
Source File: Latency.java    From flink-perf with Apache License 2.0 4 votes vote down vote up
public Sink(ParameterTool pt) throws UnknownHostException {
	this.pt = pt;
	this.withFT = pt.has("ft");
	this.logfreq = pt.getInt("logfreq");
	this.host = InetAddress.getLocalHost().getHostName();
}
 
Example 15
Source File: Throughput.java    From flink-perf with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
	final ParameterTool pt = ParameterTool.fromArgs(args);

	StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
	see.getConfig().setGlobalJobParameters(pt);
	see.setNumberOfExecutionRetries(0);

	if(pt.has("timeout")) {
		see.setBufferTimeout(pt.getLong("timeout"));
	}

	if(pt.has("ft")) {
		see.enableCheckpointing(pt.getLong("ft"));
	}

	DataStream<Type> source = see.addSource(new Source(pt) );

	DataStream<Type> repartitioned = source.partitionByHash(0);
	for(int i = 0; i < pt.getInt("repartitions", 1) - 1;i++) {
		repartitioned = repartitioned.map(new MapFunction<Type, Type>() {
			@Override
			public Type map(Type in) throws Exception {
				Type out = in.copy();
				out.f0++;
				return out;
			}
		}).partitionByHash(0);
	}
	repartitioned.flatMap(new FlatMapFunction<Type, Integer>() {
		public int host = -2;
		long received = 0;
		long start = 0;
		long logfreq = pt.getInt("logfreq");
		long lastLog = -1;
		long lastElements = 0;

		@Override
		public void flatMap(Type element, Collector<Integer> collector) throws Exception {
			if(host == -2) {
				host = convertHostnameToInt(InetAddress.getLocalHost().getHostName());
			}
			if (start == 0) {
				start = System.currentTimeMillis();
			}

			received++;
			if (received % logfreq == 0) {
				// throughput over entire time
				long now = System.currentTimeMillis();
				long sinceSec = ((now - start) / 1000);
				if (sinceSec == 0) return;
				LOG.info("Received {} elements since {}. Elements per second {}, GB received {}",
						received,
						sinceSec,
						received / sinceSec,
						(received * (8 + 8 + 4 + pt.getInt("payload"))) / 1024 / 1024 / 1024);

				// throughput for the last "logfreq" elements
				if(lastLog == -1) {
					// init (the first)
					lastLog = now;
					lastElements = received;
				} else {
					long timeDiff = now - lastLog;
					long elementDiff = received - lastElements;
					double ex = (1000/(double)timeDiff);
					LOG.info("During the last {} ms, we received {} elements. That's {} elements/second/core", timeDiff, elementDiff, elementDiff*ex);
					// reinit
					lastLog = now;
					lastElements = received;
				}
			}
			if (element.f2 != 0 /* && element.f1.equals(host) */) {
				long lat = System.currentTimeMillis() - element.f2;
				LOG.info("Latency {} ms from machine " + element.f1, lat);
			}
		}
	});
	//System.out.println("plan = "+see.getExecutionPlan());;
	see.execute("Flink Throughput Job with: "+pt.toMap());
}
 
Example 16
Source File: ExactlyOnceWriter.java    From pravega-samples with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
    LOG.info("Starting ExactlyOnce checker ...");

    // initialize the parameter utility tool in order to retrieve input parameters
    ParameterTool params = ParameterTool.fromArgs(args);

    boolean exactlyOnce = Boolean.parseBoolean(params.get("exactlyonce", "true"));
    int numEvents = params.getInt("num-events", defaultNumEvents);

    PravegaConfig pravegaConfig = PravegaConfig
            .fromParams(params)
            .withControllerURI(URI.create(params.get(Constants.Default_URI_PARAM, Constants.Default_URI)))
            .withDefaultScope(params.get(Constants.SCOPE_PARAM, Constants.DEFAULT_SCOPE));

    // create the Pravega input stream (if necessary)
    Stream stream = Utils.createStream(
            pravegaConfig,
            params.get(Constants.STREAM_PARAM, Constants.DEFAULT_STREAM));

    // initialize Flink execution environment
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment()
            .enableCheckpointing(checkpointIntervalMillis, CheckpointingMode.EXACTLY_ONCE)
            .setParallelism(1);

    // Restart flink job from last checkpoint once.
    env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0L));


    // create the Pravega sink to write a stream of text

    FlinkPravegaWriter<IntegerEvent> writer = FlinkPravegaWriter.<IntegerEvent>builder()
            .withPravegaConfig(pravegaConfig)
            .forStream(stream)
            .withEventRouter( new EventRouter())
            .withTxnLeaseRenewalPeriod(txnLeaseRenewalPeriod)
            .withWriterMode( exactlyOnce ? PravegaWriterMode.EXACTLY_ONCE : PravegaWriterMode.ATLEAST_ONCE )
            .withSerializationSchema(PravegaSerialization.serializationFor(IntegerEvent.class))
            .build();

    env
        .addSource(new ThrottledIntegerEventProducer(numEvents))
        .map(new FailingMapper<>(numEvents /2 ))  // simulate failure
        .addSink(writer)
        .setParallelism(2);

    // execute within the Flink environment
    env.execute("ExactlyOnce");


    LOG.info("Ending ExactlyOnce...");
}
 
Example 17
Source File: SocketWindowWordCount.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		// the host and the port to connect to
		final String hostname;
		final int port;
		try {
			final ParameterTool params = ParameterTool.fromArgs(args);
			hostname = params.has("hostname") ? params.get("hostname") : "localhost";
			port = params.getInt("port");
		} catch (Exception e) {
			System.err.println("No port specified. Please run 'SocketWindowWordCount " +
				"--hostname <hostname> --port <port>', where hostname (localhost by default) " +
				"and port is the address of the text server");
			System.err.println("To start a simple text server, run 'netcat -l <port>' and " +
				"type the input text into the command line");
			return;
		}

		// get the execution environment
		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		// get input data by connecting to the socket
		DataStream<String> text = env.socketTextStream(hostname, port, "\n");

		// parse the data, group it, window it, and aggregate the counts
		DataStream<WordWithCount> windowCounts = text

				.flatMap(new FlatMapFunction<String, WordWithCount>() {
					@Override
					public void flatMap(String value, Collector<WordWithCount> out) {
						for (String word : value.split("\\s")) {
							out.collect(new WordWithCount(word, 1L));
						}
					}
				})

				.keyBy("word")
				.timeWindow(Time.seconds(5))

				.reduce(new ReduceFunction<WordWithCount>() {
					@Override
					public WordWithCount reduce(WordWithCount a, WordWithCount b) {
						return new WordWithCount(a.word, a.count + b.count);
					}
				});

		// print the results with a single thread, rather than in parallel
		windowCounts.print().setParallelism(1);

		env.execute("Socket Window WordCount");
	}
 
Example 18
Source File: SocketWindowWordCount.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		// the host and the port to connect to
		final String hostname;
		final int port;
		try {
			final ParameterTool params = ParameterTool.fromArgs(args);
			hostname = params.has("hostname") ? params.get("hostname") : "localhost";
			port = params.getInt("port");
		} catch (Exception e) {
			System.err.println("No port specified. Please run 'SocketWindowWordCount " +
				"--hostname <hostname> --port <port>', where hostname (localhost by default) " +
				"and port is the address of the text server");
			System.err.println("To start a simple text server, run 'netcat -l <port>' and " +
				"type the input text into the command line");
			return;
		}

		// get the execution environment
		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		// get input data by connecting to the socket
		DataStream<String> text = env.socketTextStream(hostname, port, "\n");

		// parse the data, group it, window it, and aggregate the counts
		DataStream<WordWithCount> windowCounts = text

				.flatMap(new FlatMapFunction<String, WordWithCount>() {
					@Override
					public void flatMap(String value, Collector<WordWithCount> out) {
						for (String word : value.split("\\s")) {
							out.collect(new WordWithCount(word, 1L));
						}
					}
				})

				.keyBy("word")
				.timeWindow(Time.seconds(5))

				.reduce(new ReduceFunction<WordWithCount>() {
					@Override
					public WordWithCount reduce(WordWithCount a, WordWithCount b) {
						return new WordWithCount(a.word, a.count + b.count);
					}
				});

		// print the results with a single thread, rather than in parallel
		windowCounts.print().setParallelism(1);

		env.execute("Socket Window WordCount");
	}
 
Example 19
Source File: WindowWordCount.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up the execution environment
		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		// get input data
		DataStream<String> text;
		if (params.has("input")) {
			// read the text file from given input path
			text = env.readTextFile(params.get("input"));
		} else {
			System.out.println("Executing WindowWordCount example with default input data set.");
			System.out.println("Use --input to specify file input.");
			// get default test text data
			text = env.fromElements(WordCountData.WORDS);
		}

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		final int windowSize = params.getInt("window", 10);
		final int slideSize = params.getInt("slide", 5);

		DataStream<Tuple2<String, Integer>> counts =
		// split up the lines in pairs (2-tuples) containing: (word,1)
		text.flatMap(new WordCount.Tokenizer())
				// create windows of windowSize records slided every slideSize records
				.keyBy(0)
				.countWindow(windowSize, slideSize)
				// group by the tuple field "0" and sum up tuple field "1"
				.sum(1);

		// emit result
		if (params.has("output")) {
			counts.writeAsText(params.get("output"));
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			counts.print();
		}

		// execute program
		env.execute("WindowWordCount");
	}
 
Example 20
Source File: TridentForwardThroughput.java    From flink-perf with Apache License 2.0 3 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	int par = pt.getInt("para");

	TridentTopology topology = new TridentTopology();
	Stream sourceStream = topology.newStream("source", new Generator(pt)).parallelismHint(pt.getInt("sourceParallelism"));
	sourceStream.localOrShuffle().each(FIELDS, new Sink(pt), new Fields("dontcare"));

	Config conf = new Config();
	conf.setDebug(false);

//	conf.setMaxSpoutPending(pt.getInt("maxPending", 1000));

	//System.exit(1);

	if (!pt.has("local")) {
		conf.setNumWorkers(par);

		StormSubmitter.submitTopologyWithProgressBar("forward-throughput-"+pt.get("name", "no_name"), conf, topology.build());
	}
	else {
		conf.setMaxTaskParallelism(par);

		LocalCluster cluster = new LocalCluster();
		cluster.submitTopology("forward-throughput", conf, topology.build());

		Thread.sleep(300000);

		cluster.shutdown();
	}

}