org.apache.flink.streaming.api.environment.LocalStreamEnvironment Java Examples

The following examples show how to use org.apache.flink.streaming.api.environment.LocalStreamEnvironment. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CrawlToolIT.java    From flink-crawler with Apache License 2.0 6 votes vote down vote up
@Test
public void test() throws Exception {
    CrawlToolOptions options = new CrawlToolOptions();
    options.setSeedUrlsFilename("./src/it/resources/farsi-seeds.txt");
    options.setCommonCrawlId("2017-22");
    options.setCommonCrawlCacheDir("./target/test/CrawlToolTest/cc-cache/");
    options.setForceCrawlDelay(0L);
    options.setMaxContentSize(100000);
    options.setWARCContentPath("./target/test/CrawlToolIT/output/cc-farsi-content.txt");
    options.setMaxCrawlDuration(20);
    options.setTimeout(10);
    
    LocalStreamEnvironment env = new LocalStreamEnvironmentWithAsyncExecution();

    CrawlTool.run(env, options);

    // TODO confirm results
}
 
Example #2
Source File: FlinkUtilsTest.java    From flink-crawler with Apache License 2.0 6 votes vote down vote up
@Test
public void testMakeKeyForOperatorIndex() throws Exception {
    final int parallelism = 2;
    LocalStreamEnvironment env = new LocalStreamEnvironment();
    env.setParallelism(parallelism);

    final int maxParallelism = env.getMaxParallelism();

    DataStreamSource<Tuple2<String, Float>> pages = env.fromElements(Tuple2.of("page0", 0.0f),
            Tuple2.of("page0", 1.0f), Tuple2.of("page1", 10.0f), Tuple2.of("page666", 6660.0f));
    DataStreamSource<Tuple2<String, Float>> epsilon = env.fromElements(
            Tuple2.of(FlinkUtils.makeKeyForOperatorIndex("task:%d", maxParallelism, parallelism,
                    0), 0.5f),
            Tuple2.of(FlinkUtils.makeKeyForOperatorIndex("task:%d", maxParallelism, parallelism,
                    1), 0.25f));

    pages.union(epsilon).keyBy(0).process(new MyProcessFunction()).print();

    try {
        env.execute();
    } catch (JobExecutionException e) {
        Assert.fail(e.getCause().getMessage());
    }
}
 
Example #3
Source File: BatchExecutorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
public BatchExecutorTest() {
	batchExecutor = new BatchExecutor(LocalStreamEnvironment.getExecutionEnvironment());

	final Transformation testTransform = new LegacySourceTransformation<>(
		"MockTransform",
		new StreamSource<>(new SourceFunction<String>() {
			@Override
			public void run(SourceContext<String> ctx) {
			}

			@Override
			public void cancel() {
			}
		}),
		BasicTypeInfo.STRING_TYPE_INFO,
		1);
	Pipeline pipeline = batchExecutor.createPipeline(
		Collections.singletonList(testTransform), new TableConfig(), "Test Job");
	streamGraph = (StreamGraph) pipeline;
}
 
Example #4
Source File: UnalignedCheckpointITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Nonnull
private LocalStreamEnvironment createEnv(int parallelism, int slotsPerTaskManager, boolean slotSharing) throws IOException {
	Configuration conf = new Configuration();
	conf.setInteger(TaskManagerOptions.NUM_TASK_SLOTS, slotsPerTaskManager);
	conf.setFloat(TaskManagerOptions.NETWORK_MEMORY_FRACTION, .9f);
	conf.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER,
			slotSharing ? (parallelism + slotsPerTaskManager - 1) / slotsPerTaskManager : parallelism * 3);

	conf.setString(CheckpointingOptions.STATE_BACKEND, "filesystem");
	conf.setString(CheckpointingOptions.CHECKPOINTS_DIRECTORY, temp.newFolder().toURI().toString());

	final LocalStreamEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(parallelism, conf);
	env.enableCheckpointing(100);
	// keep in sync with FailingMapper in #createDAG
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(5, Time.milliseconds(100)));
	env.getCheckpointConfig().enableUnalignedCheckpoints(true);
	return env;
}
 
Example #5
Source File: FlinkTestUtil.java    From AthenaX with Apache License 2.0 5 votes vote down vote up
static LocalFlinkMiniCluster execute(LocalStreamEnvironment env,
                                     Configuration conf, String jobName) throws Exception {
  StreamGraph streamGraph = env.getStreamGraph();
  streamGraph.setJobName(jobName);
  JobGraph jobGraph = streamGraph.getJobGraph();
  Configuration configuration = new Configuration(conf);
  configuration.addAll(jobGraph.getJobConfiguration());
  configuration.setLong("taskmanager.memory.size", -1L);
  configuration.setInteger("taskmanager.numberOfTaskSlots", jobGraph.getMaximumParallelism());

  LocalFlinkMiniCluster cluster = new LocalFlinkMiniCluster(configuration, true);
  cluster.start();
  cluster.submitJobDetached(jobGraph);
  return cluster;
}
 
Example #6
Source File: PravegaEventPublisher.java    From pravega-samples with Apache License 2.0 5 votes vote down vote up
private void publishUsingFlinkConnector(AppConfiguration appConfiguration) throws Exception {

		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		Stream streamId = getStreamId();
		FlinkPravegaWriter<Event> writer = FlinkPravegaWriter.<Event>builder()
				.withPravegaConfig(getPravegaConfig())
				.forStream(stream)
				.withSerializationSchema(PravegaSerialization.serializationFor(Event.class))
				.withEventRouter(new EventRouter())
				.build();

		int parallelism = appConfiguration.getPipeline().getParallelism();

		if(appConfiguration.getProducer().isControlledEnv()) {
			if(!(env instanceof LocalStreamEnvironment)) {
				throw new Exception("Use a local Flink environment or set controlledEnv to false in app.json.");
			}
			//setting this to single instance since the controlled run allows user inout to trigger error events
			env.setParallelism(1);
			long latency = appConfiguration.getProducer().getLatencyInMilliSec();
			int capacity = appConfiguration.getProducer().getCapacity();
			ControlledSourceContextProducer controlledSourceContextProducer = new ControlledSourceContextProducer(capacity, latency);
			env.addSource(controlledSourceContextProducer).name("EventSource").addSink(writer).name("Pravega-" + streamId.getStreamName());
		} else {
			env.setParallelism(parallelism);
			SourceContextProducer sourceContextProducer = new SourceContextProducer(appConfiguration);
			env.addSource(sourceContextProducer).name("EventSource").addSink(writer).name("Pravega-" + streamId.getStreamName());
		}

		env.execute(appConfiguration.getName()+"-producer");

	}
 
Example #7
Source File: FlinkExecutionEnvironmentsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void useDefaultParallelismFromContextBatch() {
  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  options.setRunner(TestFlinkRunner.class);

  ExecutionEnvironment bev =
      FlinkExecutionEnvironments.createBatchExecutionEnvironment(
          options, Collections.emptyList());

  assertThat(bev, instanceOf(LocalEnvironment.class));
  assertThat(options.getParallelism(), is(LocalStreamEnvironment.getDefaultLocalParallelism()));
  assertThat(bev.getParallelism(), is(LocalStreamEnvironment.getDefaultLocalParallelism()));
}
 
Example #8
Source File: FlinkExecutionEnvironmentsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void useDefaultParallelismFromContextStreaming() {
  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  options.setRunner(TestFlinkRunner.class);

  StreamExecutionEnvironment sev =
      FlinkExecutionEnvironments.createStreamExecutionEnvironment(
          options, Collections.emptyList());

  assertThat(sev, instanceOf(LocalStreamEnvironment.class));
  assertThat(options.getParallelism(), is(LocalStreamEnvironment.getDefaultLocalParallelism()));
  assertThat(sev.getParallelism(), is(LocalStreamEnvironment.getDefaultLocalParallelism()));
}
 
Example #9
Source File: LocalStreamEnvironmentITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Test test verifies that the execution environment can be used to execute a
 * single job with multiple slots.
 */
@Test
public void testRunIsolatedJob() throws Exception {
	LocalStreamEnvironment env = new LocalStreamEnvironment();
	assertEquals(1, env.getParallelism());

	addSmallBoundedJob(env, 3);
	env.execute();
}
 
Example #10
Source File: LocalStreamEnvironmentITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Test test verifies that the execution environment can be used to execute multiple
 * bounded streaming jobs after one another.
 */
@Test
public void testMultipleJobsAfterAnother() throws Exception {
	LocalStreamEnvironment env = new LocalStreamEnvironment();

	addSmallBoundedJob(env, 3);
	env.execute();

	addSmallBoundedJob(env, 5);
	env.execute();
}
 
Example #11
Source File: Bootstrap.java    From pinpoint with Apache License 2.0 5 votes vote down vote up
public StreamExecutionEnvironment createStreamExecutionEnvironment() {
    if (flinkConfiguration.isLocalforFlinkStreamExecutionEnvironment()) {
        LocalStreamEnvironment localEnvironment = StreamExecutionEnvironment.createLocalEnvironment();
        localEnvironment.setParallelism(1);
        return localEnvironment;
    } else {
        return StreamExecutionEnvironment.getExecutionEnvironment();
    }
}
 
Example #12
Source File: ProcessTaxiStream.java    From amazon-kinesis-analytics-taxi-consumer with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();


  ParameterTool parameter;

  if (env instanceof LocalStreamEnvironment) {
    //read the parameters specified from the command line
    parameter = ParameterTool.fromArgs(args);
  } else {
    //read the parameters from the Kinesis Analytics environment
    Map<String, Properties> applicationProperties = KinesisAnalyticsRuntime.getApplicationProperties();

    Properties flinkProperties = applicationProperties.get("FlinkApplicationProperties");

    if (flinkProperties == null) {
      throw new RuntimeException("Unable to load FlinkApplicationProperties properties from the Kinesis Analytics Runtime.");
    }

    parameter = ParameterToolUtils.fromApplicationProperties(flinkProperties);
  }


  //enable event time processing
  if (parameter.get("EventTime", "true").equals("true")) {
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
  }


  //set Kinesis consumer properties
  Properties kinesisConsumerConfig = new Properties();
  //set the region the Kinesis stream is located in
  kinesisConsumerConfig.setProperty(AWSConfigConstants.AWS_REGION, parameter.get("Region", DEFAULT_REGION_NAME));
  //obtain credentials through the DefaultCredentialsProviderChain, which includes the instance metadata
  kinesisConsumerConfig.setProperty(AWSConfigConstants.AWS_CREDENTIALS_PROVIDER, "AUTO");
  //poll new events from the Kinesis stream once every second
  kinesisConsumerConfig.setProperty(ConsumerConfigConstants.SHARD_GETRECORDS_INTERVAL_MILLIS, "1000");


  //create Kinesis source
  DataStream<Event> kinesisStream = env.addSource(new FlinkKinesisConsumer<>(
      //read events from the Kinesis stream passed in as a parameter
      parameter.get("InputStreamName", DEFAULT_STREAM_NAME),
      //deserialize events with EventSchema
      new EventDeserializationSchema(),
      //using the previously defined properties
      kinesisConsumerConfig
  ));


  DataStream<TripEvent> trips = kinesisStream
      //extract watermarks from watermark events
      .assignTimestampsAndWatermarks(new TimestampAssigner())
      //remove all events that aren't TripEvents
      .filter(event -> TripEvent.class.isAssignableFrom(event.getClass()))
      //cast Event to TripEvent
      .map(event -> (TripEvent) event)
      //remove all events with geo coordinates outside of NYC
      .filter(GeoUtils::hasValidCoordinates);


  DataStream<PickupCount> pickupCounts = trips
      //compute geo hash for every event
      .map(new TripToGeoHash())
      .keyBy("geoHash")
      //collect all events in a one hour window
      .timeWindow(Time.hours(1))
      //count events per geo hash in the one hour window
      .apply(new CountByGeoHash());


  DataStream<AverageTripDuration> tripDurations = trips
      .flatMap(new TripToTripDuration())
      .keyBy("pickupGeoHash", "airportCode")
      .timeWindow(Time.hours(1))
      .apply(new TripDurationToAverageTripDuration());


  if (parameter.has("ElasticsearchEndpoint")) {
    String elasticsearchEndpoint = parameter.get("ElasticsearchEndpoint");
    final String region = parameter.get("Region", DEFAULT_REGION_NAME);

    //remove trailling /
    if (elasticsearchEndpoint.endsWith(("/"))) {
      elasticsearchEndpoint = elasticsearchEndpoint.substring(0, elasticsearchEndpoint.length()-1);
    }

    pickupCounts.addSink(AmazonElasticsearchSink.buildElasticsearchSink(elasticsearchEndpoint, region, "pickup_count", "pickup_count"));
    tripDurations.addSink(AmazonElasticsearchSink.buildElasticsearchSink(elasticsearchEndpoint, region, "trip_duration", "trip_duration"));
  }


  LOG.info("Reading events from stream {}", parameter.get("InputStreamName", DEFAULT_STREAM_NAME));

  env.execute();
}
 
Example #13
Source File: TestUserAgentAnalysisMapperClass.java    From yauaa with Apache License 2.0 4 votes vote down vote up
@Test
public void testClassDefinitionDataStream() throws Exception {
    StreamExecutionEnvironment environment = LocalStreamEnvironment.getExecutionEnvironment();

    DataStream<TestRecord> resultDataStream = environment
        .fromElements(
            "Mozilla/5.0 (X11; Linux x86_64) " +
                "AppleWebKit/537.36 (KHTML, like Gecko) " +
                "Chrome/48.0.2564.82 Safari/537.36",

            "Mozilla/5.0 (Linux; Android 7.0; Nexus 6 Build/NBD90Z) " +
                "AppleWebKit/537.36 (KHTML, like Gecko) " +
                "Chrome/53.0.2785.124 Mobile Safari/537.36"
        )

        .map((MapFunction<String, TestRecord>) TestRecord::new)

        .map(new MyUserAgentAnalysisMapper());

    List<TestRecord> result = new ArrayList<>(5);
    DataStreamUtils
        .collect(resultDataStream)
        .forEachRemaining(result::add);

    assertEquals(2, result.size());

    assertThat(result, hasItems(
        new TestRecord(
            "Mozilla/5.0 (X11; Linux x86_64) " +
                "AppleWebKit/537.36 (KHTML, like Gecko) " +
                "Chrome/48.0.2564.82 Safari/537.36",
            "Desktop",
            "Chrome 48.0.2564.82",
            null),

        new TestRecord(
            "Mozilla/5.0 (Linux; Android 7.0; Nexus 6 Build/NBD90Z) " +
                "AppleWebKit/537.36 (KHTML, like Gecko) " +
                "Chrome/53.0.2785.124 Mobile Safari/537.36",
            "Phone",
            "Chrome 53.0.2785.124",
            null)
    ));
}
 
Example #14
Source File: PythonEnvironmentFactory.java    From Flink-CEPplus with Apache License 2.0 2 votes vote down vote up
/**
 * Creates a {@link LocalStreamEnvironment}. The local execution environment
 * will run the program in a multi-threaded fashion in the same JVM as the
 * environment was created in. The default parallelism of the local
 * environment is the number of hardware contexts (CPU cores / threads),
 * unless it was specified differently by {@link PythonStreamExecutionEnvironment#set_parallelism(int)}.
 *
 * @param config Pass a custom configuration into the cluster
 * @return A local execution environment with the specified parallelism.
 */
public PythonStreamExecutionEnvironment create_local_execution_environment(Configuration config) {
	return new PythonStreamExecutionEnvironment(new LocalStreamEnvironment(config), new Path(localTmpPath), scriptName);
}