Java Code Examples for org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#getParallelism()

The following examples show how to use org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#getParallelism() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: StreamingJobGraphGeneratorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testOperatorCoordinatorAddedToJobVertex() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	DataStream<Integer> stream = env.fromSource(
			new MockSource(Boundedness.BOUNDED, 1),
			WatermarkStrategy.noWatermarks(),
			"TestingSource");

	OneInputTransformation<Integer, Integer> resultTransform = new OneInputTransformation<Integer, Integer>(
			stream.getTransformation(),
			"AnyName",
			new CoordinatedTransformOperatorFactory(),
			BasicTypeInfo.INT_TYPE_INFO,
			env.getParallelism());

	new TestingSingleOutputStreamOperator<>(env, resultTransform).print();

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	assertEquals(2, jobGraph.getVerticesAsArray()[0].getOperatorCoordinators().size());
}
 
Example 2
Source File: DataStreamSource.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public DataStreamSource(StreamExecutionEnvironment environment,
		TypeInformation<T> outTypeInfo, StreamSource<T, ?> operator,
		boolean isParallel, String sourceName) {
	super(environment, new SourceTransformation<>(sourceName, operator, outTypeInfo, environment.getParallelism()));

	this.isParallel = isParallel;
	if (!isParallel) {
		setParallelism(1);
	}
}
 
Example 3
Source File: DataStreamSource.java    From flink with Apache License 2.0 5 votes vote down vote up
public DataStreamSource(StreamExecutionEnvironment environment,
		TypeInformation<T> outTypeInfo, StreamSource<T, ?> operator,
		boolean isParallel, String sourceName) {
	super(environment, new SourceTransformation<>(sourceName, operator, outTypeInfo, environment.getParallelism()));

	this.isParallel = isParallel;
	if (!isParallel) {
		setParallelism(1);
	}
}
 
Example 4
Source File: BroadcastTriangleCount.java    From gelly-streaming with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

		// Set up the environment
		if(!parseParameters(args)) {
			return;
		}

		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		DataStream<Edge<Long, NullValue>> edges = getEdgesDataSet(env);

		int localSamples = samples / env.getParallelism();

		// Count triangles
		DataStream<Tuple2<Integer, Integer>> triangles = edges
				.broadcast()
				.flatMap(new TriangleSampler(localSamples, vertexCount))
				.flatMap(new TriangleSummer(samples, vertexCount))
				.setParallelism(1);

		// Emit the results
		if (fileOutput) {
			triangles.writeAsCsv(outputPath);
		} else {
			triangles.print();
		}

		env.execute("Broadcast Triangle Count");
	}
 
Example 5
Source File: IncidenceSamplingTriangleCount.java    From gelly-streaming with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

		// Set up the environment
		if(!parseParameters(args)) {
			return;
		}

		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		DataStream<Edge<Long, NullValue>> edges = getEdgesDataSet(env);

		int localSamples = samples / env.getParallelism();

		// Count triangles
		DataStream<Tuple2<Integer, Integer>> triangles = edges
				.flatMap(new EdgeSampleMapper(localSamples, env.getParallelism()))
				.setParallelism(1)
				.keyBy(0)
				.flatMap(new TriangleSampleMapper(localSamples, vertexCount))
				.flatMap(new TriangleSummer(samples, vertexCount))
				.setParallelism(1);

		// Emit the results
		if (fileOutput) {
			triangles.writeAsCsv(outputPath);
		} else {
			triangles.print();
		}

		env.execute("Incidence Sampling Triangle Count");
	}
 
Example 6
Source File: HiveTableSource.java    From flink with Apache License 2.0 5 votes vote down vote up
private DataStream<RowData> createStreamSourceForPartitionTable(
		StreamExecutionEnvironment execEnv,
		TypeInformation<RowData> typeInfo,
		HiveTableInputFormat inputFormat) {
	Configuration configuration = new Configuration();
	catalogTable.getOptions().forEach(configuration::setString);

	String consumeOrderStr = configuration.get(STREAMING_SOURCE_CONSUME_ORDER);
	ConsumeOrder consumeOrder = ConsumeOrder.getConsumeOrder(consumeOrderStr);
	String consumeOffset = configuration.get(STREAMING_SOURCE_CONSUME_START_OFFSET);
	String extractorKind = configuration.get(PARTITION_TIME_EXTRACTOR_KIND);
	String extractorClass = configuration.get(PARTITION_TIME_EXTRACTOR_CLASS);
	String extractorPattern = configuration.get(PARTITION_TIME_EXTRACTOR_TIMESTAMP_PATTERN);
	Duration monitorInterval = configuration.get(STREAMING_SOURCE_MONITOR_INTERVAL);

	HiveContinuousMonitoringFunction monitoringFunction = new HiveContinuousMonitoringFunction(
			hiveShim,
			jobConf,
			tablePath,
			catalogTable,
			execEnv.getParallelism(),
			consumeOrder,
			consumeOffset,
			extractorKind,
			extractorClass,
			extractorPattern,
			monitorInterval.toMillis());

	ContinuousFileReaderOperatorFactory<RowData, TimestampedHiveInputSplit> factory =
			new ContinuousFileReaderOperatorFactory<>(inputFormat);

	String sourceName = "HiveMonitoringFunction";
	SingleOutputStreamOperator<RowData> source = execEnv
			.addSource(monitoringFunction, sourceName)
			.transform("Split Reader: " + sourceName, typeInfo, factory);

	return new DataStreamSource<>(source);
}
 
Example 7
Source File: HiveTableSource.java    From flink with Apache License 2.0 5 votes vote down vote up
private DataStream<RowData> createStreamSourceForNonPartitionTable(
		StreamExecutionEnvironment execEnv,
		TypeInformation<RowData> typeInfo,
		HiveTableInputFormat inputFormat,
		HiveTablePartition hiveTable) {
	HiveTableFileInputFormat fileInputFormat = new HiveTableFileInputFormat(inputFormat, hiveTable);

	Configuration configuration = new Configuration();
	catalogTable.getOptions().forEach(configuration::setString);
	String consumeOrderStr = configuration.get(STREAMING_SOURCE_CONSUME_ORDER);
	ConsumeOrder consumeOrder = ConsumeOrder.getConsumeOrder(consumeOrderStr);
	if (consumeOrder != ConsumeOrder.CREATE_TIME_ORDER) {
		throw new UnsupportedOperationException(
				"Only " + ConsumeOrder.CREATE_TIME_ORDER + " is supported for non partition table.");
	}

	String consumeOffset = configuration.get(STREAMING_SOURCE_CONSUME_START_OFFSET);
	// to Local zone mills instead of UTC mills
	long currentReadTime = TimestampData.fromLocalDateTime(toLocalDateTime(consumeOffset))
			.toTimestamp().getTime();

	Duration monitorInterval = configuration.get(STREAMING_SOURCE_MONITOR_INTERVAL);

	ContinuousFileMonitoringFunction<RowData> monitoringFunction =
			new ContinuousFileMonitoringFunction<>(
					fileInputFormat,
					FileProcessingMode.PROCESS_CONTINUOUSLY,
					execEnv.getParallelism(),
					monitorInterval.toMillis(),
					currentReadTime);

	ContinuousFileReaderOperatorFactory<RowData, TimestampedFileInputSplit> factory =
			new ContinuousFileReaderOperatorFactory<>(fileInputFormat);

	String sourceName = "HiveFileMonitoringFunction";
	SingleOutputStreamOperator<RowData> source = execEnv.addSource(monitoringFunction, sourceName)
			.transform("Split Reader: " + sourceName, typeInfo, factory);

	return new DataStreamSource<>(source);
}
 
Example 8
Source File: DataStreamSource.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * The constructor used to create legacy sources.
 */
public DataStreamSource(
		StreamExecutionEnvironment environment,
		TypeInformation<T> outTypeInfo,
		StreamSource<T, ?> operator,
		boolean isParallel,
		String sourceName) {
	super(environment, new LegacySourceTransformation<>(sourceName, operator, outTypeInfo, environment.getParallelism()));

	this.isParallel = isParallel;
	if (!isParallel) {
		setParallelism(1);
	}
}
 
Example 9
Source File: DataStreamSource.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Constructor for new Sources (FLIP-27).
 */
public DataStreamSource(
		StreamExecutionEnvironment environment,
		Source<T, ?, ?> source,
		WatermarkStrategy<T> timestampsAndWatermarks,
		TypeInformation<T> outTypeInfo,
		String sourceName) {
	super(environment,
			new SourceTransformation<>(
					sourceName,
					new SourceOperatorFactory<>(source, timestampsAndWatermarks),
					outTypeInfo,
					environment.getParallelism()));
}
 
Example 10
Source File: ContinuousFileProcessingITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testProgram() throws Exception {

	/*
	* This test checks the interplay between the monitor and the reader
	* and also the failExternally() functionality. To test the latter we
	* set the parallelism to 1 so that we have the chaining between the sink,
	* which throws the SuccessException to signal the end of the test, and the
	* reader.
	* */

	TextInputFormat format = new TextInputFormat(new Path(hdfsURI));
	format.setFilePath(hdfsURI);
	format.setFilesFilter(FilePathFilter.createDefaultFilter());

	// create the stream execution environment with a parallelism > 1 to test
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(PARALLELISM);

	ContinuousFileMonitoringFunction<String> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(format,
			FileProcessingMode.PROCESS_CONTINUOUSLY,
			env.getParallelism(), INTERVAL);

	// the monitor has always DOP 1
	DataStream<TimestampedFileInputSplit> splits = env.addSource(monitoringFunction);
	Assert.assertEquals(1, splits.getParallelism());

	ContinuousFileReaderOperator<String> reader = new ContinuousFileReaderOperator<>(format);
	TypeInformation<String> typeInfo = TypeExtractor.getInputFormatTypes(format);

	// the readers can be multiple
	DataStream<String> content = splits.transform("FileSplitReader", typeInfo, reader);
	Assert.assertEquals(PARALLELISM, content.getParallelism());

	// finally for the sink we set the parallelism to 1 so that we can verify the output
	TestingSinkFunction sink = new TestingSinkFunction();
	content.addSink(sink).setParallelism(1);

	Thread job = new Thread() {

		@Override
		public void run() {
			try {
				env.execute("ContinuousFileProcessingITCase Job.");
			} catch (Exception e) {
				Throwable th = e;
				for (int depth = 0; depth < 20; depth++) {
					if (th instanceof SuccessException) {
						return;
					} else if (th.getCause() != null) {
						th = th.getCause();
					} else {
						break;
					}
				}
				e.printStackTrace();
				Assert.fail(e.getMessage());
			}
		}
	};
	job.start();

	// The modification time of the last created file.
	long lastCreatedModTime = Long.MIN_VALUE;

	// create the files to be read
	for (int i = 0; i < NO_OF_FILES; i++) {
		Tuple2<org.apache.hadoop.fs.Path, String> tmpFile;
		long modTime;
		do {

			// give it some time so that the files have
			// different modification timestamps.
			Thread.sleep(50);

			tmpFile = fillWithData(hdfsURI, "file", i, "This is test line.");

			modTime = hdfs.getFileStatus(tmpFile.f0).getModificationTime();
			if (modTime <= lastCreatedModTime) {
				// delete the last created file to recreate it with a different timestamp
				hdfs.delete(tmpFile.f0, false);
			}
		} while (modTime <= lastCreatedModTime);
		lastCreatedModTime = modTime;

		// put the contents in the expected results list before the reader picks them
		// this is to guarantee that they are in before the reader finishes (avoid race conditions)
		expectedContents.put(i, tmpFile.f1);

		org.apache.hadoop.fs.Path file =
			new org.apache.hadoop.fs.Path(hdfsURI + "/file" + i);
		hdfs.rename(tmpFile.f0, file);
		Assert.assertTrue(hdfs.exists(file));
	}

	// wait for the job to finish.
	job.join();
}
 
Example 11
Source File: ContinuousFileProcessingITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testProgram() throws Exception {

	/*
	* This test checks the interplay between the monitor and the reader
	* and also the failExternally() functionality. To test the latter we
	* set the parallelism to 1 so that we have the chaining between the sink,
	* which throws the SuccessException to signal the end of the test, and the
	* reader.
	* */

	TextInputFormat format = new TextInputFormat(new Path(hdfsURI));
	format.setFilePath(hdfsURI);
	format.setFilesFilter(FilePathFilter.createDefaultFilter());

	// create the stream execution environment with a parallelism > 1 to test
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(PARALLELISM);

	ContinuousFileMonitoringFunction<String> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(format,
			FileProcessingMode.PROCESS_CONTINUOUSLY,
			env.getParallelism(), INTERVAL);

	// the monitor has always DOP 1
	DataStream<TimestampedFileInputSplit> splits = env.addSource(monitoringFunction);
	Assert.assertEquals(1, splits.getParallelism());

	ContinuousFileReaderOperator<String> reader = new ContinuousFileReaderOperator<>(format);
	TypeInformation<String> typeInfo = TypeExtractor.getInputFormatTypes(format);

	// the readers can be multiple
	DataStream<String> content = splits.transform("FileSplitReader", typeInfo, reader);
	Assert.assertEquals(PARALLELISM, content.getParallelism());

	// finally for the sink we set the parallelism to 1 so that we can verify the output
	TestingSinkFunction sink = new TestingSinkFunction();
	content.addSink(sink).setParallelism(1);

	Thread job = new Thread() {

		@Override
		public void run() {
			try {
				env.execute("ContinuousFileProcessingITCase Job.");
			} catch (Exception e) {
				Throwable th = e;
				for (int depth = 0; depth < 20; depth++) {
					if (th instanceof SuccessException) {
						return;
					} else if (th.getCause() != null) {
						th = th.getCause();
					} else {
						break;
					}
				}
				e.printStackTrace();
				Assert.fail(e.getMessage());
			}
		}
	};
	job.start();

	// The modification time of the last created file.
	long lastCreatedModTime = Long.MIN_VALUE;

	// create the files to be read
	for (int i = 0; i < NO_OF_FILES; i++) {
		Tuple2<org.apache.hadoop.fs.Path, String> tmpFile;
		long modTime;
		do {

			// give it some time so that the files have
			// different modification timestamps.
			Thread.sleep(50);

			tmpFile = fillWithData(hdfsURI, "file", i, "This is test line.");

			modTime = hdfs.getFileStatus(tmpFile.f0).getModificationTime();
			if (modTime <= lastCreatedModTime) {
				// delete the last created file to recreate it with a different timestamp
				hdfs.delete(tmpFile.f0, false);
			}
		} while (modTime <= lastCreatedModTime);
		lastCreatedModTime = modTime;

		// put the contents in the expected results list before the reader picks them
		// this is to guarantee that they are in before the reader finishes (avoid race conditions)
		expectedContents.put(i, tmpFile.f1);

		org.apache.hadoop.fs.Path file =
			new org.apache.hadoop.fs.Path(hdfsURI + "/file" + i);
		hdfs.rename(tmpFile.f0, file);
		Assert.assertTrue(hdfs.exists(file));
	}

	// wait for the job to finish.
	job.join();
}
 
Example 12
Source File: FlinkStreamingPortablePipelineTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private static <T> DataStream<WindowedValue<T>> translateUnboundedSource(
    String transformName,
    String outputCollectionId,
    RunnerApi.ReadPayload payload,
    RunnerApi.Pipeline pipeline,
    PipelineOptions pipelineOptions,
    StreamExecutionEnvironment env) {

  final DataStream<WindowedValue<T>> source;
  final DataStream<WindowedValue<ValueWithRecordId<T>>> nonDedupSource;
  Coder<WindowedValue<T>> windowCoder =
      instantiateCoder(outputCollectionId, pipeline.getComponents());

  TypeInformation<WindowedValue<T>> outputTypeInfo = new CoderTypeInformation<>(windowCoder);

  WindowingStrategy windowStrategy =
      getWindowingStrategy(outputCollectionId, pipeline.getComponents());
  TypeInformation<WindowedValue<ValueWithRecordId<T>>> withIdTypeInfo =
      new CoderTypeInformation<>(
          WindowedValue.getFullCoder(
              ValueWithRecordId.ValueWithRecordIdCoder.of(
                  ((WindowedValueCoder) windowCoder).getValueCoder()),
              windowStrategy.getWindowFn().windowCoder()));

  UnboundedSource unboundedSource = ReadTranslation.unboundedSourceFromProto(payload);

  try {
    int parallelism =
        env.getMaxParallelism() > 0 ? env.getMaxParallelism() : env.getParallelism();
    UnboundedSourceWrapper sourceWrapper =
        new UnboundedSourceWrapper<>(
            transformName, pipelineOptions, unboundedSource, parallelism);
    nonDedupSource =
        env.addSource(sourceWrapper)
            .name(transformName)
            .uid(transformName)
            .returns(withIdTypeInfo);

    if (unboundedSource.requiresDeduping()) {
      source =
          nonDedupSource
              .keyBy(new FlinkStreamingTransformTranslators.ValueWithRecordIdKeySelector<>())
              .transform("deduping", outputTypeInfo, new DedupingOperator<>(pipelineOptions))
              .uid(format("%s/__deduplicated__", transformName));
    } else {
      source =
          nonDedupSource
              .flatMap(new FlinkStreamingTransformTranslators.StripIdsMap<>(pipelineOptions))
              .returns(outputTypeInfo);
    }
  } catch (Exception e) {
    throw new RuntimeException("Error while translating UnboundedSource: " + unboundedSource, e);
  }

  return source;
}
 
Example 13
Source File: ContinuousFileProcessingITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testProgram() throws Exception {

	/*
	* This test checks the interplay between the monitor and the reader
	* and also the failExternally() functionality. To test the latter we
	* set the parallelism to 1 so that we have the chaining between the sink,
	* which throws the SuccessException to signal the end of the test, and the
	* reader.
	* */

	TextInputFormat format = new TextInputFormat(new Path(hdfsURI));
	format.setFilePath(hdfsURI);
	format.setFilesFilter(FilePathFilter.createDefaultFilter());

	// create the stream execution environment with a parallelism > 1 to test
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(PARALLELISM);

	ContinuousFileMonitoringFunction<String> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(format,
			FileProcessingMode.PROCESS_CONTINUOUSLY,
			env.getParallelism(), INTERVAL);

	// the monitor has always DOP 1
	DataStream<TimestampedFileInputSplit> splits = env.addSource(monitoringFunction);
	Assert.assertEquals(1, splits.getParallelism());

	TypeInformation<String> typeInfo = TypeExtractor.getInputFormatTypes(format);

	// the readers can be multiple
	DataStream<String> content = splits.transform("FileSplitReader", typeInfo, new ContinuousFileReaderOperatorFactory<>(format));
	Assert.assertEquals(PARALLELISM, content.getParallelism());

	// finally for the sink we set the parallelism to 1 so that we can verify the output
	TestingSinkFunction sink = new TestingSinkFunction();
	content.addSink(sink).setParallelism(1);

	CompletableFuture<Void> jobFuture = new CompletableFuture<>();
	new Thread(() -> {
		try {
			env.execute("ContinuousFileProcessingITCase Job.");
			jobFuture.complete(null);
		} catch (Exception e) {
			if (ExceptionUtils.findThrowable(e, SuccessException.class).isPresent()) {
				jobFuture.complete(null);
			} else {
				jobFuture.completeExceptionally(e);
			}
		}
	}).start();

	// The modification time of the last created file.
	long lastCreatedModTime = Long.MIN_VALUE;

	// create the files to be read
	for (int i = 0; i < NO_OF_FILES; i++) {
		Tuple2<org.apache.hadoop.fs.Path, String> tmpFile;
		long modTime;
		do {

			// give it some time so that the files have
			// different modification timestamps.
			Thread.sleep(50);

			tmpFile = fillWithData(hdfsURI, "file", i, "This is test line.");

			modTime = hdfs.getFileStatus(tmpFile.f0).getModificationTime();
			if (modTime <= lastCreatedModTime) {
				// delete the last created file to recreate it with a different timestamp
				hdfs.delete(tmpFile.f0, false);
			}
		} while (modTime <= lastCreatedModTime);
		lastCreatedModTime = modTime;

		// put the contents in the expected results list before the reader picks them
		// this is to guarantee that they are in before the reader finishes (avoid race conditions)
		expectedContents.put(i, tmpFile.f1);

		org.apache.hadoop.fs.Path file =
			new org.apache.hadoop.fs.Path(hdfsURI + "/file" + i);
		hdfs.rename(tmpFile.f0, file);
		Assert.assertTrue(hdfs.exists(file));
	}

	jobFuture.get();
}