Java Code Examples for org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit

The following examples show how to use org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
@Test
public void testSplitEquality() {

	TimestampedFileInputSplit richFirstSplit =
		new TimestampedFileInputSplit(10, 2, new Path("test"), 0, 100, null);

	TimestampedFileInputSplit richSecondSplit =
		new TimestampedFileInputSplit(10, 2, new Path("test"), 0, 100, null);
	Assert.assertEquals(richFirstSplit, richSecondSplit);

	TimestampedFileInputSplit richModSecondSplit =
		new TimestampedFileInputSplit(11, 2, new Path("test"), 0, 100, null);
	Assert.assertNotEquals(richSecondSplit, richModSecondSplit);

	TimestampedFileInputSplit richThirdSplit =
		new TimestampedFileInputSplit(10, 2, new Path("test/test1"), 0, 100, null);
	Assert.assertEquals(richThirdSplit.getModificationTime(), 10);
	Assert.assertNotEquals(richFirstSplit, richThirdSplit);

	TimestampedFileInputSplit richThirdSplitCopy =
		new TimestampedFileInputSplit(10, 2, new Path("test/test1"), 0, 100, null);
	Assert.assertEquals(richThirdSplitCopy, richThirdSplit);
}
 
Example 2
@Test
public void testInvalidPathSpecification() throws Exception {

	String invalidPath = "hdfs://" + hdfsCluster.getURI().getHost() + ":" + hdfsCluster.getNameNodePort() + "/invalid/";
	TextInputFormat format = new TextInputFormat(new Path(invalidPath));

	ContinuousFileMonitoringFunction<String> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(format,
			FileProcessingMode.PROCESS_ONCE, 1, INTERVAL);
	try {
		monitoringFunction.run(new DummySourceContext() {
			@Override
			public void collect(TimestampedFileInputSplit element) {
				// we should never arrive here with an invalid path
				Assert.fail("Test passes with an invalid path.");
			}
		});

		// we should never arrive here with an invalid path
		Assert.fail("Test passed with an invalid path.");

	} catch (FileNotFoundException e) {
		Assert.assertEquals("The provided file path " + format.getFilePath() + " does not exist.", e.getMessage());
	}
}
 
Example 3
Source Project: flink   Source File: TimestampedFileInputSplitTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSplitEquality() {

	TimestampedFileInputSplit richFirstSplit =
		new TimestampedFileInputSplit(10, 2, new Path("test"), 0, 100, null);

	TimestampedFileInputSplit richSecondSplit =
		new TimestampedFileInputSplit(10, 2, new Path("test"), 0, 100, null);
	Assert.assertEquals(richFirstSplit, richSecondSplit);

	TimestampedFileInputSplit richModSecondSplit =
		new TimestampedFileInputSplit(11, 2, new Path("test"), 0, 100, null);
	Assert.assertNotEquals(richSecondSplit, richModSecondSplit);

	TimestampedFileInputSplit richThirdSplit =
		new TimestampedFileInputSplit(10, 2, new Path("test/test1"), 0, 100, null);
	Assert.assertEquals(richThirdSplit.getModificationTime(), 10);
	Assert.assertNotEquals(richFirstSplit, richThirdSplit);

	TimestampedFileInputSplit richThirdSplitCopy =
		new TimestampedFileInputSplit(10, 2, new Path("test/test1"), 0, 100, null);
	Assert.assertEquals(richThirdSplitCopy, richThirdSplit);
}
 
Example 4
Source Project: flink   Source File: ContinuousFileProcessingTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testInvalidPathSpecification() throws Exception {

	String invalidPath = "hdfs://" + hdfsCluster.getURI().getHost() + ":" + hdfsCluster.getNameNodePort() + "/invalid/";
	TextInputFormat format = new TextInputFormat(new Path(invalidPath));

	ContinuousFileMonitoringFunction<String> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(format,
			FileProcessingMode.PROCESS_ONCE, 1, INTERVAL);
	try {
		monitoringFunction.run(new DummySourceContext() {
			@Override
			public void collect(TimestampedFileInputSplit element) {
				// we should never arrive here with an invalid path
				Assert.fail("Test passes with an invalid path.");
			}
		});

		// we should never arrive here with an invalid path
		Assert.fail("Test passed with an invalid path.");

	} catch (FileNotFoundException e) {
		Assert.assertEquals("The provided file path " + format.getFilePath() + " does not exist.", e.getMessage());
	}
}
 
Example 5
Source Project: flink   Source File: TimestampedFileInputSplitTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSplitEquality() {

	TimestampedFileInputSplit richFirstSplit =
		new TimestampedFileInputSplit(10, 2, new Path("test"), 0, 100, null);

	TimestampedFileInputSplit richSecondSplit =
		new TimestampedFileInputSplit(10, 2, new Path("test"), 0, 100, null);
	Assert.assertEquals(richFirstSplit, richSecondSplit);

	TimestampedFileInputSplit richModSecondSplit =
		new TimestampedFileInputSplit(11, 2, new Path("test"), 0, 100, null);
	Assert.assertNotEquals(richSecondSplit, richModSecondSplit);

	TimestampedFileInputSplit richThirdSplit =
		new TimestampedFileInputSplit(10, 2, new Path("test/test1"), 0, 100, null);
	Assert.assertEquals(richThirdSplit.getModificationTime(), 10);
	Assert.assertNotEquals(richFirstSplit, richThirdSplit);

	TimestampedFileInputSplit richThirdSplitCopy =
		new TimestampedFileInputSplit(10, 2, new Path("test/test1"), 0, 100, null);
	Assert.assertEquals(richThirdSplitCopy, richThirdSplit);
}
 
Example 6
Source Project: flink   Source File: ContinuousFileProcessingTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testInvalidPathSpecification() throws Exception {

	String invalidPath = "hdfs://" + hdfsCluster.getURI().getHost() + ":" + hdfsCluster.getNameNodePort() + "/invalid/";
	TextInputFormat format = new TextInputFormat(new Path(invalidPath));

	ContinuousFileMonitoringFunction<String> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(format,
			FileProcessingMode.PROCESS_ONCE, 1, INTERVAL);
	try {
		monitoringFunction.run(new DummySourceContext() {
			@Override
			public void collect(TimestampedFileInputSplit element) {
				// we should never arrive here with an invalid path
				Assert.fail("Test passes with an invalid path.");
			}
		});

		// we should never arrive here with an invalid path
		Assert.fail("Test passed with an invalid path.");

	} catch (FileNotFoundException e) {
		Assert.assertEquals("The provided file path " + format.getFilePath() + " does not exist.", e.getMessage());
	}
}
 
Example 7
private HarnessWithFormat buildAndStart(
		int noOfTasks,
		int taskIdx,
		int elementsBeforeCheckpoint,
		@Nullable OperatorSubtaskState initState,
		FileInputSplit... splits) throws Exception {

	BlockingFileInputFormat format = new BlockingFileInputFormat(new Path("test"), sizeOfSplit, elementsBeforeCheckpoint);

	OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> harness = getTestHarness(format, noOfTasks, taskIdx);
	harness.setup();
	if (initState != null) {
		harness.initializeState(initState);
	}
	harness.open();
	if (splits != null) {
		for (int i = 0; i < splits.length; i++) {
			harness.processElement(new StreamRecord<>(getTimestampedSplit(i, splits[i])));
		}
	}
	HarnessWithFormat harnessWithFormat = new HarnessWithFormat(harness, format);
	while (!format.isFirstChunkProcessed()) {
		harnessWithFormat.mailboxProcessor.runMailboxStep();
	}
	return harnessWithFormat;
}
 
Example 8
@Test
public void testSplitComparison() {
	TimestampedFileInputSplit richFirstSplit =
		new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null);

	TimestampedFileInputSplit richSecondSplit =
		new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 0, 100, null);

	TimestampedFileInputSplit richThirdSplit =
		new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null);

	TimestampedFileInputSplit richForthSplit =
		new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null);

	TimestampedFileInputSplit richFifthSplit =
		new TimestampedFileInputSplit(11, 1, new Path("test/test3"), 0, 100, null);

	// smaller mod time
	Assert.assertTrue(richFirstSplit.compareTo(richSecondSplit) < 0);

	// lexicographically on the path
	Assert.assertTrue(richThirdSplit.compareTo(richFifthSplit) < 0);

	// same mod time, same file so smaller split number first
	Assert.assertTrue(richThirdSplit.compareTo(richSecondSplit) < 0);

	// smaller modification time first
	Assert.assertTrue(richThirdSplit.compareTo(richForthSplit) < 0);
}
 
Example 9
@Test
public void testIllegalArgument() {
	try {
		new TimestampedFileInputSplit(-10, 2, new Path("test"), 0, 100, null); // invalid modification time
	} catch (Exception e) {
		if (!(e instanceof IllegalArgumentException)) {
			Assert.fail(e.getMessage());
		}
	}
}
 
Example 10
private FileInputSplit createSplitFromTimestampedSplit(TimestampedFileInputSplit split) {
	checkNotNull(split);

	return new FileInputSplit(
		split.getSplitNumber(),
		split.getPath(),
		split.getStart(),
		split.getLength(),
		split.getHostnames()
	);
}
 
Example 11
private FileInputSplit createSplitFromTimestampedSplit(TimestampedFileInputSplit split) {
	Preconditions.checkNotNull(split);

	return new FileInputSplit(
		split.getSplitNumber(),
		split.getPath(),
		split.getStart(),
		split.getLength(),
		split.getHostnames()
	);
}
 
Example 12
@Override
public void collect(TimestampedFileInputSplit element) {
	String seenFileName = element.getPath().getName();
	this.seenFiles.add(seenFileName);

	if (seenFiles.size() == elementsBeforeNotifying && !latch.isTriggered()) {
		latch.trigger();
	}

	if (seenFiles.size() == elementsBeforeCanceling) {
		src.cancel();
	}
}
 
Example 13
@Override
public void collect(TimestampedFileInputSplit element) {
	try {
		long modTime = hdfs.getFileStatus(new org.apache.hadoop.fs.Path(element.getPath().getPath())).getModificationTime();

		Assert.assertTrue(modTime >= lastSeenModTime);
		Assert.assertEquals(expectedModificationTimes[splitCounter], modTime);

		lastSeenModTime = modTime;
		splitCounter++;
	} catch (IOException e) {
		Assert.fail(e.getMessage());
	}
}
 
Example 14
private OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> getTestHarness(
	BlockingFileInputFormat format, int noOfTasks, int taksIdx) throws Exception {

	ContinuousFileReaderOperator<String> reader = new ContinuousFileReaderOperator<>(format);
	reader.setOutputType(TypeExtractor.getInputFormatTypes(format), new ExecutionConfig());

	OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> testHarness =
		new OneInputStreamOperatorTestHarness<>(reader, maxParallelism, noOfTasks, taksIdx);
	testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
	return testHarness;
}
 
Example 15
private TimestampedFileInputSplit getTimestampedSplit(long modTime, FileInputSplit split) {
	Preconditions.checkNotNull(split);
	return new TimestampedFileInputSplit(
		modTime,
		split.getSplitNumber(),
		split.getPath(),
		split.getStart(),
		split.getLength(),
		split.getHostnames());
}
 
Example 16
Source Project: flink   Source File: TimestampedFileInputSplitTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSplitComparison() {
	TimestampedFileInputSplit richFirstSplit =
		new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null);

	TimestampedFileInputSplit richSecondSplit =
		new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 0, 100, null);

	TimestampedFileInputSplit richThirdSplit =
		new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null);

	TimestampedFileInputSplit richForthSplit =
		new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null);

	TimestampedFileInputSplit richFifthSplit =
		new TimestampedFileInputSplit(11, 1, new Path("test/test3"), 0, 100, null);

	// smaller mod time
	Assert.assertTrue(richFirstSplit.compareTo(richSecondSplit) < 0);

	// lexicographically on the path
	Assert.assertTrue(richThirdSplit.compareTo(richFifthSplit) < 0);

	// same mod time, same file so smaller split number first
	Assert.assertTrue(richThirdSplit.compareTo(richSecondSplit) < 0);

	// smaller modification time first
	Assert.assertTrue(richThirdSplit.compareTo(richForthSplit) < 0);
}
 
Example 17
Source Project: flink   Source File: TimestampedFileInputSplitTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testIllegalArgument() {
	try {
		new TimestampedFileInputSplit(-10, 2, new Path("test"), 0, 100, null); // invalid modification time
	} catch (Exception e) {
		if (!(e instanceof IllegalArgumentException)) {
			Assert.fail(e.getMessage());
		}
	}
}
 
Example 18
private FileInputSplit createSplitFromTimestampedSplit(TimestampedFileInputSplit split) {
	checkNotNull(split);

	return new FileInputSplit(
		split.getSplitNumber(),
		split.getPath(),
		split.getStart(),
		split.getLength(),
		split.getHostnames()
	);
}
 
Example 19
Source Project: flink   Source File: ContinuousFileProcessingTest.java    License: Apache License 2.0 5 votes vote down vote up
private FileInputSplit createSplitFromTimestampedSplit(TimestampedFileInputSplit split) {
	Preconditions.checkNotNull(split);

	return new FileInputSplit(
		split.getSplitNumber(),
		split.getPath(),
		split.getStart(),
		split.getLength(),
		split.getHostnames()
	);
}
 
Example 20
Source Project: flink   Source File: ContinuousFileProcessingTest.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void collect(TimestampedFileInputSplit element) {
	String seenFileName = element.getPath().getName();
	this.seenFiles.add(seenFileName);

	if (seenFiles.size() == elementsBeforeNotifying && !latch.isTriggered()) {
		latch.trigger();
	}

	if (seenFiles.size() == elementsBeforeCanceling) {
		src.cancel();
	}
}
 
Example 21
Source Project: flink   Source File: ContinuousFileProcessingTest.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void collect(TimestampedFileInputSplit element) {
	try {
		long modTime = hdfs.getFileStatus(new org.apache.hadoop.fs.Path(element.getPath().getPath())).getModificationTime();

		Assert.assertTrue(modTime >= lastSeenModTime);
		Assert.assertEquals(expectedModificationTimes[splitCounter], modTime);

		lastSeenModTime = modTime;
		splitCounter++;
	} catch (IOException e) {
		Assert.fail(e.getMessage());
	}
}
 
Example 22
private OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> getTestHarness(
	BlockingFileInputFormat format, int noOfTasks, int taksIdx) throws Exception {

	ContinuousFileReaderOperator<String> reader = new ContinuousFileReaderOperator<>(format);
	reader.setOutputType(TypeExtractor.getInputFormatTypes(format), new ExecutionConfig());

	OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> testHarness =
		new OneInputStreamOperatorTestHarness<>(reader, maxParallelism, noOfTasks, taksIdx);
	testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
	return testHarness;
}
 
Example 23
private TimestampedFileInputSplit getTimestampedSplit(long modTime, FileInputSplit split) {
	Preconditions.checkNotNull(split);
	return new TimestampedFileInputSplit(
		modTime,
		split.getSplitNumber(),
		split.getPath(),
		split.getStart(),
		split.getLength(),
		split.getHostnames());
}
 
Example 24
Source Project: flink   Source File: HiveTableSource.java    License: Apache License 2.0 5 votes vote down vote up
private DataStream<RowData> createStreamSourceForNonPartitionTable(
		StreamExecutionEnvironment execEnv,
		TypeInformation<RowData> typeInfo,
		HiveTableInputFormat inputFormat,
		HiveTablePartition hiveTable) {
	HiveTableFileInputFormat fileInputFormat = new HiveTableFileInputFormat(inputFormat, hiveTable);

	Configuration configuration = new Configuration();
	catalogTable.getOptions().forEach(configuration::setString);
	String consumeOrderStr = configuration.get(STREAMING_SOURCE_CONSUME_ORDER);
	ConsumeOrder consumeOrder = ConsumeOrder.getConsumeOrder(consumeOrderStr);
	if (consumeOrder != ConsumeOrder.CREATE_TIME_ORDER) {
		throw new UnsupportedOperationException(
				"Only " + ConsumeOrder.CREATE_TIME_ORDER + " is supported for non partition table.");
	}

	String consumeOffset = configuration.get(STREAMING_SOURCE_CONSUME_START_OFFSET);
	// to Local zone mills instead of UTC mills
	long currentReadTime = TimestampData.fromLocalDateTime(toLocalDateTime(consumeOffset))
			.toTimestamp().getTime();

	Duration monitorInterval = configuration.get(STREAMING_SOURCE_MONITOR_INTERVAL);

	ContinuousFileMonitoringFunction<RowData> monitoringFunction =
			new ContinuousFileMonitoringFunction<>(
					fileInputFormat,
					FileProcessingMode.PROCESS_CONTINUOUSLY,
					execEnv.getParallelism(),
					monitorInterval.toMillis(),
					currentReadTime);

	ContinuousFileReaderOperatorFactory<RowData, TimestampedFileInputSplit> factory =
			new ContinuousFileReaderOperatorFactory<>(fileInputFormat);

	String sourceName = "HiveFileMonitoringFunction";
	SingleOutputStreamOperator<RowData> source = execEnv.addSource(monitoringFunction, sourceName)
			.transform("Split Reader: " + sourceName, typeInfo, factory);

	return new DataStreamSource<>(source);
}
 
Example 25
Source Project: flink   Source File: TimestampedFileInputSplitTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSplitComparison() {
	TimestampedFileInputSplit richFirstSplit =
		new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null);

	TimestampedFileInputSplit richSecondSplit =
		new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 0, 100, null);

	TimestampedFileInputSplit richThirdSplit =
		new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null);

	TimestampedFileInputSplit richForthSplit =
		new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null);

	TimestampedFileInputSplit richFifthSplit =
		new TimestampedFileInputSplit(11, 1, new Path("test/test3"), 0, 100, null);

	// smaller mod time
	Assert.assertTrue(richFirstSplit.compareTo(richSecondSplit) < 0);

	// lexicographically on the path
	Assert.assertTrue(richThirdSplit.compareTo(richFifthSplit) < 0);

	// same mod time, same file so smaller split number first
	Assert.assertTrue(richThirdSplit.compareTo(richSecondSplit) < 0);

	// smaller modification time first
	Assert.assertTrue(richThirdSplit.compareTo(richForthSplit) < 0);
}
 
Example 26
Source Project: flink   Source File: TimestampedFileInputSplitTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testIllegalArgument() {
	try {
		new TimestampedFileInputSplit(-10, 2, new Path("test"), 0, 100, null); // invalid modification time
	} catch (Exception e) {
		if (!(e instanceof IllegalArgumentException)) {
			Assert.fail(e.getMessage());
		}
	}
}
 
Example 27
private FileInputSplit createSplitFromTimestampedSplit(TimestampedFileInputSplit split) {
	checkNotNull(split);

	return new FileInputSplit(
		split.getSplitNumber(),
		split.getPath(),
		split.getStart(),
		split.getLength(),
		split.getHostnames()
	);
}
 
Example 28
Source Project: flink   Source File: ContinuousFileProcessingTest.java    License: Apache License 2.0 5 votes vote down vote up
private FileInputSplit createSplitFromTimestampedSplit(TimestampedFileInputSplit split) {
	Preconditions.checkNotNull(split);

	return new FileInputSplit(
		split.getSplitNumber(),
		split.getPath(),
		split.getStart(),
		split.getLength(),
		split.getHostnames()
	);
}
 
Example 29
Source Project: flink   Source File: ContinuousFileProcessingTest.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void collect(TimestampedFileInputSplit element) {
	String seenFileName = element.getPath().getName();
	this.seenFiles.add(seenFileName);

	if (seenFiles.size() == elementsBeforeNotifying && !latch.isTriggered()) {
		latch.trigger();
	}

	if (seenFiles.size() == elementsBeforeCanceling) {
		src.cancel();
	}
}
 
Example 30
Source Project: flink   Source File: ContinuousFileProcessingTest.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void collect(TimestampedFileInputSplit element) {
	try {
		long modTime = hdfs.getFileStatus(new org.apache.hadoop.fs.Path(element.getPath().getPath())).getModificationTime();

		Assert.assertTrue(modTime >= lastSeenModTime);
		Assert.assertEquals(expectedModificationTimes[splitCounter], modTime);

		lastSeenModTime = modTime;
		splitCounter++;
	} catch (IOException e) {
		Assert.fail(e.getMessage());
	}
}