Java Code Examples for org.apache.flink.api.common.io.FileInputFormat.FileBaseStatistics

The following examples show how to use org.apache.flink.api.common.io.FileInputFormat.FileBaseStatistics. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Flink-CEPplus   Source File: DelimitedInputFormatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testGetStatistics() throws IOException {
	final String myString = "my mocked line 1\nmy mocked line 2\n";
	final long size = myString.length();
	final Path filePath = createTempFilePath(myString);

	final String myString2 = "my mocked line 1\nmy mocked line 2\nanother mocked line3\n";
	final long size2 = myString2.length();
	final Path filePath2 = createTempFilePath(myString2);

	final long totalSize = size + size2;

	DelimitedInputFormat<String> format = new MyTextInputFormat();
	format.setFilePaths(filePath.toUri().toString(), filePath2.toUri().toString());

	FileInputFormat.FileBaseStatistics stats = format.getStatistics(null);
	assertNotNull(stats);
	assertEquals("The file size from the statistics is wrong.", totalSize, stats.getTotalInputSize());
}
 
Example 2
Source Project: flink   Source File: DelimitedInputFormatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testGetStatistics() throws IOException {
	final String myString = "my mocked line 1\nmy mocked line 2\n";
	final long size = myString.length();
	final Path filePath = createTempFilePath(myString);

	final String myString2 = "my mocked line 1\nmy mocked line 2\nanother mocked line3\n";
	final long size2 = myString2.length();
	final Path filePath2 = createTempFilePath(myString2);

	final long totalSize = size + size2;

	DelimitedInputFormat<String> format = new MyTextInputFormat();
	format.setFilePaths(filePath.toUri().toString(), filePath2.toUri().toString());

	FileInputFormat.FileBaseStatistics stats = format.getStatistics(null);
	assertNotNull(stats);
	assertEquals("The file size from the statistics is wrong.", totalSize, stats.getTotalInputSize());
}
 
Example 3
Source Project: flink   Source File: DelimitedInputFormatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testGetStatistics() throws IOException {
	final String myString = "my mocked line 1\nmy mocked line 2\n";
	final long size = myString.length();
	final Path filePath = createTempFilePath(myString);

	final String myString2 = "my mocked line 1\nmy mocked line 2\nanother mocked line3\n";
	final long size2 = myString2.length();
	final Path filePath2 = createTempFilePath(myString2);

	final long totalSize = size + size2;

	DelimitedInputFormat<String> format = new MyTextInputFormat();
	format.setFilePaths(filePath.toUri().toString(), filePath2.toUri().toString());

	FileInputFormat.FileBaseStatistics stats = format.getStatistics(null);
	assertNotNull(stats);
	assertEquals("The file size from the statistics is wrong.", totalSize, stats.getTotalInputSize());
}
 
Example 4
Source Project: Flink-CEPplus   Source File: HadoopInputFormatBase.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public BaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException {
	// only gather base statistics for FileInputFormats
	if (!(mapredInputFormat instanceof FileInputFormat)) {
		return null;
	}

	final FileBaseStatistics cachedFileStats = (cachedStats instanceof FileBaseStatistics) ?
			(FileBaseStatistics) cachedStats : null;

	try {
		final org.apache.hadoop.fs.Path[] paths = FileInputFormat.getInputPaths(this.jobConf);

		return getFileStats(cachedFileStats, paths, new ArrayList<FileStatus>(1));
	} catch (IOException ioex) {
		if (LOG.isWarnEnabled()) {
			LOG.warn("Could not determine statistics due to an io error: "
					+ ioex.getMessage());
		}
	} catch (Throwable t) {
		if (LOG.isErrorEnabled()) {
			LOG.error("Unexpected problem while getting the file statistics: "
					+ t.getMessage(), t);
		}
	}

	// no statistics available
	return null;
}
 
Example 5
Source Project: Flink-CEPplus   Source File: HadoopInputFormatBase.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public BaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException {
	// only gather base statistics for FileInputFormats
	if (!(mapreduceInputFormat instanceof FileInputFormat)) {
		return null;
	}

	JobContext jobContext = new JobContextImpl(configuration, null);

	final FileBaseStatistics cachedFileStats = (cachedStats instanceof FileBaseStatistics) ?
			(FileBaseStatistics) cachedStats : null;

	try {
		final org.apache.hadoop.fs.Path[] paths = FileInputFormat.getInputPaths(jobContext);
		return getFileStats(cachedFileStats, paths, new ArrayList<FileStatus>(1));
	} catch (IOException ioex) {
		if (LOG.isWarnEnabled()) {
			LOG.warn("Could not determine statistics due to an io error: "
					+ ioex.getMessage());
		}
	} catch (Throwable t) {
		if (LOG.isErrorEnabled()) {
			LOG.error("Unexpected problem while getting the file statistics: "
					+ t.getMessage(), t);
		}
	}

	// no statistics available
	return null;
}
 
Example 6
Source Project: Flink-CEPplus   Source File: DelimitedInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testGetStatisticsFileDoesNotExist() throws IOException {
	DelimitedInputFormat<String> format = new MyTextInputFormat();
	format.setFilePaths("file:///path/does/not/really/exist", "file:///another/path/that/does/not/exist");

	FileBaseStatistics stats = format.getStatistics(null);
	assertNull("The file statistics should be null.", stats);
}
 
Example 7
Source Project: Flink-CEPplus   Source File: DelimitedInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testGetStatisticsSingleFileWithCachedVersion() throws IOException {
	final String myString = "my mocked line 1\nmy mocked line 2\n";
	final Path tempFile = createTempFilePath(myString);
	final long size = myString.length();
	final long fakeSize = 10065;

	DelimitedInputFormat<String> format = new MyTextInputFormat();
	format.setFilePath(tempFile);
	format.configure(new Configuration());

	FileBaseStatistics stats = format.getStatistics(null);
	assertNotNull(stats);
	assertEquals("The file size from the statistics is wrong.", size, stats.getTotalInputSize());
	
	format = new MyTextInputFormat();
	format.setFilePath(tempFile);
	format.configure(new Configuration());
	
	FileBaseStatistics newStats = format.getStatistics(stats);
	assertEquals("Statistics object was changed.", newStats, stats);
	
	// insert fake stats with the correct modification time. the call should return the fake stats
	format = new MyTextInputFormat();
	format.setFilePath(tempFile);
	format.configure(new Configuration());
	
	FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics latest = format.getStatistics(fakeStats);
	assertEquals("The file size from the statistics is wrong.", fakeSize, latest.getTotalInputSize());
	
	// insert fake stats with the expired modification time. the call should return new accurate stats
	format = new MyTextInputFormat();
	format.setFilePath(tempFile);
	format.configure(new Configuration());
	
	FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(stats.getLastModificationTime() - 1, fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
	assertEquals("The file size from the statistics is wrong.", size, reGathered.getTotalInputSize());
}
 
Example 8
Source Project: flink   Source File: HadoopInputFormatBase.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public BaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException {
	// only gather base statistics for FileInputFormats
	if (!(mapredInputFormat instanceof FileInputFormat)) {
		return null;
	}

	final FileBaseStatistics cachedFileStats = (cachedStats instanceof FileBaseStatistics) ?
			(FileBaseStatistics) cachedStats : null;

	try {
		final org.apache.hadoop.fs.Path[] paths = FileInputFormat.getInputPaths(this.jobConf);

		return getFileStats(cachedFileStats, paths, new ArrayList<FileStatus>(1));
	} catch (IOException ioex) {
		if (LOG.isWarnEnabled()) {
			LOG.warn("Could not determine statistics due to an io error: "
					+ ioex.getMessage());
		}
	} catch (Throwable t) {
		if (LOG.isErrorEnabled()) {
			LOG.error("Unexpected problem while getting the file statistics: "
					+ t.getMessage(), t);
		}
	}

	// no statistics available
	return null;
}
 
Example 9
Source Project: flink   Source File: HadoopInputFormatBase.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public BaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException {
	// only gather base statistics for FileInputFormats
	if (!(mapreduceInputFormat instanceof FileInputFormat)) {
		return null;
	}

	JobContext jobContext = new JobContextImpl(configuration, null);

	final FileBaseStatistics cachedFileStats = (cachedStats instanceof FileBaseStatistics) ?
			(FileBaseStatistics) cachedStats : null;

	try {
		final org.apache.hadoop.fs.Path[] paths = FileInputFormat.getInputPaths(jobContext);
		return getFileStats(cachedFileStats, paths, new ArrayList<FileStatus>(1));
	} catch (IOException ioex) {
		if (LOG.isWarnEnabled()) {
			LOG.warn("Could not determine statistics due to an io error: "
					+ ioex.getMessage());
		}
	} catch (Throwable t) {
		if (LOG.isErrorEnabled()) {
			LOG.error("Unexpected problem while getting the file statistics: "
					+ t.getMessage(), t);
		}
	}

	// no statistics available
	return null;
}
 
Example 10
Source Project: flink   Source File: DelimitedInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testGetStatisticsFileDoesNotExist() throws IOException {
	DelimitedInputFormat<String> format = new MyTextInputFormat();
	format.setFilePaths("file:///path/does/not/really/exist", "file:///another/path/that/does/not/exist");

	FileBaseStatistics stats = format.getStatistics(null);
	assertNull("The file statistics should be null.", stats);
}
 
Example 11
Source Project: flink   Source File: DelimitedInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testGetStatisticsSingleFileWithCachedVersion() throws IOException {
	final String myString = "my mocked line 1\nmy mocked line 2\n";
	final Path tempFile = createTempFilePath(myString);
	final long size = myString.length();
	final long fakeSize = 10065;

	DelimitedInputFormat<String> format = new MyTextInputFormat();
	format.setFilePath(tempFile);
	format.configure(new Configuration());

	FileBaseStatistics stats = format.getStatistics(null);
	assertNotNull(stats);
	assertEquals("The file size from the statistics is wrong.", size, stats.getTotalInputSize());
	
	format = new MyTextInputFormat();
	format.setFilePath(tempFile);
	format.configure(new Configuration());
	
	FileBaseStatistics newStats = format.getStatistics(stats);
	assertEquals("Statistics object was changed.", newStats, stats);
	
	// insert fake stats with the correct modification time. the call should return the fake stats
	format = new MyTextInputFormat();
	format.setFilePath(tempFile);
	format.configure(new Configuration());
	
	FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics latest = format.getStatistics(fakeStats);
	assertEquals("The file size from the statistics is wrong.", fakeSize, latest.getTotalInputSize());
	
	// insert fake stats with the expired modification time. the call should return new accurate stats
	format = new MyTextInputFormat();
	format.setFilePath(tempFile);
	format.configure(new Configuration());
	
	FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(stats.getLastModificationTime() - 1, fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
	assertEquals("The file size from the statistics is wrong.", size, reGathered.getTotalInputSize());
}
 
Example 12
Source Project: cascading-flink   Source File: TapInputFormat.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public BaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException {
	// only gather base statistics for FileInputFormats
	if (!(mapredInputFormat instanceof FileInputFormat)) {
		return null;
	}

	final FileBaseStatistics cachedFileStats = (cachedStats != null && cachedStats instanceof FileBaseStatistics) ?
			(FileBaseStatistics) cachedStats : null;

	try {
		final org.apache.hadoop.fs.Path[] paths = FileInputFormat.getInputPaths(this.jobConf);

		return getFileStats(cachedFileStats, paths, new ArrayList<FileStatus>(1));
	} catch (IOException ioex) {
		if (LOG.isWarnEnabled()) {
			LOG.warn("Could not determine statistics due to an io error: "
					+ ioex.getMessage());
		}
	} catch (Throwable t) {
		if (LOG.isErrorEnabled()) {
			LOG.error("Unexpected problem while getting the file statistics: "
					+ t.getMessage(), t);
		}
	}

	// no statistics available
	return null;
}
 
Example 13
Source Project: flink   Source File: HadoopInputFormatBase.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public BaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException {
	// only gather base statistics for FileInputFormats
	if (!(mapredInputFormat instanceof FileInputFormat)) {
		return null;
	}

	final FileBaseStatistics cachedFileStats = (cachedStats instanceof FileBaseStatistics) ?
			(FileBaseStatistics) cachedStats : null;

	try {
		final org.apache.hadoop.fs.Path[] paths = FileInputFormat.getInputPaths(this.jobConf);

		return getFileStats(cachedFileStats, paths, new ArrayList<FileStatus>(1));
	} catch (IOException ioex) {
		if (LOG.isWarnEnabled()) {
			LOG.warn("Could not determine statistics due to an io error: "
					+ ioex.getMessage());
		}
	} catch (Throwable t) {
		if (LOG.isErrorEnabled()) {
			LOG.error("Unexpected problem while getting the file statistics: "
					+ t.getMessage(), t);
		}
	}

	// no statistics available
	return null;
}
 
Example 14
Source Project: flink   Source File: HadoopInputFormatBase.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public BaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException {
	// only gather base statistics for FileInputFormats
	if (!(mapreduceInputFormat instanceof FileInputFormat)) {
		return null;
	}

	JobContext jobContext = new JobContextImpl(configuration, null);

	final FileBaseStatistics cachedFileStats = (cachedStats instanceof FileBaseStatistics) ?
			(FileBaseStatistics) cachedStats : null;

	try {
		final org.apache.hadoop.fs.Path[] paths = FileInputFormat.getInputPaths(jobContext);
		return getFileStats(cachedFileStats, paths, new ArrayList<FileStatus>(1));
	} catch (IOException ioex) {
		if (LOG.isWarnEnabled()) {
			LOG.warn("Could not determine statistics due to an io error: "
					+ ioex.getMessage());
		}
	} catch (Throwable t) {
		if (LOG.isErrorEnabled()) {
			LOG.error("Unexpected problem while getting the file statistics: "
					+ t.getMessage(), t);
		}
	}

	// no statistics available
	return null;
}
 
Example 15
Source Project: flink   Source File: DelimitedInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testGetStatisticsFileDoesNotExist() throws IOException {
	DelimitedInputFormat<String> format = new MyTextInputFormat();
	format.setFilePaths("file:///path/does/not/really/exist", "file:///another/path/that/does/not/exist");

	FileBaseStatistics stats = format.getStatistics(null);
	assertNull("The file statistics should be null.", stats);
}
 
Example 16
Source Project: flink   Source File: DelimitedInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testGetStatisticsSingleFileWithCachedVersion() throws IOException {
	final String myString = "my mocked line 1\nmy mocked line 2\n";
	final Path tempFile = createTempFilePath(myString);
	final long size = myString.length();
	final long fakeSize = 10065;

	DelimitedInputFormat<String> format = new MyTextInputFormat();
	format.setFilePath(tempFile);
	format.configure(new Configuration());

	FileBaseStatistics stats = format.getStatistics(null);
	assertNotNull(stats);
	assertEquals("The file size from the statistics is wrong.", size, stats.getTotalInputSize());
	
	format = new MyTextInputFormat();
	format.setFilePath(tempFile);
	format.configure(new Configuration());
	
	FileBaseStatistics newStats = format.getStatistics(stats);
	assertEquals("Statistics object was changed.", newStats, stats);
	
	// insert fake stats with the correct modification time. the call should return the fake stats
	format = new MyTextInputFormat();
	format.setFilePath(tempFile);
	format.configure(new Configuration());
	
	FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics latest = format.getStatistics(fakeStats);
	assertEquals("The file size from the statistics is wrong.", fakeSize, latest.getTotalInputSize());
	
	// insert fake stats with the expired modification time. the call should return new accurate stats
	format = new MyTextInputFormat();
	format.setFilePath(tempFile);
	format.configure(new Configuration());
	
	FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(stats.getLastModificationTime() - 1, fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
	assertEquals("The file size from the statistics is wrong.", size, reGathered.getTotalInputSize());
}
 
Example 17
Source Project: Flink-CEPplus   Source File: FileInputFormatTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testGetStatisticsOneFileWithCachedVersion() {
	try {
		final long SIZE = 50873;
		final long FAKE_SIZE = 10065;
		
		String tempFile = TestFileUtils.createTempFile(SIZE);
		
		DummyFileInputFormat format = new DummyFileInputFormat();
		format.setFilePath(tempFile);
		format.configure(new Configuration());
		
		
		FileBaseStatistics stats = format.getStatistics(null);
		Assert.assertEquals("The file size from the statistics is wrong.", SIZE, stats.getTotalInputSize());
		
		format = new DummyFileInputFormat();
		format.setFilePath(tempFile);
		format.configure(new Configuration());
		
		FileBaseStatistics newStats = format.getStatistics(stats);
		Assert.assertTrue("Statistics object was changed", newStats == stats);

		// insert fake stats with the correct modification time. the call should return the fake stats
		format = new DummyFileInputFormat();
		format.setFilePath(tempFile);
		format.configure(new Configuration());
		
		FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
		BaseStatistics latest = format.getStatistics(fakeStats);
		Assert.assertEquals("The file size from the statistics is wrong.", FAKE_SIZE, latest.getTotalInputSize());
		
		// insert fake stats with the expired modification time. the call should return new accurate stats
		format = new DummyFileInputFormat();
		format.setFilePath(tempFile);
		format.configure(new Configuration());
		
		FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(stats.getLastModificationTime() - 1, FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
		BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
		Assert.assertEquals("The file size from the statistics is wrong.", SIZE, reGathered.getTotalInputSize());
		
	} catch (Exception ex) {
		ex.printStackTrace();
		Assert.fail(ex.getMessage());
	}
}
 
Example 18
Source Project: Flink-CEPplus   Source File: FileInputFormatTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testGetStatisticsMultipleFilesWithCachedVersion() {
	try {
		FileSystem fs = FileSystem.getLocalFileSystem();

		final long SIZE1 = 2077;
		final long SIZE2 = 31909;
		final long SIZE3 = 10;
		final long TOTAL = SIZE1 + SIZE2 + SIZE3;
		final long FAKE_SIZE = 10065;
		
		File tempDirFile = temporaryFolder.newFolder();
		String tempDir = tempDirFile.getAbsolutePath();
		String f1 = TestFileUtils.createTempFileInDirectory(tempDir, SIZE1);
		long modTime1 = fs.getFileStatus(new Path(f1)).getModificationTime();
		String f2 = TestFileUtils.createTempFileInDirectory(tempDir, SIZE2);
		long modTime2 = fs.getFileStatus(new Path(f2)).getModificationTime();
		String f3 = TestFileUtils.createTempFileInDirectory(tempDir, SIZE3);
		long modTime3 = fs.getFileStatus(new Path(f3)).getModificationTime();
		
		DummyFileInputFormat format = new DummyFileInputFormat();
		format.setFilePath(tempDir);
		format.configure(new Configuration());
		
		FileBaseStatistics stats = format.getStatistics(null);
		Assert.assertEquals("The file size from the statistics is wrong.", TOTAL, stats.getTotalInputSize());
		
		format = new DummyFileInputFormat();
		format.setFilePath(tempDir);
		format.configure(new Configuration());
		
		FileBaseStatistics newStats = format.getStatistics(stats);
		Assert.assertTrue("Statistics object was changed", newStats == stats);

		// insert fake stats with the correct modification time. the call should return the fake stats
		format = new DummyFileInputFormat();
		format.setFilePath(tempDir);
		format.configure(new Configuration());
		
		FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
		BaseStatistics latest = format.getStatistics(fakeStats);
		Assert.assertEquals("The file size from the statistics is wrong.", FAKE_SIZE, latest.getTotalInputSize());
		
		// insert fake stats with the correct modification time. the call should return the fake stats
		format = new DummyFileInputFormat();
		format.setFilePath(tempDir);
		format.configure(new Configuration());
		
		FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(Math.min(Math.min(modTime1, modTime2), modTime3) - 1, FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
		BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
		Assert.assertEquals("The file size from the statistics is wrong.", TOTAL, reGathered.getTotalInputSize());
		
	} catch (Exception ex) {
		ex.printStackTrace();
		Assert.fail(ex.getMessage());
	}
}
 
Example 19
Source Project: Flink-CEPplus   Source File: FileInputFormatTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testGetStatisticsMultipleOneFileWithCachedVersion() throws IOException {
	FileSystem fs = FileSystem.getLocalFileSystem();

	final long size1 = 50873;
	final long fakeSize = 10065;
	String tempFile1 = TestFileUtils.createTempFile(size1);
	final long lastModTime1 = fs.getFileStatus(new Path(tempFile1)).getModificationTime();

	final long size2 = 52573;
	String tempFile2 = TestFileUtils.createTempFile(size2);
	final long lastModTime2 = fs.getFileStatus(new Path(tempFile2)).getModificationTime();

	final long sizeTotal = size1 + size2;
	
	MultiDummyFileInputFormat format = new MultiDummyFileInputFormat();
	format.setFilePaths(tempFile1, tempFile2);
	format.configure(new Configuration());
	
	FileBaseStatistics stats = format.getStatistics(null);
	Assert.assertEquals("The file size from the statistics is wrong.", sizeTotal, stats.getTotalInputSize());
	
	format = new MultiDummyFileInputFormat();
	format.setFilePath(tempFile1);
	format.configure(new Configuration());
	
	FileBaseStatistics newStats = format.getStatistics(stats);
	Assert.assertTrue("Statistics object was changed", newStats == stats);

	// insert fake stats with the correct modification time. the call should return the fake stats
	format = new MultiDummyFileInputFormat();
	format.setFilePath(tempFile1);
	format.configure(new Configuration());
	
	FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics latest = format.getStatistics(fakeStats);
	Assert.assertEquals("The file size from the statistics is wrong.", fakeSize, latest.getTotalInputSize());
	
	// insert fake stats with the expired modification time. the call should return new accurate stats
	format = new MultiDummyFileInputFormat();
	format.setFilePaths(tempFile1, tempFile2);
	format.configure(new Configuration());
	
	FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(Math.min(lastModTime1, lastModTime2) - 1, fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
	Assert.assertEquals("The file size from the statistics is wrong.", sizeTotal, reGathered.getTotalInputSize());
}
 
Example 20
Source Project: Flink-CEPplus   Source File: CompilerTestBase.java    License: Apache License 2.0 4 votes vote down vote up
public void setSourceStatistics(GenericDataSourceBase<?, ?> source, long size, float recordWidth) {
	setSourceStatistics(source, new FileBaseStatistics(Long.MAX_VALUE, size, recordWidth));
}
 
Example 21
Source Project: Flink-CEPplus   Source File: CompilerTestBase.java    License: Apache License 2.0 4 votes vote down vote up
public void setSourceStatistics(GenericDataSourceBase<?, ?> source, FileBaseStatistics stats) {
	final String key = CACHE_KEY + this.statCounter++;
	this.dataStats.cacheBaseStatistics(stats, key);
	source.setStatisticsKey(key);
}
 
Example 22
Source Project: flink   Source File: FileInputFormatTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testGetStatisticsOneFileWithCachedVersion() {
	try {
		final long SIZE = 50873;
		final long FAKE_SIZE = 10065;
		
		String tempFile = TestFileUtils.createTempFile(SIZE);
		
		DummyFileInputFormat format = new DummyFileInputFormat();
		format.setFilePath(tempFile);
		format.configure(new Configuration());
		
		
		FileBaseStatistics stats = format.getStatistics(null);
		Assert.assertEquals("The file size from the statistics is wrong.", SIZE, stats.getTotalInputSize());
		
		format = new DummyFileInputFormat();
		format.setFilePath(tempFile);
		format.configure(new Configuration());
		
		FileBaseStatistics newStats = format.getStatistics(stats);
		Assert.assertTrue("Statistics object was changed", newStats == stats);

		// insert fake stats with the correct modification time. the call should return the fake stats
		format = new DummyFileInputFormat();
		format.setFilePath(tempFile);
		format.configure(new Configuration());
		
		FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
		BaseStatistics latest = format.getStatistics(fakeStats);
		Assert.assertEquals("The file size from the statistics is wrong.", FAKE_SIZE, latest.getTotalInputSize());
		
		// insert fake stats with the expired modification time. the call should return new accurate stats
		format = new DummyFileInputFormat();
		format.setFilePath(tempFile);
		format.configure(new Configuration());
		
		FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(stats.getLastModificationTime() - 1, FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
		BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
		Assert.assertEquals("The file size from the statistics is wrong.", SIZE, reGathered.getTotalInputSize());
		
	} catch (Exception ex) {
		ex.printStackTrace();
		Assert.fail(ex.getMessage());
	}
}
 
Example 23
Source Project: flink   Source File: FileInputFormatTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testGetStatisticsMultipleFilesWithCachedVersion() {
	try {
		FileSystem fs = FileSystem.getLocalFileSystem();

		final long SIZE1 = 2077;
		final long SIZE2 = 31909;
		final long SIZE3 = 10;
		final long TOTAL = SIZE1 + SIZE2 + SIZE3;
		final long FAKE_SIZE = 10065;
		
		File tempDirFile = temporaryFolder.newFolder();
		String tempDir = tempDirFile.getAbsolutePath();
		String f1 = TestFileUtils.createTempFileInDirectory(tempDir, SIZE1);
		long modTime1 = fs.getFileStatus(new Path(f1)).getModificationTime();
		String f2 = TestFileUtils.createTempFileInDirectory(tempDir, SIZE2);
		long modTime2 = fs.getFileStatus(new Path(f2)).getModificationTime();
		String f3 = TestFileUtils.createTempFileInDirectory(tempDir, SIZE3);
		long modTime3 = fs.getFileStatus(new Path(f3)).getModificationTime();
		
		DummyFileInputFormat format = new DummyFileInputFormat();
		format.setFilePath(tempDir);
		format.configure(new Configuration());
		
		FileBaseStatistics stats = format.getStatistics(null);
		Assert.assertEquals("The file size from the statistics is wrong.", TOTAL, stats.getTotalInputSize());
		
		format = new DummyFileInputFormat();
		format.setFilePath(tempDir);
		format.configure(new Configuration());
		
		FileBaseStatistics newStats = format.getStatistics(stats);
		Assert.assertTrue("Statistics object was changed", newStats == stats);

		// insert fake stats with the correct modification time. the call should return the fake stats
		format = new DummyFileInputFormat();
		format.setFilePath(tempDir);
		format.configure(new Configuration());
		
		FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
		BaseStatistics latest = format.getStatistics(fakeStats);
		Assert.assertEquals("The file size from the statistics is wrong.", FAKE_SIZE, latest.getTotalInputSize());
		
		// insert fake stats with the correct modification time. the call should return the fake stats
		format = new DummyFileInputFormat();
		format.setFilePath(tempDir);
		format.configure(new Configuration());
		
		FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(Math.min(Math.min(modTime1, modTime2), modTime3) - 1, FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
		BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
		Assert.assertEquals("The file size from the statistics is wrong.", TOTAL, reGathered.getTotalInputSize());
		
	} catch (Exception ex) {
		ex.printStackTrace();
		Assert.fail(ex.getMessage());
	}
}
 
Example 24
Source Project: flink   Source File: FileInputFormatTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testGetStatisticsMultipleOneFileWithCachedVersion() throws IOException {
	FileSystem fs = FileSystem.getLocalFileSystem();

	final long size1 = 50873;
	final long fakeSize = 10065;
	String tempFile1 = TestFileUtils.createTempFile(size1);
	final long lastModTime1 = fs.getFileStatus(new Path(tempFile1)).getModificationTime();

	final long size2 = 52573;
	String tempFile2 = TestFileUtils.createTempFile(size2);
	final long lastModTime2 = fs.getFileStatus(new Path(tempFile2)).getModificationTime();

	final long sizeTotal = size1 + size2;
	
	MultiDummyFileInputFormat format = new MultiDummyFileInputFormat();
	format.setFilePaths(tempFile1, tempFile2);
	format.configure(new Configuration());
	
	FileBaseStatistics stats = format.getStatistics(null);
	Assert.assertEquals("The file size from the statistics is wrong.", sizeTotal, stats.getTotalInputSize());
	
	format = new MultiDummyFileInputFormat();
	format.setFilePath(tempFile1);
	format.configure(new Configuration());
	
	FileBaseStatistics newStats = format.getStatistics(stats);
	Assert.assertTrue("Statistics object was changed", newStats == stats);

	// insert fake stats with the correct modification time. the call should return the fake stats
	format = new MultiDummyFileInputFormat();
	format.setFilePath(tempFile1);
	format.configure(new Configuration());
	
	FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics latest = format.getStatistics(fakeStats);
	Assert.assertEquals("The file size from the statistics is wrong.", fakeSize, latest.getTotalInputSize());
	
	// insert fake stats with the expired modification time. the call should return new accurate stats
	format = new MultiDummyFileInputFormat();
	format.setFilePaths(tempFile1, tempFile2);
	format.configure(new Configuration());
	
	FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(Math.min(lastModTime1, lastModTime2) - 1, fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
	Assert.assertEquals("The file size from the statistics is wrong.", sizeTotal, reGathered.getTotalInputSize());
}
 
Example 25
Source Project: flink   Source File: CompilerTestBase.java    License: Apache License 2.0 4 votes vote down vote up
public void setSourceStatistics(GenericDataSourceBase<?, ?> source, long size, float recordWidth) {
	setSourceStatistics(source, new FileBaseStatistics(Long.MAX_VALUE, size, recordWidth));
}
 
Example 26
Source Project: flink   Source File: CompilerTestBase.java    License: Apache License 2.0 4 votes vote down vote up
public void setSourceStatistics(GenericDataSourceBase<?, ?> source, FileBaseStatistics stats) {
	final String key = CACHE_KEY + this.statCounter++;
	this.dataStats.cacheBaseStatistics(stats, key);
	source.setStatisticsKey(key);
}
 
Example 27
Source Project: flink   Source File: FileInputFormatTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testGetStatisticsOneFileWithCachedVersion() {
	try {
		final long SIZE = 50873;
		final long FAKE_SIZE = 10065;
		
		String tempFile = TestFileUtils.createTempFile(SIZE);
		
		DummyFileInputFormat format = new DummyFileInputFormat();
		format.setFilePath(tempFile);
		format.configure(new Configuration());
		
		
		FileBaseStatistics stats = format.getStatistics(null);
		Assert.assertEquals("The file size from the statistics is wrong.", SIZE, stats.getTotalInputSize());
		
		format = new DummyFileInputFormat();
		format.setFilePath(tempFile);
		format.configure(new Configuration());
		
		FileBaseStatistics newStats = format.getStatistics(stats);
		Assert.assertTrue("Statistics object was changed", newStats == stats);

		// insert fake stats with the correct modification time. the call should return the fake stats
		format = new DummyFileInputFormat();
		format.setFilePath(tempFile);
		format.configure(new Configuration());
		
		FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
		BaseStatistics latest = format.getStatistics(fakeStats);
		Assert.assertEquals("The file size from the statistics is wrong.", FAKE_SIZE, latest.getTotalInputSize());
		
		// insert fake stats with the expired modification time. the call should return new accurate stats
		format = new DummyFileInputFormat();
		format.setFilePath(tempFile);
		format.configure(new Configuration());
		
		FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(stats.getLastModificationTime() - 1, FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
		BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
		Assert.assertEquals("The file size from the statistics is wrong.", SIZE, reGathered.getTotalInputSize());
		
	} catch (Exception ex) {
		ex.printStackTrace();
		Assert.fail(ex.getMessage());
	}
}
 
Example 28
Source Project: flink   Source File: FileInputFormatTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testGetStatisticsMultipleFilesWithCachedVersion() {
	try {
		FileSystem fs = FileSystem.getLocalFileSystem();

		final long SIZE1 = 2077;
		final long SIZE2 = 31909;
		final long SIZE3 = 10;
		final long TOTAL = SIZE1 + SIZE2 + SIZE3;
		final long FAKE_SIZE = 10065;
		
		File tempDirFile = temporaryFolder.newFolder();
		String tempDir = tempDirFile.getAbsolutePath();
		String f1 = TestFileUtils.createTempFileInDirectory(tempDir, SIZE1);
		long modTime1 = fs.getFileStatus(new Path(f1)).getModificationTime();
		String f2 = TestFileUtils.createTempFileInDirectory(tempDir, SIZE2);
		long modTime2 = fs.getFileStatus(new Path(f2)).getModificationTime();
		String f3 = TestFileUtils.createTempFileInDirectory(tempDir, SIZE3);
		long modTime3 = fs.getFileStatus(new Path(f3)).getModificationTime();
		
		DummyFileInputFormat format = new DummyFileInputFormat();
		format.setFilePath(tempDir);
		format.configure(new Configuration());
		
		FileBaseStatistics stats = format.getStatistics(null);
		Assert.assertEquals("The file size from the statistics is wrong.", TOTAL, stats.getTotalInputSize());
		
		format = new DummyFileInputFormat();
		format.setFilePath(tempDir);
		format.configure(new Configuration());
		
		FileBaseStatistics newStats = format.getStatistics(stats);
		Assert.assertTrue("Statistics object was changed", newStats == stats);

		// insert fake stats with the correct modification time. the call should return the fake stats
		format = new DummyFileInputFormat();
		format.setFilePath(tempDir);
		format.configure(new Configuration());
		
		FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
		BaseStatistics latest = format.getStatistics(fakeStats);
		Assert.assertEquals("The file size from the statistics is wrong.", FAKE_SIZE, latest.getTotalInputSize());
		
		// insert fake stats with the correct modification time. the call should return the fake stats
		format = new DummyFileInputFormat();
		format.setFilePath(tempDir);
		format.configure(new Configuration());
		
		FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(Math.min(Math.min(modTime1, modTime2), modTime3) - 1, FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
		BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
		Assert.assertEquals("The file size from the statistics is wrong.", TOTAL, reGathered.getTotalInputSize());
		
	} catch (Exception ex) {
		ex.printStackTrace();
		Assert.fail(ex.getMessage());
	}
}
 
Example 29
Source Project: flink   Source File: FileInputFormatTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testGetStatisticsMultipleOneFileWithCachedVersion() throws IOException {
	FileSystem fs = FileSystem.getLocalFileSystem();

	final long size1 = 50873;
	final long fakeSize = 10065;
	String tempFile1 = TestFileUtils.createTempFile(size1);
	final long lastModTime1 = fs.getFileStatus(new Path(tempFile1)).getModificationTime();

	final long size2 = 52573;
	String tempFile2 = TestFileUtils.createTempFile(size2);
	final long lastModTime2 = fs.getFileStatus(new Path(tempFile2)).getModificationTime();

	final long sizeTotal = size1 + size2;
	
	MultiDummyFileInputFormat format = new MultiDummyFileInputFormat();
	format.setFilePaths(tempFile1, tempFile2);
	format.configure(new Configuration());
	
	FileBaseStatistics stats = format.getStatistics(null);
	Assert.assertEquals("The file size from the statistics is wrong.", sizeTotal, stats.getTotalInputSize());
	
	format = new MultiDummyFileInputFormat();
	format.setFilePath(tempFile1);
	format.configure(new Configuration());
	
	FileBaseStatistics newStats = format.getStatistics(stats);
	Assert.assertTrue("Statistics object was changed", newStats == stats);

	// insert fake stats with the correct modification time. the call should return the fake stats
	format = new MultiDummyFileInputFormat();
	format.setFilePath(tempFile1);
	format.configure(new Configuration());
	
	FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics latest = format.getStatistics(fakeStats);
	Assert.assertEquals("The file size from the statistics is wrong.", fakeSize, latest.getTotalInputSize());
	
	// insert fake stats with the expired modification time. the call should return new accurate stats
	format = new MultiDummyFileInputFormat();
	format.setFilePaths(tempFile1, tempFile2);
	format.configure(new Configuration());
	
	FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(Math.min(lastModTime1, lastModTime2) - 1, fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
	Assert.assertEquals("The file size from the statistics is wrong.", sizeTotal, reGathered.getTotalInputSize());
}
 
Example 30
Source Project: flink   Source File: CompilerTestBase.java    License: Apache License 2.0 4 votes vote down vote up
public void setSourceStatistics(GenericDataSourceBase<?, ?> source, long size, float recordWidth) {
	setSourceStatistics(source, new FileBaseStatistics(Long.MAX_VALUE, size, recordWidth));
}