Java Code Examples for org.apache.flink.api.common.io.statistics.BaseStatistics#AVG_RECORD_BYTES_UNKNOWN

The following examples show how to use org.apache.flink.api.common.io.statistics.BaseStatistics#AVG_RECORD_BYTES_UNKNOWN . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FileInputFormat.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path[] filePaths, ArrayList<FileStatus> files) throws IOException {

		long totalLength = 0;
		long latestModTime = 0;

		for (Path path : filePaths) {
			final FileSystem fs = FileSystem.get(path.toUri());
			final FileBaseStatistics stats = getFileStats(cachedStats, path, fs, files);

			if (stats.getTotalInputSize() == BaseStatistics.SIZE_UNKNOWN) {
				totalLength = BaseStatistics.SIZE_UNKNOWN;
			} else if (totalLength != BaseStatistics.SIZE_UNKNOWN) {
				totalLength += stats.getTotalInputSize();
			}
			latestModTime = Math.max(latestModTime, stats.getLastModificationTime());
		}

		// check whether the cached statistics are still valid, if we have any
		if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) {
			return cachedStats;
		}

		return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	}
 
Example 2
Source File: FileInputFormat.java    From flink with Apache License 2.0 6 votes vote down vote up
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path[] filePaths, ArrayList<FileStatus> files) throws IOException {

		long totalLength = 0;
		long latestModTime = 0;

		for (Path path : filePaths) {
			final FileSystem fs = FileSystem.get(path.toUri());
			final FileBaseStatistics stats = getFileStats(cachedStats, path, fs, files);

			if (stats.getTotalInputSize() == BaseStatistics.SIZE_UNKNOWN) {
				totalLength = BaseStatistics.SIZE_UNKNOWN;
			} else if (totalLength != BaseStatistics.SIZE_UNKNOWN) {
				totalLength += stats.getTotalInputSize();
			}
			latestModTime = Math.max(latestModTime, stats.getLastModificationTime());
		}

		// check whether the cached statistics are still valid, if we have any
		if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) {
			return cachedStats;
		}

		return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	}
 
Example 3
Source File: FileInputFormat.java    From flink with Apache License 2.0 6 votes vote down vote up
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path[] filePaths, ArrayList<FileStatus> files) throws IOException {

		long totalLength = 0;
		long latestModTime = 0;

		for (Path path : filePaths) {
			final FileSystem fs = FileSystem.get(path.toUri());
			final FileBaseStatistics stats = getFileStats(cachedStats, path, fs, files);

			if (stats.getTotalInputSize() == BaseStatistics.SIZE_UNKNOWN) {
				totalLength = BaseStatistics.SIZE_UNKNOWN;
			} else if (totalLength != BaseStatistics.SIZE_UNKNOWN) {
				totalLength += stats.getTotalInputSize();
			}
			latestModTime = Math.max(latestModTime, stats.getLastModificationTime());
		}

		// check whether the cached statistics are still valid, if we have any
		if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) {
			return cachedStats;
		}

		return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	}
 
Example 4
Source File: FileInputFormat.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path filePath, FileSystem fs, ArrayList<FileStatus> files) throws IOException {

		// get the file info and check whether the cached statistics are still valid.
		final FileStatus file = fs.getFileStatus(filePath);
		long totalLength = 0;

		// enumerate all files
		if (file.isDir()) {
			totalLength += addFilesInDir(file.getPath(), files, false);
		} else {
			files.add(file);
			testForUnsplittable(file);
			totalLength += file.getLen();
		}

		// check the modification time stamp
		long latestModTime = 0;
		for (FileStatus f : files) {
			latestModTime = Math.max(f.getModificationTime(), latestModTime);
		}

		// check whether the cached statistics are still valid, if we have any
		if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) {
			return cachedStats;
		}

		// sanity check
		if (totalLength <= 0) {
			totalLength = BaseStatistics.SIZE_UNKNOWN;
		}
		return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	}
 
Example 5
Source File: DelimitedInputFormatTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetStatisticsSingleFileWithCachedVersion() throws IOException {
	final String myString = "my mocked line 1\nmy mocked line 2\n";
	final Path tempFile = createTempFilePath(myString);
	final long size = myString.length();
	final long fakeSize = 10065;

	DelimitedInputFormat<String> format = new MyTextInputFormat();
	format.setFilePath(tempFile);
	format.configure(new Configuration());

	FileBaseStatistics stats = format.getStatistics(null);
	assertNotNull(stats);
	assertEquals("The file size from the statistics is wrong.", size, stats.getTotalInputSize());
	
	format = new MyTextInputFormat();
	format.setFilePath(tempFile);
	format.configure(new Configuration());
	
	FileBaseStatistics newStats = format.getStatistics(stats);
	assertEquals("Statistics object was changed.", newStats, stats);
	
	// insert fake stats with the correct modification time. the call should return the fake stats
	format = new MyTextInputFormat();
	format.setFilePath(tempFile);
	format.configure(new Configuration());
	
	FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics latest = format.getStatistics(fakeStats);
	assertEquals("The file size from the statistics is wrong.", fakeSize, latest.getTotalInputSize());
	
	// insert fake stats with the expired modification time. the call should return new accurate stats
	format = new MyTextInputFormat();
	format.setFilePath(tempFile);
	format.configure(new Configuration());
	
	FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(stats.getLastModificationTime() - 1, fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
	assertEquals("The file size from the statistics is wrong.", size, reGathered.getTotalInputSize());
}
 
Example 6
Source File: FileInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path filePath, FileSystem fs, ArrayList<FileStatus> files) throws IOException {

		// get the file info and check whether the cached statistics are still valid.
		final FileStatus file = fs.getFileStatus(filePath);
		long totalLength = 0;

		// enumerate all files
		if (file.isDir()) {
			totalLength += addFilesInDir(file.getPath(), files, false);
		} else {
			files.add(file);
			testForUnsplittable(file);
			totalLength += file.getLen();
		}

		// check the modification time stamp
		long latestModTime = 0;
		for (FileStatus f : files) {
			latestModTime = Math.max(f.getModificationTime(), latestModTime);
		}

		// check whether the cached statistics are still valid, if we have any
		if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) {
			return cachedStats;
		}

		// sanity check
		if (totalLength <= 0) {
			totalLength = BaseStatistics.SIZE_UNKNOWN;
		}
		return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	}
 
Example 7
Source File: FileInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path filePath, FileSystem fs, ArrayList<FileStatus> files) throws IOException {

		// get the file info and check whether the cached statistics are still valid.
		final FileStatus file = fs.getFileStatus(filePath);
		long totalLength = 0;

		// enumerate all files
		if (file.isDir()) {
			totalLength += addFilesInDir(file.getPath(), files, false);
		} else {
			files.add(file);
			testForUnsplittable(file);
			totalLength += file.getLen();
		}

		// check the modification time stamp
		long latestModTime = 0;
		for (FileStatus f : files) {
			latestModTime = Math.max(f.getModificationTime(), latestModTime);
		}

		// check whether the cached statistics are still valid, if we have any
		if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) {
			return cachedStats;
		}

		// sanity check
		if (totalLength <= 0) {
			totalLength = BaseStatistics.SIZE_UNKNOWN;
		}
		return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	}
 
Example 8
Source File: SourceInputFormat.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
@Override
public BaseStatistics getStatistics(BaseStatistics baseStatistics) throws IOException {
	try {
		final long estimatedSize = initialSource.getEstimatedSizeBytes(options);

		return new BaseStatistics() {
			@Override
			public long getTotalInputSize() {
				return estimatedSize;

			}

			@Override
			public long getNumberOfRecords() {
				return BaseStatistics.NUM_RECORDS_UNKNOWN;
			}

			@Override
			public float getAverageRecordWidth() {
				return BaseStatistics.AVG_RECORD_BYTES_UNKNOWN;
			}
		};
	} catch (Exception e) {
		LOG.warn("Could not read Source statistics: {}", e);
	}

	return null;
}
 
Example 9
Source File: DelimitedInputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetStatisticsSingleFileWithCachedVersion() throws IOException {
	final String myString = "my mocked line 1\nmy mocked line 2\n";
	final Path tempFile = createTempFilePath(myString);
	final long size = myString.length();
	final long fakeSize = 10065;

	DelimitedInputFormat<String> format = new MyTextInputFormat();
	format.setFilePath(tempFile);
	format.configure(new Configuration());

	FileBaseStatistics stats = format.getStatistics(null);
	assertNotNull(stats);
	assertEquals("The file size from the statistics is wrong.", size, stats.getTotalInputSize());
	
	format = new MyTextInputFormat();
	format.setFilePath(tempFile);
	format.configure(new Configuration());
	
	FileBaseStatistics newStats = format.getStatistics(stats);
	assertEquals("Statistics object was changed.", newStats, stats);
	
	// insert fake stats with the correct modification time. the call should return the fake stats
	format = new MyTextInputFormat();
	format.setFilePath(tempFile);
	format.configure(new Configuration());
	
	FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics latest = format.getStatistics(fakeStats);
	assertEquals("The file size from the statistics is wrong.", fakeSize, latest.getTotalInputSize());
	
	// insert fake stats with the expired modification time. the call should return new accurate stats
	format = new MyTextInputFormat();
	format.setFilePath(tempFile);
	format.configure(new Configuration());
	
	FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(stats.getLastModificationTime() - 1, fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
	assertEquals("The file size from the statistics is wrong.", size, reGathered.getTotalInputSize());
}
 
Example 10
Source File: DelimitedInputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetStatisticsSingleFileWithCachedVersion() throws IOException {
	final String myString = "my mocked line 1\nmy mocked line 2\n";
	final Path tempFile = createTempFilePath(myString);
	final long size = myString.length();
	final long fakeSize = 10065;

	DelimitedInputFormat<String> format = new MyTextInputFormat();
	format.setFilePath(tempFile);
	format.configure(new Configuration());

	FileBaseStatistics stats = format.getStatistics(null);
	assertNotNull(stats);
	assertEquals("The file size from the statistics is wrong.", size, stats.getTotalInputSize());
	
	format = new MyTextInputFormat();
	format.setFilePath(tempFile);
	format.configure(new Configuration());
	
	FileBaseStatistics newStats = format.getStatistics(stats);
	assertEquals("Statistics object was changed.", newStats, stats);
	
	// insert fake stats with the correct modification time. the call should return the fake stats
	format = new MyTextInputFormat();
	format.setFilePath(tempFile);
	format.configure(new Configuration());
	
	FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics latest = format.getStatistics(fakeStats);
	assertEquals("The file size from the statistics is wrong.", fakeSize, latest.getTotalInputSize());
	
	// insert fake stats with the expired modification time. the call should return new accurate stats
	format = new MyTextInputFormat();
	format.setFilePath(tempFile);
	format.configure(new Configuration());
	
	FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(stats.getLastModificationTime() - 1, fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
	assertEquals("The file size from the statistics is wrong.", size, reGathered.getTotalInputSize());
}
 
Example 11
Source File: SourceInputFormat.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public BaseStatistics getStatistics(BaseStatistics baseStatistics) throws IOException {
  try {
    final long estimatedSize = initialSource.getEstimatedSizeBytes(options);

    return new BaseStatistics() {
      @Override
      public long getTotalInputSize() {
        return estimatedSize;
      }

      @Override
      public long getNumberOfRecords() {
        return BaseStatistics.NUM_RECORDS_UNKNOWN;
      }

      @Override
      public float getAverageRecordWidth() {
        return BaseStatistics.AVG_RECORD_BYTES_UNKNOWN;
      }
    };
  } catch (Exception e) {
    LOG.warn("Could not read Source statistics: {}", e);
  }

  return null;
}
 
Example 12
Source File: FileInputFormatTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testGetStatisticsMultipleOneFileWithCachedVersion() throws IOException {
	FileSystem fs = FileSystem.getLocalFileSystem();

	final long size1 = 50873;
	final long fakeSize = 10065;
	String tempFile1 = TestFileUtils.createTempFile(size1);
	final long lastModTime1 = fs.getFileStatus(new Path(tempFile1)).getModificationTime();

	final long size2 = 52573;
	String tempFile2 = TestFileUtils.createTempFile(size2);
	final long lastModTime2 = fs.getFileStatus(new Path(tempFile2)).getModificationTime();

	final long sizeTotal = size1 + size2;
	
	MultiDummyFileInputFormat format = new MultiDummyFileInputFormat();
	format.setFilePaths(tempFile1, tempFile2);
	format.configure(new Configuration());
	
	FileBaseStatistics stats = format.getStatistics(null);
	Assert.assertEquals("The file size from the statistics is wrong.", sizeTotal, stats.getTotalInputSize());
	
	format = new MultiDummyFileInputFormat();
	format.setFilePath(tempFile1);
	format.configure(new Configuration());
	
	FileBaseStatistics newStats = format.getStatistics(stats);
	Assert.assertTrue("Statistics object was changed", newStats == stats);

	// insert fake stats with the correct modification time. the call should return the fake stats
	format = new MultiDummyFileInputFormat();
	format.setFilePath(tempFile1);
	format.configure(new Configuration());
	
	FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics latest = format.getStatistics(fakeStats);
	Assert.assertEquals("The file size from the statistics is wrong.", fakeSize, latest.getTotalInputSize());
	
	// insert fake stats with the expired modification time. the call should return new accurate stats
	format = new MultiDummyFileInputFormat();
	format.setFilePaths(tempFile1, tempFile2);
	format.configure(new Configuration());
	
	FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(Math.min(lastModTime1, lastModTime2) - 1, fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
	Assert.assertEquals("The file size from the statistics is wrong.", sizeTotal, reGathered.getTotalInputSize());
}
 
Example 13
Source File: FileInputFormatTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testGetStatisticsMultipleFilesWithCachedVersion() {
	try {
		FileSystem fs = FileSystem.getLocalFileSystem();

		final long SIZE1 = 2077;
		final long SIZE2 = 31909;
		final long SIZE3 = 10;
		final long TOTAL = SIZE1 + SIZE2 + SIZE3;
		final long FAKE_SIZE = 10065;
		
		File tempDirFile = temporaryFolder.newFolder();
		String tempDir = tempDirFile.getAbsolutePath();
		String f1 = TestFileUtils.createTempFileInDirectory(tempDir, SIZE1);
		long modTime1 = fs.getFileStatus(new Path(f1)).getModificationTime();
		String f2 = TestFileUtils.createTempFileInDirectory(tempDir, SIZE2);
		long modTime2 = fs.getFileStatus(new Path(f2)).getModificationTime();
		String f3 = TestFileUtils.createTempFileInDirectory(tempDir, SIZE3);
		long modTime3 = fs.getFileStatus(new Path(f3)).getModificationTime();
		
		DummyFileInputFormat format = new DummyFileInputFormat();
		format.setFilePath(tempDir);
		format.configure(new Configuration());
		
		FileBaseStatistics stats = format.getStatistics(null);
		Assert.assertEquals("The file size from the statistics is wrong.", TOTAL, stats.getTotalInputSize());
		
		format = new DummyFileInputFormat();
		format.setFilePath(tempDir);
		format.configure(new Configuration());
		
		FileBaseStatistics newStats = format.getStatistics(stats);
		Assert.assertTrue("Statistics object was changed", newStats == stats);

		// insert fake stats with the correct modification time. the call should return the fake stats
		format = new DummyFileInputFormat();
		format.setFilePath(tempDir);
		format.configure(new Configuration());
		
		FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
		BaseStatistics latest = format.getStatistics(fakeStats);
		Assert.assertEquals("The file size from the statistics is wrong.", FAKE_SIZE, latest.getTotalInputSize());
		
		// insert fake stats with the correct modification time. the call should return the fake stats
		format = new DummyFileInputFormat();
		format.setFilePath(tempDir);
		format.configure(new Configuration());
		
		FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(Math.min(Math.min(modTime1, modTime2), modTime3) - 1, FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
		BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
		Assert.assertEquals("The file size from the statistics is wrong.", TOTAL, reGathered.getTotalInputSize());
		
	} catch (Exception ex) {
		ex.printStackTrace();
		Assert.fail(ex.getMessage());
	}
}
 
Example 14
Source File: FileInputFormatTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testGetStatisticsOneFileWithCachedVersion() {
	try {
		final long SIZE = 50873;
		final long FAKE_SIZE = 10065;
		
		String tempFile = TestFileUtils.createTempFile(SIZE);
		
		DummyFileInputFormat format = new DummyFileInputFormat();
		format.setFilePath(tempFile);
		format.configure(new Configuration());
		
		
		FileBaseStatistics stats = format.getStatistics(null);
		Assert.assertEquals("The file size from the statistics is wrong.", SIZE, stats.getTotalInputSize());
		
		format = new DummyFileInputFormat();
		format.setFilePath(tempFile);
		format.configure(new Configuration());
		
		FileBaseStatistics newStats = format.getStatistics(stats);
		Assert.assertTrue("Statistics object was changed", newStats == stats);

		// insert fake stats with the correct modification time. the call should return the fake stats
		format = new DummyFileInputFormat();
		format.setFilePath(tempFile);
		format.configure(new Configuration());
		
		FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
		BaseStatistics latest = format.getStatistics(fakeStats);
		Assert.assertEquals("The file size from the statistics is wrong.", FAKE_SIZE, latest.getTotalInputSize());
		
		// insert fake stats with the expired modification time. the call should return new accurate stats
		format = new DummyFileInputFormat();
		format.setFilePath(tempFile);
		format.configure(new Configuration());
		
		FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(stats.getLastModificationTime() - 1, FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
		BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
		Assert.assertEquals("The file size from the statistics is wrong.", SIZE, reGathered.getTotalInputSize());
		
	} catch (Exception ex) {
		ex.printStackTrace();
		Assert.fail(ex.getMessage());
	}
}
 
Example 15
Source File: FileInputFormatTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testGetStatisticsMultipleOneFileWithCachedVersion() throws IOException {
	FileSystem fs = FileSystem.getLocalFileSystem();

	final long size1 = 50873;
	final long fakeSize = 10065;
	String tempFile1 = TestFileUtils.createTempFile(size1);
	final long lastModTime1 = fs.getFileStatus(new Path(tempFile1)).getModificationTime();

	final long size2 = 52573;
	String tempFile2 = TestFileUtils.createTempFile(size2);
	final long lastModTime2 = fs.getFileStatus(new Path(tempFile2)).getModificationTime();

	final long sizeTotal = size1 + size2;
	
	MultiDummyFileInputFormat format = new MultiDummyFileInputFormat();
	format.setFilePaths(tempFile1, tempFile2);
	format.configure(new Configuration());
	
	FileBaseStatistics stats = format.getStatistics(null);
	Assert.assertEquals("The file size from the statistics is wrong.", sizeTotal, stats.getTotalInputSize());
	
	format = new MultiDummyFileInputFormat();
	format.setFilePath(tempFile1);
	format.configure(new Configuration());
	
	FileBaseStatistics newStats = format.getStatistics(stats);
	Assert.assertTrue("Statistics object was changed", newStats == stats);

	// insert fake stats with the correct modification time. the call should return the fake stats
	format = new MultiDummyFileInputFormat();
	format.setFilePath(tempFile1);
	format.configure(new Configuration());
	
	FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics latest = format.getStatistics(fakeStats);
	Assert.assertEquals("The file size from the statistics is wrong.", fakeSize, latest.getTotalInputSize());
	
	// insert fake stats with the expired modification time. the call should return new accurate stats
	format = new MultiDummyFileInputFormat();
	format.setFilePaths(tempFile1, tempFile2);
	format.configure(new Configuration());
	
	FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(Math.min(lastModTime1, lastModTime2) - 1, fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
	Assert.assertEquals("The file size from the statistics is wrong.", sizeTotal, reGathered.getTotalInputSize());
}
 
Example 16
Source File: FileInputFormatTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testGetStatisticsMultipleFilesWithCachedVersion() {
	try {
		FileSystem fs = FileSystem.getLocalFileSystem();

		final long SIZE1 = 2077;
		final long SIZE2 = 31909;
		final long SIZE3 = 10;
		final long TOTAL = SIZE1 + SIZE2 + SIZE3;
		final long FAKE_SIZE = 10065;
		
		File tempDirFile = temporaryFolder.newFolder();
		String tempDir = tempDirFile.getAbsolutePath();
		String f1 = TestFileUtils.createTempFileInDirectory(tempDir, SIZE1);
		long modTime1 = fs.getFileStatus(new Path(f1)).getModificationTime();
		String f2 = TestFileUtils.createTempFileInDirectory(tempDir, SIZE2);
		long modTime2 = fs.getFileStatus(new Path(f2)).getModificationTime();
		String f3 = TestFileUtils.createTempFileInDirectory(tempDir, SIZE3);
		long modTime3 = fs.getFileStatus(new Path(f3)).getModificationTime();
		
		DummyFileInputFormat format = new DummyFileInputFormat();
		format.setFilePath(tempDir);
		format.configure(new Configuration());
		
		FileBaseStatistics stats = format.getStatistics(null);
		Assert.assertEquals("The file size from the statistics is wrong.", TOTAL, stats.getTotalInputSize());
		
		format = new DummyFileInputFormat();
		format.setFilePath(tempDir);
		format.configure(new Configuration());
		
		FileBaseStatistics newStats = format.getStatistics(stats);
		Assert.assertTrue("Statistics object was changed", newStats == stats);

		// insert fake stats with the correct modification time. the call should return the fake stats
		format = new DummyFileInputFormat();
		format.setFilePath(tempDir);
		format.configure(new Configuration());
		
		FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
		BaseStatistics latest = format.getStatistics(fakeStats);
		Assert.assertEquals("The file size from the statistics is wrong.", FAKE_SIZE, latest.getTotalInputSize());
		
		// insert fake stats with the correct modification time. the call should return the fake stats
		format = new DummyFileInputFormat();
		format.setFilePath(tempDir);
		format.configure(new Configuration());
		
		FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(Math.min(Math.min(modTime1, modTime2), modTime3) - 1, FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
		BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
		Assert.assertEquals("The file size from the statistics is wrong.", TOTAL, reGathered.getTotalInputSize());
		
	} catch (Exception ex) {
		ex.printStackTrace();
		Assert.fail(ex.getMessage());
	}
}
 
Example 17
Source File: FileInputFormatTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testGetStatisticsOneFileWithCachedVersion() {
	try {
		final long SIZE = 50873;
		final long FAKE_SIZE = 10065;
		
		String tempFile = TestFileUtils.createTempFile(SIZE);
		
		DummyFileInputFormat format = new DummyFileInputFormat();
		format.setFilePath(tempFile);
		format.configure(new Configuration());
		
		
		FileBaseStatistics stats = format.getStatistics(null);
		Assert.assertEquals("The file size from the statistics is wrong.", SIZE, stats.getTotalInputSize());
		
		format = new DummyFileInputFormat();
		format.setFilePath(tempFile);
		format.configure(new Configuration());
		
		FileBaseStatistics newStats = format.getStatistics(stats);
		Assert.assertTrue("Statistics object was changed", newStats == stats);

		// insert fake stats with the correct modification time. the call should return the fake stats
		format = new DummyFileInputFormat();
		format.setFilePath(tempFile);
		format.configure(new Configuration());
		
		FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
		BaseStatistics latest = format.getStatistics(fakeStats);
		Assert.assertEquals("The file size from the statistics is wrong.", FAKE_SIZE, latest.getTotalInputSize());
		
		// insert fake stats with the expired modification time. the call should return new accurate stats
		format = new DummyFileInputFormat();
		format.setFilePath(tempFile);
		format.configure(new Configuration());
		
		FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(stats.getLastModificationTime() - 1, FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
		BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
		Assert.assertEquals("The file size from the statistics is wrong.", SIZE, reGathered.getTotalInputSize());
		
	} catch (Exception ex) {
		ex.printStackTrace();
		Assert.fail(ex.getMessage());
	}
}
 
Example 18
Source File: FileInputFormatTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testGetStatisticsMultipleOneFileWithCachedVersion() throws IOException {
	FileSystem fs = FileSystem.getLocalFileSystem();

	final long size1 = 50873;
	final long fakeSize = 10065;
	String tempFile1 = TestFileUtils.createTempFile(size1);
	final long lastModTime1 = fs.getFileStatus(new Path(tempFile1)).getModificationTime();

	final long size2 = 52573;
	String tempFile2 = TestFileUtils.createTempFile(size2);
	final long lastModTime2 = fs.getFileStatus(new Path(tempFile2)).getModificationTime();

	final long sizeTotal = size1 + size2;
	
	MultiDummyFileInputFormat format = new MultiDummyFileInputFormat();
	format.setFilePaths(tempFile1, tempFile2);
	format.configure(new Configuration());
	
	FileBaseStatistics stats = format.getStatistics(null);
	Assert.assertEquals("The file size from the statistics is wrong.", sizeTotal, stats.getTotalInputSize());
	
	format = new MultiDummyFileInputFormat();
	format.setFilePath(tempFile1);
	format.configure(new Configuration());
	
	FileBaseStatistics newStats = format.getStatistics(stats);
	Assert.assertTrue("Statistics object was changed", newStats == stats);

	// insert fake stats with the correct modification time. the call should return the fake stats
	format = new MultiDummyFileInputFormat();
	format.setFilePath(tempFile1);
	format.configure(new Configuration());
	
	FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics latest = format.getStatistics(fakeStats);
	Assert.assertEquals("The file size from the statistics is wrong.", fakeSize, latest.getTotalInputSize());
	
	// insert fake stats with the expired modification time. the call should return new accurate stats
	format = new MultiDummyFileInputFormat();
	format.setFilePaths(tempFile1, tempFile2);
	format.configure(new Configuration());
	
	FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(Math.min(lastModTime1, lastModTime2) - 1, fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
	Assert.assertEquals("The file size from the statistics is wrong.", sizeTotal, reGathered.getTotalInputSize());
}
 
Example 19
Source File: FileInputFormatTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testGetStatisticsMultipleFilesWithCachedVersion() {
	try {
		FileSystem fs = FileSystem.getLocalFileSystem();

		final long SIZE1 = 2077;
		final long SIZE2 = 31909;
		final long SIZE3 = 10;
		final long TOTAL = SIZE1 + SIZE2 + SIZE3;
		final long FAKE_SIZE = 10065;
		
		File tempDirFile = temporaryFolder.newFolder();
		String tempDir = tempDirFile.getAbsolutePath();
		String f1 = TestFileUtils.createTempFileInDirectory(tempDir, SIZE1);
		long modTime1 = fs.getFileStatus(new Path(f1)).getModificationTime();
		String f2 = TestFileUtils.createTempFileInDirectory(tempDir, SIZE2);
		long modTime2 = fs.getFileStatus(new Path(f2)).getModificationTime();
		String f3 = TestFileUtils.createTempFileInDirectory(tempDir, SIZE3);
		long modTime3 = fs.getFileStatus(new Path(f3)).getModificationTime();
		
		DummyFileInputFormat format = new DummyFileInputFormat();
		format.setFilePath(tempDir);
		format.configure(new Configuration());
		
		FileBaseStatistics stats = format.getStatistics(null);
		Assert.assertEquals("The file size from the statistics is wrong.", TOTAL, stats.getTotalInputSize());
		
		format = new DummyFileInputFormat();
		format.setFilePath(tempDir);
		format.configure(new Configuration());
		
		FileBaseStatistics newStats = format.getStatistics(stats);
		Assert.assertTrue("Statistics object was changed", newStats == stats);

		// insert fake stats with the correct modification time. the call should return the fake stats
		format = new DummyFileInputFormat();
		format.setFilePath(tempDir);
		format.configure(new Configuration());
		
		FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
		BaseStatistics latest = format.getStatistics(fakeStats);
		Assert.assertEquals("The file size from the statistics is wrong.", FAKE_SIZE, latest.getTotalInputSize());
		
		// insert fake stats with the correct modification time. the call should return the fake stats
		format = new DummyFileInputFormat();
		format.setFilePath(tempDir);
		format.configure(new Configuration());
		
		FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(Math.min(Math.min(modTime1, modTime2), modTime3) - 1, FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
		BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
		Assert.assertEquals("The file size from the statistics is wrong.", TOTAL, reGathered.getTotalInputSize());
		
	} catch (Exception ex) {
		ex.printStackTrace();
		Assert.fail(ex.getMessage());
	}
}
 
Example 20
Source File: FileInputFormatTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testGetStatisticsOneFileWithCachedVersion() {
	try {
		final long SIZE = 50873;
		final long FAKE_SIZE = 10065;
		
		String tempFile = TestFileUtils.createTempFile(SIZE);
		
		DummyFileInputFormat format = new DummyFileInputFormat();
		format.setFilePath(tempFile);
		format.configure(new Configuration());
		
		
		FileBaseStatistics stats = format.getStatistics(null);
		Assert.assertEquals("The file size from the statistics is wrong.", SIZE, stats.getTotalInputSize());
		
		format = new DummyFileInputFormat();
		format.setFilePath(tempFile);
		format.configure(new Configuration());
		
		FileBaseStatistics newStats = format.getStatistics(stats);
		Assert.assertTrue("Statistics object was changed", newStats == stats);

		// insert fake stats with the correct modification time. the call should return the fake stats
		format = new DummyFileInputFormat();
		format.setFilePath(tempFile);
		format.configure(new Configuration());
		
		FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
		BaseStatistics latest = format.getStatistics(fakeStats);
		Assert.assertEquals("The file size from the statistics is wrong.", FAKE_SIZE, latest.getTotalInputSize());
		
		// insert fake stats with the expired modification time. the call should return new accurate stats
		format = new DummyFileInputFormat();
		format.setFilePath(tempFile);
		format.configure(new Configuration());
		
		FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(stats.getLastModificationTime() - 1, FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
		BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
		Assert.assertEquals("The file size from the statistics is wrong.", SIZE, reGathered.getTotalInputSize());
		
	} catch (Exception ex) {
		ex.printStackTrace();
		Assert.fail(ex.getMessage());
	}
}