Java Code Examples for org.apache.flink.testutils.TestFileUtils#createTempFile()

The following examples show how to use org.apache.flink.testutils.TestFileUtils#createTempFile() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DelimitedInputFormatSamplingTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testSamplingOneFile() {
	try {
		final String tempFile = TestFileUtils.createTempFile(TEST_DATA1);
		final Configuration conf = new Configuration();
		
		final TestDelimitedInputFormat format = new TestDelimitedInputFormat(CONFIG);
		format.setFilePath(tempFile);
		format.configure(conf);
		BaseStatistics stats = format.getStatistics(null);
		
		final int numLines = TEST_DATA_1_LINES;
		final float avgWidth = ((float) TEST_DATA1.length()) / TEST_DATA_1_LINES;
		Assert.assertTrue("Wrong record count.", stats.getNumberOfRecords() < numLines + 1 & stats.getNumberOfRecords() > numLines - 1);
		Assert.assertTrue("Wrong avg record size.", stats.getAverageRecordWidth() < avgWidth + 1 & stats.getAverageRecordWidth() > avgWidth - 1);
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 2
Source File: EnumerateNestedFilesTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Test without nested directory and recursive.file.enumeration = true
 */
@Test
public void testNoNestedDirectoryTrue() {
	try {
		String filePath = TestFileUtils.createTempFile("foo");

		this.format.setFilePath(new Path(filePath));
		this.config.setBoolean("recursive.file.enumeration", true);
		format.configure(this.config);

		FileInputSplit[] splits = format.createInputSplits(1);
		Assert.assertEquals(1, splits.length);
	} catch (Exception ex) {
		ex.printStackTrace();
		Assert.fail(ex.getMessage());
	}
}
 
Example 3
Source File: DelimitedInputFormatSamplingTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testDifferentDelimiter() {
	try {
		final String DELIMITER = "12345678-";
		String testData = TEST_DATA1.replace("\n", DELIMITER);
		
		final String tempFile = TestFileUtils.createTempFile(testData);
		final Configuration conf = new Configuration();
		
		final TestDelimitedInputFormat format = new TestDelimitedInputFormat(CONFIG);
		format.setFilePath(tempFile);
		format.setDelimiter(DELIMITER);
		format.configure(conf);
		
		BaseStatistics stats = format.getStatistics(null);
		final int numLines = TEST_DATA_1_LINES;
		final float avgWidth = ((float) testData.length()) / TEST_DATA_1_LINES;
		
		Assert.assertTrue("Wrong record count.", stats.getNumberOfRecords() < numLines + 1 & stats.getNumberOfRecords() > numLines - 1);
		Assert.assertTrue("Wrong avg record size.", stats.getAverageRecordWidth() < avgWidth + 1 & stats.getAverageRecordWidth() > avgWidth - 1);
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 4
Source File: DelimitedInputFormatSamplingTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testSamplingOverlyLongRecord() {
	try {
		final String tempFile = TestFileUtils.createTempFile(2 * OptimizerOptions.DELIMITED_FORMAT_MAX_SAMPLE_LEN.defaultValue());
		final Configuration conf = new Configuration();
		
		final TestDelimitedInputFormat format = new TestDelimitedInputFormat(CONFIG);
		format.setFilePath(tempFile);
		format.configure(conf);
		
		Assert.assertNull("Expected exception due to overly long record.", format.getStatistics(null));
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 5
Source File: EnumerateNestedFilesTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Test without nested directory and recursive.file.enumeration = true
 */
@Test
public void testNoNestedDirectoryTrue() {
	try {
		String filePath = TestFileUtils.createTempFile("foo");

		this.format.setFilePath(new Path(filePath));
		this.config.setBoolean("recursive.file.enumeration", true);
		format.configure(this.config);

		FileInputSplit[] splits = format.createInputSplits(1);
		Assert.assertEquals(1, splits.length);
	} catch (Exception ex) {
		ex.printStackTrace();
		Assert.fail(ex.getMessage());
	}
}
 
Example 6
Source File: DelimitedInputFormatSamplingTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testSamplingOneFile() {
	try {
		final String tempFile = TestFileUtils.createTempFile(TEST_DATA1);
		final Configuration conf = new Configuration();
		
		final TestDelimitedInputFormat format = new TestDelimitedInputFormat(CONFIG);
		format.setFilePath(tempFile);
		format.configure(conf);
		BaseStatistics stats = format.getStatistics(null);
		
		final int numLines = TEST_DATA_1_LINES;
		final float avgWidth = ((float) TEST_DATA1.length()) / TEST_DATA_1_LINES;
		Assert.assertTrue("Wrong record count.", stats.getNumberOfRecords() < numLines + 1 & stats.getNumberOfRecords() > numLines - 1);
		Assert.assertTrue("Wrong avg record size.", stats.getAverageRecordWidth() < avgWidth + 1 & stats.getAverageRecordWidth() > avgWidth - 1);
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 7
Source File: DelimitedInputFormatSamplingTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testSamplingOneFile() {
	try {
		final String tempFile = TestFileUtils.createTempFile(TEST_DATA1);
		final Configuration conf = new Configuration();
		
		final TestDelimitedInputFormat format = new TestDelimitedInputFormat(CONFIG);
		format.setFilePath(tempFile);
		format.configure(conf);
		BaseStatistics stats = format.getStatistics(null);
		
		final int numLines = TEST_DATA_1_LINES;
		final float avgWidth = ((float) TEST_DATA1.length()) / TEST_DATA_1_LINES;
		Assert.assertTrue("Wrong record count.", stats.getNumberOfRecords() < numLines + 1 & stats.getNumberOfRecords() > numLines - 1);
		Assert.assertTrue("Wrong avg record size.", stats.getAverageRecordWidth() < avgWidth + 1 & stats.getAverageRecordWidth() > avgWidth - 1);
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 8
Source File: DelimitedInputFormatSamplingTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testSamplingOverlyLongRecord() {
	try {
		final String tempFile = TestFileUtils.createTempFile(2 * OptimizerOptions.DELIMITED_FORMAT_MAX_SAMPLE_LEN.defaultValue());
		final Configuration conf = new Configuration();
		
		final TestDelimitedInputFormat format = new TestDelimitedInputFormat(CONFIG);
		format.setFilePath(tempFile);
		format.configure(conf);
		
		Assert.assertNull("Expected exception due to overly long record.", format.getStatistics(null));
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 9
Source File: FileInputFormatTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetStatisticsOneFileNoCachedVersion() {
	try {
		final long SIZE = 1024 * 500;
		String tempFile = TestFileUtils.createTempFile(SIZE);
		
		final DummyFileInputFormat format = new DummyFileInputFormat();
		format.setFilePath(tempFile);
		format.configure(new Configuration());
		
		BaseStatistics stats = format.getStatistics(null);
		Assert.assertEquals("The file size from the statistics is wrong.", SIZE, stats.getTotalInputSize());
	} catch (Exception ex) {
		ex.printStackTrace();
		Assert.fail(ex.getMessage());
	}
}
 
Example 10
Source File: DelimitedInputFormatSamplingTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testSamplingOverlyLongRecord() {
	try {
		final String tempFile = TestFileUtils.createTempFile(2 * OptimizerOptions.DELIMITED_FORMAT_MAX_SAMPLE_LEN.defaultValue());
		final Configuration conf = new Configuration();
		
		final TestDelimitedInputFormat format = new TestDelimitedInputFormat(CONFIG);
		format.setFilePath(tempFile);
		format.configure(conf);
		
		Assert.assertNull("Expected exception due to overly long record.", format.getStatistics(null));
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 11
Source File: DelimitedInputFormatSamplingTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testCachedStatistics() {
	try {
		final String tempFile = TestFileUtils.createTempFile(TEST_DATA1);
		final Configuration conf = new Configuration();
		
		final TestDelimitedInputFormat format = new TestDelimitedInputFormat(CONFIG);
		format.setFilePath("test://" + tempFile);
		format.configure(conf);
		
		TestFileSystem.resetStreamOpenCounter();
		BaseStatistics stats = format.getStatistics(null);
		Assert.assertEquals("Wrong number of samples taken.", DEFAULT_NUM_SAMPLES, TestFileSystem.getNumtimeStreamOpened());
		
		final TestDelimitedInputFormat format2 = new TestDelimitedInputFormat(CONFIG);
		format2.setFilePath("test://" + tempFile);
		format2.configure(conf);
		
		TestFileSystem.resetStreamOpenCounter();
		BaseStatistics stats2 = format2.getStatistics(stats);
		Assert.assertTrue("Using cached statistics should cicumvent sampling.", 0 == TestFileSystem.getNumtimeStreamOpened());
		Assert.assertTrue("Using cached statistics should cicumvent sampling.", stats == stats2);
		
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 12
Source File: DelimitedInputFormatSamplingTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testNumSamplesOneFile() {
	try {
		final String tempFile = TestFileUtils.createTempFile(TEST_DATA1);
		final Configuration conf = new Configuration();
		
		final TestDelimitedInputFormat format = new TestDelimitedInputFormat(CONFIG);
		format.setFilePath(tempFile.replace("file", "test"));
		format.configure(conf);
		
		TestFileSystem.resetStreamOpenCounter();
		format.getStatistics(null);
		Assert.assertEquals("Wrong number of samples taken.", DEFAULT_NUM_SAMPLES, TestFileSystem.getNumtimeStreamOpened());
		
		TestDelimitedInputFormat format2 = new TestDelimitedInputFormat(CONFIG);
		format2.setFilePath(tempFile.replace("file", "test"));
		format2.setNumLineSamples(8);
		format2.configure(conf);
		
		TestFileSystem.resetStreamOpenCounter();
		format2.getStatistics(null);
		Assert.assertEquals("Wrong number of samples taken.", 8, TestFileSystem.getNumtimeStreamOpened());
		
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 13
Source File: FileInputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateInputSplitSingleFile() throws IOException {
	String tempFile = TestFileUtils.createTempFile("Hello World");
	FileInputFormat fif = new DummyFileInputFormat();
	fif.setFilePath(tempFile);
	
	fif.configure(new Configuration());
	FileInputSplit[] splits = fif.createInputSplits(2);
	
	Assert.assertEquals(2, splits.length);
	Assert.assertEquals(tempFile, splits[0].getPath().toString());
	Assert.assertEquals(tempFile, splits[1].getPath().toString());
}
 
Example 14
Source File: FileInputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateInputSplitMultiFiles() throws IOException {
	String tempFile1 = TestFileUtils.createTempFile(21);
	String tempFile2 = TestFileUtils.createTempFile(22);
	String tempFile3 = TestFileUtils.createTempFile(23);
	FileInputFormat fif = new MultiDummyFileInputFormat();
	fif.setFilePaths(tempFile1, tempFile2, tempFile3);

	fif.configure(new Configuration());
	FileInputSplit[] splits = fif.createInputSplits(3);

	int numSplitsFile1 = 0;
	int numSplitsFile2 = 0;
	int numSplitsFile3 = 0;

	Assert.assertEquals(3, splits.length);
	for (FileInputSplit fis : splits) {
		Assert.assertEquals(0, fis.getStart());
		if (fis.getPath().toString().equals(tempFile1)) {
			numSplitsFile1++;
			Assert.assertEquals(21, fis.getLength());
		} else if (fis.getPath().toString().equals(tempFile2)) {
			numSplitsFile2++;
			Assert.assertEquals(22, fis.getLength());
		} else if (fis.getPath().toString().equals(tempFile3)) {
			numSplitsFile3++;
			Assert.assertEquals(23, fis.getLength());
		} else {
			Assert.fail("Got split for unknown file.");
		}
	}
	
	Assert.assertEquals(1, numSplitsFile1);
	Assert.assertEquals(1, numSplitsFile2);
	Assert.assertEquals(1, numSplitsFile3);
}
 
Example 15
Source File: FileInputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateInputSplitSingleFile() throws IOException {
	String tempFile = TestFileUtils.createTempFile("Hello World");
	FileInputFormat fif = new DummyFileInputFormat();
	fif.setFilePath(tempFile);
	
	fif.configure(new Configuration());
	FileInputSplit[] splits = fif.createInputSplits(2);
	
	Assert.assertEquals(2, splits.length);
	Assert.assertEquals(tempFile, splits[0].getPath().toString());
	Assert.assertEquals(tempFile, splits[1].getPath().toString());
}
 
Example 16
Source File: DelimitedInputFormatSamplingTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCachedStatistics() {
	try {
		final String tempFile = TestFileUtils.createTempFile(TEST_DATA1);
		final Configuration conf = new Configuration();
		
		final TestDelimitedInputFormat format = new TestDelimitedInputFormat(CONFIG);
		format.setFilePath("test://" + tempFile);
		format.configure(conf);
		
		TestFileSystem.resetStreamOpenCounter();
		BaseStatistics stats = format.getStatistics(null);
		Assert.assertEquals("Wrong number of samples taken.", DEFAULT_NUM_SAMPLES, TestFileSystem.getNumtimeStreamOpened());
		
		final TestDelimitedInputFormat format2 = new TestDelimitedInputFormat(CONFIG);
		format2.setFilePath("test://" + tempFile);
		format2.configure(conf);
		
		TestFileSystem.resetStreamOpenCounter();
		BaseStatistics stats2 = format2.getStatistics(stats);
		Assert.assertTrue("Using cached statistics should cicumvent sampling.", 0 == TestFileSystem.getNumtimeStreamOpened());
		Assert.assertTrue("Using cached statistics should cicumvent sampling.", stats == stats2);
		
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 17
Source File: DelimitedInputFormatSamplingTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCachedStatistics() {
	try {
		final String tempFile = TestFileUtils.createTempFile(TEST_DATA1);
		final Configuration conf = new Configuration();
		
		final TestDelimitedInputFormat format = new TestDelimitedInputFormat(CONFIG);
		format.setFilePath("test://" + tempFile);
		format.configure(conf);
		
		TestFileSystem.resetStreamOpenCounter();
		BaseStatistics stats = format.getStatistics(null);
		Assert.assertEquals("Wrong number of samples taken.", DEFAULT_NUM_SAMPLES, TestFileSystem.getNumtimeStreamOpened());
		
		final TestDelimitedInputFormat format2 = new TestDelimitedInputFormat(CONFIG);
		format2.setFilePath("test://" + tempFile);
		format2.configure(conf);
		
		TestFileSystem.resetStreamOpenCounter();
		BaseStatistics stats2 = format2.getStatistics(stats);
		Assert.assertTrue("Using cached statistics should cicumvent sampling.", 0 == TestFileSystem.getNumtimeStreamOpened());
		Assert.assertTrue("Using cached statistics should cicumvent sampling.", stats == stats2);
		
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 18
Source File: FileInputFormatTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateInputSplitMultiFiles() throws IOException {
	String tempFile1 = TestFileUtils.createTempFile(21);
	String tempFile2 = TestFileUtils.createTempFile(22);
	String tempFile3 = TestFileUtils.createTempFile(23);
	FileInputFormat fif = new MultiDummyFileInputFormat();
	fif.setFilePaths(tempFile1, tempFile2, tempFile3);

	fif.configure(new Configuration());
	FileInputSplit[] splits = fif.createInputSplits(3);

	int numSplitsFile1 = 0;
	int numSplitsFile2 = 0;
	int numSplitsFile3 = 0;

	Assert.assertEquals(3, splits.length);
	for (FileInputSplit fis : splits) {
		Assert.assertEquals(0, fis.getStart());
		if (fis.getPath().toString().equals(tempFile1)) {
			numSplitsFile1++;
			Assert.assertEquals(21, fis.getLength());
		} else if (fis.getPath().toString().equals(tempFile2)) {
			numSplitsFile2++;
			Assert.assertEquals(22, fis.getLength());
		} else if (fis.getPath().toString().equals(tempFile3)) {
			numSplitsFile3++;
			Assert.assertEquals(23, fis.getLength());
		} else {
			Assert.fail("Got split for unknown file.");
		}
	}
	
	Assert.assertEquals(1, numSplitsFile1);
	Assert.assertEquals(1, numSplitsFile2);
	Assert.assertEquals(1, numSplitsFile3);
}
 
Example 19
Source File: FileInputFormatTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testGetStatisticsMultipleOneFileWithCachedVersion() throws IOException {
	FileSystem fs = FileSystem.getLocalFileSystem();

	final long size1 = 50873;
	final long fakeSize = 10065;
	String tempFile1 = TestFileUtils.createTempFile(size1);
	final long lastModTime1 = fs.getFileStatus(new Path(tempFile1)).getModificationTime();

	final long size2 = 52573;
	String tempFile2 = TestFileUtils.createTempFile(size2);
	final long lastModTime2 = fs.getFileStatus(new Path(tempFile2)).getModificationTime();

	final long sizeTotal = size1 + size2;
	
	MultiDummyFileInputFormat format = new MultiDummyFileInputFormat();
	format.setFilePaths(tempFile1, tempFile2);
	format.configure(new Configuration());
	
	FileBaseStatistics stats = format.getStatistics(null);
	Assert.assertEquals("The file size from the statistics is wrong.", sizeTotal, stats.getTotalInputSize());
	
	format = new MultiDummyFileInputFormat();
	format.setFilePath(tempFile1);
	format.configure(new Configuration());
	
	FileBaseStatistics newStats = format.getStatistics(stats);
	Assert.assertTrue("Statistics object was changed", newStats == stats);

	// insert fake stats with the correct modification time. the call should return the fake stats
	format = new MultiDummyFileInputFormat();
	format.setFilePath(tempFile1);
	format.configure(new Configuration());
	
	FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics latest = format.getStatistics(fakeStats);
	Assert.assertEquals("The file size from the statistics is wrong.", fakeSize, latest.getTotalInputSize());
	
	// insert fake stats with the expired modification time. the call should return new accurate stats
	format = new MultiDummyFileInputFormat();
	format.setFilePaths(tempFile1, tempFile2);
	format.configure(new Configuration());
	
	FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(Math.min(lastModTime1, lastModTime2) - 1, fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
	Assert.assertEquals("The file size from the statistics is wrong.", sizeTotal, reGathered.getTotalInputSize());
}
 
Example 20
Source File: FileInputFormatTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testGetStatisticsMultipleOneFileWithCachedVersion() throws IOException {
	FileSystem fs = FileSystem.getLocalFileSystem();

	final long size1 = 50873;
	final long fakeSize = 10065;
	String tempFile1 = TestFileUtils.createTempFile(size1);
	final long lastModTime1 = fs.getFileStatus(new Path(tempFile1)).getModificationTime();

	final long size2 = 52573;
	String tempFile2 = TestFileUtils.createTempFile(size2);
	final long lastModTime2 = fs.getFileStatus(new Path(tempFile2)).getModificationTime();

	final long sizeTotal = size1 + size2;
	
	MultiDummyFileInputFormat format = new MultiDummyFileInputFormat();
	format.setFilePaths(tempFile1, tempFile2);
	format.configure(new Configuration());
	
	FileBaseStatistics stats = format.getStatistics(null);
	Assert.assertEquals("The file size from the statistics is wrong.", sizeTotal, stats.getTotalInputSize());
	
	format = new MultiDummyFileInputFormat();
	format.setFilePath(tempFile1);
	format.configure(new Configuration());
	
	FileBaseStatistics newStats = format.getStatistics(stats);
	Assert.assertTrue("Statistics object was changed", newStats == stats);

	// insert fake stats with the correct modification time. the call should return the fake stats
	format = new MultiDummyFileInputFormat();
	format.setFilePath(tempFile1);
	format.configure(new Configuration());
	
	FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics latest = format.getStatistics(fakeStats);
	Assert.assertEquals("The file size from the statistics is wrong.", fakeSize, latest.getTotalInputSize());
	
	// insert fake stats with the expired modification time. the call should return new accurate stats
	format = new MultiDummyFileInputFormat();
	format.setFilePaths(tempFile1, tempFile2);
	format.configure(new Configuration());
	
	FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(Math.min(lastModTime1, lastModTime2) - 1, fakeSize, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
	Assert.assertEquals("The file size from the statistics is wrong.", sizeTotal, reGathered.getTotalInputSize());
}