Java Code Examples for org.apache.flink.core.fs.FileInputSplit

The following examples show how to use org.apache.flink.core.fs.FileInputSplit. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: flink   Source File: ParquetRowInputFormatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testReadRowFromNestedRecord() throws IOException {
	Tuple3<Class<? extends SpecificRecord>, SpecificRecord, Row> nested = TestUtil.getNestedRecordTestData();
	Path path = TestUtil.createTempParquetFile(tempRoot.newFolder(), TestUtil.NESTED_SCHEMA, Collections.singletonList(nested.f1));
	MessageType nestedType = SCHEMA_CONVERTER.convert(TestUtil.NESTED_SCHEMA);

	ParquetRowInputFormat inputFormat = new ParquetRowInputFormat(path, nestedType);
	inputFormat.setRuntimeContext(TestUtil.getMockRuntimeContext());

	FileInputSplit[] splits = inputFormat.createInputSplits(1);
	assertEquals(1, splits.length);
	inputFormat.open(splits[0]);

	Row row = inputFormat.nextRecord(null);
	assertNotNull(row);
	assertEquals(7, row.getArity());

	assertEquals(nested.f2.getField(0), row.getField(0));
	assertEquals(nested.f2.getField(1), row.getField(1));
	assertArrayEquals((Long[]) nested.f2.getField(3), (Long[]) row.getField(3));
	assertArrayEquals((String[]) nested.f2.getField(4), (String[]) row.getField(4));
	assertEquals(nested.f2.getField(5), row.getField(5));
	assertArrayEquals((Row[]) nested.f2.getField(6), (Row[]) row.getField(6));
}
 
Example 2
Source Project: flink   Source File: ReplicatingDataSourceTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Tests compiler fail for join program with replicated data source behind map and changing parallelism.
 */
@Test(expected = CompilerException.class)
public void checkJoinWithReplicatedSourceInputBehindMapChangingparallelism() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.map(new IdMap()).setParallelism(DEFAULT_PARALLELISM+1)
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

}
 
Example 3
Source Project: Flink-CEPplus   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testReadDecimalTypeFile() throws IOException {
	rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration());

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	assertEquals(1, splits.length);
	rowOrcInputFormat.openInputFormat();
	rowOrcInputFormat.open(splits[0]);

	assertFalse(rowOrcInputFormat.reachedEnd());
	Row row = rowOrcInputFormat.nextRecord(null);

	// validate first row
	assertNotNull(row);
	assertEquals(1, row.getArity());
	assertEquals(BigDecimal.valueOf(-1000.5d), row.getField(0));

	// check correct number of rows
	long cnt = 1;
	while (!rowOrcInputFormat.reachedEnd()) {
		assertNotNull(rowOrcInputFormat.nextRecord(null));
		cnt++;
	}
	assertEquals(6000, cnt);
}
 
Example 4
Source Project: flink   Source File: EnumerateNestedFilesTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Test without nested directory and recursive.file.enumeration = true
 */
@Test
public void testNoNestedDirectoryTrue() {
	try {
		String filePath = TestFileUtils.createTempFile("foo");

		this.format.setFilePath(new Path(filePath));
		this.config.setBoolean("recursive.file.enumeration", true);
		format.configure(this.config);

		FileInputSplit[] splits = format.createInputSplits(1);
		Assert.assertEquals(1, splits.length);
	} catch (Exception ex) {
		ex.printStackTrace();
		Assert.fail(ex.getMessage());
	}
}
 
Example 5
Source Project: flink   Source File: CsvInputFormatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testPojoTypeWithMappingInfoAndPartialField() throws Exception {
	File tempFile = File.createTempFile("CsvReaderPojoType", "tmp");
	tempFile.deleteOnExit();
	tempFile.setWritable(true);

	OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile));
	wrt.write("123,3.123,AAA,BBB\n");
	wrt.write("456,1.123,BBB,AAA\n");
	wrt.close();

	@SuppressWarnings("unchecked")
	PojoTypeInfo<PojoItem> typeInfo = (PojoTypeInfo<PojoItem>) TypeExtractor.createTypeInfo(PojoItem.class);
	CsvInputFormat<PojoItem> inputFormat = new PojoCsvInputFormat<PojoItem>(new Path(tempFile.toURI().toString()), typeInfo, new String[]{"field1", "field4"}, new boolean[]{true, false, false, true});

	inputFormat.configure(new Configuration());
	FileInputSplit[] splits = inputFormat.createInputSplits(1);

	inputFormat.open(splits[0]);

	PojoItem item = new PojoItem();
	inputFormat.nextRecord(item);

	assertEquals(123, item.field1);
	assertEquals("BBB", item.field4);
}
 
Example 6
Source Project: Flink-CEPplus   Source File: AvroInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
private DataFileReader<E> initReader(FileInputSplit split) throws IOException {
	DatumReader<E> datumReader;

	if (org.apache.avro.generic.GenericRecord.class == avroValueType) {
		datumReader = new GenericDatumReader<E>();
	} else {
		datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)
			? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType);
	}
	if (LOG.isInfoEnabled()) {
		LOG.info("Opening split {}", split);
	}

	SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen());
	DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader);

	if (LOG.isDebugEnabled()) {
		LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema());
	}

	end = split.getStart() + split.getLength();
	recordsReadSinceLastSync = 0;
	return dataFileReader;
}
 
Example 7
Source Project: Flink-CEPplus   Source File: AvroInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void reopen(FileInputSplit split, Tuple2<Long, Long> state) throws IOException {
	Preconditions.checkNotNull(split, "reopen() cannot be called on a null split.");
	Preconditions.checkNotNull(state, "reopen() cannot be called with a null initial state.");

	try {
		this.open(split);
	} finally {
		if (state.f0 != -1) {
			lastSync = state.f0;
			recordsReadSinceLastSync = state.f1;
		}
	}

	if (lastSync != -1) {
		// open and read until the record we were before
		// the checkpoint and discard the values
		dataFileReader.seek(lastSync);
		for (int i = 0; i < recordsReadSinceLastSync; i++) {
			dataFileReader.next(null);
		}
	}
}
 
Example 8
Source Project: flink   Source File: ParquetMapInputFormatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testProjectedReadMapFromNestedRecord() throws IOException {
	Tuple3<Class<? extends SpecificRecord>, SpecificRecord, Row> nested = TestUtil.getNestedRecordTestData();
	Path path = TestUtil.createTempParquetFile(tempRoot.getRoot(), TestUtil.NESTED_SCHEMA, Collections.singletonList(nested.f1));
	MessageType nestedType = SCHEMA_CONVERTER.convert(TestUtil.NESTED_SCHEMA);
	ParquetMapInputFormat inputFormat = new ParquetMapInputFormat(path, nestedType);

	inputFormat.selectFields(Collections.singletonList("nestedMap").toArray(new String[0]));
	inputFormat.setRuntimeContext(TestUtil.getMockRuntimeContext());

	FileInputSplit[] splits = inputFormat.createInputSplits(1);
	assertEquals(1, splits.length);
	inputFormat.open(splits[0]);

	Map map = inputFormat.nextRecord(null);
	assertNotNull(map);
	assertEquals(1, map.size());

	Map<String, String> mapItem = (Map<String, String>) ((Map) map.get("nestedMap")).get("mapItem");
	assertEquals(2, mapItem.size());
	assertEquals("map", mapItem.get("type"));
	assertEquals("hashMap", mapItem.get("value"));
}
 
Example 9
Source Project: flink   Source File: GenericCsvInputFormatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testReadTooShortInputLenient() throws IOException {
	try {
		final String fileContent = "666|777|888|999|555\n111|222|333|444\n666|777|888|999|555";
		final FileInputSplit split = createTempFile(fileContent);	
	
		final Configuration parameters = new Configuration();
		format.setFieldDelimiter("|");
		format.setFieldTypesGeneric(IntValue.class, IntValue.class, IntValue.class, IntValue.class, IntValue.class);
		format.setLenient(true);
		
		format.configure(parameters);
		format.open(split);
		
		Value[] values = createIntValues(5);
		
		assertNotNull(format.nextRecord(values));	// line okay
		assertNull(format.nextRecord(values));	// line too short
		assertNotNull(format.nextRecord(values));	// line okay
	}
	catch (Exception ex) {
		fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
	}
}
 
Example 10
Source Project: Flink-CEPplus   Source File: ReplicatingDataSourceTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Tests compiler fail for join program with replicated data source behind rebalance.
 */
@Test(expected = CompilerException.class)
public void checkJoinWithReplicatedSourceInputBehindRebalance() {
	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.rebalance()
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
}
 
Example 11
Source Project: flink   Source File: CsvInputFormatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testPojoTypeWithMappingInformation() throws Exception {
	File tempFile = File.createTempFile("CsvReaderPojoType", "tmp");
	tempFile.deleteOnExit();
	tempFile.setWritable(true);

	OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile));
	wrt.write("123,3.123,AAA,BBB\n");
	wrt.write("456,1.123,BBB,AAA\n");
	wrt.close();

	@SuppressWarnings("unchecked")
	PojoTypeInfo<PojoItem> typeInfo = (PojoTypeInfo<PojoItem>) TypeExtractor.createTypeInfo(PojoItem.class);
	CsvInputFormat<PojoItem> inputFormat = new PojoCsvInputFormat<PojoItem>(new Path(tempFile.toURI().toString()), typeInfo, new String[]{"field1", "field3", "field2", "field4"});

	inputFormat.configure(new Configuration());
	FileInputSplit[] splits = inputFormat.createInputSplits(1);

	inputFormat.open(splits[0]);

	validatePojoItem(inputFormat);
}
 
Example 12
Source Project: flink   Source File: GraphCreationWithCsvITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCreateWithOnlyEdgesCsvFile() throws Exception {
	/*
	 * Test with one Csv file one with Edges data. Also tests the configuration method ignoreFistLineEdges()
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	final String fileContent2 = "header\n1,2,ot\n" +
			"3,2,tt\n" +
			"3,1,to\n";

	final FileInputSplit split2 = createTempFile(fileContent2);
	Graph<Long, NullValue, String> graph = Graph.fromCsvReader(split2.getPath().toString(), env)
			.ignoreFirstLineEdges()
			.ignoreCommentsVertices("hi")
			.edgeTypes(Long.class, String.class);

	List<Triplet<Long, NullValue, String>> result = graph.getTriplets().collect();
	expectedResult = "1,2,(null),(null),ot\n" +
			"3,2,(null),(null),tt\n" +
			"3,1,(null),(null),to\n";

	compareResultAsTuples(result, expectedResult);
}
 
Example 13
Source Project: flink   Source File: GenericCsvInputFormatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testReadTooShortInputLenient() throws IOException {
	try {
		final String fileContent = "666|777|888|999|555\n111|222|333|444\n666|777|888|999|555";
		final FileInputSplit split = createTempFile(fileContent);	
	
		final Configuration parameters = new Configuration();
		format.setFieldDelimiter("|");
		format.setFieldTypesGeneric(IntValue.class, IntValue.class, IntValue.class, IntValue.class, IntValue.class);
		format.setLenient(true);
		
		format.configure(parameters);
		format.open(split);
		
		Value[] values = createIntValues(5);
		
		assertNotNull(format.nextRecord(values));	// line okay
		assertNull(format.nextRecord(values));	// line too short
		assertNotNull(format.nextRecord(values));	// line okay
	}
	catch (Exception ex) {
		fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
	}
}
 
Example 14
Source Project: flink   Source File: DelimitedInputFormatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testReadWithoutTrailingDelimiter() throws IOException {
	// 2. test case
	final String myString = "my key|my val$$$my key2\n$$ctd.$$|my value2";
	final FileInputSplit split = createTempFile(myString);

	final Configuration parameters = new Configuration();
	// default delimiter = '\n'

	format.configure(parameters);
	format.open(split);

	String first = format.nextRecord(null);
	String second = format.nextRecord(null);

	assertNotNull(first);
	assertNotNull(second);

	assertEquals("my key|my val$$$my key2", first);
	assertEquals("$$ctd.$$|my value2", second);

	assertNull(format.nextRecord(null));
	assertTrue(format.reachedEnd());
}
 
Example 15
Source Project: Flink-CEPplus   Source File: DelimitedInputFormatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testDelimiterOnBufferBoundary() throws IOException {

	String[] records = new String[]{"1234567890<DEL?NO!>1234567890", "1234567890<DEL?NO!>1234567890", "<DEL?NO!>"};
	String delimiter = "<DELIM>";
	String fileContent = StringUtils.join(records, delimiter);


	final FileInputSplit split = createTempFile(fileContent);
	final Configuration parameters = new Configuration();

	format.setBufferSize(12);
	format.setDelimiter(delimiter);
	format.configure(parameters);
	format.open(split);

	for (String record : records) {
		String value = format.nextRecord(null);
		assertEquals(record, value);
	}

	assertNull(format.nextRecord(null));
	assertTrue(format.reachedEnd());

	format.close();
}
 
Example 16
Source Project: flink   Source File: DelimitedInputFormatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testReadWithTrailingDelimiter() throws IOException {
	// 2. test case
	final String myString = "my key|my val$$$my key2\n$$ctd.$$|my value2\n";
	final FileInputSplit split = createTempFile(myString);

	final Configuration parameters = new Configuration();
	// default delimiter = '\n'

	format.configure(parameters);
	format.open(split);

	String first = format.nextRecord(null);
	String second = format.nextRecord(null);

	assertNotNull(first);
	assertNotNull(second);

	assertEquals("my key|my val$$$my key2", first);
	assertEquals("$$ctd.$$|my value2", second);

	assertNull(format.nextRecord(null));
	assertTrue(format.reachedEnd());
}
 
Example 17
Source Project: flink   Source File: EnumerateNestedFilesTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Test with one nested directory and recursive.file.enumeration = true
 */
@Test
public void testOneNestedDirectoryTrue() {
	try {
		String firstLevelDir = TestFileUtils.randomFileName();
		String secondLevelDir = TestFileUtils.randomFileName();

		File insideNestedDir = tempFolder.newFolder(firstLevelDir, secondLevelDir);
		File nestedDir = insideNestedDir.getParentFile();

		// create a file in the first-level and two files in the nested dir
		TestFileUtils.createTempFileInDirectory(nestedDir.getAbsolutePath(), "paella");
		TestFileUtils.createTempFileInDirectory(insideNestedDir.getAbsolutePath(), "kalamari");
		TestFileUtils.createTempFileInDirectory(insideNestedDir.getAbsolutePath(), "fideua");

		this.format.setFilePath(new Path(nestedDir.toURI().toString()));
		this.config.setBoolean("recursive.file.enumeration", true);
		format.configure(this.config);

		FileInputSplit[] splits = format.createInputSplits(1);
		Assert.assertEquals(3, splits.length);
	} catch (Exception ex) {
		ex.printStackTrace();
		Assert.fail(ex.getMessage());
	}
}
 
Example 18
@Test
public void parseBitcoinRawBlock() throws HadoopCryptoLedgerConfigurationException, IOException {
  ClassLoader classLoader = getClass().getClassLoader();
    String fileName="genesis.blk";
    String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();	
    Path file = new Path(fileNameBlock); 
    FileInputSplit blockInputSplit = new FileInputSplit(0,file,0, -1, null);
    BitcoinRawBlockFlinkInputFormat inputFormat = new BitcoinRawBlockFlinkInputFormat(1024*1024,"F9BEB4D9",false);
    inputFormat.open(blockInputSplit);
    assertFalse(inputFormat.reachedEnd(),"End not reached");
    BytesWritable reuse = new BytesWritable();
    BytesWritable nextBlock = inputFormat.nextRecord(reuse);
    assertNotNull(nextBlock,"First Block returned");
	assertEquals( 293, nextBlock.getLength(),"First Block must have size of 293");
    nextBlock=inputFormat.nextRecord(reuse);
    assertNull(nextBlock,"No further block");
    assertTrue(inputFormat.reachedEnd(),"End reached");
}
 
Example 19
Source Project: Flink-CEPplus   Source File: PrimitiveInputFormatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testIntegerInput() throws IOException {
	try {
		final String fileContent = "111|222|";
		final FileInputSplit split = createInputSplit(fileContent);

		final PrimitiveInputFormat<Integer> format = new PrimitiveInputFormat<Integer>(PATH, "|", Integer.class);

		format.configure(new Configuration());
		format.open(split);

		Integer result = null;
		result = format.nextRecord(result);
		assertEquals(Integer.valueOf(111), result);

		result = format.nextRecord(result);
		assertEquals(Integer.valueOf(222), result);

		result = format.nextRecord(result);
		assertNull(result);
		assertTrue(format.reachedEnd());
	}
	catch (Exception ex) {
		fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage());
	}
}
 
Example 20
Source Project: flink   Source File: AvroInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void reopen(FileInputSplit split, Tuple2<Long, Long> state) throws IOException {
	Preconditions.checkNotNull(split, "reopen() cannot be called on a null split.");
	Preconditions.checkNotNull(state, "reopen() cannot be called with a null initial state.");

	try {
		this.open(split);
	} finally {
		if (state.f0 != -1) {
			lastSync = state.f0;
			recordsReadSinceLastSync = state.f1;
		}
	}

	if (lastSync != -1) {
		// open and read until the record we were before
		// the checkpoint and discard the values
		dataFileReader.seek(lastSync);
		for (int i = 0; i < recordsReadSinceLastSync; i++) {
			dataFileReader.next(null);
		}
	}
}
 
Example 21
Source Project: Flink-CEPplus   Source File: DelimitedInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Opens the given input split. This method opens the input stream to the specified file, allocates read buffers
 * and positions the stream at the correct position, making sure that any partial record at the beginning is skipped.
 *
 * @param split The input split to open.
 *
 * @see org.apache.flink.api.common.io.FileInputFormat#open(org.apache.flink.core.fs.FileInputSplit)
 */
@Override
public void open(FileInputSplit split) throws IOException {
	super.open(split);
	initBuffers();

	this.offset = splitStart;
	if (this.splitStart != 0) {
		this.stream.seek(offset);
		readLine();
		// if the first partial record already pushes the stream over
		// the limit of our split, then no record starts within this split
		if (this.overLimit) {
			this.end = true;
		}
	} else {
		fillBuffer(0);
	}
}
 
Example 22
Source Project: flink   Source File: ReplicatingDataSourceTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Tests compiler fail for join program with replicated data source behind reduce.
 */
@Test(expected = CompilerException.class)
public void checkJoinWithReplicatedSourceInputBehindReduce() {
	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.reduce(new LastReduce())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
}
 
Example 23
Source Project: flink   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testReadDecimalTypeFile() throws IOException {
	rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration());

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	assertEquals(1, splits.length);
	rowOrcInputFormat.openInputFormat();
	rowOrcInputFormat.open(splits[0]);

	assertFalse(rowOrcInputFormat.reachedEnd());
	Row row = rowOrcInputFormat.nextRecord(null);

	// validate first row
	assertNotNull(row);
	assertEquals(1, row.getArity());
	assertEquals(BigDecimal.valueOf(-1000.5d), row.getField(0));

	// check correct number of rows
	long cnt = 1;
	while (!rowOrcInputFormat.reachedEnd()) {
		assertNotNull(rowOrcInputFormat.nextRecord(null));
		cnt++;
	}
	assertEquals(6000, cnt);
}
 
Example 24
Source Project: flink   Source File: CsvInputFormatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testPojoType() throws Exception {
	File tempFile = File.createTempFile("CsvReaderPojoType", "tmp");
	tempFile.deleteOnExit();
	tempFile.setWritable(true);

	OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile));
	wrt.write("123,AAA,3.123,BBB\n");
	wrt.write("456,BBB,1.123,AAA\n");
	wrt.close();

	@SuppressWarnings("unchecked")
	PojoTypeInfo<PojoItem> typeInfo = (PojoTypeInfo<PojoItem>) TypeExtractor.createTypeInfo(PojoItem.class);
	CsvInputFormat<PojoItem> inputFormat = new PojoCsvInputFormat<PojoItem>(new Path(tempFile.toURI().toString()), typeInfo);

	inputFormat.configure(new Configuration());
	FileInputSplit[] splits = inputFormat.createInputSplits(1);

	inputFormat.open(splits[0]);

	validatePojoItem(inputFormat);
}
 
Example 25
Source Project: Flink-CEPplus   Source File: DelimitedInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testReadCustomDelimiterWithCharset() throws IOException {
	// Unicode row fragments
	String[] records = new String[]{"\u020e\u021f\u05c0\u020b\u020f", "Apache", "\nFlink", "\u0000", "\u05c0"};

	// Unicode delimiter
	String delimiter = "\u05c0\u05c0";

	String fileContent = StringUtils.join(records, delimiter);

	for (final String charset : new String[]{ "UTF-8", "UTF-16BE", "UTF-16LE" }) {
		// use charset when instantiating the record String
		DelimitedInputFormat<String> format = new DelimitedInputFormat<String>() {
			@Override
			public String readRecord(String reuse, byte[] bytes, int offset, int numBytes) throws IOException {
				return new String(bytes, offset, numBytes, charset);
			}
		};
		format.setFilePath("file:///some/file/that/will/not/be/read");

		final FileInputSplit split = createTempFile(fileContent, charset);

		format.setDelimiter(delimiter);
		// use the same encoding to parse the file as used to read the file;
		// the delimiter is reinterpreted when the charset is set
		format.setCharset(charset);
		format.configure(new Configuration());
		format.open(split);

		for (String record : records) {
			String value = format.nextRecord(null);
			assertEquals(record, value);
		}

		assertNull(format.nextRecord(null));
		assertTrue(format.reachedEnd());
	}
}
 
Example 26
Source Project: Flink-CEPplus   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test(expected = FileNotFoundException.class)
public void testInvalidPath() throws IOException{
	rowOrcInputFormat =
		new OrcRowInputFormat("/does/not/exist", TEST_SCHEMA_FLAT, new Configuration());
	rowOrcInputFormat.openInputFormat();
	FileInputSplit[] inputSplits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.open(inputSplits[0]);
}
 
Example 27
Source Project: Flink-CEPplus   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testProjectionMaskNested() throws IOException{
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_NESTED), TEST_SCHEMA_NESTED, new Configuration());

	OrcRowInputFormat spy = spy(rowOrcInputFormat);

	// mock options to check configuration of ORC reader
	Reader.Options options = new Reader.Options();
	doReturn(options).when(spy).getOptions(any());

	spy.selectFields(9, 11, 2);
	spy.openInputFormat();
	FileInputSplit[] splits = spy.createInputSplits(1);
	spy.open(splits[0]);

	// top-level struct is false
	boolean[] expected = new boolean[]{
		false, // top level
		false, false, // flat fields 0, 1 are out
		true, // flat field 2 is in
		false, false, false, false, false, false, // flat fields 3, 4, 5, 6, 7, 8 are out
		true, true, true, true, true, // nested field 9 is in
		false, false, false, false, // nested field 10 is out
		true, true, true, true, true}; // nested field 11 is in
	assertArrayEquals(expected, options.getInclude());
}
 
Example 28
Source Project: flink   Source File: RowCsvInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
private static FileInputSplit createTempFile(String content) throws IOException {
	File tempFile = File.createTempFile("test_contents", "tmp");
	tempFile.deleteOnExit();
	OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile), StandardCharsets.UTF_8);
	wrt.write(content);
	wrt.close();
	return new FileInputSplit(0, new Path(tempFile.toURI().toString()), 0, tempFile.length(), new String[]{"localhost"});
}
 
Example 29
Source Project: flink   Source File: ReplicatingDataSourceTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Tests join program with replicated data source behind map partition.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindMapPartition() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.mapPartition(new IdPMap())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
 
Example 30
Source Project: Flink-CEPplus   Source File: GraphCreationWithCsvITCase.java    License: Apache License 2.0 5 votes vote down vote up
private FileInputSplit createTempFile(String content) throws IOException {
	File tempFile = File.createTempFile("test_contents", "tmp");
	tempFile.deleteOnExit();

	OutputStreamWriter wrt = new OutputStreamWriter(
			new FileOutputStream(tempFile), Charset.forName("UTF-8")
	);
	wrt.write(content);
	wrt.close();

	return new FileInputSplit(0, new Path(tempFile.toURI().toString()), 0,
						tempFile.length(), new String[] {"localhost"});
}