org.apache.flink.core.fs.FileInputSplit Java Examples

The following examples show how to use org.apache.flink.core.fs.FileInputSplit. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: DelimitedInputFormatTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testDelimiterOnBufferBoundary() throws IOException {

	String[] records = new String[]{"1234567890<DEL?NO!>1234567890", "1234567890<DEL?NO!>1234567890", "<DEL?NO!>"};
	String delimiter = "<DELIM>";
	String fileContent = StringUtils.join(records, delimiter);


	final FileInputSplit split = createTempFile(fileContent);
	final Configuration parameters = new Configuration();

	format.setBufferSize(12);
	format.setDelimiter(delimiter);
	format.configure(parameters);
	format.open(split);

	for (String record : records) {
		String value = format.nextRecord(null);
		assertEquals(record, value);
	}

	assertNull(format.nextRecord(null));
	assertTrue(format.reachedEnd());

	format.close();
}

Example #2

Source File: ParquetRowInputFormatTest.java From flink with Apache License 2.0

6 votes

@Test
public void testReadRowFromNestedRecord() throws IOException {
	Tuple3<Class<? extends SpecificRecord>, SpecificRecord, Row> nested = TestUtil.getNestedRecordTestData();
	Path path = TestUtil.createTempParquetFile(tempRoot.newFolder(), TestUtil.NESTED_SCHEMA, Collections.singletonList(nested.f1));
	MessageType nestedType = SCHEMA_CONVERTER.convert(TestUtil.NESTED_SCHEMA);

	ParquetRowInputFormat inputFormat = new ParquetRowInputFormat(path, nestedType);
	inputFormat.setRuntimeContext(TestUtil.getMockRuntimeContext());

	FileInputSplit[] splits = inputFormat.createInputSplits(1);
	assertEquals(1, splits.length);
	inputFormat.open(splits[0]);

	Row row = inputFormat.nextRecord(null);
	assertNotNull(row);
	assertEquals(7, row.getArity());

	assertEquals(nested.f2.getField(0), row.getField(0));
	assertEquals(nested.f2.getField(1), row.getField(1));
	assertArrayEquals((Long[]) nested.f2.getField(3), (Long[]) row.getField(3));
	assertArrayEquals((String[]) nested.f2.getField(4), (String[]) row.getField(4));
	assertEquals(nested.f2.getField(5), row.getField(5));
	assertArrayEquals((Row[]) nested.f2.getField(6), (Row[]) row.getField(6));
}

Example #3

Source File: OrcRowInputFormatTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testReadDecimalTypeFile() throws IOException {
	rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration());

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	assertEquals(1, splits.length);
	rowOrcInputFormat.openInputFormat();
	rowOrcInputFormat.open(splits[0]);

	assertFalse(rowOrcInputFormat.reachedEnd());
	Row row = rowOrcInputFormat.nextRecord(null);

	// validate first row
	assertNotNull(row);
	assertEquals(1, row.getArity());
	assertEquals(BigDecimal.valueOf(-1000.5d), row.getField(0));

	// check correct number of rows
	long cnt = 1;
	while (!rowOrcInputFormat.reachedEnd()) {
		assertNotNull(rowOrcInputFormat.nextRecord(null));
		cnt++;
	}
	assertEquals(6000, cnt);
}

Example #4

Source File: EnumerateNestedFilesTest.java From flink with Apache License 2.0

6 votes

/**
 * Test without nested directory and recursive.file.enumeration = true
 */
@Test
public void testNoNestedDirectoryTrue() {
	try {
		String filePath = TestFileUtils.createTempFile("foo");

		this.format.setFilePath(new Path(filePath));
		this.config.setBoolean("recursive.file.enumeration", true);
		format.configure(this.config);

		FileInputSplit[] splits = format.createInputSplits(1);
		Assert.assertEquals(1, splits.length);
	} catch (Exception ex) {
		ex.printStackTrace();
		Assert.fail(ex.getMessage());
	}
}

Example #5

Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0

6 votes

/**
 * Tests compiler fail for join program with replicated data source behind map and changing parallelism.
 */
@Test(expected = CompilerException.class)
public void checkJoinWithReplicatedSourceInputBehindMapChangingparallelism() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.map(new IdMap()).setParallelism(DEFAULT_PARALLELISM+1)
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

}

Example #6

Source File: CsvInputFormatTest.java From flink with Apache License 2.0

6 votes

@Test
public void testPojoTypeWithMappingInfoAndPartialField() throws Exception {
	File tempFile = File.createTempFile("CsvReaderPojoType", "tmp");
	tempFile.deleteOnExit();
	tempFile.setWritable(true);

	OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile));
	wrt.write("123,3.123,AAA,BBB\n");
	wrt.write("456,1.123,BBB,AAA\n");
	wrt.close();

	@SuppressWarnings("unchecked")
	PojoTypeInfo<PojoItem> typeInfo = (PojoTypeInfo<PojoItem>) TypeExtractor.createTypeInfo(PojoItem.class);
	CsvInputFormat<PojoItem> inputFormat = new PojoCsvInputFormat<PojoItem>(new Path(tempFile.toURI().toString()), typeInfo, new String[]{"field1", "field4"}, new boolean[]{true, false, false, true});

	inputFormat.configure(new Configuration());
	FileInputSplit[] splits = inputFormat.createInputSplits(1);

	inputFormat.open(splits[0]);

	PojoItem item = new PojoItem();
	inputFormat.nextRecord(item);

	assertEquals(123, item.field1);
	assertEquals("BBB", item.field4);
}

Example #7

Source File: OrcRowInputFormatTest.java From flink with Apache License 2.0

6 votes

@Test
public void testReadDecimalTypeFile() throws IOException {
	rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration());

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	assertEquals(1, splits.length);
	rowOrcInputFormat.openInputFormat();
	rowOrcInputFormat.open(splits[0]);

	assertFalse(rowOrcInputFormat.reachedEnd());
	Row row = rowOrcInputFormat.nextRecord(null);

	// validate first row
	assertNotNull(row);
	assertEquals(1, row.getArity());
	assertEquals(BigDecimal.valueOf(-1000.5d), row.getField(0));

	// check correct number of rows
	long cnt = 1;
	while (!rowOrcInputFormat.reachedEnd()) {
		assertNotNull(rowOrcInputFormat.nextRecord(null));
		cnt++;
	}
	assertEquals(6000, cnt);
}

Example #8

Source File: CsvInputFormatTest.java From flink with Apache License 2.0

6 votes

@Test
public void testPojoType() throws Exception {
	File tempFile = File.createTempFile("CsvReaderPojoType", "tmp");
	tempFile.deleteOnExit();
	tempFile.setWritable(true);

	OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile));
	wrt.write("123,AAA,3.123,BBB\n");
	wrt.write("456,BBB,1.123,AAA\n");
	wrt.close();

	@SuppressWarnings("unchecked")
	PojoTypeInfo<PojoItem> typeInfo = (PojoTypeInfo<PojoItem>) TypeExtractor.createTypeInfo(PojoItem.class);
	CsvInputFormat<PojoItem> inputFormat = new PojoCsvInputFormat<PojoItem>(new Path(tempFile.toURI().toString()), typeInfo);

	inputFormat.configure(new Configuration());
	FileInputSplit[] splits = inputFormat.createInputSplits(1);

	inputFormat.open(splits[0]);

	validatePojoItem(inputFormat);
}

Example #9

Source File: AvroInputFormat.java From Flink-CEPplus with Apache License 2.0

6 votes

private DataFileReader<E> initReader(FileInputSplit split) throws IOException {
	DatumReader<E> datumReader;

	if (org.apache.avro.generic.GenericRecord.class == avroValueType) {
		datumReader = new GenericDatumReader<E>();
	} else {
		datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)
			? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType);
	}
	if (LOG.isInfoEnabled()) {
		LOG.info("Opening split {}", split);
	}

	SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen());
	DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader);

	if (LOG.isDebugEnabled()) {
		LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema());
	}

	end = split.getStart() + split.getLength();
	recordsReadSinceLastSync = 0;
	return dataFileReader;
}

Example #10

Source File: AvroInputFormat.java From Flink-CEPplus with Apache License 2.0

6 votes

@Override
public void reopen(FileInputSplit split, Tuple2<Long, Long> state) throws IOException {
	Preconditions.checkNotNull(split, "reopen() cannot be called on a null split.");
	Preconditions.checkNotNull(state, "reopen() cannot be called with a null initial state.");

	try {
		this.open(split);
	} finally {
		if (state.f0 != -1) {
			lastSync = state.f0;
			recordsReadSinceLastSync = state.f1;
		}
	}

	if (lastSync != -1) {
		// open and read until the record we were before
		// the checkpoint and discard the values
		dataFileReader.seek(lastSync);
		for (int i = 0; i < recordsReadSinceLastSync; i++) {
			dataFileReader.next(null);
		}
	}
}

Example #11

Source File: ParquetMapInputFormatTest.java From flink with Apache License 2.0

6 votes

@Test
@SuppressWarnings("unchecked")
public void testProjectedReadMapFromNestedRecord() throws IOException {
	Tuple3<Class<? extends SpecificRecord>, SpecificRecord, Row> nested = TestUtil.getNestedRecordTestData();
	Path path = TestUtil.createTempParquetFile(tempRoot.getRoot(), TestUtil.NESTED_SCHEMA, Collections.singletonList(nested.f1));
	MessageType nestedType = SCHEMA_CONVERTER.convert(TestUtil.NESTED_SCHEMA);
	ParquetMapInputFormat inputFormat = new ParquetMapInputFormat(path, nestedType);

	inputFormat.selectFields(Collections.singletonList("nestedMap").toArray(new String[0]));
	inputFormat.setRuntimeContext(TestUtil.getMockRuntimeContext());

	FileInputSplit[] splits = inputFormat.createInputSplits(1);
	assertEquals(1, splits.length);
	inputFormat.open(splits[0]);

	Map map = inputFormat.nextRecord(null);
	assertNotNull(map);
	assertEquals(1, map.size());

	Map<String, String> mapItem = (Map<String, String>) ((Map) map.get("nestedMap")).get("mapItem");
	assertEquals(2, mapItem.size());
	assertEquals("map", mapItem.get("type"));
	assertEquals("hashMap", mapItem.get("value"));
}

Example #12

Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0

6 votes

/**
 * Tests compiler fail for join program with replicated data source behind reduce.
 */
@Test(expected = CompilerException.class)
public void checkJoinWithReplicatedSourceInputBehindReduce() {
	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.reduce(new LastReduce())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
}

Example #13

Source File: AvroInputFormat.java From flink with Apache License 2.0

6 votes

@Override
public void reopen(FileInputSplit split, Tuple2<Long, Long> state) throws IOException {
	Preconditions.checkNotNull(split, "reopen() cannot be called on a null split.");
	Preconditions.checkNotNull(state, "reopen() cannot be called with a null initial state.");

	try {
		this.open(split);
	} finally {
		if (state.f0 != -1) {
			lastSync = state.f0;
			recordsReadSinceLastSync = state.f1;
		}
	}

	if (lastSync != -1) {
		// open and read until the record we were before
		// the checkpoint and discard the values
		dataFileReader.seek(lastSync);
		for (int i = 0; i < recordsReadSinceLastSync; i++) {
			dataFileReader.next(null);
		}
	}
}

Example #14

Source File: GenericCsvInputFormatTest.java From flink with Apache License 2.0

6 votes

@Test
public void testReadTooShortInputLenient() throws IOException {
	try {
		final String fileContent = "666|777|888|999|555\n111|222|333|444\n666|777|888|999|555";
		final FileInputSplit split = createTempFile(fileContent);	
	
		final Configuration parameters = new Configuration();
		format.setFieldDelimiter("|");
		format.setFieldTypesGeneric(IntValue.class, IntValue.class, IntValue.class, IntValue.class, IntValue.class);
		format.setLenient(true);
		
		format.configure(parameters);
		format.open(split);
		
		Value[] values = createIntValues(5);
		
		assertNotNull(format.nextRecord(values));	// line okay
		assertNull(format.nextRecord(values));	// line too short
		assertNotNull(format.nextRecord(values));	// line okay
	}
	catch (Exception ex) {
		fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
	}
}

Example #15

Source File: DelimitedInputFormatTest.java From flink with Apache License 2.0

6 votes

@Test
public void testReadWithoutTrailingDelimiter() throws IOException {
	// 2. test case
	final String myString = "my key|my val$$$my key2\n$$ctd.$$|my value2";
	final FileInputSplit split = createTempFile(myString);

	final Configuration parameters = new Configuration();
	// default delimiter = '\n'

	format.configure(parameters);
	format.open(split);

	String first = format.nextRecord(null);
	String second = format.nextRecord(null);

	assertNotNull(first);
	assertNotNull(second);

	assertEquals("my key|my val$$$my key2", first);
	assertEquals("$$ctd.$$|my value2", second);

	assertNull(format.nextRecord(null));
	assertTrue(format.reachedEnd());
}

Example #16

Source File: GenericCsvInputFormatTest.java From flink with Apache License 2.0

6 votes

@Test
public void testReadTooShortInputLenient() throws IOException {
	try {
		final String fileContent = "666|777|888|999|555\n111|222|333|444\n666|777|888|999|555";
		final FileInputSplit split = createTempFile(fileContent);	
	
		final Configuration parameters = new Configuration();
		format.setFieldDelimiter("|");
		format.setFieldTypesGeneric(IntValue.class, IntValue.class, IntValue.class, IntValue.class, IntValue.class);
		format.setLenient(true);
		
		format.configure(parameters);
		format.open(split);
		
		Value[] values = createIntValues(5);
		
		assertNotNull(format.nextRecord(values));	// line okay
		assertNull(format.nextRecord(values));	// line too short
		assertNotNull(format.nextRecord(values));	// line okay
	}
	catch (Exception ex) {
		fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
	}
}

Example #17

Source File: GraphCreationWithCsvITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testCreateWithOnlyEdgesCsvFile() throws Exception {
	/*
	 * Test with one Csv file one with Edges data. Also tests the configuration method ignoreFistLineEdges()
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	final String fileContent2 = "header\n1,2,ot\n" +
			"3,2,tt\n" +
			"3,1,to\n";

	final FileInputSplit split2 = createTempFile(fileContent2);
	Graph<Long, NullValue, String> graph = Graph.fromCsvReader(split2.getPath().toString(), env)
			.ignoreFirstLineEdges()
			.ignoreCommentsVertices("hi")
			.edgeTypes(Long.class, String.class);

	List<Triplet<Long, NullValue, String>> result = graph.getTriplets().collect();
	expectedResult = "1,2,(null),(null),ot\n" +
			"3,2,(null),(null),tt\n" +
			"3,1,(null),(null),to\n";

	compareResultAsTuples(result, expectedResult);
}

Example #18

Source File: CsvInputFormatTest.java From flink with Apache License 2.0

6 votes

@Test
public void testPojoTypeWithMappingInformation() throws Exception {
	File tempFile = File.createTempFile("CsvReaderPojoType", "tmp");
	tempFile.deleteOnExit();
	tempFile.setWritable(true);

	OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile));
	wrt.write("123,3.123,AAA,BBB\n");
	wrt.write("456,1.123,BBB,AAA\n");
	wrt.close();

	@SuppressWarnings("unchecked")
	PojoTypeInfo<PojoItem> typeInfo = (PojoTypeInfo<PojoItem>) TypeExtractor.createTypeInfo(PojoItem.class);
	CsvInputFormat<PojoItem> inputFormat = new PojoCsvInputFormat<PojoItem>(new Path(tempFile.toURI().toString()), typeInfo, new String[]{"field1", "field3", "field2", "field4"});

	inputFormat.configure(new Configuration());
	FileInputSplit[] splits = inputFormat.createInputSplits(1);

	inputFormat.open(splits[0]);

	validatePojoItem(inputFormat);
}

Example #19

Source File: DelimitedInputFormatTest.java From flink with Apache License 2.0

6 votes

@Test
public void testReadWithTrailingDelimiter() throws IOException {
	// 2. test case
	final String myString = "my key|my val$$$my key2\n$$ctd.$$|my value2\n";
	final FileInputSplit split = createTempFile(myString);

	final Configuration parameters = new Configuration();
	// default delimiter = '\n'

	format.configure(parameters);
	format.open(split);

	String first = format.nextRecord(null);
	String second = format.nextRecord(null);

	assertNotNull(first);
	assertNotNull(second);

	assertEquals("my key|my val$$$my key2", first);
	assertEquals("$$ctd.$$|my value2", second);

	assertNull(format.nextRecord(null));
	assertTrue(format.reachedEnd());
}

Example #20

Source File: EnumerateNestedFilesTest.java From flink with Apache License 2.0

6 votes

/**
 * Test with one nested directory and recursive.file.enumeration = true
 */
@Test
public void testOneNestedDirectoryTrue() {
	try {
		String firstLevelDir = TestFileUtils.randomFileName();
		String secondLevelDir = TestFileUtils.randomFileName();

		File insideNestedDir = tempFolder.newFolder(firstLevelDir, secondLevelDir);
		File nestedDir = insideNestedDir.getParentFile();

		// create a file in the first-level and two files in the nested dir
		TestFileUtils.createTempFileInDirectory(nestedDir.getAbsolutePath(), "paella");
		TestFileUtils.createTempFileInDirectory(insideNestedDir.getAbsolutePath(), "kalamari");
		TestFileUtils.createTempFileInDirectory(insideNestedDir.getAbsolutePath(), "fideua");

		this.format.setFilePath(new Path(nestedDir.toURI().toString()));
		this.config.setBoolean("recursive.file.enumeration", true);
		format.configure(this.config);

		FileInputSplit[] splits = format.createInputSplits(1);
		Assert.assertEquals(3, splits.length);
	} catch (Exception ex) {
		ex.printStackTrace();
		Assert.fail(ex.getMessage());
	}
}

Example #21

Source File: ReplicatingDataSourceTest.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Tests compiler fail for join program with replicated data source behind rebalance.
 */
@Test(expected = CompilerException.class)
public void checkJoinWithReplicatedSourceInputBehindRebalance() {
	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.rebalance()
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
}

Example #22

Source File: FlinkBitcoinDataSourceTest.java From hadoopcryptoledger with Apache License 2.0

6 votes

@Test
public void parseBitcoinRawBlock() throws HadoopCryptoLedgerConfigurationException, IOException {
  ClassLoader classLoader = getClass().getClassLoader();
    String fileName="genesis.blk";
    String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();	
    Path file = new Path(fileNameBlock); 
    FileInputSplit blockInputSplit = new FileInputSplit(0,file,0, -1, null);
    BitcoinRawBlockFlinkInputFormat inputFormat = new BitcoinRawBlockFlinkInputFormat(1024*1024,"F9BEB4D9",false);
    inputFormat.open(blockInputSplit);
    assertFalse(inputFormat.reachedEnd(),"End not reached");
    BytesWritable reuse = new BytesWritable();
    BytesWritable nextBlock = inputFormat.nextRecord(reuse);
    assertNotNull(nextBlock,"First Block returned");
	assertEquals( 293, nextBlock.getLength(),"First Block must have size of 293");
    nextBlock=inputFormat.nextRecord(reuse);
    assertNull(nextBlock,"No further block");
    assertTrue(inputFormat.reachedEnd(),"End reached");
}

Example #23

Source File: PrimitiveInputFormatTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testIntegerInput() throws IOException {
	try {
		final String fileContent = "111|222|";
		final FileInputSplit split = createInputSplit(fileContent);

		final PrimitiveInputFormat<Integer> format = new PrimitiveInputFormat<Integer>(PATH, "|", Integer.class);

		format.configure(new Configuration());
		format.open(split);

		Integer result = null;
		result = format.nextRecord(result);
		assertEquals(Integer.valueOf(111), result);

		result = format.nextRecord(result);
		assertEquals(Integer.valueOf(222), result);

		result = format.nextRecord(result);
		assertNull(result);
		assertTrue(format.reachedEnd());
	}
	catch (Exception ex) {
		fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage());
	}
}

Example #24

Source File: DelimitedInputFormat.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Opens the given input split. This method opens the input stream to the specified file, allocates read buffers
 * and positions the stream at the correct position, making sure that any partial record at the beginning is skipped.
 *
 * @param split The input split to open.
 *
 * @see org.apache.flink.api.common.io.FileInputFormat#open(org.apache.flink.core.fs.FileInputSplit)
 */
@Override
public void open(FileInputSplit split) throws IOException {
	super.open(split);
	initBuffers();

	this.offset = splitStart;
	if (this.splitStart != 0) {
		this.stream.seek(offset);
		readLine();
		// if the first partial record already pushes the stream over
		// the limit of our split, then no record starts within this split
		if (this.overLimit) {
			this.end = true;
		}
	} else {
		fillBuffer(0);
	}
}

Example #25

Source File: GenericCsvInputFormatTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testLongLongLong() {
	try {
		final String fileContent = "1,2,3\n3,2,1";
		final FileInputSplit split = createTempFile(fileContent);

		final Configuration parameters = new Configuration();

		format.setFieldDelimiter(",");
		format.setFieldTypesGeneric(LongValue.class, LongValue.class, LongValue.class);
		format.configure(parameters);
		format.open(split);

		Value[] values = createLongValues(3);

		values = format.nextRecord(values);
		assertNotNull(values);
		assertEquals(1L, ((LongValue) values[0]).getValue());
		assertEquals(2L, ((LongValue) values[1]).getValue());
		assertEquals(3L, ((LongValue) values[2]).getValue());
		
		values = format.nextRecord(values);
		assertNotNull(values);
		assertEquals(3L, ((LongValue) values[0]).getValue());
		assertEquals(2L, ((LongValue) values[1]).getValue());
		assertEquals(1L, ((LongValue) values[2]).getValue());

		assertNull(format.nextRecord(values));
		assertTrue(format.reachedEnd());
	} catch (Exception ex) {
		System.err.println(ex.getMessage());
		ex.printStackTrace();
		fail("Test erroneous");
	}
}

Example #26

Source File: TsFileInputFormat.java From incubator-iotdb with Apache License 2.0

5 votes

@Override
public void open(FileInputSplit split) throws IOException {
	super.open(split);
	if (config != null) {
		TSFileConfigUtil.setGlobalTSFileConfig(config);
	}
	TsFileInput in;
	try {
		if (currentSplit.getPath().getFileSystem().isDistributedFS()) {
			// HDFS
			in = new HDFSInput(new org.apache.hadoop.fs.Path(new URI(currentSplit.getPath().getPath())),
				hadoopConf);
		} else {
			// Local File System
			in = new LocalTsFileInput(Paths.get(currentSplit.getPath().toUri()));
		}
	} catch (URISyntaxException e) {
		throw new FlinkRuntimeException(e);
	}
	TsFileSequenceReader reader = new TsFileSequenceReader(in);
	readTsFile = new ReadOnlyTsFile(reader);
	queryDataSet = readTsFile.query(
		// The query method call will change the content of the param query expression,
		// the original query expression should not be passed to the query method as it may
		// be used several times.
		QueryExpression.create(expression.getSelectedSeries(), expression.getExpression()),
		currentSplit.getStart(),
		currentSplit.getStart() + currentSplit.getLength());
}

Example #27

Source File: OrcRowInputFormatTest.java From flink with Apache License 2.0

5 votes

@Test
public void testReadWithProjection() throws IOException {
	rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_NESTED), TEST_SCHEMA_NESTED, new Configuration());

	rowOrcInputFormat.selectFields(7, 0, 10, 8);

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	assertEquals(1, splits.length);
	rowOrcInputFormat.openInputFormat();
	rowOrcInputFormat.open(splits[0]);

	assertFalse(rowOrcInputFormat.reachedEnd());
	Row row = rowOrcInputFormat.nextRecord(null);

	// validate first row
	assertNotNull(row);
	assertEquals(4, row.getArity());
	// check binary
	assertArrayEquals(new byte[]{0, 1, 2, 3, 4}, (byte[]) row.getField(0));
	// check boolean
	assertEquals(false, row.getField(1));
	// check list
	assertTrue(row.getField(2) instanceof Object[]);
	Object[] list1 = (Object[]) row.getField(2);
	assertEquals(2, list1.length);
	assertEquals(Row.of(3, "good"), list1[0]);
	assertEquals(Row.of(4, "bad"), list1[1]);
	// check string
	assertEquals("hi", row.getField(3));

	// check that there is a second row with four fields
	assertFalse(rowOrcInputFormat.reachedEnd());
	row = rowOrcInputFormat.nextRecord(null);
	assertNotNull(row);
	assertEquals(4, row.getArity());
	assertTrue(rowOrcInputFormat.reachedEnd());
}

Example #28

Source File: RowCsvInputFormatTest.java From Flink-CEPplus with Apache License 2.0

5 votes

private static FileInputSplit createTempFile(String content) throws IOException {
	File tempFile = File.createTempFile("test_contents", "tmp");
	tempFile.deleteOnExit();
	OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile), StandardCharsets.UTF_8);
	wrt.write(content);
	wrt.close();
	return new FileInputSplit(0, new Path(tempFile.toURI().toString()), 0, tempFile.length(), new String[]{"localhost"});
}

Example #29

Source File: OrcRowInputFormatTest.java From flink with Apache License 2.0

5 votes

@Test
public void testDecimalPredicate() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration());

	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Not(
			// decimal pred
			new OrcRowInputFormat.Equals("_col0", PredicateLeaf.Type.DECIMAL, BigDecimal.valueOf(-1000.5))));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);
	Reader.Options options = new Reader.Options();
	doReturn(options).when(spy).getOptions(any());

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = options.getSearchArgument();
	assertNotNull(sarg);
	assertEquals("(not leaf-0)", sarg.getExpression().toString());
	assertEquals(1, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS _col0 -1000.5)", leaves.get(0).toString());
}

Example #30

Source File: RowCsvInputFormatTest.java From flink with Apache License 2.0

5 votes

@Test
public void ignoreMultiCharPrefixComments() throws Exception {
	String fileContent =
		"//description of the data\n" +
			"//successive commented line\n" +
			"this is|1|2.0|\n" +
			"a test|3|4.0|\n" +
			"//next|5|6.0|\n";

	FileInputSplit split = createTempFile(fileContent);

	TypeInformation[] fieldTypes = new TypeInformation[]{
		BasicTypeInfo.STRING_TYPE_INFO,
		BasicTypeInfo.INT_TYPE_INFO,
		BasicTypeInfo.DOUBLE_TYPE_INFO};

	RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, "\n", "|");
	format.setCommentPrefix("//");
	format.configure(new Configuration());
	format.open(split);

	Row result = new Row(3);

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("this is", result.getField(0));
	assertEquals(1, result.getField(1));
	assertEquals(2.0, result.getField(2));

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("a test", result.getField(0));
	assertEquals(3, result.getField(1));
	assertEquals(4.0, result.getField(2));
	result = format.nextRecord(result);
	assertNull(result);
}