Java Code Examples for org.apache.flink.api.java.typeutils.TupleTypeInfo#getBasicTupleTypeInfo()

The following examples show how to use org.apache.flink.api.java.typeutils.TupleTypeInfo#getBasicTupleTypeInfo() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
@Before
public void beforeTest() {
	ExecutionConfig config = new ExecutionConfig();
	config.disableObjectReuse();
	
	TupleTypeInfo<Tuple2<String, String>> typeInfo1 = TupleTypeInfo.getBasicTupleTypeInfo(String.class, String.class);
	TupleTypeInfo<Tuple2<String, Integer>> typeInfo2 = TupleTypeInfo.getBasicTupleTypeInfo(String.class, Integer.class);
	serializer1 = typeInfo1.createSerializer(config);
	serializer2 = typeInfo2.createSerializer(config);
	comparator1 = typeInfo1.createComparator(new int[]{0}, new boolean[]{true}, 0, config);
	comparator2 = typeInfo2.createComparator(new int[]{0}, new boolean[]{true}, 0, config);
	pairComp = new GenericPairComparator<>(comparator1, comparator2);

	this.memoryManager = new MemoryManager(MEMORY_SIZE, 1);
	this.ioManager = new IOManagerAsync();
}
 
Example 2
Source Project: flink   File: ReplicatingDataSourceTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Tests join program with replicated data source behind flatMap.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindFlatMap() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.flatMap(new IdFlatMap())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
 
Example 3
/**
 * Tests join program with replicated data source behind map partition.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindMapPartition() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.mapPartition(new IdPMap())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
 
Example 4
Source Project: flink   File: CsvInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testReadSparseWithNullFieldsForTypes() throws IOException {
	try {
		final String fileContent = "111|x|222|x|333|x|444|x|555|x|666|x|777|x|888|x|999|x|000|x|\n" +
				"000|x|999|x|888|x|777|x|666|x|555|x|444|x|333|x|222|x|111|x|";
		final FileInputSplit split = createTempFile(fileContent);

		final TupleTypeInfo<Tuple3<Integer, Integer, Integer>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(Integer.class, Integer.class, Integer.class);
		final CsvInputFormat<Tuple3<Integer, Integer, Integer>> format = new TupleCsvInputFormat<Tuple3<Integer, Integer, Integer>>(PATH, typeInfo, new boolean[]{true, false, false, true, false, false, false, true});

		format.setFieldDelimiter("|x|");

		format.configure(new Configuration());
		format.open(split);

		Tuple3<Integer, Integer, Integer> result = new Tuple3<Integer, Integer, Integer>();

		result = format.nextRecord(result);
		assertNotNull(result);
		assertEquals(Integer.valueOf(111), result.f0);
		assertEquals(Integer.valueOf(444), result.f1);
		assertEquals(Integer.valueOf(888), result.f2);

		result = format.nextRecord(result);
		assertNotNull(result);
		assertEquals(Integer.valueOf(000), result.f0);
		assertEquals(Integer.valueOf(777), result.f1);
		assertEquals(Integer.valueOf(333), result.f2);

		result = format.nextRecord(result);
		assertNull(result);
		assertTrue(format.reachedEnd());
	}
	catch (Exception ex) {
		fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage());
	}
}
 
Example 5
Source Project: flink   File: CsvInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testReadSparseWithNullFieldsForTypes() throws IOException {
	try {
		final String fileContent = "111|x|222|x|333|x|444|x|555|x|666|x|777|x|888|x|999|x|000|x|\n" +
				"000|x|999|x|888|x|777|x|666|x|555|x|444|x|333|x|222|x|111|x|";
		final FileInputSplit split = createTempFile(fileContent);

		final TupleTypeInfo<Tuple3<Integer, Integer, Integer>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(Integer.class, Integer.class, Integer.class);
		final CsvInputFormat<Tuple3<Integer, Integer, Integer>> format = new TupleCsvInputFormat<Tuple3<Integer, Integer, Integer>>(PATH, typeInfo, new boolean[]{true, false, false, true, false, false, false, true});

		format.setFieldDelimiter("|x|");

		format.configure(new Configuration());
		format.open(split);

		Tuple3<Integer, Integer, Integer> result = new Tuple3<Integer, Integer, Integer>();

		result = format.nextRecord(result);
		assertNotNull(result);
		assertEquals(Integer.valueOf(111), result.f0);
		assertEquals(Integer.valueOf(444), result.f1);
		assertEquals(Integer.valueOf(888), result.f2);

		result = format.nextRecord(result);
		assertNotNull(result);
		assertEquals(Integer.valueOf(000), result.f0);
		assertEquals(Integer.valueOf(777), result.f1);
		assertEquals(Integer.valueOf(333), result.f2);

		result = format.nextRecord(result);
		assertNull(result);
		assertTrue(format.reachedEnd());
	}
	catch (Exception ex) {
		fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage());
	}
}
 
Example 6
Source Project: flink   File: ReplicatingDataSourceTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Tests join program with replicated data source behind map partition.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindMapPartition() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.mapPartition(new IdPMap())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
 
Example 7
/**
 * Tests join program with replicated data source behind multiple map ops.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindMultiMaps() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.filter(new NoFilter())
			.mapPartition(new IdPMap())
			.flatMap(new IdFlatMap())
			.map(new IdMap())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
 
Example 8
@Test
public void testReadSparseWithNullFieldsForTypes() throws IOException {
	try {
		final String fileContent = "111|x|222|x|333|x|444|x|555|x|666|x|777|x|888|x|999|x|000|x|\n" +
				"000|x|999|x|888|x|777|x|666|x|555|x|444|x|333|x|222|x|111|x|";
		final FileInputSplit split = createTempFile(fileContent);

		final TupleTypeInfo<Tuple3<Integer, Integer, Integer>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(Integer.class, Integer.class, Integer.class);
		final CsvInputFormat<Tuple3<Integer, Integer, Integer>> format = new TupleCsvInputFormat<Tuple3<Integer, Integer, Integer>>(PATH, typeInfo, new boolean[]{true, false, false, true, false, false, false, true});

		format.setFieldDelimiter("|x|");

		format.configure(new Configuration());
		format.open(split);

		Tuple3<Integer, Integer, Integer> result = new Tuple3<Integer, Integer, Integer>();

		result = format.nextRecord(result);
		assertNotNull(result);
		assertEquals(Integer.valueOf(111), result.f0);
		assertEquals(Integer.valueOf(444), result.f1);
		assertEquals(Integer.valueOf(888), result.f2);

		result = format.nextRecord(result);
		assertNotNull(result);
		assertEquals(Integer.valueOf(000), result.f0);
		assertEquals(Integer.valueOf(777), result.f1);
		assertEquals(Integer.valueOf(333), result.f2);

		result = format.nextRecord(result);
		assertNull(result);
		assertTrue(format.reachedEnd());
	}
	catch (Exception ex) {
		fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage());
	}
}
 
Example 9
@Test
public void testReadSparseWithPositionSetter() throws IOException {
	try {
		final String fileContent = "111|222|333|444|555|666|777|888|999|000|\n000|999|888|777|666|555|444|333|222|111|";
		final FileInputSplit split = createTempFile(fileContent);

		final TupleTypeInfo<Tuple3<Integer, Integer, Integer>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(Integer.class, Integer.class, Integer.class);
		final CsvInputFormat<Tuple3<Integer, Integer, Integer>> format = new TupleCsvInputFormat<Tuple3<Integer, Integer, Integer>>(PATH, typeInfo, new int[]{0, 3, 7});

		format.setFieldDelimiter("|");

		format.configure(new Configuration());
		format.open(split);

		Tuple3<Integer, Integer, Integer> result = new Tuple3<Integer, Integer, Integer>();

		result = format.nextRecord(result);
		assertNotNull(result);
		assertEquals(Integer.valueOf(111), result.f0);
		assertEquals(Integer.valueOf(444), result.f1);
		assertEquals(Integer.valueOf(888), result.f2);

		result = format.nextRecord(result);
		assertNotNull(result);
		assertEquals(Integer.valueOf(000), result.f0);
		assertEquals(Integer.valueOf(777), result.f1);
		assertEquals(Integer.valueOf(333), result.f2);

		result = format.nextRecord(result);
		assertNull(result);
		assertTrue(format.reachedEnd());
	}
	catch (Exception ex) {
		fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage());
	}
}
 
Example 10
Source Project: flink   File: ReplicatingDataSourceTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Tests join program with replicated data source.
 */
@Test
public void checkJoinWithReplicatedSourceInput() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
 
Example 11
@Test
public void testQuotedStringParsingWithIncludeFields() throws Exception {
	final String fileContent = "\"20:41:52-1-3-2015\"|\"Re: Taskmanager memory error in Eclipse\"|" +
			"\"Blahblah <[email protected]>\"|\"blaaa|\"blubb\"";

	final File tempFile = File.createTempFile("CsvReaderQuotedString", "tmp");
	tempFile.deleteOnExit();
	tempFile.setWritable(true);

	OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(tempFile));
	writer.write(fileContent);
	writer.close();

	TupleTypeInfo<Tuple2<String, String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class, String.class);
	CsvInputFormat<Tuple2<String, String>> inputFormat = new TupleCsvInputFormat<Tuple2<String, String>>(new Path(tempFile.toURI().toString()), typeInfo, new boolean[]{true, false, true});

	inputFormat.enableQuotedStringParsing('"');
	inputFormat.setFieldDelimiter("|");
	inputFormat.setDelimiter('\n');

	inputFormat.configure(new Configuration());
	FileInputSplit[] splits = inputFormat.createInputSplits(1);

	inputFormat.open(splits[0]);

	Tuple2<String, String> record = inputFormat.nextRecord(new Tuple2<String, String>());

	assertEquals("20:41:52-1-3-2015", record.f0);
	assertEquals("Blahblah <[email protected]>", record.f1);
}
 
Example 12
Source Project: flink   File: CsvInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testQuotedStringParsingWithIncludeFields() throws Exception {
	final String fileContent = "\"20:41:52-1-3-2015\"|\"Re: Taskmanager memory error in Eclipse\"|" +
			"\"Blahblah <[email protected]>\"|\"blaaa|\"blubb\"";

	final File tempFile = File.createTempFile("CsvReaderQuotedString", "tmp");
	tempFile.deleteOnExit();
	tempFile.setWritable(true);

	OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(tempFile));
	writer.write(fileContent);
	writer.close();

	TupleTypeInfo<Tuple2<String, String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class, String.class);
	CsvInputFormat<Tuple2<String, String>> inputFormat = new TupleCsvInputFormat<Tuple2<String, String>>(new Path(tempFile.toURI().toString()), typeInfo, new boolean[]{true, false, true});

	inputFormat.enableQuotedStringParsing('"');
	inputFormat.setFieldDelimiter("|");
	inputFormat.setDelimiter('\n');

	inputFormat.configure(new Configuration());
	FileInputSplit[] splits = inputFormat.createInputSplits(1);

	inputFormat.open(splits[0]);

	Tuple2<String, String> record = inputFormat.nextRecord(new Tuple2<String, String>());

	assertEquals("20:41:52-1-3-2015", record.f0);
	assertEquals("Blahblah <[email protected]>", record.f1);
}
 
Example 13
Source Project: flink   File: ReplicatingDataSourceTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Tests cross program with replicated data source.
 */
@Test
public void checkCrossWithReplicatedSourceInput() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.cross(source2)
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when cross should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode crossNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType crossIn1 = crossNode.getInput1().getShipStrategy();
	ShipStrategyType crossIn2 = crossNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, crossIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, crossIn2);
}
 
Example 14
Source Project: flink   File: CsvInputFormatTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void readStringFields() {
	try {
		final String fileContent = "abc|def|ghijk\nabc||hhg\n|||";
		final FileInputSplit split = createTempFile(fileContent);

		final TupleTypeInfo<Tuple3<String, String, String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class, String.class, String.class);
		final CsvInputFormat<Tuple3<String, String, String>> format = new TupleCsvInputFormat<Tuple3<String, String, String>>(PATH, "\n", "|", typeInfo);

		final Configuration parameters = new Configuration();
		format.configure(parameters);
		format.open(split);

		Tuple3<String, String, String> result = new Tuple3<String, String, String>();

		result = format.nextRecord(result);
		assertNotNull(result);
		assertEquals("abc", result.f0);
		assertEquals("def", result.f1);
		assertEquals("ghijk", result.f2);

		result = format.nextRecord(result);
		assertNotNull(result);
		assertEquals("abc", result.f0);
		assertEquals("", result.f1);
		assertEquals("hhg", result.f2);

		result = format.nextRecord(result);
		assertNotNull(result);
		assertEquals("", result.f0);
		assertEquals("", result.f1);
		assertEquals("", result.f2);

		result = format.nextRecord(result);
		assertNull(result);
		assertTrue(format.reachedEnd());
	}
	catch (Exception ex) {
		ex.printStackTrace();
		fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage());
	}
}
 
Example 15
@Test
public void readStringFieldsWithTrailingDelimiters() {
	try {
		final String fileContent = "abc|-def|-ghijk\nabc|-|-hhg\n|-|-|-\n";
		final FileInputSplit split = createTempFile(fileContent);

		final TupleTypeInfo<Tuple3<String, String, String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class, String.class, String.class);
		final CsvInputFormat<Tuple3<String, String, String>> format = new TupleCsvInputFormat<Tuple3<String, String, String>>(PATH, typeInfo);

		format.setFieldDelimiter("|-");

		format.configure(new Configuration());
		format.open(split);

		Tuple3<String, String, String> result = new Tuple3<String, String, String>();

		result = format.nextRecord(result);
		assertNotNull(result);
		assertEquals("abc", result.f0);
		assertEquals("def", result.f1);
		assertEquals("ghijk", result.f2);

		result = format.nextRecord(result);
		assertNotNull(result);
		assertEquals("abc", result.f0);
		assertEquals("", result.f1);
		assertEquals("hhg", result.f2);

		result = format.nextRecord(result);
		assertNotNull(result);
		assertEquals("", result.f0);
		assertEquals("", result.f1);
		assertEquals("", result.f2);

		result = format.nextRecord(result);
		assertNull(result);
		assertTrue(format.reachedEnd());
	}
	catch (Exception ex) {
		fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage());
	}
}
 
Example 16
@Test
public void testTailingEmptyFields() throws Exception {
	final String fileContent = "aa,bb,cc\n" + // ok
			"aa,bb,\n" +  // the last field is empty
			"aa,,\n" +    // the last two fields are empty
			",,\n" +      // all fields are empty
			"aa,bb";      // row too short
	final FileInputSplit split = createTempFile(fileContent);

	final TupleTypeInfo<Tuple3<String, String, String>> typeInfo =
			TupleTypeInfo.getBasicTupleTypeInfo(String.class, String.class, String.class);
	final CsvInputFormat<Tuple3<String, String, String>> format =
			new TupleCsvInputFormat<Tuple3<String, String, String>>(PATH, typeInfo);

	format.setFieldDelimiter(",");

	format.configure(new Configuration());
	format.open(split);

	Tuple3<String, String, String> result = new Tuple3<String, String, String>();

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("aa", result.f0);
	assertEquals("bb", result.f1);
	assertEquals("cc", result.f2);

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("aa", result.f0);
	assertEquals("bb", result.f1);
	assertEquals("", result.f2);

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("aa", result.f0);
	assertEquals("", result.f1);
	assertEquals("", result.f2);

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("", result.f0);
	assertEquals("", result.f1);
	assertEquals("", result.f2);

	try {
		format.nextRecord(result);
		fail("Parse Exception was not thrown! (Row too short)");
	} catch (ParseException e) {}
}
 
Example 17
@Test
public void testIntegerFields() throws IOException {
	try {
		final String fileContent = "111|222|333|444|555\n666|777|888|999|000|\n";
		final FileInputSplit split = createTempFile(fileContent);

		final TupleTypeInfo<Tuple5<Integer, Integer, Integer, Integer, Integer>> typeInfo =
			TupleTypeInfo.getBasicTupleTypeInfo(Integer.class, Integer.class, Integer.class, Integer.class, Integer.class);
		final CsvInputFormat<Tuple5<Integer, Integer, Integer, Integer, Integer>> format = new TupleCsvInputFormat<Tuple5<Integer, Integer, Integer, Integer, Integer>>(PATH, typeInfo);

		format.setFieldDelimiter("|");

		format.configure(new Configuration());
		format.open(split);

		Tuple5<Integer, Integer, Integer, Integer, Integer> result = new Tuple5<Integer, Integer, Integer, Integer, Integer>();

		result = format.nextRecord(result);
		assertNotNull(result);
		assertEquals(Integer.valueOf(111), result.f0);
		assertEquals(Integer.valueOf(222), result.f1);
		assertEquals(Integer.valueOf(333), result.f2);
		assertEquals(Integer.valueOf(444), result.f3);
		assertEquals(Integer.valueOf(555), result.f4);

		result = format.nextRecord(result);
		assertNotNull(result);
		assertEquals(Integer.valueOf(666), result.f0);
		assertEquals(Integer.valueOf(777), result.f1);
		assertEquals(Integer.valueOf(888), result.f2);
		assertEquals(Integer.valueOf(999), result.f3);
		assertEquals(Integer.valueOf(000), result.f4);

		result = format.nextRecord(result);
		assertNull(result);
		assertTrue(format.reachedEnd());
	}
	catch (Exception ex) {
		fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage());
	}
}
 
Example 18
Source Project: flink   File: CsvInputFormatTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void ignoreMultiCharPrefixComments() {
	try {

		final String fileContent = "//description of the data\n" +
			"//successive commented line\n" +
			"this is|1|2.0|\n" +
			"a test|3|4.0|\n" +
			"//next|5|6.0|\n";

		final FileInputSplit split = createTempFile(fileContent);

		final TupleTypeInfo<Tuple3<String, Integer, Double>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class, Integer.class, Double.class);
		final CsvInputFormat<Tuple3<String, Integer, Double>> format = new TupleCsvInputFormat<Tuple3<String, Integer, Double>>(PATH, "\n", "|", typeInfo);
		format.setCommentPrefix("//");

		final Configuration parameters = new Configuration();
		format.configure(parameters);
		format.open(split);

		Tuple3<String, Integer, Double> result = new Tuple3<String, Integer, Double>();

		result = format.nextRecord(result);
		assertNotNull(result);
		assertEquals("this is", result.f0);
		assertEquals(Integer.valueOf(1), result.f1);
		assertEquals(new Double(2.0), result.f2);

		result = format.nextRecord(result);
		assertNotNull(result);
		assertEquals("a test", result.f0);
		assertEquals(Integer.valueOf(3), result.f1);
		assertEquals(new Double(4.0), result.f2);

		result = format.nextRecord(result);
		assertNull(result);
	}
	catch (Exception ex) {
		ex.printStackTrace();
		fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage());
	}
}
 
Example 19
Source Project: flink   File: CsvInputFormatTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void ignoreMultiCharPrefixComments() {
	try {

		final String fileContent = "//description of the data\n" +
			"//successive commented line\n" +
			"this is|1|2.0|\n" +
			"a test|3|4.0|\n" +
			"//next|5|6.0|\n";

		final FileInputSplit split = createTempFile(fileContent);

		final TupleTypeInfo<Tuple3<String, Integer, Double>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class, Integer.class, Double.class);
		final CsvInputFormat<Tuple3<String, Integer, Double>> format = new TupleCsvInputFormat<Tuple3<String, Integer, Double>>(PATH, "\n", "|", typeInfo);
		format.setCommentPrefix("//");

		final Configuration parameters = new Configuration();
		format.configure(parameters);
		format.open(split);

		Tuple3<String, Integer, Double> result = new Tuple3<String, Integer, Double>();

		result = format.nextRecord(result);
		assertNotNull(result);
		assertEquals("this is", result.f0);
		assertEquals(Integer.valueOf(1), result.f1);
		assertEquals(new Double(2.0), result.f2);

		result = format.nextRecord(result);
		assertNotNull(result);
		assertEquals("a test", result.f0);
		assertEquals(Integer.valueOf(3), result.f1);
		assertEquals(new Double(4.0), result.f2);

		result = format.nextRecord(result);
		assertNull(result);
	}
	catch (Exception ex) {
		ex.printStackTrace();
		fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage());
	}
}
 
Example 20
private void testRemovingTrailingCR(String lineBreakerInFile, String lineBreakerSetup) {
	File tempFile = null;

	String fileContent = CsvInputFormatTest.FIRST_PART + lineBreakerInFile + CsvInputFormatTest.SECOND_PART + lineBreakerInFile;

	try {
		// create input file
		tempFile = File.createTempFile("CsvInputFormatTest", "tmp");
		tempFile.deleteOnExit();
		tempFile.setWritable(true);

		OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile));
		wrt.write(fileContent);
		wrt.close();

		final TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
		final CsvInputFormat<Tuple1<String>> inputFormat = new TupleCsvInputFormat<Tuple1<String>>(new Path(tempFile.toURI().toString()), typeInfo);

		Configuration parameters = new Configuration();
		inputFormat.configure(parameters);

		inputFormat.setDelimiter(lineBreakerSetup);

		FileInputSplit[] splits = inputFormat.createInputSplits(1);

		inputFormat.open(splits[0]);

		Tuple1<String> result = inputFormat.nextRecord(new Tuple1<String>());

		assertNotNull("Expecting to not return null", result);

		assertEquals(FIRST_PART, result.f0);

		result = inputFormat.nextRecord(result);

		assertNotNull("Expecting to not return null", result);
		assertEquals(SECOND_PART, result.f0);

	}
	catch (Throwable t) {
		System.err.println("test failed with exception: " + t.getMessage());
		t.printStackTrace(System.err);
		fail("Test erroneous");
	}
}