org.apache.flink.api.common.io.ParseException Java Examples

The following examples show how to use org.apache.flink.api.common.io.ParseException. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: RowCsvInputFormat.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Override
protected boolean parseRecord(Object[] holders, byte[] bytes, int offset, int numBytes) throws ParseException {
	byte[] fieldDelimiter = this.getFieldDelimiter();
	boolean[] fieldIncluded = this.fieldIncluded;

	int startPos = offset;
	int limit = offset + numBytes;

	int field = 0;
	int output = 0;
	while (field < fieldIncluded.length) {

		// check valid start position
		if (startPos > limit || (startPos == limit && field != fieldIncluded.length - 1)) {
			if (isLenient()) {
				return false;
			} else {
				throw new ParseException("Row too short: " + new String(bytes, offset, numBytes, getCharset()));
			}
		}

		if (fieldIncluded[field]) {
			// parse field
			FieldParser<Object> parser = (FieldParser<Object>) this.getFieldParsers()[fieldPosMap[output]];
			int latestValidPos = startPos;
			startPos = parser.resetErrorStateAndParse(
				bytes,
				startPos,
				limit,
				fieldDelimiter,
				holders[fieldPosMap[output]]);

			if (!isLenient() && (parser.getErrorState() != FieldParser.ParseErrorState.NONE)) {
				// the error state EMPTY_COLUMN is ignored
				if (parser.getErrorState() != FieldParser.ParseErrorState.EMPTY_COLUMN) {
					throw new ParseException(String.format("Parsing error for column %1$s of row '%2$s' originated by %3$s: %4$s.",
						field + 1, new String(bytes, offset, numBytes), parser.getClass().getSimpleName(), parser.getErrorState()));
				}
			}
			holders[fieldPosMap[output]] = parser.getLastResult();

			// check parse result:
			// the result is null if it is invalid
			// or empty with emptyColumnAsNull enabled
			if (startPos < 0 ||
				(emptyColumnAsNull && (parser.getErrorState().equals(FieldParser.ParseErrorState.EMPTY_COLUMN)))) {
				holders[fieldPosMap[output]] = null;
				startPos = skipFields(bytes, latestValidPos, limit, fieldDelimiter);
			}
			output++;
		} else {
			// skip field
			startPos = skipFields(bytes, startPos, limit, fieldDelimiter);
		}

		// check if something went wrong
		if (startPos < 0) {
			throw new ParseException(String.format("Unexpected parser position for column %1$s of row '%2$s'",
				field + 1, new String(bytes, offset, numBytes)));
		}
		else if (startPos == limit
				&& field != fieldIncluded.length - 1
				&& !FieldParser.endsWithDelimiter(bytes, startPos - 1, fieldDelimiter)) {
			// We are at the end of the record, but not all fields have been read
			// and the end is not a field delimiter indicating an empty last field.
			if (isLenient()) {
				return false;
			} else {
				throw new ParseException("Row too short: " + new String(bytes, offset, numBytes));
			}
		}

		field++;
	}
	return true;
}
 
Example #2
Source File: CsvInputFormatTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testTailingEmptyFields() throws Exception {
	final String fileContent = "aa,bb,cc\n" + // ok
			"aa,bb,\n" +  // the last field is empty
			"aa,,\n" +    // the last two fields are empty
			",,\n" +      // all fields are empty
			"aa,bb";      // row too short
	final FileInputSplit split = createTempFile(fileContent);

	final TupleTypeInfo<Tuple3<String, String, String>> typeInfo =
			TupleTypeInfo.getBasicTupleTypeInfo(String.class, String.class, String.class);
	final CsvInputFormat<Tuple3<String, String, String>> format =
			new TupleCsvInputFormat<Tuple3<String, String, String>>(PATH, typeInfo);

	format.setFieldDelimiter(",");

	format.configure(new Configuration());
	format.open(split);

	Tuple3<String, String, String> result = new Tuple3<String, String, String>();

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("aa", result.f0);
	assertEquals("bb", result.f1);
	assertEquals("cc", result.f2);

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("aa", result.f0);
	assertEquals("bb", result.f1);
	assertEquals("", result.f2);

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("aa", result.f0);
	assertEquals("", result.f1);
	assertEquals("", result.f2);

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("", result.f0);
	assertEquals("", result.f1);
	assertEquals("", result.f2);

	try {
		format.nextRecord(result);
		fail("Parse Exception was not thrown! (Row too short)");
	} catch (ParseException e) {}
}
 
Example #3
Source File: RowCsvInputFormatTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testTailingEmptyFields() throws Exception {
	String fileContent = "abc|-def|-ghijk\n" +
			"abc|-def|-\n" +
			"abc|-|-\n" +
			"|-|-|-\n" +
			"|-|-\n" +
			"abc|-def\n";

	FileInputSplit split = createTempFile(fileContent);

	TypeInformation[] fieldTypes = new TypeInformation[]{
			BasicTypeInfo.STRING_TYPE_INFO,
			BasicTypeInfo.STRING_TYPE_INFO,
			BasicTypeInfo.STRING_TYPE_INFO};

	RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, "\n", "|");
	format.setFieldDelimiter("|-");
	format.configure(new Configuration());
	format.open(split);

	Row result = new Row(3);

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("abc", result.getField(0));
	assertEquals("def", result.getField(1));
	assertEquals("ghijk", result.getField(2));

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("abc", result.getField(0));
	assertEquals("def", result.getField(1));
	assertEquals("", result.getField(2));

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("abc", result.getField(0));
	assertEquals("", result.getField(1));
	assertEquals("", result.getField(2));

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("", result.getField(0));
	assertEquals("", result.getField(1));
	assertEquals("", result.getField(2));

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("", result.getField(0));
	assertEquals("", result.getField(1));
	assertEquals("", result.getField(2));

	try {
		format.nextRecord(result);
		fail("Parse Exception was not thrown! (Row too short)");
	} catch (ParseException e) {}
}
 
Example #4
Source File: RowCsvInputFormat.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
protected boolean parseRecord(Object[] holders, byte[] bytes, int offset, int numBytes) throws ParseException {
	byte[] fieldDelimiter = this.getFieldDelimiter();
	boolean[] fieldIncluded = this.fieldIncluded;

	int startPos = offset;
	int limit = offset + numBytes;

	int field = 0;
	int output = 0;
	while (field < fieldIncluded.length) {

		// check valid start position
		if (startPos > limit || (startPos == limit && field != fieldIncluded.length - 1)) {
			if (isLenient()) {
				return false;
			} else {
				throw new ParseException("Row too short: " + new String(bytes, offset, numBytes, getCharset()));
			}
		}

		if (fieldIncluded[field]) {
			// parse field
			FieldParser<Object> parser = (FieldParser<Object>) this.getFieldParsers()[fieldPosMap[output]];
			int latestValidPos = startPos;
			startPos = parser.resetErrorStateAndParse(
				bytes,
				startPos,
				limit,
				fieldDelimiter,
				holders[fieldPosMap[output]]);

			if (!isLenient() && (parser.getErrorState() != FieldParser.ParseErrorState.NONE)) {
				// the error state EMPTY_COLUMN is ignored
				if (parser.getErrorState() != FieldParser.ParseErrorState.EMPTY_COLUMN) {
					throw new ParseException(String.format("Parsing error for column %1$s of row '%2$s' originated by %3$s: %4$s.",
						field + 1, new String(bytes, offset, numBytes), parser.getClass().getSimpleName(), parser.getErrorState()));
				}
			}
			holders[fieldPosMap[output]] = parser.getLastResult();

			// check parse result:
			// the result is null if it is invalid
			// or empty with emptyColumnAsNull enabled
			if (startPos < 0 ||
				(emptyColumnAsNull && (parser.getErrorState().equals(FieldParser.ParseErrorState.EMPTY_COLUMN)))) {
				holders[fieldPosMap[output]] = null;
				startPos = skipFields(bytes, latestValidPos, limit, fieldDelimiter);
			}
			output++;
		} else {
			// skip field
			startPos = skipFields(bytes, startPos, limit, fieldDelimiter);
		}

		// check if something went wrong
		if (startPos < 0) {
			throw new ParseException(String.format("Unexpected parser position for column %1$s of row '%2$s'",
				field + 1, new String(bytes, offset, numBytes)));
		}
		else if (startPos == limit
				&& field != fieldIncluded.length - 1
				&& !FieldParser.endsWithDelimiter(bytes, startPos - 1, fieldDelimiter)) {
			// We are at the end of the record, but not all fields have been read
			// and the end is not a field delimiter indicating an empty last field.
			if (isLenient()) {
				return false;
			} else {
				throw new ParseException("Row too short: " + new String(bytes, offset, numBytes));
			}
		}

		field++;
	}
	return true;
}
 
Example #5
Source File: CsvInputFormatTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testTailingEmptyFields() throws Exception {
	final String fileContent = "aa,bb,cc\n" + // ok
			"aa,bb,\n" +  // the last field is empty
			"aa,,\n" +    // the last two fields are empty
			",,\n" +      // all fields are empty
			"aa,bb";      // row too short
	final FileInputSplit split = createTempFile(fileContent);

	final TupleTypeInfo<Tuple3<String, String, String>> typeInfo =
			TupleTypeInfo.getBasicTupleTypeInfo(String.class, String.class, String.class);
	final CsvInputFormat<Tuple3<String, String, String>> format =
			new TupleCsvInputFormat<Tuple3<String, String, String>>(PATH, typeInfo);

	format.setFieldDelimiter(",");

	format.configure(new Configuration());
	format.open(split);

	Tuple3<String, String, String> result = new Tuple3<String, String, String>();

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("aa", result.f0);
	assertEquals("bb", result.f1);
	assertEquals("cc", result.f2);

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("aa", result.f0);
	assertEquals("bb", result.f1);
	assertEquals("", result.f2);

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("aa", result.f0);
	assertEquals("", result.f1);
	assertEquals("", result.f2);

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("", result.f0);
	assertEquals("", result.f1);
	assertEquals("", result.f2);

	try {
		format.nextRecord(result);
		fail("Parse Exception was not thrown! (Row too short)");
	} catch (ParseException e) {}
}
 
Example #6
Source File: RowCsvInputFormatTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testTailingEmptyFields() throws Exception {
	String fileContent = "abc|-def|-ghijk\n" +
			"abc|-def|-\n" +
			"abc|-|-\n" +
			"|-|-|-\n" +
			"|-|-\n" +
			"abc|-def\n";

	FileInputSplit split = createTempFile(fileContent);

	TypeInformation[] fieldTypes = new TypeInformation[]{
			BasicTypeInfo.STRING_TYPE_INFO,
			BasicTypeInfo.STRING_TYPE_INFO,
			BasicTypeInfo.STRING_TYPE_INFO};

	RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, "\n", "|");
	format.setFieldDelimiter("|-");
	format.configure(new Configuration());
	format.open(split);

	Row result = new Row(3);

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("abc", result.getField(0));
	assertEquals("def", result.getField(1));
	assertEquals("ghijk", result.getField(2));

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("abc", result.getField(0));
	assertEquals("def", result.getField(1));
	assertEquals("", result.getField(2));

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("abc", result.getField(0));
	assertEquals("", result.getField(1));
	assertEquals("", result.getField(2));

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("", result.getField(0));
	assertEquals("", result.getField(1));
	assertEquals("", result.getField(2));

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("", result.getField(0));
	assertEquals("", result.getField(1));
	assertEquals("", result.getField(2));

	try {
		format.nextRecord(result);
		fail("Parse Exception was not thrown! (Row too short)");
	} catch (ParseException e) {}
}
 
Example #7
Source File: RowCsvInputFormat.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
protected boolean parseRecord(Object[] holders, byte[] bytes, int offset, int numBytes) throws ParseException {
	byte[] fieldDelimiter = this.getFieldDelimiter();
	boolean[] fieldIncluded = this.fieldIncluded;

	int startPos = offset;
	int limit = offset + numBytes;

	int field = 0;
	int output = 0;
	while (field < fieldIncluded.length) {

		// check valid start position
		if (startPos > limit || (startPos == limit && field != fieldIncluded.length - 1)) {
			if (isLenient()) {
				return false;
			} else {
				throw new ParseException("Row too short: " + new String(bytes, offset, numBytes, getCharset()));
			}
		}

		if (fieldIncluded[field]) {
			// parse field
			FieldParser<Object> parser = (FieldParser<Object>) this.getFieldParsers()[fieldPosMap[output]];
			int latestValidPos = startPos;
			startPos = parser.resetErrorStateAndParse(
				bytes,
				startPos,
				limit,
				fieldDelimiter,
				holders[fieldPosMap[output]]);

			if (!isLenient() && (parser.getErrorState() != FieldParser.ParseErrorState.NONE)) {
				// the error state EMPTY_COLUMN is ignored
				if (parser.getErrorState() != FieldParser.ParseErrorState.EMPTY_COLUMN) {
					throw new ParseException(String.format("Parsing error for column %1$s of row '%2$s' originated by %3$s: %4$s.",
						field + 1, new String(bytes, offset, numBytes), parser.getClass().getSimpleName(), parser.getErrorState()));
				}
			}
			holders[fieldPosMap[output]] = parser.getLastResult();

			// check parse result:
			// the result is null if it is invalid
			// or empty with emptyColumnAsNull enabled
			if (startPos < 0 ||
				(emptyColumnAsNull && (parser.getErrorState().equals(FieldParser.ParseErrorState.EMPTY_COLUMN)))) {
				holders[fieldPosMap[output]] = null;
				startPos = skipFields(bytes, latestValidPos, limit, fieldDelimiter);
			}
			output++;
		} else {
			// skip field
			startPos = skipFields(bytes, startPos, limit, fieldDelimiter);
		}

		// check if something went wrong
		if (startPos < 0) {
			throw new ParseException(String.format("Unexpected parser position for column %1$s of row '%2$s'",
				field + 1, new String(bytes, offset, numBytes)));
		}
		else if (startPos == limit
				&& field != fieldIncluded.length - 1
				&& !FieldParser.endsWithDelimiter(bytes, startPos - 1, fieldDelimiter)) {
			// We are at the end of the record, but not all fields have been read
			// and the end is not a field delimiter indicating an empty last field.
			if (isLenient()) {
				return false;
			} else {
				throw new ParseException("Row too short: " + new String(bytes, offset, numBytes));
			}
		}

		field++;
	}
	return true;
}
 
Example #8
Source File: CsvInputFormatTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testTailingEmptyFields() throws Exception {
	final String fileContent = "aa,bb,cc\n" + // ok
			"aa,bb,\n" +  // the last field is empty
			"aa,,\n" +    // the last two fields are empty
			",,\n" +      // all fields are empty
			"aa,bb";      // row too short
	final FileInputSplit split = createTempFile(fileContent);

	final TupleTypeInfo<Tuple3<String, String, String>> typeInfo =
			TupleTypeInfo.getBasicTupleTypeInfo(String.class, String.class, String.class);
	final CsvInputFormat<Tuple3<String, String, String>> format =
			new TupleCsvInputFormat<Tuple3<String, String, String>>(PATH, typeInfo);

	format.setFieldDelimiter(",");

	format.configure(new Configuration());
	format.open(split);

	Tuple3<String, String, String> result = new Tuple3<String, String, String>();

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("aa", result.f0);
	assertEquals("bb", result.f1);
	assertEquals("cc", result.f2);

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("aa", result.f0);
	assertEquals("bb", result.f1);
	assertEquals("", result.f2);

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("aa", result.f0);
	assertEquals("", result.f1);
	assertEquals("", result.f2);

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("", result.f0);
	assertEquals("", result.f1);
	assertEquals("", result.f2);

	try {
		format.nextRecord(result);
		fail("Parse Exception was not thrown! (Row too short)");
	} catch (ParseException e) {}
}
 
Example #9
Source File: RowCsvInputFormatTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testTailingEmptyFields() throws Exception {
	String fileContent = "abc|-def|-ghijk\n" +
			"abc|-def|-\n" +
			"abc|-|-\n" +
			"|-|-|-\n" +
			"|-|-\n" +
			"abc|-def\n";

	FileInputSplit split = createTempFile(fileContent);

	TypeInformation[] fieldTypes = new TypeInformation[]{
			BasicTypeInfo.STRING_TYPE_INFO,
			BasicTypeInfo.STRING_TYPE_INFO,
			BasicTypeInfo.STRING_TYPE_INFO};

	RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, "\n", "|");
	format.setFieldDelimiter("|-");
	format.configure(new Configuration());
	format.open(split);

	Row result = new Row(3);

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("abc", result.getField(0));
	assertEquals("def", result.getField(1));
	assertEquals("ghijk", result.getField(2));

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("abc", result.getField(0));
	assertEquals("def", result.getField(1));
	assertEquals("", result.getField(2));

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("abc", result.getField(0));
	assertEquals("", result.getField(1));
	assertEquals("", result.getField(2));

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("", result.getField(0));
	assertEquals("", result.getField(1));
	assertEquals("", result.getField(2));

	result = format.nextRecord(result);
	assertNotNull(result);
	assertEquals("", result.getField(0));
	assertEquals("", result.getField(1));
	assertEquals("", result.getField(2));

	try {
		format.nextRecord(result);
		fail("Parse Exception was not thrown! (Row too short)");
	} catch (ParseException e) {}
}