Java Code Examples for org.apache.flink.types.parser.FieldParser#getErrorState()

The following examples show how to use org.apache.flink.types.parser.FieldParser#getErrorState() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: StringParsers.java    From Alink with Apache License 2.0 6 votes vote down vote up
static Tuple2<Boolean, Object> parseField(FieldParser<?> parser, String token, boolean isStringField) {
    if (isStringField) {
        return Tuple2.of(true, token);
    } else {
        if (StringUtils.isNullOrWhitespaceOnly(token)) {
            return Tuple2.of(false, null);
        }
        byte[] bytes = token.getBytes();
        parser.resetErrorStateAndParse(bytes, 0, bytes.length, new byte[]{0}, null);
        FieldParser.ParseErrorState errorState = parser.getErrorState();
        if (errorState != FieldParser.ParseErrorState.NONE) {
            return Tuple2.of(false, null);
        } else {
            return Tuple2.of(true, parser.getLastResult());
        }
    }
}
 
Example 2
Source File: ColumnsWriter.java    From Alink with Apache License 2.0 6 votes vote down vote up
static Tuple2 <Boolean, Object> parseField(FieldParser <?> parser, String token, boolean isStringField) {
	if (isStringField) {
		return Tuple2.of(true, token);
	} else {
		if (StringUtils.isNullOrWhitespaceOnly(token)) {
			return Tuple2.of(false, null);
		}
		byte[] bytes = token.getBytes();
		parser.resetErrorStateAndParse(bytes, 0, bytes.length, new byte[] {0}, null);
		FieldParser.ParseErrorState errorState = parser.getErrorState();
		if (errorState != FieldParser.ParseErrorState.NONE) {
			return Tuple2.of(false, null);
		} else {
			return Tuple2.of(true, parser.getLastResult());
		}
	}
}
 
Example 3
Source File: CsvParser.java    From Alink with Apache License 2.0 6 votes vote down vote up
private Tuple2<Boolean, Object> parseField(FieldParser<?> parser, String token, boolean isStringField) {
    if (isStringField) {
        if (!enableQuote || token.charAt(0) != quoteChar) {
            return Tuple2.of(true, token);
        }
        String content;
        if (token.endsWith(quoteChar.toString())) {
            content = token.substring(1, token.length() - 1);
        } else {
            content = token.substring(1, token.length());
        }
        return Tuple2.of(true, content.replace(escapedQuote, quoteString));
    } else {
        if (StringUtils.isNullOrWhitespaceOnly(token)) {
            return Tuple2.of(true, null);
        }
        byte[] bytes = token.getBytes();
        parser.resetErrorStateAndParse(bytes, 0, bytes.length, fieldDelim.getBytes(), null);
        FieldParser.ParseErrorState errorState = parser.getErrorState();
        if (errorState != FieldParser.ParseErrorState.NONE) {
            return Tuple2.of(false, null);
        } else {
            return Tuple2.of(true, parser.getLastResult());
        }
    }
}
 
Example 4
Source File: RowCsvInputFormat.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Override
protected boolean parseRecord(Object[] holders, byte[] bytes, int offset, int numBytes) throws ParseException {
	byte[] fieldDelimiter = this.getFieldDelimiter();
	boolean[] fieldIncluded = this.fieldIncluded;

	int startPos = offset;
	int limit = offset + numBytes;

	int field = 0;
	int output = 0;
	while (field < fieldIncluded.length) {

		// check valid start position
		if (startPos > limit || (startPos == limit && field != fieldIncluded.length - 1)) {
			if (isLenient()) {
				return false;
			} else {
				throw new ParseException("Row too short: " + new String(bytes, offset, numBytes, getCharset()));
			}
		}

		if (fieldIncluded[field]) {
			// parse field
			FieldParser<Object> parser = (FieldParser<Object>) this.getFieldParsers()[fieldPosMap[output]];
			int latestValidPos = startPos;
			startPos = parser.resetErrorStateAndParse(
				bytes,
				startPos,
				limit,
				fieldDelimiter,
				holders[fieldPosMap[output]]);

			if (!isLenient() && (parser.getErrorState() != FieldParser.ParseErrorState.NONE)) {
				// the error state EMPTY_COLUMN is ignored
				if (parser.getErrorState() != FieldParser.ParseErrorState.EMPTY_COLUMN) {
					throw new ParseException(String.format("Parsing error for column %1$s of row '%2$s' originated by %3$s: %4$s.",
						field + 1, new String(bytes, offset, numBytes), parser.getClass().getSimpleName(), parser.getErrorState()));
				}
			}
			holders[fieldPosMap[output]] = parser.getLastResult();

			// check parse result:
			// the result is null if it is invalid
			// or empty with emptyColumnAsNull enabled
			if (startPos < 0 ||
				(emptyColumnAsNull && (parser.getErrorState().equals(FieldParser.ParseErrorState.EMPTY_COLUMN)))) {
				holders[fieldPosMap[output]] = null;
				startPos = skipFields(bytes, latestValidPos, limit, fieldDelimiter);
			}
			output++;
		} else {
			// skip field
			startPos = skipFields(bytes, startPos, limit, fieldDelimiter);
		}

		// check if something went wrong
		if (startPos < 0) {
			throw new ParseException(String.format("Unexpected parser position for column %1$s of row '%2$s'",
				field + 1, new String(bytes, offset, numBytes)));
		}
		else if (startPos == limit
				&& field != fieldIncluded.length - 1
				&& !FieldParser.endsWithDelimiter(bytes, startPos - 1, fieldDelimiter)) {
			// We are at the end of the record, but not all fields have been read
			// and the end is not a field delimiter indicating an empty last field.
			if (isLenient()) {
				return false;
			} else {
				throw new ParseException("Row too short: " + new String(bytes, offset, numBytes));
			}
		}

		field++;
	}
	return true;
}
 
Example 5
Source File: RowCsvInputFormat.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
protected boolean parseRecord(Object[] holders, byte[] bytes, int offset, int numBytes) throws ParseException {
	byte[] fieldDelimiter = this.getFieldDelimiter();
	boolean[] fieldIncluded = this.fieldIncluded;

	int startPos = offset;
	int limit = offset + numBytes;

	int field = 0;
	int output = 0;
	while (field < fieldIncluded.length) {

		// check valid start position
		if (startPos > limit || (startPos == limit && field != fieldIncluded.length - 1)) {
			if (isLenient()) {
				return false;
			} else {
				throw new ParseException("Row too short: " + new String(bytes, offset, numBytes, getCharset()));
			}
		}

		if (fieldIncluded[field]) {
			// parse field
			FieldParser<Object> parser = (FieldParser<Object>) this.getFieldParsers()[fieldPosMap[output]];
			int latestValidPos = startPos;
			startPos = parser.resetErrorStateAndParse(
				bytes,
				startPos,
				limit,
				fieldDelimiter,
				holders[fieldPosMap[output]]);

			if (!isLenient() && (parser.getErrorState() != FieldParser.ParseErrorState.NONE)) {
				// the error state EMPTY_COLUMN is ignored
				if (parser.getErrorState() != FieldParser.ParseErrorState.EMPTY_COLUMN) {
					throw new ParseException(String.format("Parsing error for column %1$s of row '%2$s' originated by %3$s: %4$s.",
						field + 1, new String(bytes, offset, numBytes), parser.getClass().getSimpleName(), parser.getErrorState()));
				}
			}
			holders[fieldPosMap[output]] = parser.getLastResult();

			// check parse result:
			// the result is null if it is invalid
			// or empty with emptyColumnAsNull enabled
			if (startPos < 0 ||
				(emptyColumnAsNull && (parser.getErrorState().equals(FieldParser.ParseErrorState.EMPTY_COLUMN)))) {
				holders[fieldPosMap[output]] = null;
				startPos = skipFields(bytes, latestValidPos, limit, fieldDelimiter);
			}
			output++;
		} else {
			// skip field
			startPos = skipFields(bytes, startPos, limit, fieldDelimiter);
		}

		// check if something went wrong
		if (startPos < 0) {
			throw new ParseException(String.format("Unexpected parser position for column %1$s of row '%2$s'",
				field + 1, new String(bytes, offset, numBytes)));
		}
		else if (startPos == limit
				&& field != fieldIncluded.length - 1
				&& !FieldParser.endsWithDelimiter(bytes, startPos - 1, fieldDelimiter)) {
			// We are at the end of the record, but not all fields have been read
			// and the end is not a field delimiter indicating an empty last field.
			if (isLenient()) {
				return false;
			} else {
				throw new ParseException("Row too short: " + new String(bytes, offset, numBytes));
			}
		}

		field++;
	}
	return true;
}
 
Example 6
Source File: RowCsvInputFormat.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
protected boolean parseRecord(Object[] holders, byte[] bytes, int offset, int numBytes) throws ParseException {
	byte[] fieldDelimiter = this.getFieldDelimiter();
	boolean[] fieldIncluded = this.fieldIncluded;

	int startPos = offset;
	int limit = offset + numBytes;

	int field = 0;
	int output = 0;
	while (field < fieldIncluded.length) {

		// check valid start position
		if (startPos > limit || (startPos == limit && field != fieldIncluded.length - 1)) {
			if (isLenient()) {
				return false;
			} else {
				throw new ParseException("Row too short: " + new String(bytes, offset, numBytes, getCharset()));
			}
		}

		if (fieldIncluded[field]) {
			// parse field
			FieldParser<Object> parser = (FieldParser<Object>) this.getFieldParsers()[fieldPosMap[output]];
			int latestValidPos = startPos;
			startPos = parser.resetErrorStateAndParse(
				bytes,
				startPos,
				limit,
				fieldDelimiter,
				holders[fieldPosMap[output]]);

			if (!isLenient() && (parser.getErrorState() != FieldParser.ParseErrorState.NONE)) {
				// the error state EMPTY_COLUMN is ignored
				if (parser.getErrorState() != FieldParser.ParseErrorState.EMPTY_COLUMN) {
					throw new ParseException(String.format("Parsing error for column %1$s of row '%2$s' originated by %3$s: %4$s.",
						field + 1, new String(bytes, offset, numBytes), parser.getClass().getSimpleName(), parser.getErrorState()));
				}
			}
			holders[fieldPosMap[output]] = parser.getLastResult();

			// check parse result:
			// the result is null if it is invalid
			// or empty with emptyColumnAsNull enabled
			if (startPos < 0 ||
				(emptyColumnAsNull && (parser.getErrorState().equals(FieldParser.ParseErrorState.EMPTY_COLUMN)))) {
				holders[fieldPosMap[output]] = null;
				startPos = skipFields(bytes, latestValidPos, limit, fieldDelimiter);
			}
			output++;
		} else {
			// skip field
			startPos = skipFields(bytes, startPos, limit, fieldDelimiter);
		}

		// check if something went wrong
		if (startPos < 0) {
			throw new ParseException(String.format("Unexpected parser position for column %1$s of row '%2$s'",
				field + 1, new String(bytes, offset, numBytes)));
		}
		else if (startPos == limit
				&& field != fieldIncluded.length - 1
				&& !FieldParser.endsWithDelimiter(bytes, startPos - 1, fieldDelimiter)) {
			// We are at the end of the record, but not all fields have been read
			// and the end is not a field delimiter indicating an empty last field.
			if (isLenient()) {
				return false;
			} else {
				throw new ParseException("Row too short: " + new String(bytes, offset, numBytes));
			}
		}

		field++;
	}
	return true;
}