Java Code Examples for org.apache.flink.types.StringValue#setValueAscii()

The following examples show how to use org.apache.flink.types.StringValue#setValueAscii() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TextValueInputFormat.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public StringValue readRecord(StringValue reuse, byte[] bytes, int offset, int numBytes) {
	if (this.ascii) {
		reuse.setValueAscii(bytes, offset, numBytes);
		return reuse;
	}
	else {
		ByteBuffer byteWrapper = this.byteWrapper;
		if (bytes != byteWrapper.array()) {
			byteWrapper = ByteBuffer.wrap(bytes, 0, bytes.length);
			this.byteWrapper = byteWrapper;
		}
		byteWrapper.limit(offset + numBytes);
		byteWrapper.position(offset);

		try {
			CharBuffer result = this.decoder.decode(byteWrapper);
			reuse.setValue(result);
			return reuse;
		}
		catch (CharacterCodingException e) {
			if (skipInvalidLines) {
				return null;
			} else {
				byte[] copy = new byte[numBytes];
				System.arraycopy(bytes, offset, copy, 0, numBytes);
				throw new RuntimeException("Line could not be encoded: " + Arrays.toString(copy), e);
			}
		}
	}
}
 
Example 2
Source File: TextValueInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public StringValue readRecord(StringValue reuse, byte[] bytes, int offset, int numBytes) {
	if (this.ascii) {
		reuse.setValueAscii(bytes, offset, numBytes);
		return reuse;
	}
	else {
		ByteBuffer byteWrapper = this.byteWrapper;
		if (bytes != byteWrapper.array()) {
			byteWrapper = ByteBuffer.wrap(bytes, 0, bytes.length);
			this.byteWrapper = byteWrapper;
		}
		byteWrapper.limit(offset + numBytes);
		byteWrapper.position(offset);

		try {
			CharBuffer result = this.decoder.decode(byteWrapper);
			reuse.setValue(result);
			return reuse;
		}
		catch (CharacterCodingException e) {
			if (skipInvalidLines) {
				return null;
			} else {
				byte[] copy = new byte[numBytes];
				System.arraycopy(bytes, offset, copy, 0, numBytes);
				throw new RuntimeException("Line could not be encoded: " + Arrays.toString(copy), e);
			}
		}
	}
}
 
Example 3
Source File: TextValueInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public StringValue readRecord(StringValue reuse, byte[] bytes, int offset, int numBytes) {
	if (this.ascii) {
		reuse.setValueAscii(bytes, offset, numBytes);
		return reuse;
	}
	else {
		ByteBuffer byteWrapper = this.byteWrapper;
		if (bytes != byteWrapper.array()) {
			byteWrapper = ByteBuffer.wrap(bytes, 0, bytes.length);
			this.byteWrapper = byteWrapper;
		}
		byteWrapper.limit(offset + numBytes);
		byteWrapper.position(offset);

		try {
			CharBuffer result = this.decoder.decode(byteWrapper);
			reuse.setValue(result);
			return reuse;
		}
		catch (CharacterCodingException e) {
			if (skipInvalidLines) {
				return null;
			} else {
				byte[] copy = new byte[numBytes];
				System.arraycopy(bytes, offset, copy, 0, numBytes);
				throw new RuntimeException("Line could not be encoded: " + Arrays.toString(copy), e);
			}
		}
	}
}
 
Example 4
Source File: StringValueParser.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Override
public int parseField(byte[] bytes, int startPos, int limit, byte[] delimiter, StringValue reusable) {

	if (startPos == limit) {
		setErrorState(ParseErrorState.EMPTY_COLUMN);
		reusable.setValueAscii(bytes, startPos, 0);
		return limit;
	}

	this.result = reusable;
	int i = startPos;

	final int delimLimit = limit - delimiter.length + 1;

	if(quotedStringParsing && bytes[i] == quoteCharacter) {
		// quoted string parsing enabled and first character is a quote
		i++;

		// search for ending quote character, continue when it is escaped
		while (i < limit && (bytes[i] != quoteCharacter || bytes[i - 1] == BACKSLASH)) {
			i++;
		}

		if (i == limit) {
			setErrorState(ParseErrorState.UNTERMINATED_QUOTED_STRING);
			return -1;
		} else {
			i++;
			// check for proper termination
			if (i == limit) {
				// either by end of line
				reusable.setValueAscii(bytes, startPos + 1, i - startPos - 2);
				return limit;
			} else if ( i < delimLimit && delimiterNext(bytes, i, delimiter)) {
				// or following field delimiter
				reusable.setValueAscii(bytes, startPos + 1, i - startPos - 2);
				return i + delimiter.length;
			} else {
				// no proper termination
				setErrorState(ParseErrorState.UNQUOTED_CHARS_AFTER_QUOTED_STRING);
				return -1;
			}

		}

	} else {

		// look for delimiter
		while( i < delimLimit && !delimiterNext(bytes, i, delimiter)) {
			i++;
		}

		if (i >= delimLimit) {
			reusable.setValueAscii(bytes, startPos, limit - startPos);
			return limit;
		} else {
			// delimiter found.
			if (i == startPos) {
				setErrorState(ParseErrorState.EMPTY_COLUMN); // mark empty column
			}
			reusable.setValueAscii(bytes, startPos, i - startPos);
			return i + delimiter.length;
		}
	}
}
 
Example 5
Source File: StringValueParser.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public int parseField(byte[] bytes, int startPos, int limit, byte[] delimiter, StringValue reusable) {

	if (startPos == limit) {
		setErrorState(ParseErrorState.EMPTY_COLUMN);
		reusable.setValueAscii(bytes, startPos, 0);
		return limit;
	}

	this.result = reusable;
	int i = startPos;

	final int delimLimit = limit - delimiter.length + 1;

	if(quotedStringParsing && bytes[i] == quoteCharacter) {
		// quoted string parsing enabled and first character is a quote
		i++;

		// search for ending quote character, continue when it is escaped
		while (i < limit && (bytes[i] != quoteCharacter || bytes[i - 1] == BACKSLASH)) {
			i++;
		}

		if (i == limit) {
			setErrorState(ParseErrorState.UNTERMINATED_QUOTED_STRING);
			return -1;
		} else {
			i++;
			// check for proper termination
			if (i == limit) {
				// either by end of line
				reusable.setValueAscii(bytes, startPos + 1, i - startPos - 2);
				return limit;
			} else if ( i < delimLimit && delimiterNext(bytes, i, delimiter)) {
				// or following field delimiter
				reusable.setValueAscii(bytes, startPos + 1, i - startPos - 2);
				return i + delimiter.length;
			} else {
				// no proper termination
				setErrorState(ParseErrorState.UNQUOTED_CHARS_AFTER_QUOTED_STRING);
				return -1;
			}

		}

	} else {

		// look for delimiter
		while( i < delimLimit && !delimiterNext(bytes, i, delimiter)) {
			i++;
		}

		if (i >= delimLimit) {
			reusable.setValueAscii(bytes, startPos, limit - startPos);
			return limit;
		} else {
			// delimiter found.
			if (i == startPos) {
				setErrorState(ParseErrorState.EMPTY_COLUMN); // mark empty column
			}
			reusable.setValueAscii(bytes, startPos, i - startPos);
			return i + delimiter.length;
		}
	}
}
 
Example 6
Source File: StringValueParser.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public int parseField(byte[] bytes, int startPos, int limit, byte[] delimiter, StringValue reusable) {

	if (startPos == limit) {
		setErrorState(ParseErrorState.EMPTY_COLUMN);
		reusable.setValueAscii(bytes, startPos, 0);
		return limit;
	}

	this.result = reusable;
	int i = startPos;

	final int delimLimit = limit - delimiter.length + 1;

	if(quotedStringParsing && bytes[i] == quoteCharacter) {
		// quoted string parsing enabled and first character is a quote
		i++;

		// search for ending quote character, continue when it is escaped
		while (i < limit && (bytes[i] != quoteCharacter || bytes[i - 1] == BACKSLASH)) {
			i++;
		}

		if (i == limit) {
			setErrorState(ParseErrorState.UNTERMINATED_QUOTED_STRING);
			return -1;
		} else {
			i++;
			// check for proper termination
			if (i == limit) {
				// either by end of line
				reusable.setValueAscii(bytes, startPos + 1, i - startPos - 2);
				return limit;
			} else if ( i < delimLimit && delimiterNext(bytes, i, delimiter)) {
				// or following field delimiter
				reusable.setValueAscii(bytes, startPos + 1, i - startPos - 2);
				return i + delimiter.length;
			} else {
				// no proper termination
				setErrorState(ParseErrorState.UNQUOTED_CHARS_AFTER_QUOTED_STRING);
				return -1;
			}

		}

	} else {

		// look for delimiter
		while( i < delimLimit && !delimiterNext(bytes, i, delimiter)) {
			i++;
		}

		if (i >= delimLimit) {
			reusable.setValueAscii(bytes, startPos, limit - startPos);
			return limit;
		} else {
			// delimiter found.
			if (i == startPos) {
				setErrorState(ParseErrorState.EMPTY_COLUMN); // mark empty column
			}
			reusable.setValueAscii(bytes, startPos, i - startPos);
			return i + delimiter.length;
		}
	}
}