org.apache.flink.types.parser.FieldParser Java Examples

The following examples show how to use org.apache.flink.types.parser.FieldParser. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: GenericCsvInputFormat.java    From Alink with Apache License 2.0 6 votes vote down vote up
private void initializeParsers() {
    Class<?>[] fieldClasses = extractTypeClasses(fieldTypes);

    // instantiate the parsers
    FieldParser<?>[] parsers = new FieldParser<?>[fieldClasses.length];

    for (int i = 0; i < fieldClasses.length; i++) {
        if (fieldClasses[i] != null) {
            Class<? extends FieldParser<?>> parserType = FieldParser.getParserForType(fieldClasses[i]);
            if (parserType == null) {
                throw new RuntimeException("No parser available for type '" + fieldClasses[i].getName() + "'.");
            }

            FieldParser<?> p = InstantiationUtil.instantiate(parserType, FieldParser.class);
            p.setCharset(charset);
            parsers[i] = p;
        }
    }
    this.fieldParsers = parsers;
    this.holders = new Object[fieldTypes.length];
    for (int i = 0; i < fieldTypes.length; i++) {
        holders[i] = fieldParsers[i].createValue();
    }
}
 
Example #2
Source File: StringParsers.java    From Alink with Apache License 2.0 6 votes vote down vote up
public KvParser(String[] fieldNames, TypeInformation[] fieldTypes, String colDelimiter, String valDelimiter) {
    this.fieldNames = fieldNames;
    Preconditions.checkArgument(fieldNames.length == fieldTypes.length);
    this.isString = new boolean[fieldNames.length];
    this.parsers = new FieldParser[fieldNames.length];

    for (int i = 0; i < fieldTypes.length; i++) {
        parsers[i] = getFieldParser(fieldTypes[i].getTypeClass());
        isString[i] = fieldTypes[i].equals(Types.STRING);
    }
    this.colDelimiter = colDelimiter;
    this.valDelimiter = valDelimiter;

    keyToFieldIdx = new HashMap<>();
    for (int i = 0; i < fieldNames.length; i++) {
        keyToFieldIdx.put(fieldNames[i], i);
    }
}
 
Example #3
Source File: StringParsers.java    From Alink with Apache License 2.0 6 votes vote down vote up
static Tuple2<Boolean, Object> parseField(FieldParser<?> parser, String token, boolean isStringField) {
    if (isStringField) {
        return Tuple2.of(true, token);
    } else {
        if (StringUtils.isNullOrWhitespaceOnly(token)) {
            return Tuple2.of(false, null);
        }
        byte[] bytes = token.getBytes();
        parser.resetErrorStateAndParse(bytes, 0, bytes.length, new byte[]{0}, null);
        FieldParser.ParseErrorState errorState = parser.getErrorState();
        if (errorState != FieldParser.ParseErrorState.NONE) {
            return Tuple2.of(false, null);
        } else {
            return Tuple2.of(true, parser.getLastResult());
        }
    }
}
 
Example #4
Source File: ColumnsWriter.java    From Alink with Apache License 2.0 6 votes vote down vote up
public ColumnsWriter(TableSchema schema) {
	this.nCols = schema.getFieldNames().length;
	this.colNames = schema.getFieldNames();
	this.isString = new boolean[colNames.length];
	TypeInformation[] fieldTypes = schema.getFieldTypes();

	this.parsers = new FieldParser[fieldTypes.length];

	for (int i = 0; i < fieldTypes.length; i++) {
		parsers[i] = getFieldParser(fieldTypes[i].getTypeClass());
		isString[i] = fieldTypes[i].equals(Types.STRING);
	}

	keyToFieldIdx = new HashMap <>();
	for (int i = 0; i < colNames.length; i++) {
		keyToFieldIdx.put(colNames[i], i);
	}
}
 
Example #5
Source File: ColumnsWriter.java    From Alink with Apache License 2.0 6 votes vote down vote up
static Tuple2 <Boolean, Object> parseField(FieldParser <?> parser, String token, boolean isStringField) {
	if (isStringField) {
		return Tuple2.of(true, token);
	} else {
		if (StringUtils.isNullOrWhitespaceOnly(token)) {
			return Tuple2.of(false, null);
		}
		byte[] bytes = token.getBytes();
		parser.resetErrorStateAndParse(bytes, 0, bytes.length, new byte[] {0}, null);
		FieldParser.ParseErrorState errorState = parser.getErrorState();
		if (errorState != FieldParser.ParseErrorState.NONE) {
			return Tuple2.of(false, null);
		} else {
			return Tuple2.of(true, parser.getLastResult());
		}
	}
}
 
Example #6
Source File: GenericCsvInputFormat.java    From flink with Apache License 2.0 6 votes vote down vote up
protected void setFieldTypesGeneric(Class<?> ... fieldTypes) {
	if (fieldTypes == null) {
		throw new IllegalArgumentException("Field types must not be null.");
	}
	
	this.fieldIncluded = new boolean[fieldTypes.length];
	ArrayList<Class<?>> types = new ArrayList<Class<?>>();
	
	// check if we support parsers for these types
	for (int i = 0; i < fieldTypes.length; i++) {
		Class<?> type = fieldTypes[i];
		
		if (type != null) {
			if (FieldParser.getParserForType(type) == null) {
				throw new IllegalArgumentException("The type '" + type.getName() + "' is not supported for the CSV input format.");
			}
			types.add(type);
			fieldIncluded[i] = true;
		}
	}

	this.fieldTypes = types.toArray(new Class<?>[types.size()]);
}
 
Example #7
Source File: CsvParser.java    From Alink with Apache License 2.0 6 votes vote down vote up
/**
 * The Constructor.
 *
 * @param types      Column types.
 * @param fieldDelim Field delimiter in the text line.
 * @param quoteChar  Quoting character. Contents between a pair of quoting chars are treated as a field, even if
 *                   contains field delimiters. Two consecutive quoting chars represents a real quoting char.
 */
public CsvParser(TypeInformation[] types, String fieldDelim, @Nullable Character quoteChar) {
    this.fieldDelim = fieldDelim;
    this.lenFieldDelim = this.fieldDelim.length();
    this.quoteChar = quoteChar;
    this.enableQuote = quoteChar != null;
    this.parsers = new FieldParser[types.length];
    this.isString = new boolean[types.length];

    if (enableQuote) {
        this.quoteString = quoteChar.toString();
        this.escapedQuote = this.quoteString + this.quoteString;
    }

    for (int i = 0; i < types.length; i++) {
        Class typeClazz = types[i].getTypeClass();
        Class<? extends FieldParser<?>> parserType = FieldParser.getParserForType(typeClazz);
        if (parserType == null) {
            throw new RuntimeException("No parser available for type '" + typeClazz.getName() + "'.");
        }
        parsers[i] = InstantiationUtil.instantiate(parserType, FieldParser.class);
        isString[i] = types[i].equals(Types.STRING);
    }
}
 
Example #8
Source File: RowCsvInputFormatTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testParseStringErrors() throws Exception {
	StringParser stringParser = new StringParser();

	stringParser.enableQuotedStringParsing((byte) '"');

	Map<String, StringParser.ParseErrorState> failures = new HashMap<>();
	failures.put("\"string\" trailing", FieldParser.ParseErrorState.UNQUOTED_CHARS_AFTER_QUOTED_STRING);
	failures.put("\"unterminated ", FieldParser.ParseErrorState.UNTERMINATED_QUOTED_STRING);

	for (Map.Entry<String, StringParser.ParseErrorState> failure : failures.entrySet()) {
		int result = stringParser.parseField(
			failure.getKey().getBytes(ConfigConstants.DEFAULT_CHARSET),
			0,
			failure.getKey().length(),
			new byte[]{(byte) '|'},
			null);
		assertEquals(-1, result);
		assertEquals(failure.getValue(), stringParser.getErrorState());
	}
}
 
Example #9
Source File: CsvInputFormatTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testParseStringErrors() throws Exception {
	StringParser stringParser = new StringParser();
	stringParser.enableQuotedStringParsing((byte) '"');

	Object[][] failures = {
			{"\"string\" trailing", FieldParser.ParseErrorState.UNQUOTED_CHARS_AFTER_QUOTED_STRING},
			{"\"unterminated ", FieldParser.ParseErrorState.UNTERMINATED_QUOTED_STRING}
	};

	for (Object[] failure : failures) {
		String input = (String) failure[0];

		int result = stringParser.parseField(input.getBytes(ConfigConstants.DEFAULT_CHARSET), 0,
			input.length(), new byte[]{'|'}, null);

		assertThat(result, is(-1));
		assertThat(stringParser.getErrorState(), is(failure[1]));
	}

}
 
Example #10
Source File: CsvParser.java    From Alink with Apache License 2.0 6 votes vote down vote up
private Tuple2<Boolean, Object> parseField(FieldParser<?> parser, String token, boolean isStringField) {
    if (isStringField) {
        if (!enableQuote || token.charAt(0) != quoteChar) {
            return Tuple2.of(true, token);
        }
        String content;
        if (token.endsWith(quoteChar.toString())) {
            content = token.substring(1, token.length() - 1);
        } else {
            content = token.substring(1, token.length());
        }
        return Tuple2.of(true, content.replace(escapedQuote, quoteString));
    } else {
        if (StringUtils.isNullOrWhitespaceOnly(token)) {
            return Tuple2.of(true, null);
        }
        byte[] bytes = token.getBytes();
        parser.resetErrorStateAndParse(bytes, 0, bytes.length, fieldDelim.getBytes(), null);
        FieldParser.ParseErrorState errorState = parser.getErrorState();
        if (errorState != FieldParser.ParseErrorState.NONE) {
            return Tuple2.of(false, null);
        } else {
            return Tuple2.of(true, parser.getLastResult());
        }
    }
}
 
Example #11
Source File: CsvInputFormat.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public void open(FileInputSplit split) throws IOException {
	super.open(split);

	@SuppressWarnings("unchecked")
	FieldParser<Object>[] fieldParsers = (FieldParser<Object>[]) getFieldParsers();

	// create the value holders
	this.parsedValues = new Object[fieldParsers.length];
	for (int i = 0; i < fieldParsers.length; i++) {
		this.parsedValues[i] = fieldParsers[i].createValue();
	}

	// left to right evaluation makes access [0] okay
	// this marker is used to fasten up readRecord, so that it doesn't have to check each call if the line ending is set to default
	if (this.getDelimiter().length == 1 && this.getDelimiter()[0] == '\n') {
		this.lineDelimiterIsLinebreak = true;
	}

	this.commentCount = 0;
	this.invalidLineCount = 0;
}
 
Example #12
Source File: CsvInputFormatTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testParseStringErrors() throws Exception {
	StringParser stringParser = new StringParser();
	stringParser.enableQuotedStringParsing((byte) '"');

	Object[][] failures = {
			{"\"string\" trailing", FieldParser.ParseErrorState.UNQUOTED_CHARS_AFTER_QUOTED_STRING},
			{"\"unterminated ", FieldParser.ParseErrorState.UNTERMINATED_QUOTED_STRING}
	};

	for (Object[] failure : failures) {
		String input = (String) failure[0];

		int result = stringParser.parseField(input.getBytes(ConfigConstants.DEFAULT_CHARSET), 0,
			input.length(), new byte[]{'|'}, null);

		assertThat(result, is(-1));
		assertThat(stringParser.getErrorState(), is(failure[1]));
	}

}
 
Example #13
Source File: RowCsvInputFormatTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testParseStringErrors() throws Exception {
	StringParser stringParser = new StringParser();

	stringParser.enableQuotedStringParsing((byte) '"');

	Map<String, StringParser.ParseErrorState> failures = new HashMap<>();
	failures.put("\"string\" trailing", FieldParser.ParseErrorState.UNQUOTED_CHARS_AFTER_QUOTED_STRING);
	failures.put("\"unterminated ", FieldParser.ParseErrorState.UNTERMINATED_QUOTED_STRING);

	for (Map.Entry<String, StringParser.ParseErrorState> failure : failures.entrySet()) {
		int result = stringParser.parseField(
			failure.getKey().getBytes(ConfigConstants.DEFAULT_CHARSET),
			0,
			failure.getKey().length(),
			new byte[]{(byte) '|'},
			null);
		assertEquals(-1, result);
		assertEquals(failure.getValue(), stringParser.getErrorState());
	}
}
 
Example #14
Source File: GenericCsvInputFormat.java    From flink with Apache License 2.0 6 votes vote down vote up
protected void setFieldTypesGeneric(Class<?> ... fieldTypes) {
	if (fieldTypes == null) {
		throw new IllegalArgumentException("Field types must not be null.");
	}
	
	this.fieldIncluded = new boolean[fieldTypes.length];
	ArrayList<Class<?>> types = new ArrayList<Class<?>>();
	
	// check if we support parsers for these types
	for (int i = 0; i < fieldTypes.length; i++) {
		Class<?> type = fieldTypes[i];
		
		if (type != null) {
			if (FieldParser.getParserForType(type) == null) {
				throw new IllegalArgumentException("The type '" + type.getName() + "' is not supported for the CSV input format.");
			}
			types.add(type);
			fieldIncluded[i] = true;
		}
	}

	this.fieldTypes = types.toArray(new Class<?>[types.size()]);
}
 
Example #15
Source File: GenericCsvInputFormat.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
protected void setFieldTypesGeneric(Class<?> ... fieldTypes) {
	if (fieldTypes == null) {
		throw new IllegalArgumentException("Field types must not be null.");
	}
	
	this.fieldIncluded = new boolean[fieldTypes.length];
	ArrayList<Class<?>> types = new ArrayList<Class<?>>();
	
	// check if we support parsers for these types
	for (int i = 0; i < fieldTypes.length; i++) {
		Class<?> type = fieldTypes[i];
		
		if (type != null) {
			if (FieldParser.getParserForType(type) == null) {
				throw new IllegalArgumentException("The type '" + type.getName() + "' is not supported for the CSV input format.");
			}
			types.add(type);
			fieldIncluded[i] = true;
		}
	}

	this.fieldTypes = types.toArray(new Class<?>[types.size()]);
}
 
Example #16
Source File: RowCsvInputFormatTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testParseStringErrors() throws Exception {
	StringParser stringParser = new StringParser();

	stringParser.enableQuotedStringParsing((byte) '"');

	Map<String, StringParser.ParseErrorState> failures = new HashMap<>();
	failures.put("\"string\" trailing", FieldParser.ParseErrorState.UNQUOTED_CHARS_AFTER_QUOTED_STRING);
	failures.put("\"unterminated ", FieldParser.ParseErrorState.UNTERMINATED_QUOTED_STRING);

	for (Map.Entry<String, StringParser.ParseErrorState> failure : failures.entrySet()) {
		int result = stringParser.parseField(
			failure.getKey().getBytes(ConfigConstants.DEFAULT_CHARSET),
			0,
			failure.getKey().length(),
			new byte[]{(byte) '|'},
			null);
		assertEquals(-1, result);
		assertEquals(failure.getValue(), stringParser.getErrorState());
	}
}
 
Example #17
Source File: CsvInputFormatTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testParseStringErrors() throws Exception {
	StringParser stringParser = new StringParser();
	stringParser.enableQuotedStringParsing((byte) '"');

	Object[][] failures = {
			{"\"string\" trailing", FieldParser.ParseErrorState.UNQUOTED_CHARS_AFTER_QUOTED_STRING},
			{"\"unterminated ", FieldParser.ParseErrorState.UNTERMINATED_QUOTED_STRING}
	};

	for (Object[] failure : failures) {
		String input = (String) failure[0];

		int result = stringParser.parseField(input.getBytes(ConfigConstants.DEFAULT_CHARSET), 0,
			input.length(), new byte[]{'|'}, null);

		assertThat(result, is(-1));
		assertThat(stringParser.getErrorState(), is(failure[1]));
	}

}
 
Example #18
Source File: PrimitiveInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void open(FileInputSplit split) throws IOException {
	super.open(split);
	Class<? extends FieldParser<OT>> parserType = FieldParser.getParserForType(primitiveClass);
	if (parserType == null) {
		throw new IllegalArgumentException("The type '" + primitiveClass.getName() + "' is not supported for the primitive input format.");
	}
	parser = InstantiationUtil.instantiate(parserType, FieldParser.class);
}
 
Example #19
Source File: CsvInputFormat.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public void open(FileInputSplit split) throws IOException {
	super.open(split);

	@SuppressWarnings("unchecked")
	FieldParser<Object>[] fieldParsers = (FieldParser<Object>[]) getFieldParsers();

	//throw exception if no field parsers are available
	if (fieldParsers.length == 0) {
		throw new IOException("CsvInputFormat.open(FileInputSplit split) - no field parsers to parse input");
	}

	// create the value holders
	this.parsedValues = new Object[fieldParsers.length];
	for (int i = 0; i < fieldParsers.length; i++) {
		this.parsedValues[i] = fieldParsers[i].createValue();
	}

	// left to right evaluation makes access [0] okay
	// this marker is used to fasten up readRecord, so that it doesn't have to check each call if the line ending is set to default
	if (this.getDelimiter().length == 1 && this.getDelimiter()[0] == '\n') {
		this.lineDelimiterIsLinebreak = true;
	}

	this.commentCount = 0;
	this.invalidLineCount = 0;
}
 
Example #20
Source File: GenericCsvInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
protected void setFieldsGeneric(int[] sourceFieldIndices, Class<?>[] fieldTypes) {
	checkNotNull(sourceFieldIndices);
	checkNotNull(fieldTypes);
	checkArgument(sourceFieldIndices.length == fieldTypes.length,
		"Number of field indices and field types must match.");

	for (int i : sourceFieldIndices) {
		if (i < 0) {
			throw new IllegalArgumentException("Field indices must not be smaller than zero.");
		}
	}

	int largestFieldIndex = max(sourceFieldIndices);
	this.fieldIncluded = new boolean[largestFieldIndex + 1];
	ArrayList<Class<?>> types = new ArrayList<Class<?>>();

	// check if we support parsers for these types
	for (int i = 0; i < fieldTypes.length; i++) {
		Class<?> type = fieldTypes[i];

		if (type != null) {
			if (FieldParser.getParserForType(type) == null) {
				throw new IllegalArgumentException("The type '" + type.getName()
					+ "' is not supported for the CSV input format.");
			}
			types.add(type);
			fieldIncluded[sourceFieldIndices[i]] = true;
		}
	}

	this.fieldTypes = types.toArray(new Class<?>[types.size()]);
}
 
Example #21
Source File: AnyToTripleFlatMapper.java    From Alink with Apache License 2.0 5 votes vote down vote up
@Override
public void open() {
	this.formatReader = initFormatReader(super.getDataSchema(), params).f0;
	this.isString = new boolean[fieldTypes.length];
	this.parsers = new FieldParser[fieldTypes.length];

	for (int i = 0; i < fieldTypes.length; i++) {
		parsers[i] = ColumnsWriter.getFieldParser(fieldTypes[i].getTypeClass());
		isString[i] = fieldTypes[i].equals(Types.STRING);
	}
}
 
Example #22
Source File: ColumnsWriter.java    From Alink with Apache License 2.0 5 votes vote down vote up
static FieldParser <?> getFieldParser(Class typeClazz) {
	Class <? extends FieldParser <?>> parserType = FieldParser.getParserForType(typeClazz);
	if (parserType == null) {
		throw new RuntimeException("No parser available for type '" + typeClazz.getName() + "'.");
	}
	return InstantiationUtil.instantiate(parserType, FieldParser.class);
}
 
Example #23
Source File: GenericCsvInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
protected void setFieldsGeneric(boolean[] includedMask, Class<?>[] fieldTypes) {
	checkNotNull(includedMask);
	checkNotNull(fieldTypes);

	ArrayList<Class<?>> types = new ArrayList<Class<?>>();

	// check if types are valid for included fields
	int typeIndex = 0;
	for (int i = 0; i < includedMask.length; i++) {

		if (includedMask[i]) {
			if (typeIndex > fieldTypes.length - 1) {
				throw new IllegalArgumentException("Missing type for included field " + i + ".");
			}
			Class<?> type = fieldTypes[typeIndex++];

			if (type == null) {
				throw new IllegalArgumentException("Type for included field " + i + " should not be null.");
			} else {
				// check if we support parsers for this type
				if (FieldParser.getParserForType(type) == null) {
					throw new IllegalArgumentException("The type '" + type.getName() + "' is not supported for the CSV input format.");
				}
				types.add(type);
			}
		}
	}

	this.fieldTypes = types.toArray(new Class<?>[types.size()]);
	this.fieldIncluded = includedMask;
}
 
Example #24
Source File: GenericCsvInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void open(FileInputSplit split) throws IOException {
	super.open(split);

	// instantiate the parsers
	FieldParser<?>[] parsers = new FieldParser<?>[fieldTypes.length];
	
	for (int i = 0; i < fieldTypes.length; i++) {
		if (fieldTypes[i] != null) {
			Class<? extends FieldParser<?>> parserType = FieldParser.getParserForType(fieldTypes[i]);
			if (parserType == null) {
				throw new RuntimeException("No parser available for type '" + fieldTypes[i].getName() + "'.");
			}

			FieldParser<?> p = InstantiationUtil.instantiate(parserType, FieldParser.class);

			p.setCharset(getCharset());
			if (this.quotedStringParsing) {
				if (p instanceof StringParser) {
					((StringParser)p).enableQuotedStringParsing(this.quoteCharacter);
				} else if (p instanceof StringValueParser) {
					((StringValueParser)p).enableQuotedStringParsing(this.quoteCharacter);
				}
			}

			parsers[i] = p;
		}
	}
	this.fieldParsers = parsers;
	
	// skip the first line, if we are at the beginning of a file and have the option set
	if (this.skipFirstLineAsHeader && this.splitStart == 0) {
		readLine(); // read and ignore
	}
}
 
Example #25
Source File: StringParsers.java    From Alink with Apache License 2.0 5 votes vote down vote up
public JsonParser(String[] fieldNames, TypeInformation[] fieldTypes) {
    this.fieldNames = fieldNames;
    Preconditions.checkArgument(fieldNames.length == fieldTypes.length);
    this.isString = new boolean[fieldNames.length];
    this.parsers = new FieldParser[fieldNames.length];

    for (int i = 0; i < fieldTypes.length; i++) {
        parsers[i] = getFieldParser(fieldTypes[i].getTypeClass());
        isString[i] = fieldTypes[i].equals(Types.STRING);
    }
}
 
Example #26
Source File: StringParsers.java    From Alink with Apache License 2.0 5 votes vote down vote up
static FieldParser<?> getFieldParser(Class typeClazz) {
    Class<? extends FieldParser<?>> parserType = FieldParser.getParserForType(typeClazz);
    if (parserType == null) {
        throw new RuntimeException("No parser available for type '" + typeClazz.getName() + "'.");
    }
    return InstantiationUtil.instantiate(parserType, FieldParser.class);
}
 
Example #27
Source File: GenericCsvInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void open(FileInputSplit split) throws IOException {
	super.open(split);

	// instantiate the parsers
	FieldParser<?>[] parsers = new FieldParser<?>[fieldTypes.length];
	
	for (int i = 0; i < fieldTypes.length; i++) {
		if (fieldTypes[i] != null) {
			Class<? extends FieldParser<?>> parserType = FieldParser.getParserForType(fieldTypes[i]);
			if (parserType == null) {
				throw new RuntimeException("No parser available for type '" + fieldTypes[i].getName() + "'.");
			}

			FieldParser<?> p = InstantiationUtil.instantiate(parserType, FieldParser.class);

			p.setCharset(getCharset());
			if (this.quotedStringParsing) {
				if (p instanceof StringParser) {
					((StringParser)p).enableQuotedStringParsing(this.quoteCharacter);
				} else if (p instanceof StringValueParser) {
					((StringValueParser)p).enableQuotedStringParsing(this.quoteCharacter);
				}
			}

			parsers[i] = p;
		}
	}
	this.fieldParsers = parsers;
	
	// skip the first line, if we are at the beginning of a file and have the option set
	if (this.skipFirstLineAsHeader && this.splitStart == 0) {
		readLine(); // read and ignore
	}
}
 
Example #28
Source File: GenericCsvInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
protected void setFieldsGeneric(boolean[] includedMask, Class<?>[] fieldTypes) {
	checkNotNull(includedMask);
	checkNotNull(fieldTypes);

	ArrayList<Class<?>> types = new ArrayList<Class<?>>();

	// check if types are valid for included fields
	int typeIndex = 0;
	for (int i = 0; i < includedMask.length; i++) {

		if (includedMask[i]) {
			if (typeIndex > fieldTypes.length - 1) {
				throw new IllegalArgumentException("Missing type for included field " + i + ".");
			}
			Class<?> type = fieldTypes[typeIndex++];

			if (type == null) {
				throw new IllegalArgumentException("Type for included field " + i + " should not be null.");
			} else {
				// check if we support parsers for this type
				if (FieldParser.getParserForType(type) == null) {
					throw new IllegalArgumentException("The type '" + type.getName() + "' is not supported for the CSV input format.");
				}
				types.add(type);
			}
		}
	}

	this.fieldTypes = types.toArray(new Class<?>[types.size()]);
	this.fieldIncluded = includedMask;
}
 
Example #29
Source File: GenericCsvInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
protected void setFieldsGeneric(int[] sourceFieldIndices, Class<?>[] fieldTypes) {
	checkNotNull(sourceFieldIndices);
	checkNotNull(fieldTypes);
	checkArgument(sourceFieldIndices.length == fieldTypes.length,
		"Number of field indices and field types must match.");

	for (int i : sourceFieldIndices) {
		if (i < 0) {
			throw new IllegalArgumentException("Field indices must not be smaller than zero.");
		}
	}

	int largestFieldIndex = max(sourceFieldIndices);
	this.fieldIncluded = new boolean[largestFieldIndex + 1];
	ArrayList<Class<?>> types = new ArrayList<Class<?>>();

	// check if we support parsers for these types
	for (int i = 0; i < fieldTypes.length; i++) {
		Class<?> type = fieldTypes[i];

		if (type != null) {
			if (FieldParser.getParserForType(type) == null) {
				throw new IllegalArgumentException("The type '" + type.getName()
					+ "' is not supported for the CSV input format.");
			}
			types.add(type);
			fieldIncluded[sourceFieldIndices[i]] = true;
		}
	}

	this.fieldTypes = types.toArray(new Class<?>[types.size()]);
}
 
Example #30
Source File: PrimitiveInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void open(FileInputSplit split) throws IOException {
	super.open(split);
	Class<? extends FieldParser<OT>> parserType = FieldParser.getParserForType(primitiveClass);
	if (parserType == null) {
		throw new IllegalArgumentException("The type '" + primitiveClass.getName() + "' is not supported for the primitive input format.");
	}
	parser = InstantiationUtil.instantiate(parserType, FieldParser.class);
}