Java Code Examples for org.apache.flink.api.java.typeutils.RowTypeInfo

The following examples show how to use org.apache.flink.api.java.typeutils.RowTypeInfo. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: flink   Source File: ParquetSchemaConverter.java    License: Apache License 2.0 6 votes vote down vote up
private static TypeInformation<?> convertFields(List<Type> parquetFields) {
	List<TypeInformation<?>> types = new ArrayList<>();
	List<String> names = new ArrayList<>();
	for (Type field : parquetFields) {
		TypeInformation<?> subType = convertParquetTypeToTypeInfo(field);
		if (subType != null) {
			types.add(subType);
			names.add(field.getName());
		} else {
			LOGGER.error("Parquet field {} in schema type {} can not be converted to Flink Internal Type",
				field.getName(), field.getOriginalType().name());
		}
	}

	return new RowTypeInfo(types.toArray(new TypeInformation<?>[0]),
		names.toArray(new String[0]));
}
 
Example 2
Source Project: pulsar   Source File: PulsarTableSink.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public TableSink<Row> configure(String[] fieldNames,
                                TypeInformation<?>[] fieldTypes) {

    PulsarTableSink sink = createSink();

    sink.fieldNames = checkNotNull(fieldNames, "Field names are null");
    sink.fieldTypes = checkNotNull(fieldTypes, "Field types are null");
    checkArgument(fieldNames.length == fieldTypes.length,
            "Number of provided field names and types do not match");

    RowTypeInfo rowSchema = new RowTypeInfo(fieldTypes, fieldNames);
    sink.serializationSchema = createSerializationSchema(rowSchema);
    sink.keyExtractor = new RowKeyExtractor(
            routingKeyFieldName,
            fieldNames,
            fieldTypes);
    sink.propertiesExtractor = PulsarPropertiesExtractor.EMPTY;

    return sink;
}
 
Example 3
/**
 * Specifies schema of the produced table.
 *
 * @return The schema of the produced table.
 */
@Override
public TypeInformation<Row> getReturnType() {

	TypeInformation<?>[] types = new TypeInformation[] {
			Types.LONG,
			Types.LONG,
			Types.LONG,
			Types.STRING,
			Types.FLOAT,
			Types.FLOAT,
			Types.FLOAT
	};

	String[] names = new String[]{
			"rideId",
			"taxiId",
			"driverId",
			"paymentType",
			"tip",
			"tolls",
			"totalFare"
	};

	return new RowTypeInfo(types, names);
}
 
Example 4
Source Project: flink   Source File: ParquetMapInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private void convert(Map<String, Object> target, Map<String, Object> source, MapTypeInfo mapTypeInfo) {
	TypeInformation valueTypeInfp = mapTypeInfo.getValueTypeInfo();

	for (Map.Entry<String, Object> entry : source.entrySet()) {
		String key = entry.getKey();
		Object value = entry.getValue();
		if (valueTypeInfp instanceof RowTypeInfo) {
			Map<String, Object> nestedRow = new HashMap<>();
			convert(nestedRow, (Row) value,
				((RowTypeInfo) valueTypeInfp).getFieldTypes(), ((RowTypeInfo) valueTypeInfp).getFieldNames());
			target.put(key, nestedRow);
		} else if (valueTypeInfp instanceof MapTypeInfo) {
			Map<String, Object> nestedMap = new HashMap<>();
			convert(nestedMap, (Map<String, Object>) value, (MapTypeInfo) valueTypeInfp);
			target.put(key, nestedMap);
		} else if (valueTypeInfp instanceof ObjectArrayTypeInfo) {
			List<Object> nestedObjectList = new ArrayList<>();
			convert(nestedObjectList, (Object[]) value, (ObjectArrayTypeInfo) valueTypeInfp);
			target.put(key, nestedObjectList);
		}
	}
}
 
Example 5
Source Project: Flink-CEPplus   Source File: RowSerializerTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testRowSerializer() {
	TypeInformation<Row> typeInfo = new RowTypeInfo(
		BasicTypeInfo.INT_TYPE_INFO,
		BasicTypeInfo.STRING_TYPE_INFO);
	Row row1 = new Row(2);
	row1.setField(0, 1);
	row1.setField(1, "a");

	Row row2 = new Row(2);
	row2.setField(0, 2);
	row2.setField(1, null);

	TypeSerializer<Row> serializer = typeInfo.createSerializer(new ExecutionConfig());
	RowSerializerTestInstance instance = new RowSerializerTestInstance(serializer, row1, row2);
	instance.testAll();
}
 
Example 6
Source Project: flink   Source File: CassandraSink.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Writes a DataStream into a Cassandra database.
 *
 * @param input input DataStream
 * @param <IN>  input type
 * @return CassandraSinkBuilder, to further configure the sink
 */
public static <IN> CassandraSinkBuilder<IN> addSink(DataStream<IN> input) {
	TypeInformation<IN> typeInfo = input.getType();
	if (typeInfo instanceof TupleTypeInfo) {
		DataStream<Tuple> tupleInput = (DataStream<Tuple>) input;
		return (CassandraSinkBuilder<IN>) new CassandraTupleSinkBuilder<>(tupleInput, tupleInput.getType(), tupleInput.getType().createSerializer(tupleInput.getExecutionEnvironment().getConfig()));
	}
	if (typeInfo instanceof RowTypeInfo) {
		DataStream<Row> rowInput = (DataStream<Row>) input;
		return (CassandraSinkBuilder<IN>) new CassandraRowSinkBuilder(rowInput, rowInput.getType(), rowInput.getType().createSerializer(rowInput.getExecutionEnvironment().getConfig()));
	}
	if (typeInfo instanceof PojoTypeInfo) {
		return new CassandraPojoSinkBuilder<>(input, input.getType(), input.getType().createSerializer(input.getExecutionEnvironment().getConfig()));
	}
	if (typeInfo instanceof CaseClassTypeInfo) {
		DataStream<Product> productInput = (DataStream<Product>) input;
		return (CassandraSinkBuilder<IN>) new CassandraScalaProductSinkBuilder<>(productInput, productInput.getType(), productInput.getType().createSerializer(input.getExecutionEnvironment().getConfig()));
	}
	throw new IllegalArgumentException("No support for the type of the given DataStream: " + input.getType());
}
 
Example 7
Source Project: flink   Source File: HiveTableSinkTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testInsertIntoNonPartitionTable() throws Exception {
	String dbName = "default";
	String tblName = "dest";
	RowTypeInfo rowTypeInfo = createDestTable(dbName, tblName, 0);
	ObjectPath tablePath = new ObjectPath(dbName, tblName);

	TableEnvironment tableEnv = HiveTestUtils.createTableEnv();
	List<Row> toWrite = generateRecords(5);
	Table src = tableEnv.fromTableSource(new CollectionTableSource(toWrite, rowTypeInfo));
	tableEnv.registerTable("src", src);

	tableEnv.registerCatalog("hive", hiveCatalog);
	tableEnv.sqlQuery("select * from src").insertInto("hive", "default", "dest");
	tableEnv.execute("mytest");

	verifyWrittenData(toWrite, hiveShell.executeQuery("select * from " + tblName));

	hiveCatalog.dropTable(tablePath, false);
}
 
Example 8
Source Project: flink   Source File: JDBCInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
public JDBCInputFormat finish() {
	if (format.username == null) {
		LOG.info("Username was not supplied separately.");
	}
	if (format.password == null) {
		LOG.info("Password was not supplied separately.");
	}
	if (format.dbURL == null) {
		throw new IllegalArgumentException("No database URL supplied");
	}
	if (format.queryTemplate == null) {
		throw new IllegalArgumentException("No query supplied");
	}
	if (format.drivername == null) {
		throw new IllegalArgumentException("No driver supplied");
	}
	if (format.rowTypeInfo == null) {
		throw new IllegalArgumentException("No " + RowTypeInfo.class.getSimpleName() + " supplied");
	}
	if (format.parameterValues == null) {
		LOG.debug("No input splitting configured (data will be read with parallelism 1).");
	}
	return format;
}
 
Example 9
Source Project: flink   Source File: ParquetMapInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private void convert(List<Object> target, Object[] source, ObjectArrayTypeInfo objectArrayTypeInfo) {
	TypeInformation<?> itemType = objectArrayTypeInfo.getComponentInfo();
	for (Object field : source) {
		if (itemType instanceof RowTypeInfo) {
			Map<String, Object> nestedRow = new HashMap<>();
			convert(nestedRow, (Row) field,
				((RowTypeInfo) itemType).getFieldTypes(), ((RowTypeInfo) itemType).getFieldNames());
			target.add(nestedRow);
		} else if (itemType instanceof MapTypeInfo) {
			Map<String, Object> nestedMap = new HashMap<>();
			MapTypeInfo mapTypeInfo = (MapTypeInfo) itemType;
			convert(nestedMap, (Map<String, Object>) field, mapTypeInfo);
			target.add(nestedMap);
		} else if (itemType instanceof ObjectArrayTypeInfo) {
			List<Object> nestedObjectList = new ArrayList<>();
			convert(nestedObjectList, (Row[]) field, (ObjectArrayTypeInfo) itemType);
			target.add(nestedObjectList);
		}
	}

}
 
Example 10
Source Project: flink   Source File: JDBCTableSource.java    License: Apache License 2.0 6 votes vote down vote up
private JDBCTableSource(
	JDBCOptions options, JDBCReadOptions readOptions, JDBCLookupOptions lookupOptions,
	TableSchema schema, int[] selectFields) {
	this.options = options;
	this.readOptions = readOptions;
	this.lookupOptions = lookupOptions;
	this.schema = schema;

	this.selectFields = selectFields;

	final TypeInformation<?>[] schemaTypeInfos = schema.getFieldTypes();
	final String[] schemaFieldNames = schema.getFieldNames();
	if (selectFields != null) {
		TypeInformation<?>[] typeInfos = new TypeInformation[selectFields.length];
		String[] typeNames = new String[selectFields.length];
		for (int i = 0; i < selectFields.length; i++) {
			typeInfos[i] = schemaTypeInfos[selectFields[i]];
			typeNames[i] = schemaFieldNames[selectFields[i]];
		}
		this.returnType = new RowTypeInfo(typeInfos, typeNames);
	} else {
		this.returnType = new RowTypeInfo(schemaTypeInfos, schemaFieldNames);
	}
}
 
Example 11
Source Project: flink   Source File: OrcRowInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Creates an OrcRowInputFormat.
 *
 * @param path The path to read ORC files from.
 * @param orcSchema The schema of the ORC files as ORC TypeDescription.
 * @param orcConfig The configuration to read the ORC files with.
 * @param batchSize The number of Row objects to read in a batch.
 */
public OrcRowInputFormat(String path, TypeDescription orcSchema, Configuration orcConfig, int batchSize) {
	super(new Path(path));

	// configure OrcRowInputFormat
	this.schema = orcSchema;
	this.rowType = (RowTypeInfo) OrcBatchReader.schemaToTypeInfo(schema);
	this.conf = orcConfig;
	this.batchSize = batchSize;

	// set default selection mask, i.e., all fields.
	this.selectedFields = new int[this.schema.getChildren().size()];
	for (int i = 0; i < selectedFields.length; i++) {
		this.selectedFields[i] = i;
	}
}
 
Example 12
Source Project: sylph   Source File: StreamSqlBuilder.java    License: Apache License 2.0 5 votes vote down vote up
public void buildStreamBySql(String sql)
{
    FlinkSqlParser flinkSqlParser = FlinkSqlParser.builder()
            .setTableEnv(tableEnv)
            .setConnectorStore(connectorStore)
            .build();
    Statement statement = sqlParser.createStatement(sql);

    if (statement instanceof CreateStreamAsSelect) {
        CreateStreamAsSelect createStreamAsSelect = (CreateStreamAsSelect) statement;
        Table table = tableEnv.sqlQuery(createStreamAsSelect.getViewSql());
        RowTypeInfo rowTypeInfo = (RowTypeInfo) table.getSchema().toRowType();
        DataStream<Row> stream = tableEnv.toAppendStream(table, Row.class);
        stream.getTransformation().setOutputType(rowTypeInfo);

        registerStreamTable(stream, createStreamAsSelect.getName(), createStreamAsSelect.getWatermark(), ImmutableList.of());
    }
    else if (statement instanceof CreateTable) {
        if (((CreateTable) statement).getType() == CreateTable.Type.BATCH) {
            batchTables.add((CreateTable) statement);
        }
        else {
            createStreamTable((CreateTable) statement);
        }
    }
    else if (statement instanceof CreateFunction) {
        createFunction((CreateFunction) statement);
    }
    else if (statement instanceof InsertInto || statement instanceof SelectQuery) {
        flinkSqlParser.parser(sql, ImmutableList.copyOf(batchTables));
    }
    else {
        throw new IllegalArgumentException("this driver class " + statement.getClass() + " have't support!");
    }
}
 
Example 13
Source Project: flink   Source File: DescriptorProperties.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Validates a type property.
 */
public void validateType(String key, boolean isOptional, boolean requireRow) {
	validateOptional(
		key,
		isOptional,
		(value) -> {
			// we don't validate the string but let the parser do the work for us
			// it throws a validation exception
			final TypeInformation<?> typeInfo = TypeStringUtils.readTypeInfo(value);
			if (requireRow && !(typeInfo instanceof RowTypeInfo)) {
				throw new ValidationException(
					"Row type information expected for key '" + key + "' but was: " + value);
			}
		});
}
 
Example 14
Source Project: flink   Source File: JsonRowFormatFactory.java    License: Apache License 2.0 5 votes vote down vote up
private TypeInformation<Row> createTypeInformation(DescriptorProperties descriptorProperties) {
	if (descriptorProperties.containsKey(JsonValidator.FORMAT_SCHEMA)) {
		return (RowTypeInfo) descriptorProperties.getType(JsonValidator.FORMAT_SCHEMA);
	} else if (descriptorProperties.containsKey(JsonValidator.FORMAT_JSON_SCHEMA)) {
		return JsonRowSchemaConverter.convert(descriptorProperties.getString(JsonValidator.FORMAT_JSON_SCHEMA));
	} else {
		return deriveSchema(descriptorProperties.asMap()).toRowType();
	}
}
 
Example 15
Source Project: alibaba-flink-connectors   Source File: DatahubTableSink.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public OutputFormat<Row> getOutputFormat() {
	RowTypeInfo flinkRowTypeInfo = new RowTypeInfo(schema.getFieldTypes(), schema.getFieldNames());
	DatahubOutputFormat outputFormat = new DatahubOutputFormat<Row>(
			endpoint,
			project,
			topic,
			accessId,
			accessKey,
			flinkRowTypeInfo);

	if (prop.containsKey(CONNECTOR_BUFFER_SIZE)) {
		outputFormat.setBufferSize(prop.getInt(CONNECTOR_BUFFER_SIZE));
	}

	if (prop.containsKey(CONNECTOR_BATCH_SIZE)) {
		outputFormat.setBatchSize(prop.getInt(CONNECTOR_BATCH_SIZE));
	}

	if (prop.containsKey(CONNECTOR_BATCH_WRITE_TIMEOUT_IN_MILLS)) {
		outputFormat.setBatchWriteTimeout(prop.getLong(CONNECTOR_BATCH_WRITE_TIMEOUT_IN_MILLS));
	}
	if (prop.containsKey(CONNECTOR_RETRY_TIMEOUT_IN_MILLS)) {
		outputFormat.setRetryTimeoutInMills(prop.getInt(CONNECTOR_RETRY_TIMEOUT_IN_MILLS));
	}

	if (prop.containsKey(CONNECTOR_MAX_RETRY_TIMES)) {
		outputFormat.setMaxRetryTimes(prop.getInt(CONNECTOR_MAX_RETRY_TIMES));
	}

	outputFormat.setRecordResolver(
			new DatahubRowRecordResolver(flinkRowTypeInfo, project, topic, accessId, accessKey, endpoint));

	return outputFormat;
}
 
Example 16
Source Project: alchemy   Source File: KafkaBaseSinkDescriptor.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public <T> T transform(TableSchema param) throws Exception {
    TableSchema tableSchema = createTableSchema();
    if (tableSchema == null) {
        tableSchema = param;
    }
    if (tableSchema == null) {
        throw new IllegalArgumentException("TableSchema must be not null");
    }
    TypeInformation[] fieldTypes = new TypeInformation[tableSchema.getFieldCount()];
    for (int i = 0; i < tableSchema.getFieldCount(); i++) {
        if (FlinkTypeFactory.isTimeIndicatorType(tableSchema.getFieldTypes()[i])) {
            fieldTypes[i] = Types.SQL_TIMESTAMP();
        }else{
            fieldTypes[i] = tableSchema.getFieldTypes()[i];
        }
    }
    TypeInformation typeInformation = new RowTypeInfo(fieldTypes, tableSchema.getFieldNames());
    SerializationSchema<Row> rowSerializationSchema = createSerializationSchema(typeInformation);
    return (T) newTableSink(
        new TableSchema(tableSchema.getFieldNames(), fieldTypes),
        this.topic,
        PropertiesUtil.fromYamlMap(this.getProperties()),
        Optional.empty(),
        rowSerializationSchema == null ? new JsonRowSerializationSchema(typeInformation) : rowSerializationSchema
    );
}
 
Example 17
Source Project: flink   Source File: CsvRowSerializationSchema.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Creates a {@link CsvRowSerializationSchema} expecting the given {@link TypeInformation}.
 *
 * @param typeInfo type information used to create schema.
 */
public Builder(TypeInformation<Row> typeInfo) {
	Preconditions.checkNotNull(typeInfo, "Type information must not be null.");

	if (!(typeInfo instanceof RowTypeInfo)) {
		throw new IllegalArgumentException("Row type information expected.");
	}

	this.typeInfo = (RowTypeInfo) typeInfo;
	this.csvSchema = CsvRowSchemaConverter.convert((RowTypeInfo) typeInfo);
}
 
Example 18
Source Project: flink   Source File: TemporalTableFunctionImpl.java    License: Apache License 2.0 5 votes vote down vote up
public static TemporalTableFunction create(
		QueryOperation operationTree,
		Expression timeAttribute,
		Expression primaryKey) {
	return new TemporalTableFunctionImpl(
		operationTree,
		timeAttribute,
		primaryKey,
		new RowTypeInfo(
			operationTree.getTableSchema().getFieldTypes(),
			operationTree.getTableSchema().getFieldNames()));
}
 
Example 19
Source Project: flink   Source File: CsvRowDeserializationSchema.java    License: Apache License 2.0 5 votes vote down vote up
private CsvRowDeserializationSchema(
		RowTypeInfo typeInfo,
		CsvSchema csvSchema,
		boolean ignoreParseErrors) {
	this.typeInfo = typeInfo;
	this.runtimeConverter = createRowRuntimeConverter(typeInfo, ignoreParseErrors, true);
	this.csvSchema = csvSchema;
	this.objectReader = new CsvMapper().readerFor(JsonNode.class).with(csvSchema);
	this.ignoreParseErrors = ignoreParseErrors;
}
 
Example 20
Source Project: sylph   Source File: StreamSqlUtil.java    License: Apache License 2.0 5 votes vote down vote up
public static RowTypeInfo schemaToRowTypeInfo(Schema schema)
{
    TypeInformation<?>[] types = schema.getFieldTypes().stream().map(StreamSqlUtil::getFlinkType)
            .toArray(TypeInformation<?>[]::new);
    String[] names = schema.getFieldNames().toArray(new String[0]);
    return new RowTypeInfo(types, names);
}
 
Example 21
Source Project: flink   Source File: MaterializedCollectStreamResult.java    License: Apache License 2.0 5 votes vote down vote up
public MaterializedCollectStreamResult(
		RowTypeInfo outputType,
		ExecutionConfig config,
		InetAddress gatewayAddress,
		int gatewayPort,
		int maxRowCount) {

	this(
		outputType,
		config,
		gatewayAddress,
		gatewayPort,
		maxRowCount,
		computeMaterializedTableOvercommit(maxRowCount));
}
 
Example 22
Source Project: Flink-CEPplus   Source File: JavaSqlITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testRowRegisterRowWithNames() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
	StreamITCase.clear();

	List<Row> data = new ArrayList<>();
	data.add(Row.of(1, 1L, "Hi"));
	data.add(Row.of(2, 2L, "Hello"));
	data.add(Row.of(3, 2L, "Hello world"));

	TypeInformation<?>[] types = {
			BasicTypeInfo.INT_TYPE_INFO,
			BasicTypeInfo.LONG_TYPE_INFO,
			BasicTypeInfo.STRING_TYPE_INFO};
	String[] names = {"a", "b", "c"};

	RowTypeInfo typeInfo = new RowTypeInfo(types, names);

	DataStream<Row> ds = env.fromCollection(data).returns(typeInfo);

	Table in = tableEnv.fromDataStream(ds, "a,b,c");
	tableEnv.registerTable("MyTableRow", in);

	String sqlQuery = "SELECT a,c FROM MyTableRow";
	Table result = tableEnv.sqlQuery(sqlQuery);

	DataStream<Row> resultSet = tableEnv.toAppendStream(result, Row.class);
	resultSet.addSink(new StreamITCase.StringSink<Row>());
	env.execute();

	List<String> expected = new ArrayList<>();
	expected.add("1,Hi");
	expected.add("2,Hello");
	expected.add("3,Hello world");

	StreamITCase.compareWithList(expected);
}
 
Example 23
Source Project: flink   Source File: ParquetMapInputFormat.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private void convert(Map<String, Object> map, Row row, TypeInformation<?>[] fieldTypes, String[] fieldNames) {
	for (int i = 0; i < fieldNames.length; i++) {
		if (row.getField(i) != null) {
			if (fieldTypes[i] instanceof BasicTypeInfo
				|| fieldTypes[i] instanceof PrimitiveArrayTypeInfo
				|| fieldTypes[i] instanceof BasicArrayTypeInfo) {
				map.put(fieldNames[i], row.getField(i));
			} else if (fieldTypes[i] instanceof RowTypeInfo) {
				Map<String, Object> nestedRow = new HashMap<>();
				RowTypeInfo nestedRowTypeInfo = (RowTypeInfo) fieldTypes[i];
				convert(nestedRow, (Row) row.getField(i),
					nestedRowTypeInfo.getFieldTypes(), nestedRowTypeInfo.getFieldNames());
				map.put(fieldNames[i], nestedRow);
			} else if (fieldTypes[i] instanceof MapTypeInfo) {
				Map<String, Object> nestedMap = new HashMap<>();
				MapTypeInfo mapTypeInfo = (MapTypeInfo) fieldTypes[i];
				convert(nestedMap, (Map<String, Object>) row.getField(i), mapTypeInfo);
				map.put(fieldNames[i], nestedMap);
			} else if (fieldTypes[i] instanceof ObjectArrayTypeInfo) {
				List<Object> nestedObjectList = new ArrayList<>();
				ObjectArrayTypeInfo objectArrayTypeInfo = (ObjectArrayTypeInfo) fieldTypes[i];
				convert(nestedObjectList, (Row[]) row.getField(i), objectArrayTypeInfo);
				map.put(fieldNames[i], nestedObjectList);
			}
		}
	}
}
 
Example 24
Source Project: Flink-CEPplus   Source File: CsvRowDeserializationSchema.java    License: Apache License 2.0 5 votes vote down vote up
private CsvRowDeserializationSchema(
		RowTypeInfo typeInfo,
		CsvSchema csvSchema,
		boolean ignoreParseErrors) {
	this.typeInfo = typeInfo;
	this.runtimeConverter = createRowRuntimeConverter(typeInfo, ignoreParseErrors, true);
	this.csvSchema = csvSchema;
	this.objectReader = new CsvMapper().readerFor(JsonNode.class).with(csvSchema);
	this.ignoreParseErrors = ignoreParseErrors;
}
 
Example 25
Source Project: flink   Source File: LegacyRowSerializerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testRowSerializerWithComplexTypes() {
	RowTypeInfo typeInfo = new RowTypeInfo(
		BasicTypeInfo.INT_TYPE_INFO,
		BasicTypeInfo.DOUBLE_TYPE_INFO,
		BasicTypeInfo.STRING_TYPE_INFO,
		new TupleTypeInfo<Tuple3<Integer, Boolean, Short>>(
			BasicTypeInfo.INT_TYPE_INFO,
			BasicTypeInfo.BOOLEAN_TYPE_INFO,
			BasicTypeInfo.SHORT_TYPE_INFO),
		TypeExtractor.createTypeInfo(MyPojo.class));

	MyPojo testPojo1 = new MyPojo();
	testPojo1.name = null;
	MyPojo testPojo2 = new MyPojo();
	testPojo2.name = "Test1";
	MyPojo testPojo3 = new MyPojo();
	testPojo3.name = "Test2";

	Row[] data = new Row[]{
		createRow(null, null, null, null, null),
		createRow(0, null, null, null, null),
		createRow(0, 0.0, null, null, null),
		createRow(0, 0.0, "a", null, null),
		createRow(1, 0.0, "a", null, null),
		createRow(1, 1.0, "a", null, null),
		createRow(1, 1.0, "b", null, null),
		createRow(1, 1.0, "b", new Tuple3<>(1, false, (short) 2), null),
		createRow(1, 1.0, "b", new Tuple3<>(2, false, (short) 2), null),
		createRow(1, 1.0, "b", new Tuple3<>(2, true, (short) 2), null),
		createRow(1, 1.0, "b", new Tuple3<>(2, true, (short) 3), null),
		createRow(1, 1.0, "b", new Tuple3<>(2, true, (short) 3), testPojo1),
		createRow(1, 1.0, "b", new Tuple3<>(2, true, (short) 3), testPojo2),
		createRow(1, 1.0, "b", new Tuple3<>(2, true, (short) 3), testPojo3)
	};

	TypeSerializer<Row> serializer = typeInfo.createLegacySerializer(new ExecutionConfig());
	RowSerializerTestInstance testInstance = new RowSerializerTestInstance(serializer, data);
	testInstance.testAll();
}
 
Example 26
Source Project: flink   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testProducedType() throws IOException {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_NESTED), TEST_SCHEMA_NESTED, new Configuration());

	assertTrue(rowOrcInputFormat.getProducedType() instanceof RowTypeInfo);
	RowTypeInfo producedType = (RowTypeInfo) rowOrcInputFormat.getProducedType();

	assertArrayEquals(
		new TypeInformation[]{
			// primitives
			Types.BOOLEAN, Types.BYTE, Types.SHORT, Types.INT, Types.LONG, Types.FLOAT, Types.DOUBLE,
			// binary
			PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO,
			// string
			Types.STRING,
			// struct
			Types.ROW_NAMED(
				new String[]{"list"},
				ObjectArrayTypeInfo.getInfoFor(
					Types.ROW_NAMED(new String[]{"int1", "string1"}, Types.INT, Types.STRING))),
			// list
			ObjectArrayTypeInfo.getInfoFor(
				Types.ROW_NAMED(new String[]{"int1", "string1"}, Types.INT, Types.STRING)),
			// map
			new MapTypeInfo<>(Types.STRING, Types.ROW_NAMED(new String[]{"int1", "string1"}, Types.INT, Types.STRING))
		},
		producedType.getFieldTypes());
	assertArrayEquals(
		new String[]{"boolean1", "byte1", "short1", "int1", "long1", "float1", "double1", "bytes1", "string1", "middle", "list", "map"},
		producedType.getFieldNames());
}
 
Example 27
Source Project: flink   Source File: AsyncLookupJoinRunner.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void open(Configuration parameters) throws Exception {
	super.open(parameters);
	this.fetcher = generatedFetcher.newInstance(getRuntimeContext().getUserCodeClassLoader());
	FunctionUtils.setFunctionRuntimeContext(fetcher, getRuntimeContext());
	FunctionUtils.openFunction(fetcher, parameters);

	// try to compile the generated ResultFuture, fail fast if the code is corrupt.
	generatedResultFuture.compile(getRuntimeContext().getUserCodeClassLoader());

	// row converter is stateless which is thread-safe
	DataFormatConverters.RowConverter rowConverter;
	if (fetcherReturnType instanceof RowTypeInfo) {
		rowConverter = (DataFormatConverters.RowConverter) DataFormatConverters.getConverterForDataType(
				fromLegacyInfoToDataType(fetcherReturnType));
	} else if (fetcherReturnType instanceof RowDataTypeInfo) {
		rowConverter = null;
	} else {
		throw new IllegalStateException("This should never happen, " +
			"currently fetcherReturnType can only be RowDataTypeInfo or RowTypeInfo");
	}

	// asyncBufferCapacity + 1 as the queue size in order to avoid
	// blocking on the queue when taking a collector.
	this.resultFutureBuffer = new ArrayBlockingQueue<>(asyncBufferCapacity + 1);
	this.allResultFutures = new ArrayList<>();
	for (int i = 0; i < asyncBufferCapacity + 1; i++) {
		JoinedRowResultFuture rf = new JoinedRowResultFuture(
			resultFutureBuffer,
			createFetcherResultFuture(parameters),
			rowConverter,
			isLeftOuterJoin,
			rightRowTypeInfo.getArity());
		// add will throw exception immediately if the queue is full which should never happen
		resultFutureBuffer.add(rf);
		allResultFutures.add(rf);
	}
}
 
Example 28
/**
 * Creates a JSON deserialization schema for the given fields and types.
 *
 * @param typeInfo   Type information describing the result type. The field names are used
 *                   to parse the JSON file and so are the types.
 * @param columnNames
 */
public FlexQETLRowDeserializationSchema(TypeInformation<Row> typeInfo, String[] columnNames) {
    Preconditions.checkNotNull(typeInfo, "Type information");
    this.typeInfo = typeInfo;

    this.fieldNames = ((RowTypeInfo) typeInfo).getFieldNames();
    this.fieldTypes = ((RowTypeInfo) typeInfo).getFieldTypes();

    this.columnNames = columnNames;
}
 
Example 29
Source Project: flink   Source File: MaterializedCollectStreamResult.java    License: Apache License 2.0 5 votes vote down vote up
@VisibleForTesting
public MaterializedCollectStreamResult(
		RowTypeInfo outputType,
		ExecutionConfig config,
		InetAddress gatewayAddress,
		int gatewayPort,
		int maxRowCount,
		int overcommitThreshold) {
	super(outputType, config, gatewayAddress, gatewayPort);

	if (maxRowCount <= 0) {
		this.maxRowCount = Integer.MAX_VALUE;
	} else {
		this.maxRowCount = maxRowCount;
	}

	this.overcommitThreshold = overcommitThreshold;

	// prepare for materialization
	final int initialCapacity = computeMaterializedTableCapacity(maxRowCount); // avoid frequent resizing
	materializedTable = new ArrayList<>(initialCapacity);
	rowPositionCache = new HashMap<>(initialCapacity);
	snapshot = new ArrayList<>();
	validRowPosition = 0;
	isLastSnapshot = false;
	pageCount = 0;
}
 
Example 30
Source Project: flink   Source File: RowCsvInputFormatSplitTest.java    License: Apache License 2.0 5 votes vote down vote up
private void test(
		String content,
		long offset, long length,
		char escapeChar,
		List<Row> expected,
		TypeInformation[] fieldTypes) throws Exception {
	FileInputSplit split = createTempFile(content, offset, length);

	RowCsvInputFormat.Builder builder = RowCsvInputFormat.builder(new RowTypeInfo(fieldTypes), PATH)
			.setEscapeCharacter(escapeChar);

	RowCsvInputFormat format = builder.build();
	format.configure(new Configuration());
	format.open(split);

	List<Row> rows = new ArrayList<>();
	while (!format.reachedEnd()) {
		Row result = new Row(3);
		result = format.nextRecord(result);
		if (result == null) {
			break;
		} else {
			rows.add(result);
		}
	}

	assertEquals(expected, rows);
}