org.apache.flink.api.java.typeutils.RowTypeInfo Java Examples

The following examples show how to use org.apache.flink.api.java.typeutils.RowTypeInfo. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: OrcRowInputFormat.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Creates an OrcRowInputFormat.
 *
 * @param path The path to read ORC files from.
 * @param orcSchema The schema of the ORC files as ORC TypeDescription.
 * @param orcConfig The configuration to read the ORC files with.
 * @param batchSize The number of Row objects to read in a batch.
 */
public OrcRowInputFormat(String path, TypeDescription orcSchema, Configuration orcConfig, int batchSize) {
	super(new Path(path));

	// configure OrcRowInputFormat
	this.schema = orcSchema;
	this.rowType = (RowTypeInfo) OrcBatchReader.schemaToTypeInfo(schema);
	this.conf = orcConfig;
	this.batchSize = batchSize;

	// set default selection mask, i.e., all fields.
	this.selectedFields = new int[this.schema.getChildren().size()];
	for (int i = 0; i < selectedFields.length; i++) {
		this.selectedFields[i] = i;
	}
}
 
Example #2
Source File: JDBCTableSource.java    From flink with Apache License 2.0 6 votes vote down vote up
private JDBCTableSource(
	JDBCOptions options, JDBCReadOptions readOptions, JDBCLookupOptions lookupOptions,
	TableSchema schema, int[] selectFields) {
	this.options = options;
	this.readOptions = readOptions;
	this.lookupOptions = lookupOptions;
	this.schema = schema;

	this.selectFields = selectFields;

	final TypeInformation<?>[] schemaTypeInfos = schema.getFieldTypes();
	final String[] schemaFieldNames = schema.getFieldNames();
	if (selectFields != null) {
		TypeInformation<?>[] typeInfos = new TypeInformation[selectFields.length];
		String[] typeNames = new String[selectFields.length];
		for (int i = 0; i < selectFields.length; i++) {
			typeInfos[i] = schemaTypeInfos[selectFields[i]];
			typeNames[i] = schemaFieldNames[selectFields[i]];
		}
		this.returnType = new RowTypeInfo(typeInfos, typeNames);
	} else {
		this.returnType = new RowTypeInfo(schemaTypeInfos, schemaFieldNames);
	}
}
 
Example #3
Source File: ParquetMapInputFormat.java    From flink with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private void convert(Map<String, Object> target, Map<String, Object> source, MapTypeInfo mapTypeInfo) {
	TypeInformation valueTypeInfp = mapTypeInfo.getValueTypeInfo();

	for (Map.Entry<String, Object> entry : source.entrySet()) {
		String key = entry.getKey();
		Object value = entry.getValue();
		if (valueTypeInfp instanceof RowTypeInfo) {
			Map<String, Object> nestedRow = new HashMap<>();
			convert(nestedRow, (Row) value,
				((RowTypeInfo) valueTypeInfp).getFieldTypes(), ((RowTypeInfo) valueTypeInfp).getFieldNames());
			target.put(key, nestedRow);
		} else if (valueTypeInfp instanceof MapTypeInfo) {
			Map<String, Object> nestedMap = new HashMap<>();
			convert(nestedMap, (Map<String, Object>) value, (MapTypeInfo) valueTypeInfp);
			target.put(key, nestedMap);
		} else if (valueTypeInfp instanceof ObjectArrayTypeInfo) {
			List<Object> nestedObjectList = new ArrayList<>();
			convert(nestedObjectList, (Object[]) value, (ObjectArrayTypeInfo) valueTypeInfp);
			target.put(key, nestedObjectList);
		}
	}
}
 
Example #4
Source File: ParquetMapInputFormat.java    From flink with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private void convert(List<Object> target, Object[] source, ObjectArrayTypeInfo objectArrayTypeInfo) {
	TypeInformation<?> itemType = objectArrayTypeInfo.getComponentInfo();
	for (Object field : source) {
		if (itemType instanceof RowTypeInfo) {
			Map<String, Object> nestedRow = new HashMap<>();
			convert(nestedRow, (Row) field,
				((RowTypeInfo) itemType).getFieldTypes(), ((RowTypeInfo) itemType).getFieldNames());
			target.add(nestedRow);
		} else if (itemType instanceof MapTypeInfo) {
			Map<String, Object> nestedMap = new HashMap<>();
			MapTypeInfo mapTypeInfo = (MapTypeInfo) itemType;
			convert(nestedMap, (Map<String, Object>) field, mapTypeInfo);
			target.add(nestedMap);
		} else if (itemType instanceof ObjectArrayTypeInfo) {
			List<Object> nestedObjectList = new ArrayList<>();
			convert(nestedObjectList, (Row[]) field, (ObjectArrayTypeInfo) itemType);
			target.add(nestedObjectList);
		}
	}

}
 
Example #5
Source File: PulsarTableSink.java    From pulsar with Apache License 2.0 6 votes vote down vote up
@Override
public TableSink<Row> configure(String[] fieldNames,
                                TypeInformation<?>[] fieldTypes) {

    PulsarTableSink sink = createSink();

    sink.fieldNames = checkNotNull(fieldNames, "Field names are null");
    sink.fieldTypes = checkNotNull(fieldTypes, "Field types are null");
    checkArgument(fieldNames.length == fieldTypes.length,
            "Number of provided field names and types do not match");

    RowTypeInfo rowSchema = new RowTypeInfo(fieldTypes, fieldNames);
    sink.serializationSchema = createSerializationSchema(rowSchema);
    sink.keyExtractor = new RowKeyExtractor(
            routingKeyFieldName,
            fieldNames,
            fieldTypes);
    sink.propertiesExtractor = PulsarPropertiesExtractor.EMPTY;

    return sink;
}
 
Example #6
Source File: JDBCInputFormat.java    From flink with Apache License 2.0 6 votes vote down vote up
public JDBCInputFormat finish() {
	if (format.username == null) {
		LOG.info("Username was not supplied separately.");
	}
	if (format.password == null) {
		LOG.info("Password was not supplied separately.");
	}
	if (format.dbURL == null) {
		throw new IllegalArgumentException("No database URL supplied");
	}
	if (format.queryTemplate == null) {
		throw new IllegalArgumentException("No query supplied");
	}
	if (format.drivername == null) {
		throw new IllegalArgumentException("No driver supplied");
	}
	if (format.rowTypeInfo == null) {
		throw new IllegalArgumentException("No " + RowTypeInfo.class.getSimpleName() + " supplied");
	}
	if (format.parameterValues == null) {
		LOG.debug("No input splitting configured (data will be read with parallelism 1).");
	}
	return format;
}
 
Example #7
Source File: HiveTableSinkTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testInsertIntoNonPartitionTable() throws Exception {
	String dbName = "default";
	String tblName = "dest";
	RowTypeInfo rowTypeInfo = createDestTable(dbName, tblName, 0);
	ObjectPath tablePath = new ObjectPath(dbName, tblName);

	TableEnvironment tableEnv = HiveTestUtils.createTableEnv();
	List<Row> toWrite = generateRecords(5);
	Table src = tableEnv.fromTableSource(new CollectionTableSource(toWrite, rowTypeInfo));
	tableEnv.registerTable("src", src);

	tableEnv.registerCatalog("hive", hiveCatalog);
	tableEnv.sqlQuery("select * from src").insertInto("hive", "default", "dest");
	tableEnv.execute("mytest");

	verifyWrittenData(toWrite, hiveShell.executeQuery("select * from " + tblName));

	hiveCatalog.dropTable(tablePath, false);
}
 
Example #8
Source File: RowSerializerTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testRowSerializer() {
	TypeInformation<Row> typeInfo = new RowTypeInfo(
		BasicTypeInfo.INT_TYPE_INFO,
		BasicTypeInfo.STRING_TYPE_INFO);
	Row row1 = new Row(2);
	row1.setField(0, 1);
	row1.setField(1, "a");

	Row row2 = new Row(2);
	row2.setField(0, 2);
	row2.setField(1, null);

	TypeSerializer<Row> serializer = typeInfo.createSerializer(new ExecutionConfig());
	RowSerializerTestInstance instance = new RowSerializerTestInstance(serializer, row1, row2);
	instance.testAll();
}
 
Example #9
Source File: CassandraSink.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Writes a DataStream into a Cassandra database.
 *
 * @param input input DataStream
 * @param <IN>  input type
 * @return CassandraSinkBuilder, to further configure the sink
 */
public static <IN> CassandraSinkBuilder<IN> addSink(DataStream<IN> input) {
	TypeInformation<IN> typeInfo = input.getType();
	if (typeInfo instanceof TupleTypeInfo) {
		DataStream<Tuple> tupleInput = (DataStream<Tuple>) input;
		return (CassandraSinkBuilder<IN>) new CassandraTupleSinkBuilder<>(tupleInput, tupleInput.getType(), tupleInput.getType().createSerializer(tupleInput.getExecutionEnvironment().getConfig()));
	}
	if (typeInfo instanceof RowTypeInfo) {
		DataStream<Row> rowInput = (DataStream<Row>) input;
		return (CassandraSinkBuilder<IN>) new CassandraRowSinkBuilder(rowInput, rowInput.getType(), rowInput.getType().createSerializer(rowInput.getExecutionEnvironment().getConfig()));
	}
	if (typeInfo instanceof PojoTypeInfo) {
		return new CassandraPojoSinkBuilder<>(input, input.getType(), input.getType().createSerializer(input.getExecutionEnvironment().getConfig()));
	}
	if (typeInfo instanceof CaseClassTypeInfo) {
		DataStream<Product> productInput = (DataStream<Product>) input;
		return (CassandraSinkBuilder<IN>) new CassandraScalaProductSinkBuilder<>(productInput, productInput.getType(), productInput.getType().createSerializer(input.getExecutionEnvironment().getConfig()));
	}
	throw new IllegalArgumentException("No support for the type of the given DataStream: " + input.getType());
}
 
Example #10
Source File: TaxiFareTableSource.java    From flink-training-exercises with Apache License 2.0 6 votes vote down vote up
/**
 * Specifies schema of the produced table.
 *
 * @return The schema of the produced table.
 */
@Override
public TypeInformation<Row> getReturnType() {

	TypeInformation<?>[] types = new TypeInformation[] {
			Types.LONG,
			Types.LONG,
			Types.LONG,
			Types.STRING,
			Types.FLOAT,
			Types.FLOAT,
			Types.FLOAT
	};

	String[] names = new String[]{
			"rideId",
			"taxiId",
			"driverId",
			"paymentType",
			"tip",
			"tolls",
			"totalFare"
	};

	return new RowTypeInfo(types, names);
}
 
Example #11
Source File: ParquetSchemaConverter.java    From flink with Apache License 2.0 6 votes vote down vote up
private static TypeInformation<?> convertFields(List<Type> parquetFields) {
	List<TypeInformation<?>> types = new ArrayList<>();
	List<String> names = new ArrayList<>();
	for (Type field : parquetFields) {
		TypeInformation<?> subType = convertParquetTypeToTypeInfo(field);
		if (subType != null) {
			types.add(subType);
			names.add(field.getName());
		} else {
			LOGGER.error("Parquet field {} in schema type {} can not be converted to Flink Internal Type",
				field.getName(), field.getOriginalType().name());
		}
	}

	return new RowTypeInfo(types.toArray(new TypeInformation<?>[0]),
		names.toArray(new String[0]));
}
 
Example #12
Source File: MysqlSideFunction.java    From alchemy with Apache License 2.0 5 votes vote down vote up
private Map<Integer, String> getIndexFields(RowTypeInfo sideType) {
    Map<Integer, String> indexFields = new HashMap<>(sideType.getArity());
    String[] fieldNames = sideType.getFieldNames();
    for (String field : fieldNames) {
        indexFields.put(sideType.getFieldIndex(field), field);
    }
    return indexFields;
}
 
Example #13
Source File: AvroRowDeserializationSchema.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a Avro deserialization schema for the given Avro schema string.
 *
 * @param avroSchemaString Avro schema string to deserialize Avro's record to Flink's row
 */
public AvroRowDeserializationSchema(String avroSchemaString) {
	Preconditions.checkNotNull(avroSchemaString, "Avro schema must not be null.");
	recordClazz = null;
	final TypeInformation<?> typeInfo = AvroSchemaConverter.convertToTypeInfo(avroSchemaString);
	Preconditions.checkArgument(typeInfo instanceof RowTypeInfo, "Row type information expected.");
	this.typeInfo = (RowTypeInfo) typeInfo;
	schemaString = avroSchemaString;
	schema = new Schema.Parser().parse(avroSchemaString);
	record = new GenericData.Record(schema);
	datumReader = new GenericDatumReader<>(schema);
	inputStream = new MutableByteArrayInputStream();
	decoder = DecoderFactory.get().binaryDecoder(inputStream, null);
}
 
Example #14
Source File: LegacyRowSerializerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testLargeRowSerializer() {
	RowTypeInfo typeInfo = new RowTypeInfo(
		BasicTypeInfo.INT_TYPE_INFO,
		BasicTypeInfo.INT_TYPE_INFO,
		BasicTypeInfo.INT_TYPE_INFO,
		BasicTypeInfo.INT_TYPE_INFO,
		BasicTypeInfo.INT_TYPE_INFO,
		BasicTypeInfo.INT_TYPE_INFO,
		BasicTypeInfo.INT_TYPE_INFO,
		BasicTypeInfo.INT_TYPE_INFO,
		BasicTypeInfo.INT_TYPE_INFO,
		BasicTypeInfo.INT_TYPE_INFO,
		BasicTypeInfo.INT_TYPE_INFO,
		BasicTypeInfo.INT_TYPE_INFO,
		BasicTypeInfo.STRING_TYPE_INFO);

	Row row = new Row(13);
	row.setField(0, 2);
	row.setField(1, null);
	row.setField(3, null);
	row.setField(4, null);
	row.setField(5, null);
	row.setField(6, null);
	row.setField(7, null);
	row.setField(8, null);
	row.setField(9, null);
	row.setField(10, null);
	row.setField(11, null);
	row.setField(12, "Test");

	TypeSerializer<Row> serializer = typeInfo.createLegacySerializer(new ExecutionConfig());
	RowSerializerTestInstance testInstance = new RowSerializerTestInstance(serializer, row);
	testInstance.testAll();
}
 
Example #15
Source File: HiveTableSinkTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testWriteNestedComplexType() throws Exception {
	String dbName = "default";
	String tblName = "dest";
	ObjectPath tablePath = new ObjectPath(dbName, tblName);

	// nested complex types
	TableSchema.Builder builder = new TableSchema.Builder();
	// array of rows
	builder.fields(new String[]{"a"}, new DataType[]{DataTypes.ARRAY(
			DataTypes.ROW(DataTypes.FIELD("f1", DataTypes.INT()), DataTypes.FIELD("f2", DataTypes.STRING())))});
	RowTypeInfo rowTypeInfo = createDestTable(dbName, tblName, builder.build(), 0);
	Row row = new Row(rowTypeInfo.getArity());
	Object[] array = new Object[3];
	row.setField(0, array);
	for (int i = 0; i < array.length; i++) {
		Row struct = new Row(2);
		struct.setField(0, 1 + i);
		struct.setField(1, String.valueOf((char) ('a' + i)));
		array[i] = struct;
	}
	List<Row> toWrite = new ArrayList<>();
	toWrite.add(row);

	TableEnvironment tableEnv = HiveTestUtils.createTableEnv();

	Table src = tableEnv.fromTableSource(new CollectionTableSource(toWrite, rowTypeInfo));
	tableEnv.registerTable("nestedSrc", src);
	tableEnv.registerCatalog("hive", hiveCatalog);
	tableEnv.sqlQuery("select * from nestedSrc").insertInto("hive", "default", "dest");
	tableEnv.execute("mytest");

	List<String> result = hiveShell.executeQuery("select * from " + tblName);
	assertEquals(1, result.size());
	assertEquals("[{\"f1\":1,\"f2\":\"a\"},{\"f1\":2,\"f2\":\"b\"},{\"f1\":3,\"f2\":\"c\"}]", result.get(0));
	hiveCatalog.dropTable(tablePath, false);
}
 
Example #16
Source File: CsvRowDeserializationSchema.java    From flink with Apache License 2.0 5 votes vote down vote up
private CsvRowDeserializationSchema(
		RowTypeInfo typeInfo,
		CsvSchema csvSchema,
		boolean ignoreParseErrors) {
	this.typeInfo = typeInfo;
	this.runtimeConverter = createRowRuntimeConverter(typeInfo, ignoreParseErrors, true);
	this.csvSchema = csvSchema;
	this.objectReader = new CsvMapper().readerFor(JsonNode.class).with(csvSchema);
	this.ignoreParseErrors = ignoreParseErrors;
}
 
Example #17
Source File: RowComparatorWithManyFieldsTests.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void setUp() throws Exception {
	TypeInformation<?>[] fieldTypes = new TypeInformation[numberOfFields];
	for (int i = 0; i < numberOfFields; i++) {
		fieldTypes[i] = BasicTypeInfo.STRING_TYPE_INFO;
	}
	typeInfo = new RowTypeInfo(fieldTypes);

}
 
Example #18
Source File: AvroRowDeserializationSchema.java    From flink with Apache License 2.0 5 votes vote down vote up
private Row convertAvroRecordToRow(Schema schema, RowTypeInfo typeInfo, IndexedRecord record) {
	final List<Schema.Field> fields = schema.getFields();
	final TypeInformation<?>[] fieldInfo = typeInfo.getFieldTypes();
	final int length = fields.size();
	final Row row = new Row(length);
	for (int i = 0; i < length; i++) {
		final Schema.Field field = fields.get(i);
		row.setField(i, convertAvroType(field.schema(), fieldInfo[i], record.get(i)));
	}
	return row;
}
 
Example #19
Source File: DescriptorProperties.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Validates a type property.
 */
public void validateType(String key, boolean isOptional, boolean requireRow) {
	validateOptional(
		key,
		isOptional,
		(value) -> {
			// we don't validate the string but let the parser do the work for us
			// it throws a validation exception
			final TypeInformation<?> typeInfo = TypeStringUtils.readTypeInfo(value);
			if (requireRow && !(typeInfo instanceof RowTypeInfo)) {
				throw new ValidationException(
					"Row type information expected for key '" + key + "' but was: " + value);
			}
		});
}
 
Example #20
Source File: CsvRowSerializationSchema.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a {@link CsvRowSerializationSchema} expecting the given {@link TypeInformation}.
 *
 * @param typeInfo type information used to create schema.
 */
public Builder(TypeInformation<Row> typeInfo) {
	Preconditions.checkNotNull(typeInfo, "Type information must not be null.");

	if (!(typeInfo instanceof RowTypeInfo)) {
		throw new IllegalArgumentException("Row type information expected.");
	}

	this.typeInfo = (RowTypeInfo) typeInfo;
	this.csvSchema = CsvRowSchemaConverter.convert((RowTypeInfo) typeInfo);
}
 
Example #21
Source File: OrcRowInputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testProducedTypeWithProjection() throws IOException {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_NESTED), TEST_SCHEMA_NESTED, new Configuration());

	rowOrcInputFormat.selectFields(9, 3, 7, 10);

	assertTrue(rowOrcInputFormat.getProducedType() instanceof RowTypeInfo);
	RowTypeInfo producedType = (RowTypeInfo) rowOrcInputFormat.getProducedType();

	assertArrayEquals(
		new TypeInformation[]{
			// struct
			Types.ROW_NAMED(
				new String[]{"list"},
				ObjectArrayTypeInfo.getInfoFor(
					Types.ROW_NAMED(new String[]{"int1", "string1"}, Types.INT, Types.STRING))),
			// int
			Types.INT,
			// binary
			PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO,
			// list
			ObjectArrayTypeInfo.getInfoFor(
				Types.ROW_NAMED(new String[]{"int1", "string1"}, Types.INT, Types.STRING))
		},
		producedType.getFieldTypes());
	assertArrayEquals(
		new String[]{"middle", "int1", "bytes1", "list"},
		producedType.getFieldNames());
}
 
Example #22
Source File: ParquetTableSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetReturnType() {
	MessageType nestedSchema = SCHEMA_CONVERTER.convert(TestUtil.NESTED_SCHEMA);
	ParquetTableSource parquetTableSource = ParquetTableSource.builder()
		.path("dummy-path")
		.forParquetSchema(nestedSchema)
		.build();

	TypeInformation<Row> returnType = parquetTableSource.getReturnType();
	assertNotNull(returnType);
	assertTrue(returnType instanceof RowTypeInfo);
	RowTypeInfo rowType = (RowTypeInfo) returnType;
	assertEquals(NESTED_ROW_TYPE, rowType);
}
 
Example #23
Source File: CsvRowSchemaConverter.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Convert {@link TypeInformation} to {@link CsvSchema.ColumnType} based on Jackson's categories.
 */
private static CsvSchema.ColumnType convertType(String fieldName, TypeInformation<?> info) {
	if (STRING_TYPES.contains(info)) {
		return CsvSchema.ColumnType.STRING;
	} else if (NUMBER_TYPES.contains(info)) {
		return CsvSchema.ColumnType.NUMBER;
	} else if (BOOLEAN_TYPES.contains(info)) {
		return CsvSchema.ColumnType.BOOLEAN;
	} else if (info instanceof ObjectArrayTypeInfo) {
		validateNestedField(fieldName, ((ObjectArrayTypeInfo) info).getComponentInfo());
		return CsvSchema.ColumnType.ARRAY;
	} else if (info instanceof BasicArrayTypeInfo) {
		validateNestedField(fieldName, ((BasicArrayTypeInfo) info).getComponentInfo());
		return CsvSchema.ColumnType.ARRAY;
	} else if (info instanceof RowTypeInfo) {
		final TypeInformation<?>[] types = ((RowTypeInfo) info).getFieldTypes();
		for (TypeInformation<?> type : types) {
			validateNestedField(fieldName, type);
		}
		return CsvSchema.ColumnType.ARRAY;
	} else if (info instanceof PrimitiveArrayTypeInfo &&
			((PrimitiveArrayTypeInfo) info).getComponentType() == Types.BYTE) {
		return CsvSchema.ColumnType.STRING;
	} else {
		throw new IllegalArgumentException(
			"Unsupported type information '" + info.toString() + "' for field '" + fieldName + "'.");
	}
}
 
Example #24
Source File: JsonRowDeserializationSchema.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a JSON deserialization schema for the given type information.
 *
 * @param typeInfo Type information describing the result type. The field names of {@link Row}
 *                 are used to parse the JSON properties.
 */
public JsonRowDeserializationSchema(TypeInformation<Row> typeInfo) {
	Preconditions.checkNotNull(typeInfo, "Type information");
	this.typeInfo = typeInfo;

	if (!(typeInfo instanceof RowTypeInfo)) {
		throw new IllegalArgumentException("Row type information expected.");
	}
}
 
Example #25
Source File: RowCsvInputFormatSplitTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private void test(
		String content,
		long offset, long length,
		char escapeChar,
		List<Row> expected,
		TypeInformation[] fieldTypes) throws Exception {
	FileInputSplit split = createTempFile(content, offset, length);

	RowCsvInputFormat.Builder builder = RowCsvInputFormat.builder(new RowTypeInfo(fieldTypes), PATH)
			.setEscapeCharacter(escapeChar);

	RowCsvInputFormat format = builder.build();
	format.configure(new Configuration());
	format.open(split);

	List<Row> rows = new ArrayList<>();
	while (!format.reachedEnd()) {
		Row result = new Row(3);
		result = format.nextRecord(result);
		if (result == null) {
			break;
		} else {
			rows.add(result);
		}
	}

	assertEquals(expected, rows);
}
 
Example #26
Source File: MaterializedCollectStreamResult.java    From flink with Apache License 2.0 5 votes vote down vote up
@VisibleForTesting
public MaterializedCollectStreamResult(
		RowTypeInfo outputType,
		ExecutionConfig config,
		InetAddress gatewayAddress,
		int gatewayPort,
		int maxRowCount,
		int overcommitThreshold) {
	super(outputType, config, gatewayAddress, gatewayPort);

	if (maxRowCount <= 0) {
		this.maxRowCount = Integer.MAX_VALUE;
	} else {
		this.maxRowCount = maxRowCount;
	}

	this.overcommitThreshold = overcommitThreshold;

	// prepare for materialization
	final int initialCapacity = computeMaterializedTableCapacity(maxRowCount); // avoid frequent resizing
	materializedTable = new ArrayList<>(initialCapacity);
	rowPositionCache = new HashMap<>(initialCapacity);
	snapshot = new ArrayList<>();
	validRowPosition = 0;
	isLastSnapshot = false;
	pageCount = 0;
}
 
Example #27
Source File: FlexQETLRowDeserializationSchema.java    From PoseidonX with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a JSON deserialization schema for the given fields and types.
 *
 * @param typeInfo   Type information describing the result type. The field names are used
 *                   to parse the JSON file and so are the types.
 * @param columnNames
 */
public FlexQETLRowDeserializationSchema(TypeInformation<Row> typeInfo, String[] columnNames) {
    Preconditions.checkNotNull(typeInfo, "Type information");
    this.typeInfo = typeInfo;

    this.fieldNames = ((RowTypeInfo) typeInfo).getFieldNames();
    this.fieldTypes = ((RowTypeInfo) typeInfo).getFieldTypes();

    this.columnNames = columnNames;
}
 
Example #28
Source File: AsyncLookupJoinRunner.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void open(Configuration parameters) throws Exception {
	super.open(parameters);
	this.fetcher = generatedFetcher.newInstance(getRuntimeContext().getUserCodeClassLoader());
	FunctionUtils.setFunctionRuntimeContext(fetcher, getRuntimeContext());
	FunctionUtils.openFunction(fetcher, parameters);

	// try to compile the generated ResultFuture, fail fast if the code is corrupt.
	generatedResultFuture.compile(getRuntimeContext().getUserCodeClassLoader());

	// row converter is stateless which is thread-safe
	DataFormatConverters.RowConverter rowConverter;
	if (fetcherReturnType instanceof RowTypeInfo) {
		rowConverter = (DataFormatConverters.RowConverter) DataFormatConverters.getConverterForDataType(
				fromLegacyInfoToDataType(fetcherReturnType));
	} else if (fetcherReturnType instanceof RowDataTypeInfo) {
		rowConverter = null;
	} else {
		throw new IllegalStateException("This should never happen, " +
			"currently fetcherReturnType can only be RowDataTypeInfo or RowTypeInfo");
	}

	// asyncBufferCapacity + 1 as the queue size in order to avoid
	// blocking on the queue when taking a collector.
	this.resultFutureBuffer = new ArrayBlockingQueue<>(asyncBufferCapacity + 1);
	this.allResultFutures = new ArrayList<>();
	for (int i = 0; i < asyncBufferCapacity + 1; i++) {
		JoinedRowResultFuture rf = new JoinedRowResultFuture(
			resultFutureBuffer,
			createFetcherResultFuture(parameters),
			rowConverter,
			isLeftOuterJoin,
			rightRowTypeInfo.getArity());
		// add will throw exception immediately if the queue is full which should never happen
		resultFutureBuffer.add(rf);
		allResultFutures.add(rf);
	}
}
 
Example #29
Source File: OrcRowInputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testProducedType() throws IOException {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_NESTED), TEST_SCHEMA_NESTED, new Configuration());

	assertTrue(rowOrcInputFormat.getProducedType() instanceof RowTypeInfo);
	RowTypeInfo producedType = (RowTypeInfo) rowOrcInputFormat.getProducedType();

	assertArrayEquals(
		new TypeInformation[]{
			// primitives
			Types.BOOLEAN, Types.BYTE, Types.SHORT, Types.INT, Types.LONG, Types.FLOAT, Types.DOUBLE,
			// binary
			PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO,
			// string
			Types.STRING,
			// struct
			Types.ROW_NAMED(
				new String[]{"list"},
				ObjectArrayTypeInfo.getInfoFor(
					Types.ROW_NAMED(new String[]{"int1", "string1"}, Types.INT, Types.STRING))),
			// list
			ObjectArrayTypeInfo.getInfoFor(
				Types.ROW_NAMED(new String[]{"int1", "string1"}, Types.INT, Types.STRING)),
			// map
			new MapTypeInfo<>(Types.STRING, Types.ROW_NAMED(new String[]{"int1", "string1"}, Types.INT, Types.STRING))
		},
		producedType.getFieldTypes());
	assertArrayEquals(
		new String[]{"boolean1", "byte1", "short1", "int1", "long1", "float1", "double1", "bytes1", "string1", "middle", "list", "map"},
		producedType.getFieldNames());
}
 
Example #30
Source File: LegacyRowSerializerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testRowSerializerWithComplexTypes() {
	RowTypeInfo typeInfo = new RowTypeInfo(
		BasicTypeInfo.INT_TYPE_INFO,
		BasicTypeInfo.DOUBLE_TYPE_INFO,
		BasicTypeInfo.STRING_TYPE_INFO,
		new TupleTypeInfo<Tuple3<Integer, Boolean, Short>>(
			BasicTypeInfo.INT_TYPE_INFO,
			BasicTypeInfo.BOOLEAN_TYPE_INFO,
			BasicTypeInfo.SHORT_TYPE_INFO),
		TypeExtractor.createTypeInfo(MyPojo.class));

	MyPojo testPojo1 = new MyPojo();
	testPojo1.name = null;
	MyPojo testPojo2 = new MyPojo();
	testPojo2.name = "Test1";
	MyPojo testPojo3 = new MyPojo();
	testPojo3.name = "Test2";

	Row[] data = new Row[]{
		createRow(null, null, null, null, null),
		createRow(0, null, null, null, null),
		createRow(0, 0.0, null, null, null),
		createRow(0, 0.0, "a", null, null),
		createRow(1, 0.0, "a", null, null),
		createRow(1, 1.0, "a", null, null),
		createRow(1, 1.0, "b", null, null),
		createRow(1, 1.0, "b", new Tuple3<>(1, false, (short) 2), null),
		createRow(1, 1.0, "b", new Tuple3<>(2, false, (short) 2), null),
		createRow(1, 1.0, "b", new Tuple3<>(2, true, (short) 2), null),
		createRow(1, 1.0, "b", new Tuple3<>(2, true, (short) 3), null),
		createRow(1, 1.0, "b", new Tuple3<>(2, true, (short) 3), testPojo1),
		createRow(1, 1.0, "b", new Tuple3<>(2, true, (short) 3), testPojo2),
		createRow(1, 1.0, "b", new Tuple3<>(2, true, (short) 3), testPojo3)
	};

	TypeSerializer<Row> serializer = typeInfo.createLegacySerializer(new ExecutionConfig());
	RowSerializerTestInstance testInstance = new RowSerializerTestInstance(serializer, data);
	testInstance.testAll();
}