org.apache.flink.api.common.io.InputFormat Java Examples

The following examples show how to use org.apache.flink.api.common.io.InputFormat. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AvroInputFormatTypeExtractionTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testTypeExtraction() {
	try {
		InputFormat<MyAvroType, ?> format = new AvroInputFormat<MyAvroType>(new Path("file:///ignore/this/file"), MyAvroType.class);

		TypeInformation<?> typeInfoDirect = TypeExtractor.getInputFormatTypes(format);

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		DataSet<MyAvroType> input = env.createInput(format);
		TypeInformation<?> typeInfoDataSet = input.getType();

		Assert.assertTrue(typeInfoDirect instanceof PojoTypeInfo);
		Assert.assertTrue(typeInfoDataSet instanceof PojoTypeInfo);

		Assert.assertEquals(MyAvroType.class, typeInfoDirect.getTypeClass());
		Assert.assertEquals(MyAvroType.class, typeInfoDataSet.getTypeClass());
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example #2
Source File: InputOutputFormatContainerTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testOnlyInputFormat() {
	InputOutputFormatContainer formatContainer = new InputOutputFormatContainer(Thread.currentThread().getContextClassLoader());

	OperatorID operatorID = new OperatorID();
	formatContainer.addInputFormat(operatorID, new TestInputFormat("test input format"));
	formatContainer.addParameters(operatorID, "parameter1", "abc123");

	TaskConfig taskConfig = new TaskConfig(new Configuration());
	formatContainer.write(taskConfig);

	InputOutputFormatContainer loadedFormatContainer = new InputOutputFormatContainer(taskConfig, getClass().getClassLoader());

	Map<OperatorID, UserCodeWrapper<? extends InputFormat<?, ?>>> inputFormats = loadedFormatContainer.getInputFormats();
	assertEquals(1, inputFormats.size());
	assertEquals(0, loadedFormatContainer.getOutputFormats().size());

	TestInputFormat inputFormat = (TestInputFormat) inputFormats.get(operatorID).getUserCodeObject();
	assertEquals("test input format", inputFormat.getName());

	Configuration parameters = loadedFormatContainer.getParameters(operatorID);
	assertEquals(1, parameters.keySet().size());
	assertEquals("abc123", parameters.getString("parameter1", null));
}
 
Example #3
Source File: TypeExtractorInputFormatsTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testMultiLevelDerivedInputFormatType() {
	try {

		// composite type
		{
			InputFormat<?, ?> format = new FinalRelativeInputFormat();
			TypeInformation<?> typeInfo = TypeExtractor.getInputFormatTypes(format);
			
			assertTrue(typeInfo.isTupleType());
			assertTrue(typeInfo instanceof TupleTypeInfo);
			
			@SuppressWarnings("unchecked")
			TupleTypeInfo<Tuple3<String, Integer, Double>> tupleInfo = (TupleTypeInfo<Tuple3<String, Integer, Double>>) typeInfo;
			
			assertEquals(3, tupleInfo.getArity());
			assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tupleInfo.getTypeAt(0));
			assertEquals(BasicTypeInfo.INT_TYPE_INFO, tupleInfo.getTypeAt(1));
			assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tupleInfo.getTypeAt(2));
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #4
Source File: AvroInputFormatTypeExtractionTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testTypeExtraction() {
	try {
		InputFormat<MyAvroType, ?> format = new AvroInputFormat<MyAvroType>(new Path("file:///ignore/this/file"), MyAvroType.class);

		TypeInformation<?> typeInfoDirect = TypeExtractor.getInputFormatTypes(format);

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		DataSet<MyAvroType> input = env.createInput(format);
		TypeInformation<?> typeInfoDataSet = input.getType();

		Assert.assertTrue(typeInfoDirect instanceof PojoTypeInfo);
		Assert.assertTrue(typeInfoDataSet instanceof PojoTypeInfo);

		Assert.assertEquals(MyAvroType.class, typeInfoDirect.getTypeClass());
		Assert.assertEquals(MyAvroType.class, typeInfoDataSet.getTypeClass());
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example #5
Source File: JobGraphGenerator.java    From flink with Apache License 2.0 6 votes vote down vote up
private JobVertex createDataSourceVertex(SourcePlanNode node) throws CompilerException {
	final InputOutputFormatVertex vertex = new InputOutputFormatVertex(node.getNodeName());
	final TaskConfig config = new TaskConfig(vertex.getConfiguration());

	final OperatorID operatorID = new OperatorID();

	vertex.setResources(node.getMinResources(), node.getPreferredResources());
	vertex.setInvokableClass(DataSourceTask.class);
	vertex.setFormatDescription(operatorID, getDescriptionForUserCode(node.getProgramOperator().getUserCodeWrapper()));

	// set user code
	new InputOutputFormatContainer(Thread.currentThread().getContextClassLoader())
		.addInputFormat(operatorID, (UserCodeWrapper<? extends InputFormat<?, ?>>) node.getProgramOperator().getUserCodeWrapper())
		.addParameters(operatorID, node.getProgramOperator().getParameters())
		.write(config);

	config.setOutputSerializer(node.getSerializer());
	return vertex;
}
 
Example #6
Source File: DataSource.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a new data source.
 *
 * @param context The environment in which the data source gets executed.
 * @param inputFormat The input format that the data source executes.
 * @param type The type of the elements produced by this input format.
 */
public DataSource(ExecutionEnvironment context, InputFormat<OUT, ?> inputFormat, TypeInformation<OUT> type, String dataSourceLocationName) {
	super(context, type);

	this.dataSourceLocationName = dataSourceLocationName;

	if (inputFormat == null) {
		throw new IllegalArgumentException("The input format may not be null.");
	}

	this.inputFormat = inputFormat;

	if (inputFormat instanceof NonParallelInput) {
		this.parallelism = 1;
	}
}
 
Example #7
Source File: JobGraphGenerator.java    From flink with Apache License 2.0 6 votes vote down vote up
private JobVertex createDataSourceVertex(SourcePlanNode node) throws CompilerException {
	final InputOutputFormatVertex vertex = new InputOutputFormatVertex(node.getNodeName());
	final TaskConfig config = new TaskConfig(vertex.getConfiguration());

	final OperatorID operatorID = new OperatorID();

	vertex.setResources(node.getMinResources(), node.getPreferredResources());
	vertex.setInvokableClass(DataSourceTask.class);
	vertex.setFormatDescription(operatorID, getDescriptionForUserCode(node.getProgramOperator().getUserCodeWrapper()));

	// set user code
	new InputOutputFormatContainer(Thread.currentThread().getContextClassLoader())
		.addInputFormat(operatorID, (UserCodeWrapper<? extends InputFormat<?, ?>>) node.getProgramOperator().getUserCodeWrapper())
		.addParameters(operatorID, node.getProgramOperator().getParameters())
		.write(config);

	config.setOutputSerializer(node.getSerializer());
	return vertex;
}
 
Example #8
Source File: AvroInputFormatTypeExtractionTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testTypeExtraction() {
	try {
		InputFormat<MyAvroType, ?> format = new AvroInputFormat<MyAvroType>(new Path("file:///ignore/this/file"), MyAvroType.class);

		TypeInformation<?> typeInfoDirect = TypeExtractor.getInputFormatTypes(format);

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		DataSet<MyAvroType> input = env.createInput(format);
		TypeInformation<?> typeInfoDataSet = input.getType();

		Assert.assertTrue(typeInfoDirect instanceof PojoTypeInfo);
		Assert.assertTrue(typeInfoDataSet instanceof PojoTypeInfo);

		Assert.assertEquals(MyAvroType.class, typeInfoDirect.getTypeClass());
		Assert.assertEquals(MyAvroType.class, typeInfoDataSet.getTypeClass());
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example #9
Source File: TypeExtractorInputFormatsTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testMultiLevelDerivedInputFormatType() {
	try {

		// composite type
		{
			InputFormat<?, ?> format = new FinalRelativeInputFormat();
			TypeInformation<?> typeInfo = TypeExtractor.getInputFormatTypes(format);
			
			assertTrue(typeInfo.isTupleType());
			assertTrue(typeInfo instanceof TupleTypeInfo);
			
			@SuppressWarnings("unchecked")
			TupleTypeInfo<Tuple3<String, Integer, Double>> tupleInfo = (TupleTypeInfo<Tuple3<String, Integer, Double>>) typeInfo;
			
			assertEquals(3, tupleInfo.getArity());
			assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tupleInfo.getTypeAt(0));
			assertEquals(BasicTypeInfo.INT_TYPE_INFO, tupleInfo.getTypeAt(1));
			assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tupleInfo.getTypeAt(2));
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #10
Source File: DataSource.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a new data source.
 *
 * @param context The environment in which the data source gets executed.
 * @param inputFormat The input format that the data source executes.
 * @param type The type of the elements produced by this input format.
 */
public DataSource(ExecutionEnvironment context, InputFormat<OUT, ?> inputFormat, TypeInformation<OUT> type, String dataSourceLocationName) {
	super(context, type);

	this.dataSourceLocationName = dataSourceLocationName;

	if (inputFormat == null) {
		throw new IllegalArgumentException("The input format may not be null.");
	}

	this.inputFormat = inputFormat;

	if (inputFormat instanceof NonParallelInput) {
		this.parallelism = 1;
	}
}
 
Example #11
Source File: StreamExecutionEnvironment.java    From flink with Apache License 2.0 5 votes vote down vote up
private <OUT> DataStreamSource<OUT> createInput(InputFormat<OUT, ?> inputFormat,
												TypeInformation<OUT> typeInfo,
												String sourceName) {

	InputFormatSourceFunction<OUT> function = new InputFormatSourceFunction<>(inputFormat, typeInfo);
	return addSource(function, sourceName, typeInfo);
}
 
Example #12
Source File: GenericDataSourceBase.java    From flink with Apache License 2.0 5 votes vote down vote up
protected List<OUT> executeOnCollections(RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception {
	@SuppressWarnings("unchecked")
	InputFormat<OUT, InputSplit> inputFormat = (InputFormat<OUT, InputSplit>) this.formatWrapper.getUserCodeObject();
	//configure the input format
	inputFormat.configure(this.parameters);

	//open the input format
	if (inputFormat instanceof RichInputFormat) {
		((RichInputFormat) inputFormat).setRuntimeContext(ctx);
		((RichInputFormat) inputFormat).openInputFormat();
	}

	List<OUT> result = new ArrayList<OUT>();
	
	// splits
	InputSplit[] splits = inputFormat.createInputSplits(1);
	TypeSerializer<OUT> serializer = getOperatorInfo().getOutputType().createSerializer(executionConfig);
	
	for (InputSplit split : splits) {
		inputFormat.open(split);
		
		while (!inputFormat.reachedEnd()) {
			OUT next = inputFormat.nextRecord(serializer.createInstance());
			if (next != null) {
				result.add(serializer.copy(next));
			}
		}
		
		inputFormat.close();
	}
	
	//close the input format
	if (inputFormat instanceof RichInputFormat) {
		((RichInputFormat) inputFormat).closeInputFormat();
	}

	return result;
}
 
Example #13
Source File: InputFormatProvider.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Helper method for creating a static provider.
 */
static InputFormatProvider of(InputFormat<RowData, ?> inputFormat) {
	return new InputFormatProvider() {
		@Override
		public InputFormat<RowData, ?> createInputFormat() {
			return inputFormat;
		}

		@Override
		public boolean isBounded() {
			return true;
		}
	};
}
 
Example #14
Source File: TypeExtractor.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@PublicEvolving
public static <IN> TypeInformation<IN> getInputFormatTypes(InputFormat<IN, ?> inputFormatInterface) {
	if (inputFormatInterface instanceof ResultTypeQueryable) {
		return ((ResultTypeQueryable<IN>) inputFormatInterface).getProducedType();
	}
	return new TypeExtractor().privateCreateTypeInfo(InputFormat.class, inputFormatInterface.getClass(), 0, null, null);
}
 
Example #15
Source File: TypeExtractor.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@PublicEvolving
public static <IN> TypeInformation<IN> getInputFormatTypes(InputFormat<IN, ?> inputFormatInterface) {
	if (inputFormatInterface instanceof ResultTypeQueryable) {
		return ((ResultTypeQueryable<IN>) inputFormatInterface).getProducedType();
	}
	return new TypeExtractor().privateCreateTypeInfo(InputFormat.class, inputFormatInterface.getClass(), 0, null, null);
}
 
Example #16
Source File: GenericDataSourceBase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
protected List<OUT> executeOnCollections(RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception {
	@SuppressWarnings("unchecked")
	InputFormat<OUT, InputSplit> inputFormat = (InputFormat<OUT, InputSplit>) this.formatWrapper.getUserCodeObject();
	//configure the input format
	inputFormat.configure(this.parameters);

	//open the input format
	if (inputFormat instanceof RichInputFormat) {
		((RichInputFormat) inputFormat).setRuntimeContext(ctx);
		((RichInputFormat) inputFormat).openInputFormat();
	}

	List<OUT> result = new ArrayList<OUT>();
	
	// splits
	InputSplit[] splits = inputFormat.createInputSplits(1);
	TypeSerializer<OUT> serializer = getOperatorInfo().getOutputType().createSerializer(executionConfig);
	
	for (InputSplit split : splits) {
		inputFormat.open(split);
		
		while (!inputFormat.reachedEnd()) {
			OUT next = inputFormat.nextRecord(serializer.createInstance());
			if (next != null) {
				result.add(serializer.copy(next));
			}
		}
		
		inputFormat.close();
	}
	
	//close the input format
	if (inputFormat instanceof RichInputFormat) {
		((RichInputFormat) inputFormat).closeInputFormat();
	}

	return result;
}
 
Example #17
Source File: TypeExtractorInputFormatsTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testQueryableFormatType() {
	try {
		InputFormat<?, ?> format = new QueryableInputFormat();
		TypeInformation<?> typeInfo = TypeExtractor.getInputFormatTypes(format);
		assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, typeInfo);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #18
Source File: ParquetFileSystemFormatFactory.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public InputFormat<RowData, ?> createReader(ReaderContext context) {
	return new ParquetInputFormat(
			context.getPaths(),
			context.getSchema().getFieldNames(),
			context.getSchema().getFieldDataTypes(),
			context.getProjectFields(),
			context.getDefaultPartName(),
			context.getPushedDownLimit(),
			getParquetConfiguration(context.getFormatOptions()),
			context.getFormatOptions().get(UTC_TIMEZONE));
}
 
Example #19
Source File: CsvFileSystemFormatFactory.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public InputFormat<RowData, ?> createReader(ReaderContext context) {
	ReadableConfig options = context.getFormatOptions();
	validateFormatOptions(options);

	RowType formatRowType = context.getFormatRowType();

	String[] fieldNames = context.getSchema().getFieldNames();
	List<String> projectFields = Arrays.stream(context.getProjectFields())
		.mapToObj(idx -> fieldNames[idx])
		.collect(Collectors.toList());
	List<String> csvFields = Arrays.stream(fieldNames)
		.filter(field -> !context.getPartitionKeys().contains(field))
		.collect(Collectors.toList());

	int[] csvSelectFieldToProjectFieldMapping = context.getFormatProjectFields().stream()
		.mapToInt(projectFields::indexOf)
		.toArray();
	int[] csvSelectFieldToCsvFieldMapping = context.getFormatProjectFields().stream()
		.mapToInt(csvFields::indexOf)
		.toArray();

	CsvSchema csvSchema = buildCsvSchema(formatRowType, options);

	boolean ignoreParseErrors = options.get(IGNORE_PARSE_ERRORS);

	return new CsvInputFormat(
		context.getPaths(),
		context.getSchema().getFieldDataTypes(),
		context.getSchema().getFieldNames(),
		csvSchema,
		formatRowType,
		context.getProjectFields(),
		context.getPartitionKeys(),
		context.getDefaultPartName(),
		context.getPushedDownLimit(),
		csvSelectFieldToProjectFieldMapping,
		csvSelectFieldToCsvFieldMapping,
		ignoreParseErrors);
}
 
Example #20
Source File: TestCsvFileSystemFormatFactory.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public InputFormat<RowData, ?> createReader(ReaderContext context) {
	return new TestRowDataCsvInputFormat(
			context.getPaths(),
			context.getSchema(),
			context.getPartitionKeys(),
			context.getDefaultPartName(),
			context.getProjectFields(),
			context.getPushedDownLimit());
}
 
Example #21
Source File: HiveTableSource.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public InputFormat getInputFormat() {
	if (!initAllPartitions) {
		initAllPartitions();
	}
	return new HiveTableInputFormat(jobConf, catalogTable, allHivePartitions);
}
 
Example #22
Source File: InputOutputFormatContainer.java    From flink with Apache License 2.0 5 votes vote down vote up
public void addInputFormat(OperatorID operatorId, UserCodeWrapper<? extends InputFormat<?, ?>> wrapper) {
	if (inputFormats.containsKey(checkNotNull(operatorId))) {
		throw new IllegalStateException("The input format has been set for the operator: " + operatorId);
	}

	inputFormats.put(operatorId, checkNotNull(wrapper));
}
 
Example #23
Source File: ParquetTableSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testFieldsProjection() throws Exception {
	ParquetTableSource parquetTableSource = createNestedTestParquetTableSource(testPath);
	ParquetTableSource projected = (ParquetTableSource) parquetTableSource.projectFields(new int[] {2, 4, 6});

	// ensure a new reference is returned
	assertNotSame(projected, parquetTableSource);

	// ensure table schema is the same
	assertEquals(parquetTableSource.getTableSchema(), projected.getTableSchema());

	// ensure that table source description differs
	assertNotEquals(parquetTableSource.explainSource(), projected.explainSource());

	String[] fieldNames = ((RowTypeInfo) NESTED_ROW_TYPE).getFieldNames();
	TypeInformation[] fieldTypes =  ((RowTypeInfo) NESTED_ROW_TYPE).getFieldTypes();
	assertEquals(
		Types.ROW_NAMED(
			new String[] {fieldNames[2], fieldNames[4], fieldNames[6]},
			fieldTypes[2], fieldTypes[4], fieldTypes[6]
		),
		projected.getReturnType()
	);

	// ensure ParquetInputFormat is configured with selected fields
	DataSet<Row> data = projected.getDataSet(ExecutionEnvironment.createLocalEnvironment());
	InputFormat<Row, ?> inputFormat = ((DataSource<Row>) data).getInputFormat();
	assertTrue(inputFormat instanceof ParquetRowInputFormat);
	ParquetRowInputFormat parquetIF = (ParquetRowInputFormat) inputFormat;
	assertArrayEquals(new String[]{fieldNames[2], fieldNames[4], fieldNames[6]}, parquetIF.getFieldNames());
	assertArrayEquals(new TypeInformation<?>[]{fieldTypes[2], fieldTypes[4], fieldTypes[6]}, parquetIF.getFieldTypes());
}
 
Example #24
Source File: InputOutputFormatContainer.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public <OT, T extends InputSplit> Pair<OperatorID, InputFormat<OT, T>> getUniqueInputFormat() {
	Map<OperatorID, UserCodeWrapper<? extends InputFormat<?, ?>>> inputFormats = formats.getInputFormats();
	Preconditions.checkState(inputFormats.size() == 1);

	Map.Entry<OperatorID, UserCodeWrapper<? extends InputFormat<?, ?>>> entry = inputFormats.entrySet().iterator().next();

	return new ImmutablePair<>(entry.getKey(),
		(InputFormat<OT, T>) entry.getValue().getUserCodeObject(InputFormat.class, userCodeClassLoader));
}
 
Example #25
Source File: ParquetTableSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testFieldsProjection() throws Exception {
	ParquetTableSource parquetTableSource = createNestedTestParquetTableSource(testPath);
	ParquetTableSource projected = (ParquetTableSource) parquetTableSource.projectFields(new int[] {2, 4, 6});

	// ensure a new reference is returned
	assertNotSame(projected, parquetTableSource);

	// ensure table schema is the same
	assertEquals(parquetTableSource.getTableSchema(), projected.getTableSchema());

	// ensure that table source description differs
	assertNotEquals(parquetTableSource.explainSource(), projected.explainSource());

	String[] fieldNames = ((RowTypeInfo) NESTED_ROW_TYPE).getFieldNames();
	TypeInformation[] fieldTypes =  ((RowTypeInfo) NESTED_ROW_TYPE).getFieldTypes();
	assertEquals(
		Types.ROW_NAMED(
			new String[] {fieldNames[2], fieldNames[4], fieldNames[6]},
			fieldTypes[2], fieldTypes[4], fieldTypes[6]
		),
		projected.getReturnType()
	);

	// ensure ParquetInputFormat is configured with selected fields
	DataSet<Row> data = projected.getDataSet(ExecutionEnvironment.createLocalEnvironment());
	InputFormat<Row, ?> inputFormat = ((DataSource<Row>) data).getInputFormat();
	assertTrue(inputFormat instanceof ParquetRowInputFormat);
	ParquetRowInputFormat parquetIF = (ParquetRowInputFormat) inputFormat;
	assertArrayEquals(new String[]{fieldNames[2], fieldNames[4], fieldNames[6]}, parquetIF.getFieldNames());
	assertArrayEquals(new TypeInformation<?>[]{fieldTypes[2], fieldTypes[4], fieldTypes[6]}, parquetIF.getFieldTypes());
}
 
Example #26
Source File: GenericDataSourceBase.java    From flink with Apache License 2.0 5 votes vote down vote up
protected List<OUT> executeOnCollections(RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception {
	@SuppressWarnings("unchecked")
	InputFormat<OUT, InputSplit> inputFormat = (InputFormat<OUT, InputSplit>) this.formatWrapper.getUserCodeObject();
	//configure the input format
	inputFormat.configure(this.parameters);

	//open the input format
	if (inputFormat instanceof RichInputFormat) {
		((RichInputFormat) inputFormat).setRuntimeContext(ctx);
		((RichInputFormat) inputFormat).openInputFormat();
	}

	List<OUT> result = new ArrayList<OUT>();
	
	// splits
	InputSplit[] splits = inputFormat.createInputSplits(1);
	TypeSerializer<OUT> serializer = getOperatorInfo().getOutputType().createSerializer(executionConfig);
	
	for (InputSplit split : splits) {
		inputFormat.open(split);
		
		while (!inputFormat.reachedEnd()) {
			OUT next = inputFormat.nextRecord(serializer.createInstance());
			if (next != null) {
				result.add(serializer.copy(next));
			}
		}
		
		inputFormat.close();
	}
	
	//close the input format
	if (inputFormat instanceof RichInputFormat) {
		((RichInputFormat) inputFormat).closeInputFormat();
	}

	return result;
}
 
Example #27
Source File: TypeExtractor.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@PublicEvolving
public static <IN> TypeInformation<IN> getInputFormatTypes(InputFormat<IN, ?> inputFormatInterface) {
	if (inputFormatInterface instanceof ResultTypeQueryable) {
		return ((ResultTypeQueryable<IN>) inputFormatInterface).getProducedType();
	}
	return new TypeExtractor().privateCreateTypeInfo(InputFormat.class, inputFormatInterface.getClass(), 0, null, null);
}
 
Example #28
Source File: OrcTableSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testProjectFields() throws Exception {

	OrcTableSource orc = OrcTableSource.builder()
		.path(getPath(TEST_FILE_NESTED))
		.forOrcSchema(TEST_SCHEMA_NESTED)
		.build();

	OrcTableSource projected = (OrcTableSource) orc.projectFields(new int[]{3, 5, 1, 0});

	// ensure copy is returned
	assertTrue(orc != projected);

	// ensure table schema is identical
	assertEquals(orc.getTableSchema(), projected.getTableSchema());

	// ensure return type was adapted
	String[] fieldNames = getNestedFieldNames();
	TypeInformation[] fieldTypes = getNestedFieldTypes();
	assertEquals(
		Types.ROW_NAMED(
			new String[] {fieldNames[3], fieldNames[5], fieldNames[1], fieldNames[0]},
			new TypeInformation[] {fieldTypes[3], fieldTypes[5], fieldTypes[1], fieldTypes[0]}),
		projected.getReturnType());

	// ensure IF is configured with selected fields
	OrcTableSource spyTS = spy(projected);
	OrcRowInputFormat mockIF = mock(OrcRowInputFormat.class);
	doReturn(mockIF).when(spyTS).buildOrcInputFormat();
	ExecutionEnvironment env = mock(ExecutionEnvironment.class);
	when(env.createInput(any(InputFormat.class))).thenReturn(mock(DataSource.class));
	spyTS.getDataSet(env);
	verify(mockIF).selectFields(eq(3), eq(5), eq(1), eq(0));
}
 
Example #29
Source File: InputOutputFormatContainer.java    From flink with Apache License 2.0 4 votes vote down vote up
public Map<OperatorID, UserCodeWrapper<? extends InputFormat<?, ?>>> getInputFormats() {
	return formats.getInputFormats();
}
 
Example #30
Source File: InputFormatSourceFunction.java    From flink with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
public InputFormatSourceFunction(InputFormat<OUT, ?> format, TypeInformation<OUT> typeInfo) {
	this.format = (InputFormat<OUT, InputSplit>) format;
	this.typeInfo = typeInfo;
}