org.apache.flink.api.common.io.InputFormat Java Examples

The following examples show how to use org.apache.flink.api.common.io.InputFormat. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: Flink-CEPplus   Author: ljygz   File: AvroInputFormatTypeExtractionTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testTypeExtraction() {
	try {
		InputFormat<MyAvroType, ?> format = new AvroInputFormat<MyAvroType>(new Path("file:///ignore/this/file"), MyAvroType.class);

		TypeInformation<?> typeInfoDirect = TypeExtractor.getInputFormatTypes(format);

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		DataSet<MyAvroType> input = env.createInput(format);
		TypeInformation<?> typeInfoDataSet = input.getType();

		Assert.assertTrue(typeInfoDirect instanceof PojoTypeInfo);
		Assert.assertTrue(typeInfoDataSet instanceof PojoTypeInfo);

		Assert.assertEquals(MyAvroType.class, typeInfoDirect.getTypeClass());
		Assert.assertEquals(MyAvroType.class, typeInfoDataSet.getTypeClass());
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example #2
Source Project: Flink-CEPplus   Author: ljygz   File: DataSource.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Creates a new data source.
 *
 * @param context The environment in which the data source gets executed.
 * @param inputFormat The input format that the data source executes.
 * @param type The type of the elements produced by this input format.
 */
public DataSource(ExecutionEnvironment context, InputFormat<OUT, ?> inputFormat, TypeInformation<OUT> type, String dataSourceLocationName) {
	super(context, type);

	this.dataSourceLocationName = dataSourceLocationName;

	if (inputFormat == null) {
		throw new IllegalArgumentException("The input format may not be null.");
	}

	this.inputFormat = inputFormat;

	if (inputFormat instanceof NonParallelInput) {
		this.parallelism = 1;
	}
}
 
Example #3
Source Project: flink   Author: apache   File: TypeExtractorInputFormatsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testMultiLevelDerivedInputFormatType() {
	try {

		// composite type
		{
			InputFormat<?, ?> format = new FinalRelativeInputFormat();
			TypeInformation<?> typeInfo = TypeExtractor.getInputFormatTypes(format);
			
			assertTrue(typeInfo.isTupleType());
			assertTrue(typeInfo instanceof TupleTypeInfo);
			
			@SuppressWarnings("unchecked")
			TupleTypeInfo<Tuple3<String, Integer, Double>> tupleInfo = (TupleTypeInfo<Tuple3<String, Integer, Double>>) typeInfo;
			
			assertEquals(3, tupleInfo.getArity());
			assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tupleInfo.getTypeAt(0));
			assertEquals(BasicTypeInfo.INT_TYPE_INFO, tupleInfo.getTypeAt(1));
			assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tupleInfo.getTypeAt(2));
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #4
Source Project: flink   Author: apache   File: JobGraphGenerator.java    License: Apache License 2.0 6 votes vote down vote up
private JobVertex createDataSourceVertex(SourcePlanNode node) throws CompilerException {
	final InputOutputFormatVertex vertex = new InputOutputFormatVertex(node.getNodeName());
	final TaskConfig config = new TaskConfig(vertex.getConfiguration());

	final OperatorID operatorID = new OperatorID();

	vertex.setResources(node.getMinResources(), node.getPreferredResources());
	vertex.setInvokableClass(DataSourceTask.class);
	vertex.setFormatDescription(operatorID, getDescriptionForUserCode(node.getProgramOperator().getUserCodeWrapper()));

	// set user code
	new InputOutputFormatContainer(Thread.currentThread().getContextClassLoader())
		.addInputFormat(operatorID, (UserCodeWrapper<? extends InputFormat<?, ?>>) node.getProgramOperator().getUserCodeWrapper())
		.addParameters(operatorID, node.getProgramOperator().getParameters())
		.write(config);

	config.setOutputSerializer(node.getSerializer());
	return vertex;
}
 
Example #5
Source Project: flink   Author: flink-tpc-ds   File: AvroInputFormatTypeExtractionTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testTypeExtraction() {
	try {
		InputFormat<MyAvroType, ?> format = new AvroInputFormat<MyAvroType>(new Path("file:///ignore/this/file"), MyAvroType.class);

		TypeInformation<?> typeInfoDirect = TypeExtractor.getInputFormatTypes(format);

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		DataSet<MyAvroType> input = env.createInput(format);
		TypeInformation<?> typeInfoDataSet = input.getType();

		Assert.assertTrue(typeInfoDirect instanceof PojoTypeInfo);
		Assert.assertTrue(typeInfoDataSet instanceof PojoTypeInfo);

		Assert.assertEquals(MyAvroType.class, typeInfoDirect.getTypeClass());
		Assert.assertEquals(MyAvroType.class, typeInfoDataSet.getTypeClass());
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example #6
Source Project: flink   Author: flink-tpc-ds   File: DataSource.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Creates a new data source.
 *
 * @param context The environment in which the data source gets executed.
 * @param inputFormat The input format that the data source executes.
 * @param type The type of the elements produced by this input format.
 */
public DataSource(ExecutionEnvironment context, InputFormat<OUT, ?> inputFormat, TypeInformation<OUT> type, String dataSourceLocationName) {
	super(context, type);

	this.dataSourceLocationName = dataSourceLocationName;

	if (inputFormat == null) {
		throw new IllegalArgumentException("The input format may not be null.");
	}

	this.inputFormat = inputFormat;

	if (inputFormat instanceof NonParallelInput) {
		this.parallelism = 1;
	}
}
 
Example #7
Source Project: flink   Author: flink-tpc-ds   File: TypeExtractorInputFormatsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testMultiLevelDerivedInputFormatType() {
	try {

		// composite type
		{
			InputFormat<?, ?> format = new FinalRelativeInputFormat();
			TypeInformation<?> typeInfo = TypeExtractor.getInputFormatTypes(format);
			
			assertTrue(typeInfo.isTupleType());
			assertTrue(typeInfo instanceof TupleTypeInfo);
			
			@SuppressWarnings("unchecked")
			TupleTypeInfo<Tuple3<String, Integer, Double>> tupleInfo = (TupleTypeInfo<Tuple3<String, Integer, Double>>) typeInfo;
			
			assertEquals(3, tupleInfo.getArity());
			assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tupleInfo.getTypeAt(0));
			assertEquals(BasicTypeInfo.INT_TYPE_INFO, tupleInfo.getTypeAt(1));
			assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tupleInfo.getTypeAt(2));
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #8
Source Project: flink   Author: flink-tpc-ds   File: InputOutputFormatContainerTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testOnlyInputFormat() {
	InputOutputFormatContainer formatContainer = new InputOutputFormatContainer(Thread.currentThread().getContextClassLoader());

	OperatorID operatorID = new OperatorID();
	formatContainer.addInputFormat(operatorID, new TestInputFormat("test input format"));
	formatContainer.addParameters(operatorID, "parameter1", "abc123");

	TaskConfig taskConfig = new TaskConfig(new Configuration());
	formatContainer.write(taskConfig);

	InputOutputFormatContainer loadedFormatContainer = new InputOutputFormatContainer(taskConfig, getClass().getClassLoader());

	Map<OperatorID, UserCodeWrapper<? extends InputFormat<?, ?>>> inputFormats = loadedFormatContainer.getInputFormats();
	assertEquals(1, inputFormats.size());
	assertEquals(0, loadedFormatContainer.getOutputFormats().size());

	TestInputFormat inputFormat = (TestInputFormat) inputFormats.get(operatorID).getUserCodeObject();
	assertEquals("test input format", inputFormat.getName());

	Configuration parameters = loadedFormatContainer.getParameters(operatorID);
	assertEquals(1, parameters.keySet().size());
	assertEquals("abc123", parameters.getString("parameter1", null));
}
 
Example #9
Source Project: flink   Author: apache   File: AvroInputFormatTypeExtractionTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testTypeExtraction() {
	try {
		InputFormat<MyAvroType, ?> format = new AvroInputFormat<MyAvroType>(new Path("file:///ignore/this/file"), MyAvroType.class);

		TypeInformation<?> typeInfoDirect = TypeExtractor.getInputFormatTypes(format);

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		DataSet<MyAvroType> input = env.createInput(format);
		TypeInformation<?> typeInfoDataSet = input.getType();

		Assert.assertTrue(typeInfoDirect instanceof PojoTypeInfo);
		Assert.assertTrue(typeInfoDataSet instanceof PojoTypeInfo);

		Assert.assertEquals(MyAvroType.class, typeInfoDirect.getTypeClass());
		Assert.assertEquals(MyAvroType.class, typeInfoDataSet.getTypeClass());
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example #10
Source Project: flink   Author: flink-tpc-ds   File: JobGraphGenerator.java    License: Apache License 2.0 6 votes vote down vote up
private JobVertex createDataSourceVertex(SourcePlanNode node) throws CompilerException {
	final InputOutputFormatVertex vertex = new InputOutputFormatVertex(node.getNodeName());
	final TaskConfig config = new TaskConfig(vertex.getConfiguration());

	final OperatorID operatorID = new OperatorID();

	vertex.setResources(node.getMinResources(), node.getPreferredResources());
	vertex.setInvokableClass(DataSourceTask.class);
	vertex.setFormatDescription(operatorID, getDescriptionForUserCode(node.getProgramOperator().getUserCodeWrapper()));

	// set user code
	new InputOutputFormatContainer(Thread.currentThread().getContextClassLoader())
		.addInputFormat(operatorID, (UserCodeWrapper<? extends InputFormat<?, ?>>) node.getProgramOperator().getUserCodeWrapper())
		.addParameters(operatorID, node.getProgramOperator().getParameters())
		.write(config);

	config.setOutputSerializer(node.getSerializer());
	return vertex;
}
 
Example #11
Source Project: flink   Author: apache   File: TypeExtractor.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@PublicEvolving
public static <IN> TypeInformation<IN> getInputFormatTypes(InputFormat<IN, ?> inputFormatInterface) {
	if (inputFormatInterface instanceof ResultTypeQueryable) {
		return ((ResultTypeQueryable<IN>) inputFormatInterface).getProducedType();
	}
	return new TypeExtractor().privateCreateTypeInfo(InputFormat.class, inputFormatInterface.getClass(), 0, null, null);
}
 
Example #12
Source Project: flink   Author: apache   File: StreamExecutionEnvironment.java    License: Apache License 2.0 5 votes vote down vote up
private <OUT> DataStreamSource<OUT> createInput(InputFormat<OUT, ?> inputFormat,
												TypeInformation<OUT> typeInfo,
												String sourceName) {

	InputFormatSourceFunction<OUT> function = new InputFormatSourceFunction<>(inputFormat, typeInfo);
	return addSource(function, sourceName, typeInfo);
}
 
Example #13
Source Project: Flink-CEPplus   Author: ljygz   File: TypeExtractor.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@PublicEvolving
public static <IN> TypeInformation<IN> getInputFormatTypes(InputFormat<IN, ?> inputFormatInterface) {
	if (inputFormatInterface instanceof ResultTypeQueryable) {
		return ((ResultTypeQueryable<IN>) inputFormatInterface).getProducedType();
	}
	return new TypeExtractor().privateCreateTypeInfo(InputFormat.class, inputFormatInterface.getClass(), 0, null, null);
}
 
Example #14
Source Project: Flink-CEPplus   Author: ljygz   File: GenericDataSourceBase.java    License: Apache License 2.0 5 votes vote down vote up
protected List<OUT> executeOnCollections(RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception {
	@SuppressWarnings("unchecked")
	InputFormat<OUT, InputSplit> inputFormat = (InputFormat<OUT, InputSplit>) this.formatWrapper.getUserCodeObject();
	//configure the input format
	inputFormat.configure(this.parameters);

	//open the input format
	if (inputFormat instanceof RichInputFormat) {
		((RichInputFormat) inputFormat).setRuntimeContext(ctx);
		((RichInputFormat) inputFormat).openInputFormat();
	}

	List<OUT> result = new ArrayList<OUT>();
	
	// splits
	InputSplit[] splits = inputFormat.createInputSplits(1);
	TypeSerializer<OUT> serializer = getOperatorInfo().getOutputType().createSerializer(executionConfig);
	
	for (InputSplit split : splits) {
		inputFormat.open(split);
		
		while (!inputFormat.reachedEnd()) {
			OUT next = inputFormat.nextRecord(serializer.createInstance());
			if (next != null) {
				result.add(serializer.copy(next));
			}
		}
		
		inputFormat.close();
	}
	
	//close the input format
	if (inputFormat instanceof RichInputFormat) {
		((RichInputFormat) inputFormat).closeInputFormat();
	}

	return result;
}
 
Example #15
Source Project: Flink-CEPplus   Author: ljygz   File: TypeExtractorInputFormatsTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testQueryableFormatType() {
	try {
		InputFormat<?, ?> format = new QueryableInputFormat();
		TypeInformation<?> typeInfo = TypeExtractor.getInputFormatTypes(format);
		assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, typeInfo);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #16
Source Project: flink   Author: apache   File: ParquetFileSystemFormatFactory.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public InputFormat<RowData, ?> createReader(ReaderContext context) {
	return new ParquetInputFormat(
			context.getPaths(),
			context.getSchema().getFieldNames(),
			context.getSchema().getFieldDataTypes(),
			context.getProjectFields(),
			context.getDefaultPartName(),
			context.getPushedDownLimit(),
			getParquetConfiguration(context.getFormatOptions()),
			context.getFormatOptions().get(UTC_TIMEZONE));
}
 
Example #17
Source Project: flink   Author: apache   File: CsvFileSystemFormatFactory.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public InputFormat<RowData, ?> createReader(ReaderContext context) {
	ReadableConfig options = context.getFormatOptions();
	validateFormatOptions(options);

	RowType formatRowType = context.getFormatRowType();

	String[] fieldNames = context.getSchema().getFieldNames();
	List<String> projectFields = Arrays.stream(context.getProjectFields())
		.mapToObj(idx -> fieldNames[idx])
		.collect(Collectors.toList());
	List<String> csvFields = Arrays.stream(fieldNames)
		.filter(field -> !context.getPartitionKeys().contains(field))
		.collect(Collectors.toList());

	int[] csvSelectFieldToProjectFieldMapping = context.getFormatProjectFields().stream()
		.mapToInt(projectFields::indexOf)
		.toArray();
	int[] csvSelectFieldToCsvFieldMapping = context.getFormatProjectFields().stream()
		.mapToInt(csvFields::indexOf)
		.toArray();

	CsvSchema csvSchema = buildCsvSchema(formatRowType, options);

	boolean ignoreParseErrors = options.get(IGNORE_PARSE_ERRORS);

	return new CsvInputFormat(
		context.getPaths(),
		context.getSchema().getFieldDataTypes(),
		context.getSchema().getFieldNames(),
		csvSchema,
		formatRowType,
		context.getProjectFields(),
		context.getPartitionKeys(),
		context.getDefaultPartName(),
		context.getPushedDownLimit(),
		csvSelectFieldToProjectFieldMapping,
		csvSelectFieldToCsvFieldMapping,
		ignoreParseErrors);
}
 
Example #18
Source Project: flink   Author: flink-tpc-ds   File: HiveTableSource.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public InputFormat getInputFormat() {
	if (!initAllPartitions) {
		initAllPartitions();
	}
	return new HiveTableInputFormat(jobConf, catalogTable, allHivePartitions);
}
 
Example #19
Source Project: flink   Author: apache   File: ParquetTableSourceTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testFieldsProjection() throws Exception {
	ParquetTableSource parquetTableSource = createNestedTestParquetTableSource(testPath);
	ParquetTableSource projected = (ParquetTableSource) parquetTableSource.projectFields(new int[] {2, 4, 6});

	// ensure a new reference is returned
	assertNotSame(projected, parquetTableSource);

	// ensure table schema is the same
	assertEquals(parquetTableSource.getTableSchema(), projected.getTableSchema());

	// ensure that table source description differs
	assertNotEquals(parquetTableSource.explainSource(), projected.explainSource());

	String[] fieldNames = ((RowTypeInfo) NESTED_ROW_TYPE).getFieldNames();
	TypeInformation[] fieldTypes =  ((RowTypeInfo) NESTED_ROW_TYPE).getFieldTypes();
	assertEquals(
		Types.ROW_NAMED(
			new String[] {fieldNames[2], fieldNames[4], fieldNames[6]},
			fieldTypes[2], fieldTypes[4], fieldTypes[6]
		),
		projected.getReturnType()
	);

	// ensure ParquetInputFormat is configured with selected fields
	DataSet<Row> data = projected.getDataSet(ExecutionEnvironment.createLocalEnvironment());
	InputFormat<Row, ?> inputFormat = ((DataSource<Row>) data).getInputFormat();
	assertTrue(inputFormat instanceof ParquetRowInputFormat);
	ParquetRowInputFormat parquetIF = (ParquetRowInputFormat) inputFormat;
	assertArrayEquals(new String[]{fieldNames[2], fieldNames[4], fieldNames[6]}, parquetIF.getFieldNames());
	assertArrayEquals(new TypeInformation<?>[]{fieldTypes[2], fieldTypes[4], fieldTypes[6]}, parquetIF.getFieldTypes());
}
 
Example #20
Source Project: flink   Author: flink-tpc-ds   File: ParquetTableSourceTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testFieldsProjection() throws Exception {
	ParquetTableSource parquetTableSource = createNestedTestParquetTableSource(testPath);
	ParquetTableSource projected = (ParquetTableSource) parquetTableSource.projectFields(new int[] {2, 4, 6});

	// ensure a new reference is returned
	assertNotSame(projected, parquetTableSource);

	// ensure table schema is the same
	assertEquals(parquetTableSource.getTableSchema(), projected.getTableSchema());

	// ensure that table source description differs
	assertNotEquals(parquetTableSource.explainSource(), projected.explainSource());

	String[] fieldNames = ((RowTypeInfo) NESTED_ROW_TYPE).getFieldNames();
	TypeInformation[] fieldTypes =  ((RowTypeInfo) NESTED_ROW_TYPE).getFieldTypes();
	assertEquals(
		Types.ROW_NAMED(
			new String[] {fieldNames[2], fieldNames[4], fieldNames[6]},
			fieldTypes[2], fieldTypes[4], fieldTypes[6]
		),
		projected.getReturnType()
	);

	// ensure ParquetInputFormat is configured with selected fields
	DataSet<Row> data = projected.getDataSet(ExecutionEnvironment.createLocalEnvironment());
	InputFormat<Row, ?> inputFormat = ((DataSource<Row>) data).getInputFormat();
	assertTrue(inputFormat instanceof ParquetRowInputFormat);
	ParquetRowInputFormat parquetIF = (ParquetRowInputFormat) inputFormat;
	assertArrayEquals(new String[]{fieldNames[2], fieldNames[4], fieldNames[6]}, parquetIF.getFieldNames());
	assertArrayEquals(new TypeInformation<?>[]{fieldTypes[2], fieldTypes[4], fieldTypes[6]}, parquetIF.getFieldTypes());
}
 
Example #21
Source Project: flink   Author: apache   File: GenericDataSourceBase.java    License: Apache License 2.0 5 votes vote down vote up
protected List<OUT> executeOnCollections(RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception {
	@SuppressWarnings("unchecked")
	InputFormat<OUT, InputSplit> inputFormat = (InputFormat<OUT, InputSplit>) this.formatWrapper.getUserCodeObject();
	//configure the input format
	inputFormat.configure(this.parameters);

	//open the input format
	if (inputFormat instanceof RichInputFormat) {
		((RichInputFormat) inputFormat).setRuntimeContext(ctx);
		((RichInputFormat) inputFormat).openInputFormat();
	}

	List<OUT> result = new ArrayList<OUT>();
	
	// splits
	InputSplit[] splits = inputFormat.createInputSplits(1);
	TypeSerializer<OUT> serializer = getOperatorInfo().getOutputType().createSerializer(executionConfig);
	
	for (InputSplit split : splits) {
		inputFormat.open(split);
		
		while (!inputFormat.reachedEnd()) {
			OUT next = inputFormat.nextRecord(serializer.createInstance());
			if (next != null) {
				result.add(serializer.copy(next));
			}
		}
		
		inputFormat.close();
	}
	
	//close the input format
	if (inputFormat instanceof RichInputFormat) {
		((RichInputFormat) inputFormat).closeInputFormat();
	}

	return result;
}
 
Example #22
Source Project: flink   Author: flink-tpc-ds   File: TypeExtractor.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@PublicEvolving
public static <IN> TypeInformation<IN> getInputFormatTypes(InputFormat<IN, ?> inputFormatInterface) {
	if (inputFormatInterface instanceof ResultTypeQueryable) {
		return ((ResultTypeQueryable<IN>) inputFormatInterface).getProducedType();
	}
	return new TypeExtractor().privateCreateTypeInfo(InputFormat.class, inputFormatInterface.getClass(), 0, null, null);
}
 
Example #23
Source Project: flink   Author: flink-tpc-ds   File: GenericDataSourceBase.java    License: Apache License 2.0 5 votes vote down vote up
protected List<OUT> executeOnCollections(RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception {
	@SuppressWarnings("unchecked")
	InputFormat<OUT, InputSplit> inputFormat = (InputFormat<OUT, InputSplit>) this.formatWrapper.getUserCodeObject();
	//configure the input format
	inputFormat.configure(this.parameters);

	//open the input format
	if (inputFormat instanceof RichInputFormat) {
		((RichInputFormat) inputFormat).setRuntimeContext(ctx);
		((RichInputFormat) inputFormat).openInputFormat();
	}

	List<OUT> result = new ArrayList<OUT>();
	
	// splits
	InputSplit[] splits = inputFormat.createInputSplits(1);
	TypeSerializer<OUT> serializer = getOperatorInfo().getOutputType().createSerializer(executionConfig);
	
	for (InputSplit split : splits) {
		inputFormat.open(split);
		
		while (!inputFormat.reachedEnd()) {
			OUT next = inputFormat.nextRecord(serializer.createInstance());
			if (next != null) {
				result.add(serializer.copy(next));
			}
		}
		
		inputFormat.close();
	}
	
	//close the input format
	if (inputFormat instanceof RichInputFormat) {
		((RichInputFormat) inputFormat).closeInputFormat();
	}

	return result;
}
 
Example #24
Source Project: flink   Author: flink-tpc-ds   File: InputOutputFormatContainer.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public <OT, T extends InputSplit> Pair<OperatorID, InputFormat<OT, T>> getUniqueInputFormat() {
	Map<OperatorID, UserCodeWrapper<? extends InputFormat<?, ?>>> inputFormats = formats.getInputFormats();
	Preconditions.checkState(inputFormats.size() == 1);

	Map.Entry<OperatorID, UserCodeWrapper<? extends InputFormat<?, ?>>> entry = inputFormats.entrySet().iterator().next();

	return new ImmutablePair<>(entry.getKey(),
		(InputFormat<OT, T>) entry.getValue().getUserCodeObject(InputFormat.class, userCodeClassLoader));
}
 
Example #25
Source Project: flink   Author: apache   File: InputOutputFormatContainer.java    License: Apache License 2.0 5 votes vote down vote up
public void addInputFormat(OperatorID operatorId, UserCodeWrapper<? extends InputFormat<?, ?>> wrapper) {
	if (inputFormats.containsKey(checkNotNull(operatorId))) {
		throw new IllegalStateException("The input format has been set for the operator: " + operatorId);
	}

	inputFormats.put(operatorId, checkNotNull(wrapper));
}
 
Example #26
Source Project: flink   Author: apache   File: TestCsvFileSystemFormatFactory.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public InputFormat<RowData, ?> createReader(ReaderContext context) {
	return new TestRowDataCsvInputFormat(
			context.getPaths(),
			context.getSchema(),
			context.getPartitionKeys(),
			context.getDefaultPartName(),
			context.getProjectFields(),
			context.getPushedDownLimit());
}
 
Example #27
Source Project: flink   Author: apache   File: OrcTableSourceTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testProjectFields() throws Exception {

	OrcTableSource orc = OrcTableSource.builder()
		.path(getPath(TEST_FILE_NESTED))
		.forOrcSchema(TEST_SCHEMA_NESTED)
		.build();

	OrcTableSource projected = (OrcTableSource) orc.projectFields(new int[]{3, 5, 1, 0});

	// ensure copy is returned
	assertTrue(orc != projected);

	// ensure table schema is identical
	assertEquals(orc.getTableSchema(), projected.getTableSchema());

	// ensure return type was adapted
	String[] fieldNames = getNestedFieldNames();
	TypeInformation[] fieldTypes = getNestedFieldTypes();
	assertEquals(
		Types.ROW_NAMED(
			new String[] {fieldNames[3], fieldNames[5], fieldNames[1], fieldNames[0]},
			new TypeInformation[] {fieldTypes[3], fieldTypes[5], fieldTypes[1], fieldTypes[0]}),
		projected.getReturnType());

	// ensure IF is configured with selected fields
	OrcTableSource spyTS = spy(projected);
	OrcRowInputFormat mockIF = mock(OrcRowInputFormat.class);
	doReturn(mockIF).when(spyTS).buildOrcInputFormat();
	ExecutionEnvironment env = mock(ExecutionEnvironment.class);
	when(env.createInput(any(InputFormat.class))).thenReturn(mock(DataSource.class));
	spyTS.getDataSet(env);
	verify(mockIF).selectFields(eq(3), eq(5), eq(1), eq(0));
}
 
Example #28
Source Project: flink   Author: apache   File: InputFormatProvider.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Helper method for creating a static provider.
 */
static InputFormatProvider of(InputFormat<RowData, ?> inputFormat) {
	return new InputFormatProvider() {
		@Override
		public InputFormat<RowData, ?> createInputFormat() {
			return inputFormat;
		}

		@Override
		public boolean isBounded() {
			return true;
		}
	};
}
 
Example #29
Source Project: alibaba-flink-connectors   Author: alibaba   File: AbstractLocatableParallelSource.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public InputFormat<T, InputSplit> getFormat() {
	return new LocatableParallelSourceInputFormatWrapper<>(this);
}
 
Example #30
Source Project: Flink-CEPplus   Author: ljygz   File: CassandraConnectorITCase.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testCassandraBatchPojoFormat() throws Exception {

	session.execute(CREATE_TABLE_QUERY.replace(TABLE_NAME_VARIABLE, CustomCassandraAnnotatedPojo.TABLE_NAME));

	OutputFormat<CustomCassandraAnnotatedPojo> sink = new CassandraPojoOutputFormat<>(builder, CustomCassandraAnnotatedPojo.class, () -> new Mapper.Option[]{Mapper.Option.saveNullFields(true)});

	List<CustomCassandraAnnotatedPojo> customCassandraAnnotatedPojos = IntStream.range(0, 20)
		.mapToObj(x -> new CustomCassandraAnnotatedPojo(UUID.randomUUID().toString(), x, 0))
		.collect(Collectors.toList());
	try {
		sink.configure(new Configuration());
		sink.open(0, 1);
		for (CustomCassandraAnnotatedPojo customCassandraAnnotatedPojo : customCassandraAnnotatedPojos) {
			sink.writeRecord(customCassandraAnnotatedPojo);
		}
	} finally {
		sink.close();
	}
	ResultSet rs = session.execute(SELECT_DATA_QUERY.replace(TABLE_NAME_VARIABLE, CustomCassandraAnnotatedPojo.TABLE_NAME));
	Assert.assertEquals(20, rs.all().size());

	InputFormat<CustomCassandraAnnotatedPojo, InputSplit> source = new CassandraPojoInputFormat<>(SELECT_DATA_QUERY.replace(TABLE_NAME_VARIABLE, "batches"), builder, CustomCassandraAnnotatedPojo.class);
	List<CustomCassandraAnnotatedPojo> result = new ArrayList<>();

	try {
		source.configure(new Configuration());
		source.open(null);
		while (!source.reachedEnd()) {
			CustomCassandraAnnotatedPojo temp = source.nextRecord(null);
			result.add(temp);
		}
	} finally {
		source.close();
	}

	Assert.assertEquals(20, result.size());
	result.sort(Comparator.comparingInt(CustomCassandraAnnotatedPojo::getCounter));
	customCassandraAnnotatedPojos.sort(Comparator.comparingInt(CustomCassandraAnnotatedPojo::getCounter));

	assertThat(result, samePropertyValuesAs(customCassandraAnnotatedPojos));
}