Java Code Examples for org.apache.flink.api.java.operators.DataSource

The following examples show how to use org.apache.flink.api.java.operators.DataSource. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: flink   Source File: CSVReaderTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSubClassWithPartialsInHierarchie() throws Exception {
	CsvReader reader = getCsvReader();
	DataSource<FinalItem> sitems = reader.tupleType(FinalItem.class);
	TypeInformation<?> info = sitems.getType();

	Assert.assertEquals(true, info.isTupleType());
	Assert.assertEquals(FinalItem.class, info.getTypeClass());

	@SuppressWarnings("unchecked")
	TupleTypeInfo<SubItem> tinfo = (TupleTypeInfo<SubItem>) info;

	Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0));
	Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1));
	Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2));
	Assert.assertEquals(ValueTypeInfo.class, tinfo.getTypeAt(3).getClass());
	Assert.assertEquals(ValueTypeInfo.class, tinfo.getTypeAt(4).getClass());
	Assert.assertEquals(StringValue.class, ((ValueTypeInfo<?>) tinfo.getTypeAt(3)).getTypeClass());
	Assert.assertEquals(LongValue.class, ((ValueTypeInfo<?>) tinfo.getTypeAt(4)).getTypeClass());

	CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) sitems.getInputFormat();
	Assert.assertArrayEquals(new Class<?>[] {Integer.class, String.class, Double.class, StringValue.class, LongValue.class}, inputFormat.getFieldTypes());
}
 
Example 2
Source Project: Flink-CEPplus   Source File: ExecutionEnvironment.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Creates a new data set that contains the given elements. The framework will determine the type according to the
 * based type user supplied. The elements should be the same or be the subclass to the based type.
 * The sequence of elements must not be empty.
 * Note that this operation will result in a non-parallel data source, i.e. a data source with
 * a parallelism of one.
 *
 * @param type The base class type for every element in the collection.
 * @param data The elements to make up the data set.
 * @return A DataSet representing the given list of elements.
 */
@SafeVarargs
public final <X> DataSource<X> fromElements(Class<X> type, X... data) {
	if (data == null) {
		throw new IllegalArgumentException("The data must not be null.");
	}
	if (data.length == 0) {
		throw new IllegalArgumentException("The number of elements must not be zero.");
	}

	TypeInformation<X> typeInfo;
	try {
		typeInfo = TypeExtractor.getForClass(type);
	}
	catch (Exception e) {
		throw new RuntimeException("Could not create TypeInformation for type " + type.getName()
				+ "; please specify the TypeInformation manually via "
				+ "ExecutionEnvironment#fromElements(Collection, TypeInformation)", e);
	}

	return fromCollection(Arrays.asList(data), typeInfo, Utils.getCallLocationName());
}
 
Example 3
Source Project: flink   Source File: ExecutionEnvironment.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Creates a new data set that contains the given elements. The elements must all be of the same type,
 * for example, all of the {@link String} or {@link Integer}. The sequence of elements must not be empty.
 *
 * <p>The framework will try and determine the exact type from the collection elements.
 * In case of generic elements, it may be necessary to manually supply the type information
 * via {@link #fromCollection(Collection, TypeInformation)}.
 *
 * <p>Note that this operation will result in a non-parallel data source, i.e. a data source with
 * a parallelism of one.
 *
 * @param data The elements to make up the data set.
 * @return A DataSet representing the given list of elements.
 */
@SafeVarargs
public final <X> DataSource<X> fromElements(X... data) {
	if (data == null) {
		throw new IllegalArgumentException("The data must not be null.");
	}
	if (data.length == 0) {
		throw new IllegalArgumentException("The number of elements must not be zero.");
	}

	TypeInformation<X> typeInfo;
	try {
		typeInfo = TypeExtractor.getForObject(data[0]);
	}
	catch (Exception e) {
		throw new RuntimeException("Could not create TypeInformation for type " + data[0].getClass().getName()
				+ "; please specify the TypeInformation manually via "
				+ "ExecutionEnvironment#fromElements(Collection, TypeInformation)", e);
	}

	return fromCollection(Arrays.asList(data), typeInfo, Utils.getCallLocationName());
}
 
Example 4
Source Project: Flink-CEPplus   Source File: CsvReader.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Configures the reader to read the CSV data and parse it to the given type. The type must be a subclass of
 * {@link Tuple}. The type information for the fields is obtained from the type class. The type
 * consequently needs to specify all generic field types of the tuple.
 *
 * @param targetType The class of the target type, needs to be a subclass of Tuple.
 * @return The DataSet representing the parsed CSV data.
 */
public <T extends Tuple> DataSource<T> tupleType(Class<T> targetType) {
	Preconditions.checkNotNull(targetType, "The target type class must not be null.");
	if (!Tuple.class.isAssignableFrom(targetType)) {
		throw new IllegalArgumentException("The target type must be a subclass of " + Tuple.class.getName());
	}

	@SuppressWarnings("unchecked")
	TupleTypeInfo<T> typeInfo = (TupleTypeInfo<T>) TypeExtractor.createTypeInfo(targetType);
	CsvInputFormat<T> inputFormat = new TupleCsvInputFormat<T>(path, this.lineDelimiter, this.fieldDelimiter, typeInfo, this.includedMask);

	Class<?>[] classes = new Class<?>[typeInfo.getArity()];
	for (int i = 0; i < typeInfo.getArity(); i++) {
		classes[i] = typeInfo.getTypeAt(i).getTypeClass();
	}

	configureInputFormat(inputFormat);
	return new DataSource<T>(executionContext, inputFormat, typeInfo, Utils.getCallLocationName());
}
 
Example 5
Source Project: Flink-CEPplus   Source File: CSVReaderTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testFieldTypes() throws Exception {
	CsvReader reader = getCsvReader();
	DataSource<Item> items = reader.tupleType(Item.class);

	TypeInformation<?> info = items.getType();
	if (!info.isTupleType()) {
		Assert.fail();
	} else {
		TupleTypeInfo<?> tinfo = (TupleTypeInfo<?>) info;
		Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0));
		Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1));
		Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2));
		Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(3));

	}

	CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) items.getInputFormat();
	Assert.assertArrayEquals(new Class<?>[]{Integer.class, String.class, Double.class, String.class}, inputFormat.getFieldTypes());
}
 
Example 6
Source Project: Flink-CEPplus   Source File: CSVReaderTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSubClass() throws Exception {
	CsvReader reader = getCsvReader();
	DataSource<SubItem> sitems = reader.tupleType(SubItem.class);
	TypeInformation<?> info = sitems.getType();

	Assert.assertEquals(true, info.isTupleType());
	Assert.assertEquals(SubItem.class, info.getTypeClass());

	@SuppressWarnings("unchecked")
	TupleTypeInfo<SubItem> tinfo = (TupleTypeInfo<SubItem>) info;

	Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0));
	Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1));
	Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2));
	Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(3));

	CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) sitems.getInputFormat();
	Assert.assertArrayEquals(new Class<?>[]{Integer.class, String.class, Double.class, String.class}, inputFormat.getFieldTypes());
}
 
Example 7
Source Project: Flink-CEPplus   Source File: GraphGeneratorUtils.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Generates {@link Vertex Vertices} with sequential, numerical labels.
 *
 * @param env the Flink execution environment.
 * @param parallelism operator parallelism
 * @param vertexCount number of sequential vertex labels
 * @return {@link DataSet} of sequentially labeled {@link Vertex vertices}
 */
public static DataSet<Vertex<LongValue, NullValue>> vertexSequence(ExecutionEnvironment env, int parallelism, long vertexCount) {
	Preconditions.checkArgument(vertexCount >= 0, "Vertex count must be non-negative");

	if (vertexCount == 0) {
		return env
			.fromCollection(Collections.emptyList(), TypeInformation.of(new TypeHint<Vertex<LongValue, NullValue>>(){}))
				.setParallelism(parallelism)
				.name("Empty vertex set");
	} else {
		LongValueSequenceIterator iterator = new LongValueSequenceIterator(0, vertexCount - 1);

		DataSource<LongValue> vertexLabels = env
			.fromParallelCollection(iterator, LongValue.class)
				.setParallelism(parallelism)
				.name("Vertex indices");

		return vertexLabels
			.map(new CreateVertex())
				.setParallelism(parallelism)
				.name("Vertex sequence");
	}
}
 
Example 8
Source Project: flink   Source File: ExecutionEnvironment.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Creates a new data set that contains the given elements. The framework will determine the type according to the
 * based type user supplied. The elements should be the same or be the subclass to the based type.
 * The sequence of elements must not be empty.
 * Note that this operation will result in a non-parallel data source, i.e. a data source with
 * a parallelism of one.
 *
 * @param type The base class type for every element in the collection.
 * @param data The elements to make up the data set.
 * @return A DataSet representing the given list of elements.
 */
@SafeVarargs
public final <X> DataSource<X> fromElements(Class<X> type, X... data) {
	if (data == null) {
		throw new IllegalArgumentException("The data must not be null.");
	}
	if (data.length == 0) {
		throw new IllegalArgumentException("The number of elements must not be zero.");
	}

	TypeInformation<X> typeInfo;
	try {
		typeInfo = TypeExtractor.getForClass(type);
	}
	catch (Exception e) {
		throw new RuntimeException("Could not create TypeInformation for type " + type.getName()
				+ "; please specify the TypeInformation manually via "
				+ "ExecutionEnvironment#fromElements(Collection, TypeInformation)", e);
	}

	return fromCollection(Arrays.asList(data), typeInfo, Utils.getCallLocationName());
}
 
Example 9
Source Project: flink   Source File: BootstrapTransformationTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testOperatorSpecificMaxParallelismRespected() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	DataSource<Integer> input = env.fromElements(0);

	BootstrapTransformation<Integer> transformation = OperatorTransformation
		.bootstrapWith(input)
		.setMaxParallelism(1)
		.transform(new ExampleStateBootstrapFunction());

	int maxParallelism = transformation.getMaxParallelism(4);
	DataSet<TaggedOperatorSubtaskState> result = transformation.writeOperatorSubtaskStates(
		OperatorIDGenerator.fromUid("uid"),
		new MemoryStateBackend(),
		new Path(),
		maxParallelism
	);

	Assert.assertEquals("The parallelism of a data set should be constrained my the savepoint max parallelism", 1, getParallelism(result));
}
 
Example 10
public static void main(String[] args) throws Exception {
    ExecutionEnvironment environment = ExecutionEnvironment.getExecutionEnvironment();
    String filePath = "file:\\D:\\imooc\\新一代大数据计算引擎 Flink从入门到实战-v\\input\\hello.txt";

    //1. 注册一个本地文件
    environment.registerCachedFile(filePath, "java-cf");
    DataSource<String> data = environment.fromElements("hadoop", "spark", "flink", "pyspark", "storm");

    data.map(new RichMapFunction<String, String>() {
        List<String> list = new ArrayList<>();
        @Override
        public void open(Configuration parameters) throws Exception {
            File file = getRuntimeContext().getDistributedCache().getFile("java-cf");
            List<String> lines = FileUtils.readLines(file);
            for (String line : lines) {
                System.out.println("line: " + line);
            }

        }

        @Override
        public String map(String value) throws Exception {
            return value;
        }
    }).print();
}
 
Example 11
Source Project: flink-learning   Source File: Main.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool params = ParameterTool.fromArgs(args);
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().setGlobalJobParameters(params);

    DataSource<String> dataSource = env.fromElements(WORDS);

    dataSource.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
        @Override
        public void flatMap(String line, Collector<Tuple2<String, Integer>> out) throws Exception {
            String[] words = line.split("\\W+");
            for (String word : words) {
                out.collect(new Tuple2<>(word, 1));
            }
        }
    })
            .groupBy(0)
            .sum(1)
            .print();

    long count = dataSource.count();
    System.out.println(count);
}
 
Example 12
public static void main(String[] args) throws Exception {
    ExecutionEnvironment environment = ExecutionEnvironment.getExecutionEnvironment();
    BatchTableEnvironment tableEnvironment = BatchTableEnvironment.getTableEnvironment(environment);
    String filepath = "file:\\D:\\imooc\\新一代大数据计算引擎 Flink从入门到实战-v\\input\\sales.csv";
    //csv => DataSet
    DataSource<Sales> csv = environment.readCsvFile(filepath)
            .ignoreFirstLine()
            .pojoType(Sales.class, "transactionId", "customerId", "itemId", "amountPaid");
    //csv.print();

    Table sales = tableEnvironment.fromDataSet(csv);
    tableEnvironment.registerTable("sales", sales);
    Table resultTable = tableEnvironment.sqlQuery("select customerId, sum(amountPaid) money from sales group by customerId");

    DataSet<Row> result = tableEnvironment.toDataSet(resultTable, Row.class);
    result.print();
}
 
Example 13
Source Project: flink   Source File: CSVReaderTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSubClassWithPartialsInHierarchie() throws Exception {
	CsvReader reader = getCsvReader();
	DataSource<FinalItem> sitems = reader.tupleType(FinalItem.class);
	TypeInformation<?> info = sitems.getType();

	Assert.assertEquals(true, info.isTupleType());
	Assert.assertEquals(FinalItem.class, info.getTypeClass());

	@SuppressWarnings("unchecked")
	TupleTypeInfo<SubItem> tinfo = (TupleTypeInfo<SubItem>) info;

	Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0));
	Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1));
	Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2));
	Assert.assertEquals(ValueTypeInfo.class, tinfo.getTypeAt(3).getClass());
	Assert.assertEquals(ValueTypeInfo.class, tinfo.getTypeAt(4).getClass());
	Assert.assertEquals(StringValue.class, ((ValueTypeInfo<?>) tinfo.getTypeAt(3)).getTypeClass());
	Assert.assertEquals(LongValue.class, ((ValueTypeInfo<?>) tinfo.getTypeAt(4)).getTypeClass());

	CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) sitems.getInputFormat();
	Assert.assertArrayEquals(new Class<?>[] {Integer.class, String.class, Double.class, StringValue.class, LongValue.class}, inputFormat.getFieldTypes());
}
 
Example 14
Source Project: flink   Source File: CSVReaderTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSubClass() throws Exception {
	CsvReader reader = getCsvReader();
	DataSource<SubItem> sitems = reader.tupleType(SubItem.class);
	TypeInformation<?> info = sitems.getType();

	Assert.assertEquals(true, info.isTupleType());
	Assert.assertEquals(SubItem.class, info.getTypeClass());

	@SuppressWarnings("unchecked")
	TupleTypeInfo<SubItem> tinfo = (TupleTypeInfo<SubItem>) info;

	Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0));
	Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1));
	Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2));
	Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(3));

	CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) sitems.getInputFormat();
	Assert.assertArrayEquals(new Class<?>[]{Integer.class, String.class, Double.class, String.class}, inputFormat.getFieldTypes());
}
 
Example 15
Source Project: toolbox   Source File: DataFlinkWriter.java    License: Apache License 2.0 5 votes vote down vote up
public static <T extends DataInstance> void writeHeader(ExecutionEnvironment env, DataFlink<T> data, String path,
                                                         boolean includeRanges) {

    DataSource<String> name = env.fromElements("@relation " + data.getName());
    name.writeAsText(path + "/name.txt", FileSystem.WriteMode.OVERWRITE);

    DataSource<Attribute> attData = env.fromCollection(data.getAttributes().getFullListOfAttributes());

    attData.writeAsFormattedText(path + "/attributes.txt", FileSystem.WriteMode.OVERWRITE, new TextOutputFormat.TextFormatter<Attribute>() {
        @Override
        public String format(Attribute att) {
            return ARFFDataWriter.attributeToARFFStringWithIndex(att,includeRanges);
        }
    });
}
 
Example 16
Source Project: flink   Source File: PartitionOperatorTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testRangePartitionByComplexKeyWithTooManyOrders() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	final DataSource<Tuple2<Tuple2<Integer, Integer>, Integer>> ds = env.fromElements(
		new Tuple2<>(new Tuple2<>(1, 1), 1),
		new Tuple2<>(new Tuple2<>(2, 2), 2),
		new Tuple2<>(new Tuple2<>(2, 2), 2)
	);
	ds.partitionByRange(0).withOrders(Order.ASCENDING, Order.DESCENDING);
}
 
Example 17
Source Project: beam   Source File: FlinkBatchTransformTranslators.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void translateNode(
    PTransform<PBegin, PCollection<byte[]>> transform, FlinkBatchTranslationContext context) {
  String name = transform.getName();
  PCollection<byte[]> output = context.getOutput(transform);

  TypeInformation<WindowedValue<byte[]>> typeInformation = context.getTypeInfo(output);
  DataSource<WindowedValue<byte[]>> dataSource =
      new DataSource<>(
          context.getExecutionEnvironment(), new ImpulseInputFormat(), typeInformation, name);

  context.setOutputDataSet(output, dataSource);
}
 
Example 18
Source Project: flink   Source File: PropertyDataSourceTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedOrderedSource3() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(0)
			.splitsOrderedBy(new int[]{1}, new Order[]{Order.ASCENDING});

	data.output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example 19
public static void mapFunction(ExecutionEnvironment env) throws Exception {
    List<Integer> list = new ArrayList<Integer>();
    for (int i = 0; i <= 10; i++) {
        list.add(i);
    }
    DataSource<Integer> dataSource = env.fromCollection(list);
    dataSource.map(new MapFunction<Integer, Integer>() {
        @Override
        public Integer map(Integer value) throws Exception {
            return value + 1;
        }
    }).print();
}
 
Example 20
Source Project: Flink-CEPplus   Source File: PartitionOperatorTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testRangePartitionWithEmptyIndicesKey() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	final DataSource<Tuple2<Tuple2<Integer, Integer>, Integer>> ds = env.fromElements(
		new Tuple2<>(new Tuple2<>(1, 1), 1),
		new Tuple2<>(new Tuple2<>(2, 2), 2),
		new Tuple2<>(new Tuple2<>(2, 2), 2)
	);
	ds.partitionByRange(new int[]{});
}
 
Example 21
Source Project: Flink-CEPplus   Source File: PartitionOperatorTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testRangePartitionByComplexKeyWithOrders() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	final DataSource<Tuple2<Tuple2<Integer, Integer>, Integer>> ds = env.fromElements(
		new Tuple2<>(new Tuple2<>(1, 1), 1),
		new Tuple2<>(new Tuple2<>(2, 2), 2),
		new Tuple2<>(new Tuple2<>(2, 2), 2)
	);
	ds.partitionByRange(0, 1).withOrders(Order.ASCENDING, Order.DESCENDING);
}
 
Example 22
Source Project: Flink-CEPplus   Source File: PartitionOperatorTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testRangePartitionByComplexKeyWithTooManyOrders() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	final DataSource<Tuple2<Tuple2<Integer, Integer>, Integer>> ds = env.fromElements(
		new Tuple2<>(new Tuple2<>(1, 1), 1),
		new Tuple2<>(new Tuple2<>(2, 2), 2),
		new Tuple2<>(new Tuple2<>(2, 2), 2)
	);
	ds.partitionByRange(0).withOrders(Order.ASCENDING, Order.DESCENDING);
}
 
Example 23
Source Project: flink   Source File: PropertyDataSourceTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedOrderedSource6() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f1.intField")
			.splitsOrderedBy("f1", new Order[]{Order.DESCENDING});

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(2)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(1,2,3)));
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example 24
Source Project: flink   Source File: ParquetTableSourceTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testFieldsProjection() throws Exception {
	ParquetTableSource parquetTableSource = createNestedTestParquetTableSource(testPath);
	ParquetTableSource projected = (ParquetTableSource) parquetTableSource.projectFields(new int[] {2, 4, 6});

	// ensure a new reference is returned
	assertNotSame(projected, parquetTableSource);

	// ensure table schema is the same
	assertEquals(parquetTableSource.getTableSchema(), projected.getTableSchema());

	// ensure that table source description differs
	assertNotEquals(parquetTableSource.explainSource(), projected.explainSource());

	String[] fieldNames = ((RowTypeInfo) NESTED_ROW_TYPE).getFieldNames();
	TypeInformation[] fieldTypes =  ((RowTypeInfo) NESTED_ROW_TYPE).getFieldTypes();
	assertEquals(
		Types.ROW_NAMED(
			new String[] {fieldNames[2], fieldNames[4], fieldNames[6]},
			fieldTypes[2], fieldTypes[4], fieldTypes[6]
		),
		projected.getReturnType()
	);

	// ensure ParquetInputFormat is configured with selected fields
	DataSet<Row> data = projected.getDataSet(ExecutionEnvironment.createLocalEnvironment());
	InputFormat<Row, ?> inputFormat = ((DataSource<Row>) data).getInputFormat();
	assertTrue(inputFormat instanceof ParquetRowInputFormat);
	ParquetRowInputFormat parquetIF = (ParquetRowInputFormat) inputFormat;
	assertArrayEquals(new String[]{fieldNames[2], fieldNames[4], fieldNames[6]}, parquetIF.getFieldNames());
	assertArrayEquals(new TypeInformation<?>[]{fieldTypes[2], fieldTypes[4], fieldTypes[6]}, parquetIF.getFieldTypes());
}
 
Example 25
Source Project: Flink-CEPplus   Source File: PropertyDataSourceTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedSource2() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(1, 0);

	data.output(new DiscardingOutputFormat<Tuple2<Long,String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0, 1)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example 26
Source Project: flink   Source File: PropertyDataSourceTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedOrderedSource6() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f1.intField")
			.splitsOrderedBy("f1", new Order[]{Order.DESCENDING});

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(2)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(1,2,3)));
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example 27
Source Project: Flink-CEPplus   Source File: PropertyDataSourceTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedGroupedSource1() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(0)
			.splitsGroupedBy(0);

	data.output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(0)));
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example 28
Source Project: Flink-CEPplus   Source File: PropertyDataSourceTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedGroupedSource2() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(0)
			.splitsGroupedBy(1, 0);

	data.output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(0, 1)));
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example 29
Source Project: Flink-CEPplus   Source File: PropertyDataSourceTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedGroupedSource3() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(1)
			.splitsGroupedBy(0);

	data.output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example 30
Source Project: Flink-CEPplus   Source File: PropertyDataSourceTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedGroupedSource5() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f2")
			.splitsGroupedBy("f2");

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(4)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(4)));
	Assert.assertTrue(lprops.getOrdering() == null);

}