org.apache.flink.api.java.operators.DataSource Java Examples

The following examples show how to use org.apache.flink.api.java.operators.DataSource. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JavaDistributeApp.java    From 163-bigdate-note with GNU General Public License v3.0 7 votes vote down vote up
public static void main(String[] args) throws Exception {
    ExecutionEnvironment environment = ExecutionEnvironment.getExecutionEnvironment();
    String filePath = "file:\\D:\\imooc\\新一代大数据计算引擎 Flink从入门到实战-v\\input\\hello.txt";

    //1. 注册一个本地文件
    environment.registerCachedFile(filePath, "java-cf");
    DataSource<String> data = environment.fromElements("hadoop", "spark", "flink", "pyspark", "storm");

    data.map(new RichMapFunction<String, String>() {
        List<String> list = new ArrayList<>();
        @Override
        public void open(Configuration parameters) throws Exception {
            File file = getRuntimeContext().getDistributedCache().getFile("java-cf");
            List<String> lines = FileUtils.readLines(file);
            for (String line : lines) {
                System.out.println("line: " + line);
            }

        }

        @Override
        public String map(String value) throws Exception {
            return value;
        }
    }).print();
}
 
Example #2
Source File: CSVReaderTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testFieldTypes() throws Exception {
	CsvReader reader = getCsvReader();
	DataSource<Item> items = reader.tupleType(Item.class);

	TypeInformation<?> info = items.getType();
	if (!info.isTupleType()) {
		Assert.fail();
	} else {
		TupleTypeInfo<?> tinfo = (TupleTypeInfo<?>) info;
		Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0));
		Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1));
		Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2));
		Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(3));

	}

	CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) items.getInputFormat();
	Assert.assertArrayEquals(new Class<?>[]{Integer.class, String.class, Double.class, String.class}, inputFormat.getFieldTypes());
}
 
Example #3
Source File: ExecutionEnvironment.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a new data set that contains the given elements. The framework will determine the type according to the
 * based type user supplied. The elements should be the same or be the subclass to the based type.
 * The sequence of elements must not be empty.
 * Note that this operation will result in a non-parallel data source, i.e. a data source with
 * a parallelism of one.
 *
 * @param type The base class type for every element in the collection.
 * @param data The elements to make up the data set.
 * @return A DataSet representing the given list of elements.
 */
@SafeVarargs
public final <X> DataSource<X> fromElements(Class<X> type, X... data) {
	if (data == null) {
		throw new IllegalArgumentException("The data must not be null.");
	}
	if (data.length == 0) {
		throw new IllegalArgumentException("The number of elements must not be zero.");
	}

	TypeInformation<X> typeInfo;
	try {
		typeInfo = TypeExtractor.getForClass(type);
	}
	catch (Exception e) {
		throw new RuntimeException("Could not create TypeInformation for type " + type.getName()
				+ "; please specify the TypeInformation manually via "
				+ "ExecutionEnvironment#fromElements(Collection, TypeInformation)", e);
	}

	return fromCollection(Arrays.asList(data), typeInfo, Utils.getCallLocationName());
}
 
Example #4
Source File: ExecutionEnvironment.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a new data set that contains the given elements. The framework will determine the type according to the
 * based type user supplied. The elements should be the same or be the subclass to the based type.
 * The sequence of elements must not be empty.
 * Note that this operation will result in a non-parallel data source, i.e. a data source with
 * a parallelism of one.
 *
 * @param type The base class type for every element in the collection.
 * @param data The elements to make up the data set.
 * @return A DataSet representing the given list of elements.
 */
@SafeVarargs
public final <X> DataSource<X> fromElements(Class<X> type, X... data) {
	if (data == null) {
		throw new IllegalArgumentException("The data must not be null.");
	}
	if (data.length == 0) {
		throw new IllegalArgumentException("The number of elements must not be zero.");
	}

	TypeInformation<X> typeInfo;
	try {
		typeInfo = TypeExtractor.getForClass(type);
	}
	catch (Exception e) {
		throw new RuntimeException("Could not create TypeInformation for type " + type.getName()
				+ "; please specify the TypeInformation manually via "
				+ "ExecutionEnvironment#fromElements(Collection, TypeInformation)", e);
	}

	return fromCollection(Arrays.asList(data), typeInfo, Utils.getCallLocationName());
}
 
Example #5
Source File: CSVReaderTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testSubClassWithPartialsInHierarchie() throws Exception {
	CsvReader reader = getCsvReader();
	DataSource<FinalItem> sitems = reader.tupleType(FinalItem.class);
	TypeInformation<?> info = sitems.getType();

	Assert.assertEquals(true, info.isTupleType());
	Assert.assertEquals(FinalItem.class, info.getTypeClass());

	@SuppressWarnings("unchecked")
	TupleTypeInfo<SubItem> tinfo = (TupleTypeInfo<SubItem>) info;

	Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0));
	Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1));
	Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2));
	Assert.assertEquals(ValueTypeInfo.class, tinfo.getTypeAt(3).getClass());
	Assert.assertEquals(ValueTypeInfo.class, tinfo.getTypeAt(4).getClass());
	Assert.assertEquals(StringValue.class, ((ValueTypeInfo<?>) tinfo.getTypeAt(3)).getTypeClass());
	Assert.assertEquals(LongValue.class, ((ValueTypeInfo<?>) tinfo.getTypeAt(4)).getTypeClass());

	CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) sitems.getInputFormat();
	Assert.assertArrayEquals(new Class<?>[] {Integer.class, String.class, Double.class, StringValue.class, LongValue.class}, inputFormat.getFieldTypes());
}
 
Example #6
Source File: Main.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool params = ParameterTool.fromArgs(args);
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().setGlobalJobParameters(params);

    DataSource<String> dataSource = env.fromElements(WORDS);

    dataSource.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
        @Override
        public void flatMap(String line, Collector<Tuple2<String, Integer>> out) throws Exception {
            String[] words = line.split("\\W+");
            for (String word : words) {
                out.collect(new Tuple2<>(word, 1));
            }
        }
    })
            .groupBy(0)
            .sum(1)
            .print();

    long count = dataSource.count();
    System.out.println(count);
}
 
Example #7
Source File: JavaTableSQLAPI.java    From 163-bigdate-note with GNU General Public License v3.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    ExecutionEnvironment environment = ExecutionEnvironment.getExecutionEnvironment();
    BatchTableEnvironment tableEnvironment = BatchTableEnvironment.getTableEnvironment(environment);
    String filepath = "file:\\D:\\imooc\\新一代大数据计算引擎 Flink从入门到实战-v\\input\\sales.csv";
    //csv => DataSet
    DataSource<Sales> csv = environment.readCsvFile(filepath)
            .ignoreFirstLine()
            .pojoType(Sales.class, "transactionId", "customerId", "itemId", "amountPaid");
    //csv.print();

    Table sales = tableEnvironment.fromDataSet(csv);
    tableEnvironment.registerTable("sales", sales);
    Table resultTable = tableEnvironment.sqlQuery("select customerId, sum(amountPaid) money from sales group by customerId");

    DataSet<Row> result = tableEnvironment.toDataSet(resultTable, Row.class);
    result.print();
}
 
Example #8
Source File: CSVReaderTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testSubClassWithPartialsInHierarchie() throws Exception {
	CsvReader reader = getCsvReader();
	DataSource<FinalItem> sitems = reader.tupleType(FinalItem.class);
	TypeInformation<?> info = sitems.getType();

	Assert.assertEquals(true, info.isTupleType());
	Assert.assertEquals(FinalItem.class, info.getTypeClass());

	@SuppressWarnings("unchecked")
	TupleTypeInfo<SubItem> tinfo = (TupleTypeInfo<SubItem>) info;

	Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0));
	Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1));
	Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2));
	Assert.assertEquals(ValueTypeInfo.class, tinfo.getTypeAt(3).getClass());
	Assert.assertEquals(ValueTypeInfo.class, tinfo.getTypeAt(4).getClass());
	Assert.assertEquals(StringValue.class, ((ValueTypeInfo<?>) tinfo.getTypeAt(3)).getTypeClass());
	Assert.assertEquals(LongValue.class, ((ValueTypeInfo<?>) tinfo.getTypeAt(4)).getTypeClass());

	CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) sitems.getInputFormat();
	Assert.assertArrayEquals(new Class<?>[] {Integer.class, String.class, Double.class, StringValue.class, LongValue.class}, inputFormat.getFieldTypes());
}
 
Example #9
Source File: CSVReaderTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testSubClass() throws Exception {
	CsvReader reader = getCsvReader();
	DataSource<SubItem> sitems = reader.tupleType(SubItem.class);
	TypeInformation<?> info = sitems.getType();

	Assert.assertEquals(true, info.isTupleType());
	Assert.assertEquals(SubItem.class, info.getTypeClass());

	@SuppressWarnings("unchecked")
	TupleTypeInfo<SubItem> tinfo = (TupleTypeInfo<SubItem>) info;

	Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0));
	Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1));
	Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2));
	Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(3));

	CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) sitems.getInputFormat();
	Assert.assertArrayEquals(new Class<?>[]{Integer.class, String.class, Double.class, String.class}, inputFormat.getFieldTypes());
}
 
Example #10
Source File: CsvReader.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Configures the reader to read the CSV data and parse it to the given type. The type must be a subclass of
 * {@link Tuple}. The type information for the fields is obtained from the type class. The type
 * consequently needs to specify all generic field types of the tuple.
 *
 * @param targetType The class of the target type, needs to be a subclass of Tuple.
 * @return The DataSet representing the parsed CSV data.
 */
public <T extends Tuple> DataSource<T> tupleType(Class<T> targetType) {
	Preconditions.checkNotNull(targetType, "The target type class must not be null.");
	if (!Tuple.class.isAssignableFrom(targetType)) {
		throw new IllegalArgumentException("The target type must be a subclass of " + Tuple.class.getName());
	}

	@SuppressWarnings("unchecked")
	TupleTypeInfo<T> typeInfo = (TupleTypeInfo<T>) TypeExtractor.createTypeInfo(targetType);
	CsvInputFormat<T> inputFormat = new TupleCsvInputFormat<T>(path, this.lineDelimiter, this.fieldDelimiter, typeInfo, this.includedMask);

	Class<?>[] classes = new Class<?>[typeInfo.getArity()];
	for (int i = 0; i < typeInfo.getArity(); i++) {
		classes[i] = typeInfo.getTypeAt(i).getTypeClass();
	}

	configureInputFormat(inputFormat);
	return new DataSource<T>(executionContext, inputFormat, typeInfo, Utils.getCallLocationName());
}
 
Example #11
Source File: GraphGeneratorUtils.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Generates {@link Vertex Vertices} with sequential, numerical labels.
 *
 * @param env the Flink execution environment.
 * @param parallelism operator parallelism
 * @param vertexCount number of sequential vertex labels
 * @return {@link DataSet} of sequentially labeled {@link Vertex vertices}
 */
public static DataSet<Vertex<LongValue, NullValue>> vertexSequence(ExecutionEnvironment env, int parallelism, long vertexCount) {
	Preconditions.checkArgument(vertexCount >= 0, "Vertex count must be non-negative");

	if (vertexCount == 0) {
		return env
			.fromCollection(Collections.emptyList(), TypeInformation.of(new TypeHint<Vertex<LongValue, NullValue>>(){}))
				.setParallelism(parallelism)
				.name("Empty vertex set");
	} else {
		LongValueSequenceIterator iterator = new LongValueSequenceIterator(0, vertexCount - 1);

		DataSource<LongValue> vertexLabels = env
			.fromParallelCollection(iterator, LongValue.class)
				.setParallelism(parallelism)
				.name("Vertex indices");

		return vertexLabels
			.map(new CreateVertex())
				.setParallelism(parallelism)
				.name("Vertex sequence");
	}
}
 
Example #12
Source File: ExecutionEnvironment.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a new data set that contains the given elements. The elements must all be of the same type,
 * for example, all of the {@link String} or {@link Integer}. The sequence of elements must not be empty.
 *
 * <p>The framework will try and determine the exact type from the collection elements.
 * In case of generic elements, it may be necessary to manually supply the type information
 * via {@link #fromCollection(Collection, TypeInformation)}.
 *
 * <p>Note that this operation will result in a non-parallel data source, i.e. a data source with
 * a parallelism of one.
 *
 * @param data The elements to make up the data set.
 * @return A DataSet representing the given list of elements.
 */
@SafeVarargs
public final <X> DataSource<X> fromElements(X... data) {
	if (data == null) {
		throw new IllegalArgumentException("The data must not be null.");
	}
	if (data.length == 0) {
		throw new IllegalArgumentException("The number of elements must not be zero.");
	}

	TypeInformation<X> typeInfo;
	try {
		typeInfo = TypeExtractor.getForObject(data[0]);
	}
	catch (Exception e) {
		throw new RuntimeException("Could not create TypeInformation for type " + data[0].getClass().getName()
				+ "; please specify the TypeInformation manually via "
				+ "ExecutionEnvironment#fromElements(Collection, TypeInformation)", e);
	}

	return fromCollection(Arrays.asList(data), typeInfo, Utils.getCallLocationName());
}
 
Example #13
Source File: BootstrapTransformationTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testOperatorSpecificMaxParallelismRespected() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	DataSource<Integer> input = env.fromElements(0);

	BootstrapTransformation<Integer> transformation = OperatorTransformation
		.bootstrapWith(input)
		.setMaxParallelism(1)
		.transform(new ExampleStateBootstrapFunction());

	int maxParallelism = transformation.getMaxParallelism(4);
	DataSet<TaggedOperatorSubtaskState> result = transformation.writeOperatorSubtaskStates(
		OperatorIDGenerator.fromUid("uid"),
		new MemoryStateBackend(),
		new Path(),
		maxParallelism
	);

	Assert.assertEquals("The parallelism of a data set should be constrained my the savepoint max parallelism", 1, getParallelism(result));
}
 
Example #14
Source File: CSVReaderTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testSubClass() throws Exception {
	CsvReader reader = getCsvReader();
	DataSource<SubItem> sitems = reader.tupleType(SubItem.class);
	TypeInformation<?> info = sitems.getType();

	Assert.assertEquals(true, info.isTupleType());
	Assert.assertEquals(SubItem.class, info.getTypeClass());

	@SuppressWarnings("unchecked")
	TupleTypeInfo<SubItem> tinfo = (TupleTypeInfo<SubItem>) info;

	Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0));
	Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1));
	Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2));
	Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(3));

	CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) sitems.getInputFormat();
	Assert.assertArrayEquals(new Class<?>[]{Integer.class, String.class, Double.class, String.class}, inputFormat.getFieldTypes());
}
 
Example #15
Source File: PropertyDataSourceTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedGroupedSource2() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(0)
			.splitsGroupedBy(1, 0);

	data.output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(0, 1)));
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example #16
Source File: PropertyDataSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedOrderedSource6() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f1.intField")
			.splitsOrderedBy("f1", new Order[]{Order.DESCENDING});

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(2)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(1,2,3)));
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example #17
Source File: PartitionOperatorTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testRangePartitionByComplexKeyWithOrders() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	final DataSource<Tuple2<Tuple2<Integer, Integer>, Integer>> ds = env.fromElements(
		new Tuple2<>(new Tuple2<>(1, 1), 1),
		new Tuple2<>(new Tuple2<>(2, 2), 2),
		new Tuple2<>(new Tuple2<>(2, 2), 2)
	);
	ds.partitionByRange(0, 1).withOrders(Order.ASCENDING, Order.DESCENDING);
}
 
Example #18
Source File: PropertyDataSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedGroupedSource5() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f2")
			.splitsGroupedBy("f2");

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(4)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(4)));
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example #19
Source File: PropertyDataSourceTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedGroupedSource1() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(0)
			.splitsGroupedBy(0);

	data.output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(0)));
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example #20
Source File: PropertyDataSourceTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedGroupedSource6() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f1.intField")
			.splitsGroupedBy("f0; f1.intField");

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(2)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(0,2)));
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example #21
Source File: PropertyDataSourceTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedGroupedSource7() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f1.intField")
			.splitsGroupedBy("f1");

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(2)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(1,2,3)));
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example #22
Source File: PropertyDataSourceTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedGroupedSource8() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f1")
			.splitsGroupedBy("f1.stringField");

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1,2,3)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example #23
Source File: PropertyDataSourceTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedOrderedSource2() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(1)
			.splitsOrderedBy(new int[]{1, 0}, new Order[]{Order.ASCENDING, Order.DESCENDING});

	data.output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue((new FieldSet(lprops.getGroupedFields().toArray())).equals(new FieldSet(1, 0)));
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example #24
Source File: PropertyDataSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedOrderedSource4() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(0, 1)
			.splitsOrderedBy(new int[]{1}, new Order[]{Order.DESCENDING});

	data.output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0, 1)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example #25
Source File: PropertyDataSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedSource2() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(1, 0);

	data.output(new DiscardingOutputFormat<Tuple2<Long,String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0, 1)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example #26
Source File: PropertyDataSourceTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedGroupedSource5() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f2")
			.splitsGroupedBy("f2");

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(4)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(4)));
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example #27
Source File: PropertyDataSourceTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedSource2() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(1, 0);

	data.output(new DiscardingOutputFormat<Tuple2<Long,String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0, 1)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example #28
Source File: PropertyDataSourceTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedOrderedSource7() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f1")
			.splitsOrderedBy("f1.stringField", new Order[]{Order.ASCENDING});

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1,2,3)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example #29
Source File: PropertyDataSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedOrderedSource6() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f1.intField")
			.splitsOrderedBy("f1", new Order[]{Order.DESCENDING});

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(2)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(1,2,3)));
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example #30
Source File: ParquetTableSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testFieldsProjection() throws Exception {
	ParquetTableSource parquetTableSource = createNestedTestParquetTableSource(testPath);
	ParquetTableSource projected = (ParquetTableSource) parquetTableSource.projectFields(new int[] {2, 4, 6});

	// ensure a new reference is returned
	assertNotSame(projected, parquetTableSource);

	// ensure table schema is the same
	assertEquals(parquetTableSource.getTableSchema(), projected.getTableSchema());

	// ensure that table source description differs
	assertNotEquals(parquetTableSource.explainSource(), projected.explainSource());

	String[] fieldNames = ((RowTypeInfo) NESTED_ROW_TYPE).getFieldNames();
	TypeInformation[] fieldTypes =  ((RowTypeInfo) NESTED_ROW_TYPE).getFieldTypes();
	assertEquals(
		Types.ROW_NAMED(
			new String[] {fieldNames[2], fieldNames[4], fieldNames[6]},
			fieldTypes[2], fieldTypes[4], fieldTypes[6]
		),
		projected.getReturnType()
	);

	// ensure ParquetInputFormat is configured with selected fields
	DataSet<Row> data = projected.getDataSet(ExecutionEnvironment.createLocalEnvironment());
	InputFormat<Row, ?> inputFormat = ((DataSource<Row>) data).getInputFormat();
	assertTrue(inputFormat instanceof ParquetRowInputFormat);
	ParquetRowInputFormat parquetIF = (ParquetRowInputFormat) inputFormat;
	assertArrayEquals(new String[]{fieldNames[2], fieldNames[4], fieldNames[6]}, parquetIF.getFieldNames());
	assertArrayEquals(new TypeInformation<?>[]{fieldTypes[2], fieldTypes[4], fieldTypes[6]}, parquetIF.getFieldTypes());
}