Java Code Examples for org.apache.flink.api.common.operators.GenericDataSourceBase

The following examples show how to use org.apache.flink.api.common.operators.GenericDataSourceBase. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Flink-CEPplus   Source File: DataSource.java    License: Apache License 2.0 6 votes vote down vote up
protected GenericDataSourceBase<OUT, ?> translateToDataFlow() {
	String name = this.name != null ? this.name : "at " + dataSourceLocationName + " (" + inputFormat.getClass().getName() + ")";
	if (name.length() > 150) {
		name = name.substring(0, 150);
	}

	@SuppressWarnings({"unchecked", "rawtypes"})
	GenericDataSourceBase<OUT, ?> source = new GenericDataSourceBase(this.inputFormat,
		new OperatorInformation<OUT>(getType()), name);
	source.setParallelism(parallelism);
	if (this.parameters != null) {
		source.getParameters().addAll(this.parameters);
	}
	if (this.splitDataProperties != null) {
		source.setSplitDataProperties(this.splitDataProperties);
	}
	return source;
}
 
Example 2
Source Project: flink   Source File: DataSource.java    License: Apache License 2.0 6 votes vote down vote up
protected GenericDataSourceBase<OUT, ?> translateToDataFlow() {
	String name = this.name != null ? this.name : "at " + dataSourceLocationName + " (" + inputFormat.getClass().getName() + ")";
	if (name.length() > 150) {
		name = name.substring(0, 150);
	}

	@SuppressWarnings({"unchecked", "rawtypes"})
	GenericDataSourceBase<OUT, ?> source = new GenericDataSourceBase(this.inputFormat,
		new OperatorInformation<OUT>(getType()), name);
	source.setParallelism(parallelism);
	if (this.parameters != null) {
		source.getParameters().addAll(this.parameters);
	}
	if (this.splitDataProperties != null) {
		source.setSplitDataProperties(this.splitDataProperties);
	}
	return source;
}
 
Example 3
Source Project: flink   Source File: DataSource.java    License: Apache License 2.0 6 votes vote down vote up
protected GenericDataSourceBase<OUT, ?> translateToDataFlow() {
	String name = this.name != null ? this.name : "at " + dataSourceLocationName + " (" + inputFormat.getClass().getName() + ")";
	if (name.length() > 150) {
		name = name.substring(0, 150);
	}

	@SuppressWarnings({"unchecked", "rawtypes"})
	GenericDataSourceBase<OUT, ?> source = new GenericDataSourceBase(this.inputFormat,
		new OperatorInformation<OUT>(getType()), name);
	source.setParallelism(parallelism);
	if (this.parameters != null) {
		source.getParameters().addAll(this.parameters);
	}
	if (this.splitDataProperties != null) {
		source.setSplitDataProperties(this.splitDataProperties);
	}
	return source;
}
 
Example 4
Source Project: Flink-CEPplus   Source File: KMeansSingleStepTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCompileKMeansSingleStepWithStats() {

	Plan p = getKMeansPlan();
	p.setExecutionConfig(new ExecutionConfig());
	// set the statistics
	OperatorResolver cr = getContractResolver(p);
	GenericDataSourceBase<?, ?> pointsSource = cr.getNode(DATAPOINTS);
	GenericDataSourceBase<?, ?> centersSource = cr.getNode(CENTERS);
	setSourceStatistics(pointsSource, 100L * 1024 * 1024 * 1024, 32f);
	setSourceStatistics(centersSource, 1024 * 1024, 32f);

	OptimizedPlan plan = compileWithStats(p);
	checkPlan(plan);
}
 
Example 5
Source Project: Flink-CEPplus   Source File: ReduceTranslationTests.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void translateNonGroupedReduce() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);

		initialData.reduce(new RichReduceFunction<Tuple3<Double, StringValue, LongValue>>() {
			public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) {
				return value1;
			}
		}).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertTrue(reducer.getKeyColumns(0) == null || reducer.getKeyColumns(0).length == 0);

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == ExecutionConfig.PARALLELISM_DEFAULT);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example 6
Source Project: Flink-CEPplus   Source File: AggregateTranslationTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void translateAggregate() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		@SuppressWarnings("unchecked")
		DataSet<Tuple3<Double, StringValue, Long>> initialData =
				env.fromElements(new Tuple3<Double, StringValue, Long>(3.141592, new StringValue("foobar"), Long.valueOf(77)));

		initialData.groupBy(0).aggregate(Aggregations.MIN, 1).and(Aggregations.SUM, 2).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, Long>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		GroupReduceOperatorBase<?, ?, ?> reducer = (GroupReduceOperatorBase<?, ?, ?>) sink.getInput();

		// check keys
		assertEquals(1, reducer.getKeyColumns(0).length);
		assertEquals(0, reducer.getKeyColumns(0)[0]);

		assertEquals(-1, reducer.getParallelism());
		assertTrue(reducer.isCombinable());

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example 7
Source Project: Flink-CEPplus   Source File: DistinctTranslationTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void translateDistinctPlain() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);

		initialData.distinct().output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		// currently distinct is translated to a Reduce
		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertArrayEquals(new int[] {0, 1, 2}, reducer.getKeyColumns(0));

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example 8
Source Project: Flink-CEPplus   Source File: DistinctTranslationTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void translateDistinctPlain2() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<CustomType> initialData = getSourcePojoDataSet(env);

		initialData.distinct().output(new DiscardingOutputFormat<CustomType>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		// currently distinct is translated to a Reduce
		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertArrayEquals(new int[] {0}, reducer.getKeyColumns(0));

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example 9
Source Project: Flink-CEPplus   Source File: DistinctTranslationTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void translateDistinctPosition() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);

		initialData.distinct(1, 2).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		// currently distinct is translated to a Reduce
		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertArrayEquals(new int[] {1, 2}, reducer.getKeyColumns(0));

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example 10
Source Project: Flink-CEPplus   Source File: DistinctTranslationTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void translateDistinctExpressionKey() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<CustomType> initialData = getSourcePojoDataSet(env);

		initialData.distinct("myInt").output(new DiscardingOutputFormat<CustomType>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		// currently distinct is translated to a Reduce
		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertArrayEquals(new int[] {0}, reducer.getKeyColumns(0));

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example 11
Source Project: Flink-CEPplus   Source File: JavaApiPostPass.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private static <T> TypeInformation<T> getTypeInfoFromSource(SourcePlanNode node) {
	Operator<?> op = node.getOptimizerNode().getOperator();
	
	if (op instanceof GenericDataSourceBase) {
		return ((GenericDataSourceBase<T, ?>) op).getOperatorInfo().getOutputType();
	} else {
		throw new RuntimeException("Wrong operator type found in post pass.");
	}
}
 
Example 12
Source Project: Flink-CEPplus   Source File: DataSourceNode.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new DataSourceNode for the given contract.
 * 
 * @param pactContract
 *        The data source contract object.
 */
public DataSourceNode(GenericDataSourceBase<?, ?> pactContract) {
	super(pactContract);
	
	if (pactContract.getUserCodeWrapper().getUserCodeClass() == null) {
		throw new IllegalArgumentException("Input format has not been set.");
	}
	
	if (NonParallelInput.class.isAssignableFrom(pactContract.getUserCodeWrapper().getUserCodeClass())) {
		setParallelism(1);
		this.sequentialInput = true;
	} else {
		this.sequentialInput = false;
	}

	this.replicatedInput = ReplicatingInputFormat.class.isAssignableFrom(
													pactContract.getUserCodeWrapper().getUserCodeClass());

	this.gprops = new GlobalProperties();
	this.lprops = new LocalProperties();

	SplitDataProperties<?> splitProps = pactContract.getSplitDataProperties();

	if(replicatedInput) {
		this.gprops.setFullyReplicated();
		this.lprops = new LocalProperties();
	} else if (splitProps != null) {
		// configure data properties of data source using split properties
		setDataPropertiesFromSplitProperties(splitProps);
	}

}
 
Example 13
Source Project: Flink-CEPplus   Source File: CompilerTestBase.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public boolean preVisit(Operator<?> visitable) {

	if(visitable instanceof GenericDataSourceBase) {
		sources.add((GenericDataSourceBase<?, ?>) visitable);
	}
	else if(visitable instanceof BulkIterationBase) {
		((BulkIterationBase<?>) visitable).getNextPartialSolution().accept(this);
	}

	return true;
}
 
Example 14
/**
 * This test simulates a join of a big left side with a small right side inside of an iteration, where the small side is on a static path.
 * Currently the best execution plan is a HYBRIDHASH_BUILD_SECOND_CACHED, where the small side is hashed and cached.
 * This test also makes sure that all relevant plans are correctly enumerated by the optimizer.
 */
@Test
public void testCorrectChoosing() {
	try {
		
		Plan plan = getTestPlanRightStatic("");
		
		SourceCollectorVisitor sourceCollector = new SourceCollectorVisitor();
		plan.accept(sourceCollector);
		
		for(GenericDataSourceBase<?, ?> s : sourceCollector.getSources()) {
			if(s.getName().equals("bigFile")) {
				this.setSourceStatistics(s, 10000000, 1000);
			}
			else if(s.getName().equals("smallFile")) {
				this.setSourceStatistics(s, 100, 100);
			}
		}
		
		
		OptimizedPlan oPlan = compileNoStats(plan);

		OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
		DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner");
		
		// verify correct join strategy
		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND_CACHED, innerJoin.getDriverStrategy());
		assertEquals(TempMode.NONE, innerJoin.getInput1().getTempMode());
		assertEquals(TempMode.NONE, innerJoin.getInput2().getTempMode());
	
		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test errored: " + e.getMessage());
	}
}
 
Example 15
Source Project: flink   Source File: KMeansSingleStepTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCompileKMeansSingleStepWithStats() {

	Plan p = getKMeansPlan();
	p.setExecutionConfig(new ExecutionConfig());
	// set the statistics
	OperatorResolver cr = getContractResolver(p);
	GenericDataSourceBase<?, ?> pointsSource = cr.getNode(DATAPOINTS);
	GenericDataSourceBase<?, ?> centersSource = cr.getNode(CENTERS);
	setSourceStatistics(pointsSource, 100L * 1024 * 1024 * 1024, 32f);
	setSourceStatistics(centersSource, 1024 * 1024, 32f);

	OptimizedPlan plan = compileWithStats(p);
	checkPlan(plan);
}
 
Example 16
Source Project: flink   Source File: ReduceTranslationTests.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void translateNonGroupedReduce() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);

		initialData.reduce(new RichReduceFunction<Tuple3<Double, StringValue, LongValue>>() {
			public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) {
				return value1;
			}
		}).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertTrue(reducer.getKeyColumns(0) == null || reducer.getKeyColumns(0).length == 0);

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == ExecutionConfig.PARALLELISM_DEFAULT);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example 17
Source Project: flink   Source File: AggregateTranslationTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void translateAggregate() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		@SuppressWarnings("unchecked")
		DataSet<Tuple3<Double, StringValue, Long>> initialData =
				env.fromElements(new Tuple3<Double, StringValue, Long>(3.141592, new StringValue("foobar"), Long.valueOf(77)));

		initialData.groupBy(0).aggregate(Aggregations.MIN, 1).and(Aggregations.SUM, 2).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, Long>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		GroupReduceOperatorBase<?, ?, ?> reducer = (GroupReduceOperatorBase<?, ?, ?>) sink.getInput();

		// check keys
		assertEquals(1, reducer.getKeyColumns(0).length);
		assertEquals(0, reducer.getKeyColumns(0)[0]);

		assertEquals(-1, reducer.getParallelism());
		assertTrue(reducer.isCombinable());

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example 18
Source Project: flink   Source File: DistinctTranslationTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void translateDistinctPlain() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);

		initialData.distinct().output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		// currently distinct is translated to a Reduce
		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertArrayEquals(new int[] {0, 1, 2}, reducer.getKeyColumns(0));

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example 19
Source Project: flink   Source File: DistinctTranslationTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void translateDistinctPlain2() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<CustomType> initialData = getSourcePojoDataSet(env);

		initialData.distinct().output(new DiscardingOutputFormat<CustomType>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		// currently distinct is translated to a Reduce
		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertArrayEquals(new int[] {0}, reducer.getKeyColumns(0));

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example 20
Source Project: flink   Source File: DistinctTranslationTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void translateDistinctPosition() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);

		initialData.distinct(1, 2).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		// currently distinct is translated to a Reduce
		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertArrayEquals(new int[] {1, 2}, reducer.getKeyColumns(0));

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example 21
Source Project: flink   Source File: DistinctTranslationTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void translateDistinctExpressionKey() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<CustomType> initialData = getSourcePojoDataSet(env);

		initialData.distinct("myInt").output(new DiscardingOutputFormat<CustomType>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		// currently distinct is translated to a Reduce
		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertArrayEquals(new int[] {0}, reducer.getKeyColumns(0));

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example 22
Source Project: flink   Source File: JavaApiPostPass.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private static <T> TypeInformation<T> getTypeInfoFromSource(SourcePlanNode node) {
	Operator<?> op = node.getOptimizerNode().getOperator();
	
	if (op instanceof GenericDataSourceBase) {
		return ((GenericDataSourceBase<T, ?>) op).getOperatorInfo().getOutputType();
	} else {
		throw new RuntimeException("Wrong operator type found in post pass.");
	}
}
 
Example 23
Source Project: flink   Source File: DataSourceNode.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new DataSourceNode for the given contract.
 * 
 * @param pactContract
 *        The data source contract object.
 */
public DataSourceNode(GenericDataSourceBase<?, ?> pactContract) {
	super(pactContract);
	
	if (pactContract.getUserCodeWrapper().getUserCodeClass() == null) {
		throw new IllegalArgumentException("Input format has not been set.");
	}
	
	if (NonParallelInput.class.isAssignableFrom(pactContract.getUserCodeWrapper().getUserCodeClass())) {
		setParallelism(1);
		this.sequentialInput = true;
	} else {
		this.sequentialInput = false;
	}

	this.replicatedInput = ReplicatingInputFormat.class.isAssignableFrom(
													pactContract.getUserCodeWrapper().getUserCodeClass());

	this.gprops = new GlobalProperties();
	this.lprops = new LocalProperties();

	SplitDataProperties<?> splitProps = pactContract.getSplitDataProperties();

	if(replicatedInput) {
		this.gprops.setFullyReplicated();
		this.lprops = new LocalProperties();
	} else if (splitProps != null) {
		// configure data properties of data source using split properties
		setDataPropertiesFromSplitProperties(splitProps);
	}

}
 
Example 24
Source Project: flink   Source File: CompilerTestBase.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public boolean preVisit(Operator<?> visitable) {

	if(visitable instanceof GenericDataSourceBase) {
		sources.add((GenericDataSourceBase<?, ?>) visitable);
	}
	else if(visitable instanceof BulkIterationBase) {
		((BulkIterationBase<?>) visitable).getNextPartialSolution().accept(this);
	}

	return true;
}
 
Example 25
Source Project: flink   Source File: CachedMatchStrategyCompilerTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * This test simulates a join of a big left side with a small right side inside of an iteration, where the small side is on a static path.
 * Currently the best execution plan is a HYBRIDHASH_BUILD_SECOND_CACHED, where the small side is hashed and cached.
 * This test also makes sure that all relevant plans are correctly enumerated by the optimizer.
 */
@Test
public void testCorrectChoosing() {
	try {
		
		Plan plan = getTestPlanRightStatic("");
		
		SourceCollectorVisitor sourceCollector = new SourceCollectorVisitor();
		plan.accept(sourceCollector);
		
		for(GenericDataSourceBase<?, ?> s : sourceCollector.getSources()) {
			if(s.getName().equals("bigFile")) {
				this.setSourceStatistics(s, 10000000, 1000);
			}
			else if(s.getName().equals("smallFile")) {
				this.setSourceStatistics(s, 100, 100);
			}
		}
		
		
		OptimizedPlan oPlan = compileNoStats(plan);

		OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
		DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner");
		
		// verify correct join strategy
		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND_CACHED, innerJoin.getDriverStrategy());
		assertEquals(TempMode.NONE, innerJoin.getInput1().getTempMode());
		assertEquals(TempMode.NONE, innerJoin.getInput2().getTempMode());
	
		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test errored: " + e.getMessage());
	}
}
 
Example 26
Source Project: flink   Source File: KMeansSingleStepTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCompileKMeansSingleStepWithStats() throws Exception {

	Plan p = getKMeansPlan();
	p.setExecutionConfig(new ExecutionConfig());
	// set the statistics
	OperatorResolver cr = getContractResolver(p);
	GenericDataSourceBase<?, ?> pointsSource = cr.getNode(DATAPOINTS);
	GenericDataSourceBase<?, ?> centersSource = cr.getNode(CENTERS);
	setSourceStatistics(pointsSource, 100L * 1024 * 1024 * 1024, 32f);
	setSourceStatistics(centersSource, 1024 * 1024, 32f);

	OptimizedPlan plan = compileWithStats(p);
	checkPlan(plan);
}
 
Example 27
Source Project: flink   Source File: ReduceTranslationTests.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void translateNonGroupedReduce() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);

		initialData.reduce(new RichReduceFunction<Tuple3<Double, StringValue, LongValue>>() {
			public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) {
				return value1;
			}
		}).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertTrue(reducer.getKeyColumns(0) == null || reducer.getKeyColumns(0).length == 0);

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == ExecutionConfig.PARALLELISM_DEFAULT);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example 28
Source Project: flink   Source File: AggregateTranslationTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void translateAggregate() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		@SuppressWarnings("unchecked")
		DataSet<Tuple3<Double, StringValue, Long>> initialData =
				env.fromElements(new Tuple3<Double, StringValue, Long>(3.141592, new StringValue("foobar"), Long.valueOf(77)));

		initialData.groupBy(0).aggregate(Aggregations.MIN, 1).and(Aggregations.SUM, 2).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, Long>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		GroupReduceOperatorBase<?, ?, ?> reducer = (GroupReduceOperatorBase<?, ?, ?>) sink.getInput();

		// check keys
		assertEquals(1, reducer.getKeyColumns(0).length);
		assertEquals(0, reducer.getKeyColumns(0)[0]);

		assertEquals(-1, reducer.getParallelism());
		assertTrue(reducer.isCombinable());

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example 29
Source Project: flink   Source File: DistinctTranslationTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void translateDistinctPlain() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);

		initialData.distinct().output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		// currently distinct is translated to a Reduce
		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertArrayEquals(new int[] {0, 1, 2}, reducer.getKeyColumns(0));

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example 30
Source Project: flink   Source File: DistinctTranslationTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void translateDistinctPlain2() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<CustomType> initialData = getSourcePojoDataSet(env);

		initialData.distinct().output(new DiscardingOutputFormat<CustomType>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		// currently distinct is translated to a Reduce
		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertArrayEquals(new int[] {0}, reducer.getKeyColumns(0));

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}