org.apache.flink.api.common.operators.GenericDataSourceBase Java Examples

The following examples show how to use org.apache.flink.api.common.operators.GenericDataSourceBase. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DataSource.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
protected GenericDataSourceBase<OUT, ?> translateToDataFlow() {
	String name = this.name != null ? this.name : "at " + dataSourceLocationName + " (" + inputFormat.getClass().getName() + ")";
	if (name.length() > 150) {
		name = name.substring(0, 150);
	}

	@SuppressWarnings({"unchecked", "rawtypes"})
	GenericDataSourceBase<OUT, ?> source = new GenericDataSourceBase(this.inputFormat,
		new OperatorInformation<OUT>(getType()), name);
	source.setParallelism(parallelism);
	if (this.parameters != null) {
		source.getParameters().addAll(this.parameters);
	}
	if (this.splitDataProperties != null) {
		source.setSplitDataProperties(this.splitDataProperties);
	}
	return source;
}
 
Example #2
Source File: DataSource.java    From flink with Apache License 2.0 6 votes vote down vote up
protected GenericDataSourceBase<OUT, ?> translateToDataFlow() {
	String name = this.name != null ? this.name : "at " + dataSourceLocationName + " (" + inputFormat.getClass().getName() + ")";
	if (name.length() > 150) {
		name = name.substring(0, 150);
	}

	@SuppressWarnings({"unchecked", "rawtypes"})
	GenericDataSourceBase<OUT, ?> source = new GenericDataSourceBase(this.inputFormat,
		new OperatorInformation<OUT>(getType()), name);
	source.setParallelism(parallelism);
	if (this.parameters != null) {
		source.getParameters().addAll(this.parameters);
	}
	if (this.splitDataProperties != null) {
		source.setSplitDataProperties(this.splitDataProperties);
	}
	return source;
}
 
Example #3
Source File: DataSource.java    From flink with Apache License 2.0 6 votes vote down vote up
protected GenericDataSourceBase<OUT, ?> translateToDataFlow() {
	String name = this.name != null ? this.name : "at " + dataSourceLocationName + " (" + inputFormat.getClass().getName() + ")";
	if (name.length() > 150) {
		name = name.substring(0, 150);
	}

	@SuppressWarnings({"unchecked", "rawtypes"})
	GenericDataSourceBase<OUT, ?> source = new GenericDataSourceBase(this.inputFormat,
		new OperatorInformation<OUT>(getType()), name);
	source.setParallelism(parallelism);
	if (this.parameters != null) {
		source.getParameters().addAll(this.parameters);
	}
	if (this.splitDataProperties != null) {
		source.setSplitDataProperties(this.splitDataProperties);
	}
	return source;
}
 
Example #4
Source File: AggregateTranslationTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void translateAggregate() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		@SuppressWarnings("unchecked")
		DataSet<Tuple3<Double, StringValue, Long>> initialData =
				env.fromElements(new Tuple3<Double, StringValue, Long>(3.141592, new StringValue("foobar"), Long.valueOf(77)));

		initialData.groupBy(0).aggregate(Aggregations.MIN, 1).and(Aggregations.SUM, 2).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, Long>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		GroupReduceOperatorBase<?, ?, ?> reducer = (GroupReduceOperatorBase<?, ?, ?>) sink.getInput();

		// check keys
		assertEquals(1, reducer.getKeyColumns(0).length);
		assertEquals(0, reducer.getKeyColumns(0)[0]);

		assertEquals(-1, reducer.getParallelism());
		assertTrue(reducer.isCombinable());

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example #5
Source File: DistinctTranslationTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void translateDistinctPlain() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);

		initialData.distinct().output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		// currently distinct is translated to a Reduce
		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertArrayEquals(new int[] {0, 1, 2}, reducer.getKeyColumns(0));

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example #6
Source File: DataSourceNode.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new DataSourceNode for the given contract.
 * 
 * @param pactContract
 *        The data source contract object.
 */
public DataSourceNode(GenericDataSourceBase<?, ?> pactContract) {
	super(pactContract);
	
	if (pactContract.getUserCodeWrapper().getUserCodeClass() == null) {
		throw new IllegalArgumentException("Input format has not been set.");
	}
	
	if (NonParallelInput.class.isAssignableFrom(pactContract.getUserCodeWrapper().getUserCodeClass())) {
		setParallelism(1);
		this.sequentialInput = true;
	} else {
		this.sequentialInput = false;
	}

	this.replicatedInput = ReplicatingInputFormat.class.isAssignableFrom(
													pactContract.getUserCodeWrapper().getUserCodeClass());

	this.gprops = new GlobalProperties();
	this.lprops = new LocalProperties();

	SplitDataProperties<?> splitProps = pactContract.getSplitDataProperties();

	if(replicatedInput) {
		this.gprops.setFullyReplicated();
		this.lprops = new LocalProperties();
	} else if (splitProps != null) {
		// configure data properties of data source using split properties
		setDataPropertiesFromSplitProperties(splitProps);
	}

}
 
Example #7
Source File: CompilerTestBase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public boolean preVisit(Operator<?> visitable) {

	if(visitable instanceof GenericDataSourceBase) {
		sources.add((GenericDataSourceBase<?, ?>) visitable);
	}
	else if(visitable instanceof BulkIterationBase) {
		((BulkIterationBase<?>) visitable).getNextPartialSolution().accept(this);
	}

	return true;
}
 
Example #8
Source File: AggregateTranslationTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void translateAggregate() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		@SuppressWarnings("unchecked")
		DataSet<Tuple3<Double, StringValue, Long>> initialData =
				env.fromElements(new Tuple3<Double, StringValue, Long>(3.141592, new StringValue("foobar"), Long.valueOf(77)));

		initialData.groupBy(0).aggregate(Aggregations.MIN, 1).and(Aggregations.SUM, 2).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, Long>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		GroupReduceOperatorBase<?, ?, ?> reducer = (GroupReduceOperatorBase<?, ?, ?>) sink.getInput();

		// check keys
		assertEquals(1, reducer.getKeyColumns(0).length);
		assertEquals(0, reducer.getKeyColumns(0)[0]);

		assertEquals(-1, reducer.getParallelism());
		assertTrue(reducer.isCombinable());

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example #9
Source File: DistinctTranslationTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void translateDistinctPosition() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);

		initialData.distinct(1, 2).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		// currently distinct is translated to a Reduce
		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertArrayEquals(new int[] {1, 2}, reducer.getKeyColumns(0));

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example #10
Source File: ReduceTranslationTests.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void translateNonGroupedReduce() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);

		initialData.reduce(new RichReduceFunction<Tuple3<Double, StringValue, LongValue>>() {
			public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) {
				return value1;
			}
		}).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertTrue(reducer.getKeyColumns(0) == null || reducer.getKeyColumns(0).length == 0);

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == ExecutionConfig.PARALLELISM_DEFAULT);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example #11
Source File: DistinctTranslationTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void translateDistinctExpressionKey() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<CustomType> initialData = getSourcePojoDataSet(env);

		initialData.distinct("myInt").output(new DiscardingOutputFormat<CustomType>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		// currently distinct is translated to a Reduce
		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertArrayEquals(new int[] {0}, reducer.getKeyColumns(0));

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example #12
Source File: JavaApiPostPass.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private static <T> TypeInformation<T> getTypeInfoFromSource(SourcePlanNode node) {
	Operator<?> op = node.getOptimizerNode().getOperator();
	
	if (op instanceof GenericDataSourceBase) {
		return ((GenericDataSourceBase<T, ?>) op).getOperatorInfo().getOutputType();
	} else {
		throw new RuntimeException("Wrong operator type found in post pass.");
	}
}
 
Example #13
Source File: DistinctTranslationTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void translateDistinctPlain2() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<CustomType> initialData = getSourcePojoDataSet(env);

		initialData.distinct().output(new DiscardingOutputFormat<CustomType>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		// currently distinct is translated to a Reduce
		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertArrayEquals(new int[] {0}, reducer.getKeyColumns(0));

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example #14
Source File: DistinctTranslationTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void translateDistinctPosition() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);

		initialData.distinct(1, 2).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		// currently distinct is translated to a Reduce
		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertArrayEquals(new int[] {1, 2}, reducer.getKeyColumns(0));

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example #15
Source File: DistinctTranslationTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void translateDistinctPlain2() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<CustomType> initialData = getSourcePojoDataSet(env);

		initialData.distinct().output(new DiscardingOutputFormat<CustomType>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		// currently distinct is translated to a Reduce
		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertArrayEquals(new int[] {0}, reducer.getKeyColumns(0));

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example #16
Source File: ReduceTranslationTests.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void translateNonGroupedReduce() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);

		initialData.reduce(new RichReduceFunction<Tuple3<Double, StringValue, LongValue>>() {
			public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) {
				return value1;
			}
		}).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertTrue(reducer.getKeyColumns(0) == null || reducer.getKeyColumns(0).length == 0);

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == ExecutionConfig.PARALLELISM_DEFAULT);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example #17
Source File: DistinctTranslationTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void translateDistinctPlain() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);

		initialData.distinct().output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		// currently distinct is translated to a Reduce
		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertArrayEquals(new int[] {0, 1, 2}, reducer.getKeyColumns(0));

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example #18
Source File: KMeansSingleStepTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCompileKMeansSingleStepWithStats() {

	Plan p = getKMeansPlan();
	p.setExecutionConfig(new ExecutionConfig());
	// set the statistics
	OperatorResolver cr = getContractResolver(p);
	GenericDataSourceBase<?, ?> pointsSource = cr.getNode(DATAPOINTS);
	GenericDataSourceBase<?, ?> centersSource = cr.getNode(CENTERS);
	setSourceStatistics(pointsSource, 100L * 1024 * 1024 * 1024, 32f);
	setSourceStatistics(centersSource, 1024 * 1024, 32f);

	OptimizedPlan plan = compileWithStats(p);
	checkPlan(plan);
}
 
Example #19
Source File: AggregateTranslationTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void translateAggregate() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		@SuppressWarnings("unchecked")
		DataSet<Tuple3<Double, StringValue, Long>> initialData =
				env.fromElements(new Tuple3<Double, StringValue, Long>(3.141592, new StringValue("foobar"), Long.valueOf(77)));

		initialData.groupBy(0).aggregate(Aggregations.MIN, 1).and(Aggregations.SUM, 2).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, Long>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		GroupReduceOperatorBase<?, ?, ?> reducer = (GroupReduceOperatorBase<?, ?, ?>) sink.getInput();

		// check keys
		assertEquals(1, reducer.getKeyColumns(0).length);
		assertEquals(0, reducer.getKeyColumns(0)[0]);

		assertEquals(-1, reducer.getParallelism());
		assertTrue(reducer.isCombinable());

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example #20
Source File: DistinctTranslationTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void translateDistinctPlain() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);

		initialData.distinct().output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		// currently distinct is translated to a Reduce
		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertArrayEquals(new int[] {0, 1, 2}, reducer.getKeyColumns(0));

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example #21
Source File: DistinctTranslationTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void translateDistinctPlain2() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<CustomType> initialData = getSourcePojoDataSet(env);

		initialData.distinct().output(new DiscardingOutputFormat<CustomType>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		// currently distinct is translated to a Reduce
		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertArrayEquals(new int[] {0}, reducer.getKeyColumns(0));

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example #22
Source File: DistinctTranslationTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void translateDistinctPosition() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);

		initialData.distinct(1, 2).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		// currently distinct is translated to a Reduce
		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertArrayEquals(new int[] {1, 2}, reducer.getKeyColumns(0));

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example #23
Source File: KMeansSingleStepTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testCompileKMeansSingleStepWithStats() {

	Plan p = getKMeansPlan();
	p.setExecutionConfig(new ExecutionConfig());
	// set the statistics
	OperatorResolver cr = getContractResolver(p);
	GenericDataSourceBase<?, ?> pointsSource = cr.getNode(DATAPOINTS);
	GenericDataSourceBase<?, ?> centersSource = cr.getNode(CENTERS);
	setSourceStatistics(pointsSource, 100L * 1024 * 1024 * 1024, 32f);
	setSourceStatistics(centersSource, 1024 * 1024, 32f);

	OptimizedPlan plan = compileWithStats(p);
	checkPlan(plan);
}
 
Example #24
Source File: DistinctTranslationTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void translateDistinctExpressionKey() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<CustomType> initialData = getSourcePojoDataSet(env);

		initialData.distinct("myInt").output(new DiscardingOutputFormat<CustomType>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		// currently distinct is translated to a Reduce
		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertArrayEquals(new int[] {0}, reducer.getKeyColumns(0));

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example #25
Source File: JavaApiPostPass.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private static <T> TypeInformation<T> getTypeInfoFromSource(SourcePlanNode node) {
	Operator<?> op = node.getOptimizerNode().getOperator();
	
	if (op instanceof GenericDataSourceBase) {
		return ((GenericDataSourceBase<T, ?>) op).getOperatorInfo().getOutputType();
	} else {
		throw new RuntimeException("Wrong operator type found in post pass.");
	}
}
 
Example #26
Source File: DataSourceNode.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new DataSourceNode for the given contract.
 * 
 * @param pactContract
 *        The data source contract object.
 */
public DataSourceNode(GenericDataSourceBase<?, ?> pactContract) {
	super(pactContract);
	
	if (pactContract.getUserCodeWrapper().getUserCodeClass() == null) {
		throw new IllegalArgumentException("Input format has not been set.");
	}
	
	if (NonParallelInput.class.isAssignableFrom(pactContract.getUserCodeWrapper().getUserCodeClass())) {
		setParallelism(1);
		this.sequentialInput = true;
	} else {
		this.sequentialInput = false;
	}

	this.replicatedInput = ReplicatingInputFormat.class.isAssignableFrom(
													pactContract.getUserCodeWrapper().getUserCodeClass());

	this.gprops = new GlobalProperties();
	this.lprops = new LocalProperties();

	SplitDataProperties<?> splitProps = pactContract.getSplitDataProperties();

	if(replicatedInput) {
		this.gprops.setFullyReplicated();
		this.lprops = new LocalProperties();
	} else if (splitProps != null) {
		// configure data properties of data source using split properties
		setDataPropertiesFromSplitProperties(splitProps);
	}

}
 
Example #27
Source File: ReduceTranslationTests.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void translateNonGroupedReduce() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);

		initialData.reduce(new RichReduceFunction<Tuple3<Double, StringValue, LongValue>>() {
			public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) {
				return value1;
			}
		}).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertTrue(reducer.getKeyColumns(0) == null || reducer.getKeyColumns(0).length == 0);

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == ExecutionConfig.PARALLELISM_DEFAULT);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example #28
Source File: CompilerTestBase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public boolean preVisit(Operator<?> visitable) {

	if(visitable instanceof GenericDataSourceBase) {
		sources.add((GenericDataSourceBase<?, ?>) visitable);
	}
	else if(visitable instanceof BulkIterationBase) {
		((BulkIterationBase<?>) visitable).getNextPartialSolution().accept(this);
	}

	return true;
}
 
Example #29
Source File: CachedMatchStrategyCompilerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * This test simulates a join of a big left side with a small right side inside of an iteration, where the small side is on a static path.
 * Currently the best execution plan is a HYBRIDHASH_BUILD_SECOND_CACHED, where the small side is hashed and cached.
 * This test also makes sure that all relevant plans are correctly enumerated by the optimizer.
 */
@Test
public void testCorrectChoosing() {
	try {
		
		Plan plan = getTestPlanRightStatic("");
		
		SourceCollectorVisitor sourceCollector = new SourceCollectorVisitor();
		plan.accept(sourceCollector);
		
		for(GenericDataSourceBase<?, ?> s : sourceCollector.getSources()) {
			if(s.getName().equals("bigFile")) {
				this.setSourceStatistics(s, 10000000, 1000);
			}
			else if(s.getName().equals("smallFile")) {
				this.setSourceStatistics(s, 100, 100);
			}
		}
		
		
		OptimizedPlan oPlan = compileNoStats(plan);

		OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
		DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner");
		
		// verify correct join strategy
		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND_CACHED, innerJoin.getDriverStrategy());
		assertEquals(TempMode.NONE, innerJoin.getInput1().getTempMode());
		assertEquals(TempMode.NONE, innerJoin.getInput2().getTempMode());
	
		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test errored: " + e.getMessage());
	}
}
 
Example #30
Source File: KMeansSingleStepTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCompileKMeansSingleStepWithStats() throws Exception {

	Plan p = getKMeansPlan();
	p.setExecutionConfig(new ExecutionConfig());
	// set the statistics
	OperatorResolver cr = getContractResolver(p);
	GenericDataSourceBase<?, ?> pointsSource = cr.getNode(DATAPOINTS);
	GenericDataSourceBase<?, ?> centersSource = cr.getNode(CENTERS);
	setSourceStatistics(pointsSource, 100L * 1024 * 1024 * 1024, 32f);
	setSourceStatistics(centersSource, 1024 * 1024, 32f);

	OptimizedPlan plan = compileWithStats(p);
	checkPlan(plan);
}