Java Code Examples for org.apache.flink.api.java.tuple.Tuple1

The following examples show how to use org.apache.flink.api.java.tuple.Tuple1. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Flink-CEPplus   Source File: HBaseConnectorITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testTableInputFormat() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	DataSet<Tuple1<Integer>> result = env
		.createInput(new InputFormatForTestTable())
		.reduce(new ReduceFunction<Tuple1<Integer>>(){

			@Override
			public Tuple1<Integer> reduce(Tuple1<Integer> v1, Tuple1<Integer> v2) throws Exception {
				return Tuple1.of(v1.f0 + v2.f0);
			}
		});

	List<Tuple1<Integer>> resultSet = result.collect();

	assertEquals(1, resultSet.size());
	assertEquals(360, (int) resultSet.get(0).f0);
}
 
Example 2
Source Project: flink   Source File: SortPartitionITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSortPartitionByKeyField() throws Exception {
	/*
	 * Test sort partition on key field
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	List<Tuple1<Boolean>> result = ds
			.map(new IdMapper<Tuple3<Integer, Long, String>>()).setParallelism(4) // parallelize input
			.sortPartition(1, Order.DESCENDING)
			.mapPartition(new OrderCheckMapper<>(new Tuple3Checker()))
			.distinct().collect();

	String expected = "(true)\n";

	compareResultAsText(result, expected);
}
 
Example 3
Source Project: flink   Source File: ReplicatingDataSourceTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Tests compiler fail for join program with replicated data source and changing parallelism.
 */
@Test(expected = CompilerException.class)
public void checkJoinWithReplicatedSourceInputChangingparallelism() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.join(source2).where("*").equalTo("*").setParallelism(DEFAULT_PARALLELISM+2)
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
}
 
Example 4
Source Project: flink   Source File: FirstNITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testFirstNOnUngroupedDS() throws Exception {
	/*
	 * First-n on ungrouped data set
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple1<Integer>> seven = ds.first(7).map(new OneMapper()).sum(0);

	List<Tuple1<Integer>> result = seven.collect();

	String expected = "(7)\n";

	compareResultAsText(result, expected);
}
 
Example 5
Source Project: flink   Source File: TupleSerializerTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testTuple1StringArray() {
	Random rnd = new Random(289347567856686223L);
	
	String[] arr1 = new String[] {"abc", "",
			StringUtils.getRandomString(rnd, 10, 100),
			StringUtils.getRandomString(rnd, 15, 50),
			StringUtils.getRandomString(rnd, 30, 170),
			StringUtils.getRandomString(rnd, 14, 15),
			""};
	
	String[] arr2 = new String[] {"foo", "",
			StringUtils.getRandomString(rnd, 10, 100),
			StringUtils.getRandomString(rnd, 1000, 5000),
			StringUtils.getRandomString(rnd, 30000, 35000),
			StringUtils.getRandomString(rnd, 100*1024, 105*1024),
			"bar"};
	
	@SuppressWarnings("unchecked")
	Tuple1<String[]>[] testTuples = new Tuple1[] {
		new Tuple1<String[]>(arr1),
		new Tuple1<String[]>(arr2)
	};
	
	runTests(-1, testTuples);
}
 
Example 6
Source Project: flink   Source File: JoinDeadlockITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> longs = env.generateSequence(0, 100000);

	DataSet<Tuple1<Long>> longT1 = longs.map(new TupleWrapper());
	DataSet<Tuple1<Long>> longT2 = longT1.project(0);
	DataSet<Tuple1<Long>> longT3 = longs.map(new TupleWrapper());

	longT2.join(longT3).where(0).equalTo(0).projectFirst(0)
			.join(longT1).where(0).equalTo(0).projectFirst(0)
			.writeAsText(resultPath);

	env.execute();
}
 
Example 7
Source Project: flink   Source File: AggregateITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testNestedAggregate() throws Exception {
	/*
	 * Nested Aggregate
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple1<Integer>> aggregateDs = ds.groupBy(1)
			.aggregate(Aggregations.MIN, 0)
			.aggregate(Aggregations.MIN, 0)
			.project(0);

	List<Tuple1<Integer>> result = aggregateDs.collect();

	String expected = "1\n";

	compareResultAsTuples(result, expected);
}
 
Example 8
Source Project: Flink-CEPplus   Source File: SortPartitionITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSortPartitionWithKeySelector2() throws Exception {
	/*
	 * Test sort partition on an extracted key
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	List<Tuple1<Boolean>> result = ds
		.map(new IdMapper<Tuple3<Integer, Long, String>>()).setParallelism(4) // parallelize input
		.sortPartition(new KeySelector<Tuple3<Integer, Long, String>, Tuple2<Integer, Long>>() {
			@Override
			public Tuple2<Integer, Long> getKey(Tuple3<Integer, Long, String> value) throws Exception {
				return new Tuple2<>(value.f0, value.f1);
			}
		}, Order.DESCENDING)
		.mapPartition(new OrderCheckMapper<>(new Tuple3Checker()))
		.distinct().collect();

	String expected = "(true)\n";

	compareResultAsText(result, expected);
}
 
Example 9
Source Project: Flink-CEPplus   Source File: ReplicatingDataSourceITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testReplicatedSourceToJoin() throws Exception {
	/*
	 * Test replicated source going into join
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple1<Long>> source1 = env.createInput(new ReplicatingInputFormat<Long, GenericInputSplit>
			(new ParallelIteratorInputFormat<Long>(new NumberSequenceIterator(0L, 1000L))), BasicTypeInfo.LONG_TYPE_INFO)
			.map(new ToTuple());
	DataSet<Tuple1<Long>> source2 = env.generateSequence(0L, 1000L).map(new ToTuple());

	DataSet<Tuple> pairs = source1.join(source2).where(0).equalTo(0)
			.projectFirst(0)
			.sum(0);

	List<Tuple> result = pairs.collect();

	String expectedResult = "(500500)";

	compareResultAsText(result, expectedResult);
}
 
Example 10
Source Project: Flink-CEPplus   Source File: JoinDeadlockITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> longs = env.generateSequence(0, 100000);

	DataSet<Tuple1<Long>> longT1 = longs.map(new TupleWrapper());
	DataSet<Tuple1<Long>> longT2 = longT1.project(0);
	DataSet<Tuple1<Long>> longT3 = longs.map(new TupleWrapper());

	longT2.join(longT3).where(0).equalTo(0).projectFirst(0)
			.join(longT1).where(0).equalTo(0).projectFirst(0)
			.writeAsText(resultPath);

	env.execute();
}
 
Example 11
Source Project: flink   Source File: SortPartitionITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSortPartitionByTwoFieldExpressions() throws Exception {
	/*
	 * Test sort partition on two field expressions
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(2);

	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
	List<Tuple1<Boolean>> result = ds
			.map(new IdMapper<Tuple5<Integer, Long, Integer, String, Long>>()).setParallelism(2) // parallelize input
			.sortPartition("f4", Order.ASCENDING)
			.sortPartition("f2", Order.DESCENDING)
			.mapPartition(new OrderCheckMapper<>(new Tuple5Checker()))
			.distinct().collect();

	String expected = "(true)\n";

	compareResultAsText(result, expected);
}
 
Example 12
Source Project: flink   Source File: SortPartitionITCase.java    License: Apache License 2.0 6 votes vote down vote up
@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testSortPartitionByFieldExpression() throws Exception {
	/*
	 * Test sort partition on field expression
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	List<Tuple1<Boolean>> result = ds
			.map(new IdMapper()).setParallelism(4) // parallelize input
			.sortPartition("f1", Order.DESCENDING)
			.mapPartition(new OrderCheckMapper<>(new Tuple3Checker()))
			.distinct().collect();

	String expected = "(true)\n";

	compareResultAsText(result, expected);
}
 
Example 13
Source Project: gelly-streaming   Source File: TestNumberOfEntities.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testNumberOfEdges() throws Exception {
	/*
	 * Test numberOfEdges() with the sample graph
     */
       final String resultPath = getTempDirPath("result");
       final String expectedResult = "1\n" +
               "2\n" +
               "3\n" +
               "4\n" +
               "5\n" +
               "6\n" +
               "7\n";

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	GraphStream<Long, NullValue, Long> graph = new SimpleEdgeStream<>(GraphStreamTestUtils.getLongLongEdgeDataStream(env), env);
	graph.numberOfEdges().map(new MapFunction<Long, Tuple1<Long>>() {
		@Override
		public Tuple1<Long> map(Long value) throws Exception {
			return new Tuple1<>(value);
		}
	}).writeAsCsv(resultPath, FileSystem.WriteMode.OVERWRITE);
	env.execute();
       compareResultsByLinesInMemory(expectedResult, resultPath);
}
 
Example 14
Source Project: flink   Source File: ReplicatingDataSourceTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Tests compiler fail for join program with replicated data source behind reduce.
 */
@Test(expected = CompilerException.class)
public void checkJoinWithReplicatedSourceInputBehindReduce() {
	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.reduce(new LastReduce())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
}
 
Example 15
Source Project: Flink-CEPplus   Source File: ReplicatingDataSourceTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Tests compiler fail for join program with replicated data source behind map and changing parallelism.
 */
@Test(expected = CompilerException.class)
public void checkJoinWithReplicatedSourceInputBehindMapChangingparallelism() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.map(new IdMap()).setParallelism(DEFAULT_PARALLELISM+1)
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

}
 
Example 16
Source Project: flink   Source File: TupleSerializerTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testTuple1StringArray() {
	Random rnd = new Random(289347567856686223L);
	
	String[] arr1 = new String[] {"abc", "",
			StringUtils.getRandomString(rnd, 10, 100),
			StringUtils.getRandomString(rnd, 15, 50),
			StringUtils.getRandomString(rnd, 30, 170),
			StringUtils.getRandomString(rnd, 14, 15),
			""};
	
	String[] arr2 = new String[] {"foo", "",
			StringUtils.getRandomString(rnd, 10, 100),
			StringUtils.getRandomString(rnd, 1000, 5000),
			StringUtils.getRandomString(rnd, 30000, 35000),
			StringUtils.getRandomString(rnd, 100*1024, 105*1024),
			"bar"};
	
	@SuppressWarnings("unchecked")
	Tuple1<String[]>[] testTuples = new Tuple1[] {
		new Tuple1<String[]>(arr1),
		new Tuple1<String[]>(arr2)
	};
	
	runTests(-1, testTuples);
}
 
Example 17
Source Project: flink   Source File: MapVerticesITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testWithtuple1Value() throws Exception {
	/*
	 * Test mapVertices() and change the value type to a Tuple1
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env),
		TestGraphUtils.getLongLongEdgeData(env), env);

	DataSet<Vertex<Long, Tuple1<Long>>> mappedVertices = graph.mapVertices(new ToTuple1Mapper()).getVertices();
	List<Vertex<Long, Tuple1<Long>>> result = mappedVertices.collect();

	expectedResult = "1,(1)\n" +
		"2,(2)\n" +
		"3,(3)\n" +
		"4,(4)\n" +
		"5,(5)\n";

	compareResultAsTuples(result, expectedResult);
}
 
Example 18
Source Project: Alink   Source File: BisectingKMeansTrainBatchOp.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void open(Configuration parameters) {
    List<Long> bcDivisibleIndices = getRuntimeContext().getBroadcastVariable(DIVISIBLE_INDICES);
    divisibleIndices = new HashSet<>(bcDivisibleIndices);
    List<Tuple1<IterInfo>> bcIterInfo = getRuntimeContext().getBroadcastVariable(ITER_INFO);
    shouldUpdateState = bcIterInfo.get(0).f0.atLastInnerIterStep();
    shouldInitState = getIterationRuntimeContext().getSuperstepNumber() == 1;
    List<Tuple2<Long, DenseVector>> bcNewClusterCenters = getRuntimeContext().getBroadcastVariable(
        NEW_CLUSTER_CENTERS);
    newClusterCenters = new HashMap<>(0);
    bcNewClusterCenters.forEach(t -> newClusterCenters.put(t.f0, t.f1));
    if (distance instanceof EuclideanDistance) {
        middlePlanes = new HashMap<>(0);
        divisibleIndices.forEach(parentIndex -> {
            long lchild = leftChildIndex(parentIndex);
            long rchild = rightChildIndex(parentIndex);
            DenseVector m = newClusterCenters.get(rchild).plus(newClusterCenters.get(lchild));
            DenseVector v = newClusterCenters.get(rchild).minus(newClusterCenters.get(lchild));
            BLAS.scal(0.5, m);
            double length = BLAS.dot(m, v);
            middlePlanes.put(parentIndex, Tuple2.of(v, length));
        });
    }
    if (shouldInitState) {
        assignmentInState = new ArrayList<>();
    }
}
 
Example 19
Source Project: Flink-CEPplus   Source File: Graph.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Creates a graph from a DataSet of edges.
 * Vertices are created automatically and their values are set
 * by applying the provided map function to the vertex IDs.
 *
 * @param edges a DataSet of edges.
 * @param vertexValueInitializer the mapper function that initializes the vertex values.
 * It allows to apply a map transformation on the vertex ID to produce an initial vertex value.
 * @param context the flink execution environment.
 * @return the newly created graph.
 */
public static <K, VV, EV> Graph<K, VV, EV> fromDataSet(DataSet<Edge<K, EV>> edges,
		final MapFunction<K, VV> vertexValueInitializer, ExecutionEnvironment context) {

	TypeInformation<K> keyType = ((TupleTypeInfo<?>) edges.getType()).getTypeAt(0);

	TypeInformation<VV> valueType = TypeExtractor.createTypeInfo(
			MapFunction.class, vertexValueInitializer.getClass(), 1, keyType, null);

	@SuppressWarnings({ "unchecked", "rawtypes" })
	TypeInformation<Vertex<K, VV>> returnType = (TypeInformation<Vertex<K, VV>>) new TupleTypeInfo(
			Vertex.class, keyType, valueType);

	DataSet<Vertex<K, VV>> vertices = edges
		.flatMap(new EmitSrcAndTargetAsTuple1<>())
			.name("Source and target IDs")
		.distinct()
			.name("IDs")
		.map(new MapFunction<Tuple1<K>, Vertex<K, VV>>() {
			private Vertex<K, VV> output = new Vertex<>();

			public Vertex<K, VV> map(Tuple1<K> value) throws Exception {
				output.f0 = value.f0;
				output.f1 = vertexValueInitializer.map(value.f0);
				return output;
			}
		}).returns(returnType).withForwardedFields("f0").name("Initialize vertex values");

	return new Graph<>(vertices, edges, context);
}
 
Example 20
@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> minNeighbor = iteration.getWorkset()
			.join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1);

	DataSet<Tuple2<Long, Long>> updatedIds = iteration.getSolutionSet()
			.join(minNeighbor).where(0).equalTo(0).with(new UpdateComponentIdMatchMirrored());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(updatedIds, updatedIds);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}
 
Example 21
Source Project: flink   Source File: TupleTypeInfoTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testTupleTypeInfoSymmetricEqualityRelation() {
	TupleTypeInfo<Tuple1<Integer>> tupleTypeInfo = new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO);

	TupleTypeInfoBase<Tuple1> anonymousTupleTypeInfo = new TupleTypeInfoBase<Tuple1>(
		Tuple1.class,
		(TypeInformation<?>)BasicTypeInfo.INT_TYPE_INFO) {

		private static final long serialVersionUID = -7985593598027660836L;

		@Override
		public TypeSerializer<Tuple1> createSerializer(ExecutionConfig config) {
			return null;
		}

		@Override
		protected TypeComparatorBuilder<Tuple1> createTypeComparatorBuilder() {
			return null;
		}

		@Override
		public String[] getFieldNames() {
			return new String[0];
		}

		@Override
		public int getFieldIndex(String fieldName) {
			return 0;
		}
	};

	boolean tupleVsAnonymous = tupleTypeInfo.equals(anonymousTupleTypeInfo);
	boolean anonymousVsTuple = anonymousTupleTypeInfo.equals(tupleTypeInfo);

	assertTrue("Equality relation should be symmetric", tupleVsAnonymous == anonymousVsTuple);
}
 
Example 22
@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> minNeighbor = iteration.getWorkset()
			.join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1);

	DataSet<Tuple2<Long, Long>> updatedIds = iteration.getSolutionSet()
			.join(minNeighbor).where(0).equalTo(0).with(new UpdateComponentIdMatchMirrored());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(updatedIds, updatedIds);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}
 
Example 23
Source Project: Flink-CEPplus   Source File: ReplicatingDataSourceTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Tests join program with replicated data source behind multiple map ops.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindMultiMaps() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.filter(new NoFilter())
			.mapPartition(new IdPMap())
			.flatMap(new IdFlatMap())
			.map(new IdMap())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
 
Example 24
Source Project: flink   Source File: StreamProjection.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Projects a {@link Tuple} {@link DataStream} to the previously selected fields.
 *
 * @return The projected DataStream.
 * @see Tuple
 * @see DataStream
 */
public <T0> SingleOutputStreamOperator<Tuple1<T0>> projectTuple1() {
	TypeInformation<?>[] fTypes = extractFieldTypes(fieldIndexes, dataStream.getType());
	TupleTypeInfo<Tuple1<T0>> tType = new TupleTypeInfo<Tuple1<T0>>(fTypes);

	return dataStream.transform("Projection", tType, new StreamProject<IN, Tuple1<T0>>(
			fieldIndexes, tType.createSerializer(dataStream.getExecutionConfig())));
}
 
Example 25
Source Project: Flink-CEPplus   Source File: MusicProfiles.java    License: Apache License 2.0 5 votes vote down vote up
public void coGroup(Iterable<Tuple3<String, String, Integer>> triplets,
		Iterable<Tuple1<String>> invalidSongs, Collector<Tuple3<String, String, Integer>> out) {

	if (!invalidSongs.iterator().hasNext()) {
		// this is a valid triplet
		for (Tuple3<String, String, Integer> triplet : triplets) {
			out.collect(triplet);
		}
	}
}
 
Example 26
@Override
public Tuple1<Long> reduce(Tuple1<Long> value1, Tuple1<Long> value2) {
	count++;
	if (count >= failurePos && getRuntimeContext().getIndexOfThisSubtask() == 0) {
		LOG.info(">>>>>>>>>>>>>>>>> Reached failing position <<<<<<<<<<<<<<<<<<<<<");
	}

	value1.f0 += value2.f0;
	return value1;
}
 
Example 27
Source Project: flink   Source File: InvalidVertexIdsValidator.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Checks that the edge set input contains valid vertex Ids, i.e. that they
 * also exist in the vertex input set.
 *
 * @return a boolean stating whether a graph is valid
 *         with respect to its vertex ids.
 */
@Override
public boolean validate(Graph<K, VV, EV> graph) throws Exception {
	DataSet<Tuple1<K>> edgeIds = graph.getEdges()
			.flatMap(new MapEdgeIds<>()).distinct();
	DataSet<K> invalidIds = graph.getVertices().coGroup(edgeIds).where(0)
			.equalTo(0).with(new GroupInvalidIds<>()).first(1);

	return invalidIds.map(new KToTupleMap<>()).count() == 0;
}
 
Example 28
@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	DataSet<Tuple1<Long>> pipelinedSource = env.fromElements(new Tuple1<Long>(1L));

	DataSet<Tuple1<Long>> slowBlockingSource = env.generateSequence(0, 10).map(
			new MapFunction<Long, Tuple1<Long>>() {
				@Override
				public Tuple1<Long> map(Long value) throws Exception {
					Thread.sleep(200);

					return new Tuple1<Long>(value);
				}
			}
	);

	slowBlockingSource.join(slowBlockingSource)
			.where(0).equalTo(0).output(new DiscardingOutputFormat<Tuple2<Tuple1<Long>, Tuple1<Long>>>());

	// Join the slow blocking and the pipelined source. This test should verify that this works
	// w/o problems and the blocking result is not requested too early.
	pipelinedSource.join(slowBlockingSource)
			.where(0).equalTo(0)
			.output(new DiscardingOutputFormat<Tuple2<Tuple1<Long>, Tuple1<Long>>>());

	env.execute("Consume one pipelined and one blocking result test job");
}
 
Example 29
Source Project: flink   Source File: GenericWriteAheadSinkTest.java    License: Apache License 2.0 5 votes vote down vote up
@Override
protected boolean sendValues(Iterable<Tuple1<Integer>> values, long checkpointId, long timestamp) throws Exception {
	for (Tuple1<Integer> value : values) {
		this.values.add(value.f0);
	}
	return true;
}
 
Example 30
Source Project: Flink-CEPplus   Source File: LambdaExtractionTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testLambdaWithNonGenericResultType() {
	MapFunction<Tuple2<Tuple1<Integer>, Boolean>, Boolean> f = (i) -> null;

	TypeInformation<?> ti = TypeExtractor.getMapReturnTypes(f, NESTED_TUPLE_BOOLEAN_TYPE, null, true);
	assertTrue(ti instanceof BasicTypeInfo);
	assertEquals(BasicTypeInfo.BOOLEAN_TYPE_INFO, ti);
}