org.apache.flink.api.java.tuple.Tuple1 Java Examples

The following examples show how to use org.apache.flink.api.java.tuple.Tuple1. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: MapVerticesITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testWithtuple1Value() throws Exception {
	/*
	 * Test mapVertices() and change the value type to a Tuple1
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env),
		TestGraphUtils.getLongLongEdgeData(env), env);

	DataSet<Vertex<Long, Tuple1<Long>>> mappedVertices = graph.mapVertices(new ToTuple1Mapper()).getVertices();
	List<Vertex<Long, Tuple1<Long>>> result = mappedVertices.collect();

	expectedResult = "1,(1)\n" +
		"2,(2)\n" +
		"3,(3)\n" +
		"4,(4)\n" +
		"5,(5)\n";

	compareResultAsTuples(result, expectedResult);
}

Example #2

Source File: FirstNITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testFirstNOnUngroupedDS() throws Exception {
	/*
	 * First-n on ungrouped data set
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple1<Integer>> seven = ds.first(7).map(new OneMapper()).sum(0);

	List<Tuple1<Integer>> result = seven.collect();

	String expected = "(7)\n";

	compareResultAsText(result, expected);
}

Example #3

Source File: SortPartitionITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testSortPartitionByKeyField() throws Exception {
	/*
	 * Test sort partition on key field
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	List<Tuple1<Boolean>> result = ds
			.map(new IdMapper<Tuple3<Integer, Long, String>>()).setParallelism(4) // parallelize input
			.sortPartition(1, Order.DESCENDING)
			.mapPartition(new OrderCheckMapper<>(new Tuple3Checker()))
			.distinct().collect();

	String expected = "(true)\n";

	compareResultAsText(result, expected);
}

Example #4

Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0

6 votes

/**
 * Tests compiler fail for join program with replicated data source and changing parallelism.
 */
@Test(expected = CompilerException.class)
public void checkJoinWithReplicatedSourceInputChangingparallelism() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.join(source2).where("*").equalTo("*").setParallelism(DEFAULT_PARALLELISM+2)
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
}

Example #5

Source File: TupleSerializerTest.java From flink with Apache License 2.0

6 votes

@Test
public void testTuple1StringArray() {
	Random rnd = new Random(289347567856686223L);
	
	String[] arr1 = new String[] {"abc", "",
			StringUtils.getRandomString(rnd, 10, 100),
			StringUtils.getRandomString(rnd, 15, 50),
			StringUtils.getRandomString(rnd, 30, 170),
			StringUtils.getRandomString(rnd, 14, 15),
			""};
	
	String[] arr2 = new String[] {"foo", "",
			StringUtils.getRandomString(rnd, 10, 100),
			StringUtils.getRandomString(rnd, 1000, 5000),
			StringUtils.getRandomString(rnd, 30000, 35000),
			StringUtils.getRandomString(rnd, 100*1024, 105*1024),
			"bar"};
	
	@SuppressWarnings("unchecked")
	Tuple1<String[]>[] testTuples = new Tuple1[] {
		new Tuple1<String[]>(arr1),
		new Tuple1<String[]>(arr2)
	};
	
	runTests(-1, testTuples);
}

Example #6

Source File: JoinDeadlockITCase.java From flink with Apache License 2.0

6 votes

@Override
protected void testProgram() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> longs = env.generateSequence(0, 100000);

	DataSet<Tuple1<Long>> longT1 = longs.map(new TupleWrapper());
	DataSet<Tuple1<Long>> longT2 = longT1.project(0);
	DataSet<Tuple1<Long>> longT3 = longs.map(new TupleWrapper());

	longT2.join(longT3).where(0).equalTo(0).projectFirst(0)
			.join(longT1).where(0).equalTo(0).projectFirst(0)
			.writeAsText(resultPath);

	env.execute();
}

Example #7

Source File: AggregateITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testNestedAggregate() throws Exception {
	/*
	 * Nested Aggregate
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple1<Integer>> aggregateDs = ds.groupBy(1)
			.aggregate(Aggregations.MIN, 0)
			.aggregate(Aggregations.MIN, 0)
			.project(0);

	List<Tuple1<Integer>> result = aggregateDs.collect();

	String expected = "1\n";

	compareResultAsTuples(result, expected);
}

Example #8

Source File: SortPartitionITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testSortPartitionWithKeySelector2() throws Exception {
	/*
	 * Test sort partition on an extracted key
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	List<Tuple1<Boolean>> result = ds
		.map(new IdMapper<Tuple3<Integer, Long, String>>()).setParallelism(4) // parallelize input
		.sortPartition(new KeySelector<Tuple3<Integer, Long, String>, Tuple2<Integer, Long>>() {
			@Override
			public Tuple2<Integer, Long> getKey(Tuple3<Integer, Long, String> value) throws Exception {
				return new Tuple2<>(value.f0, value.f1);
			}
		}, Order.DESCENDING)
		.mapPartition(new OrderCheckMapper<>(new Tuple3Checker()))
		.distinct().collect();

	String expected = "(true)\n";

	compareResultAsText(result, expected);
}

Example #9

Source File: ReplicatingDataSourceITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testReplicatedSourceToJoin() throws Exception {
	/*
	 * Test replicated source going into join
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple1<Long>> source1 = env.createInput(new ReplicatingInputFormat<Long, GenericInputSplit>
			(new ParallelIteratorInputFormat<Long>(new NumberSequenceIterator(0L, 1000L))), BasicTypeInfo.LONG_TYPE_INFO)
			.map(new ToTuple());
	DataSet<Tuple1<Long>> source2 = env.generateSequence(0L, 1000L).map(new ToTuple());

	DataSet<Tuple> pairs = source1.join(source2).where(0).equalTo(0)
			.projectFirst(0)
			.sum(0);

	List<Tuple> result = pairs.collect();

	String expectedResult = "(500500)";

	compareResultAsText(result, expectedResult);
}

Example #10

Source File: SortPartitionITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testSortPartitionByTwoFieldExpressions() throws Exception {
	/*
	 * Test sort partition on two field expressions
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(2);

	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
	List<Tuple1<Boolean>> result = ds
			.map(new IdMapper<Tuple5<Integer, Long, Integer, String, Long>>()).setParallelism(2) // parallelize input
			.sortPartition("f4", Order.ASCENDING)
			.sortPartition("f2", Order.DESCENDING)
			.mapPartition(new OrderCheckMapper<>(new Tuple5Checker()))
			.distinct().collect();

	String expected = "(true)\n";

	compareResultAsText(result, expected);
}

Example #11

Source File: HBaseConnectorITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testTableInputFormat() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	DataSet<Tuple1<Integer>> result = env
		.createInput(new InputFormatForTestTable())
		.reduce(new ReduceFunction<Tuple1<Integer>>(){

			@Override
			public Tuple1<Integer> reduce(Tuple1<Integer> v1, Tuple1<Integer> v2) throws Exception {
				return Tuple1.of(v1.f0 + v2.f0);
			}
		});

	List<Tuple1<Integer>> resultSet = result.collect();

	assertEquals(1, resultSet.size());
	assertEquals(360, (int) resultSet.get(0).f0);
}

Example #12

Source File: TupleSerializerTest.java From flink with Apache License 2.0

6 votes

@Test
public void testTuple1StringArray() {
	Random rnd = new Random(289347567856686223L);
	
	String[] arr1 = new String[] {"abc", "",
			StringUtils.getRandomString(rnd, 10, 100),
			StringUtils.getRandomString(rnd, 15, 50),
			StringUtils.getRandomString(rnd, 30, 170),
			StringUtils.getRandomString(rnd, 14, 15),
			""};
	
	String[] arr2 = new String[] {"foo", "",
			StringUtils.getRandomString(rnd, 10, 100),
			StringUtils.getRandomString(rnd, 1000, 5000),
			StringUtils.getRandomString(rnd, 30000, 35000),
			StringUtils.getRandomString(rnd, 100*1024, 105*1024),
			"bar"};
	
	@SuppressWarnings("unchecked")
	Tuple1<String[]>[] testTuples = new Tuple1[] {
		new Tuple1<String[]>(arr1),
		new Tuple1<String[]>(arr2)
	};
	
	runTests(-1, testTuples);
}

Example #13

Source File: ReplicatingDataSourceTest.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Tests compiler fail for join program with replicated data source behind map and changing parallelism.
 */
@Test(expected = CompilerException.class)
public void checkJoinWithReplicatedSourceInputBehindMapChangingparallelism() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.map(new IdMap()).setParallelism(DEFAULT_PARALLELISM+1)
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

}

Example #14

Source File: TestNumberOfEntities.java From gelly-streaming with Apache License 2.0

6 votes

@Test
public void testNumberOfEdges() throws Exception {
	/*
	 * Test numberOfEdges() with the sample graph
     */
       final String resultPath = getTempDirPath("result");
       final String expectedResult = "1\n" +
               "2\n" +
               "3\n" +
               "4\n" +
               "5\n" +
               "6\n" +
               "7\n";

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	GraphStream<Long, NullValue, Long> graph = new SimpleEdgeStream<>(GraphStreamTestUtils.getLongLongEdgeDataStream(env), env);
	graph.numberOfEdges().map(new MapFunction<Long, Tuple1<Long>>() {
		@Override
		public Tuple1<Long> map(Long value) throws Exception {
			return new Tuple1<>(value);
		}
	}).writeAsCsv(resultPath, FileSystem.WriteMode.OVERWRITE);
	env.execute();
       compareResultsByLinesInMemory(expectedResult, resultPath);
}

Example #15

Source File: SortPartitionITCase.java From flink with Apache License 2.0

6 votes

@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testSortPartitionByFieldExpression() throws Exception {
	/*
	 * Test sort partition on field expression
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	List<Tuple1<Boolean>> result = ds
			.map(new IdMapper()).setParallelism(4) // parallelize input
			.sortPartition("f1", Order.DESCENDING)
			.mapPartition(new OrderCheckMapper<>(new Tuple3Checker()))
			.distinct().collect();

	String expected = "(true)\n";

	compareResultAsText(result, expected);
}

Example #16

Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0

6 votes

/**
 * Tests compiler fail for join program with replicated data source behind reduce.
 */
@Test(expected = CompilerException.class)
public void checkJoinWithReplicatedSourceInputBehindReduce() {
	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.reduce(new LastReduce())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
}

Example #17

Source File: JoinDeadlockITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Override
protected void testProgram() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> longs = env.generateSequence(0, 100000);

	DataSet<Tuple1<Long>> longT1 = longs.map(new TupleWrapper());
	DataSet<Tuple1<Long>> longT2 = longT1.project(0);
	DataSet<Tuple1<Long>> longT3 = longs.map(new TupleWrapper());

	longT2.join(longT3).where(0).equalTo(0).projectFirst(0)
			.join(longT1).where(0).equalTo(0).projectFirst(0)
			.writeAsText(resultPath);

	env.execute();
}

Example #18

Source File: ConnectedComponentsWithSolutionSetFirstITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> minNeighbor = iteration.getWorkset()
			.join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1);

	DataSet<Tuple2<Long, Long>> updatedIds = iteration.getSolutionSet()
			.join(minNeighbor).where(0).equalTo(0).with(new UpdateComponentIdMatchMirrored());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(updatedIds, updatedIds);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}

Example #19

Source File: InvalidVertexIdsValidator.java From flink with Apache License 2.0

5 votes

/**
 * Checks that the edge set input contains valid vertex Ids, i.e. that they
 * also exist in the vertex input set.
 *
 * @return a boolean stating whether a graph is valid
 *         with respect to its vertex ids.
 */
@Override
public boolean validate(Graph<K, VV, EV> graph) throws Exception {
	DataSet<Tuple1<K>> edgeIds = graph.getEdges()
			.flatMap(new MapEdgeIds<>()).distinct();
	DataSet<K> invalidIds = graph.getVertices().coGroup(edgeIds).where(0)
			.equalTo(0).with(new GroupInvalidIds<>()).first(1);

	return invalidIds.map(new KToTupleMap<>()).count() == 0;
}

Example #20

Source File: BisectingKMeansTrainBatchOp.java From Alink with Apache License 2.0

5 votes

@Override
public void open(Configuration parameters) {
    List<Long> bcDivisibleIndices = getRuntimeContext().getBroadcastVariable(DIVISIBLE_INDICES);
    divisibleIndices = new HashSet<>(bcDivisibleIndices);
    List<Tuple1<IterInfo>> bcIterInfo = getRuntimeContext().getBroadcastVariable(ITER_INFO);
    shouldUpdateState = bcIterInfo.get(0).f0.atLastInnerIterStep();
    shouldInitState = getIterationRuntimeContext().getSuperstepNumber() == 1;
    List<Tuple2<Long, DenseVector>> bcNewClusterCenters = getRuntimeContext().getBroadcastVariable(
        NEW_CLUSTER_CENTERS);
    newClusterCenters = new HashMap<>(0);
    bcNewClusterCenters.forEach(t -> newClusterCenters.put(t.f0, t.f1));
    if (distance instanceof EuclideanDistance) {
        middlePlanes = new HashMap<>(0);
        divisibleIndices.forEach(parentIndex -> {
            long lchild = leftChildIndex(parentIndex);
            long rchild = rightChildIndex(parentIndex);
            DenseVector m = newClusterCenters.get(rchild).plus(newClusterCenters.get(lchild));
            DenseVector v = newClusterCenters.get(rchild).minus(newClusterCenters.get(lchild));
            BLAS.scal(0.5, m);
            double length = BLAS.dot(m, v);
            middlePlanes.put(parentIndex, Tuple2.of(v, length));
        });
    }
    if (shouldInitState) {
        assignmentInState = new ArrayList<>();
    }
}

Example #21

Source File: ReplicatingDataSourceTest.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Tests cross program with replicated data source behind map and filter.
 */
@Test
public void checkCrossWithReplicatedSourceInputBehindMap() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.map(new IdMap())
			.filter(new NoFilter())
			.cross(source2)
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when cross should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode crossNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType crossIn1 = crossNode.getInput1().getShipStrategy();
	ShipStrategyType crossIn2 = crossNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, crossIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, crossIn2);
}

Example #22

Source File: MaximumDegree.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
public void join(Edge<T, ET> edge, Tuple1<T> id, Collector<Edge<T, ET>> out)
		throws Exception {
	if (id == null) {
		out.collect(edge);
	}
}

Example #23

Source File: JavaSerializerTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
protected Serializable[] getTestData() {
	return new Serializable[] {
			new Integer(42),
			new File("/some/path/that/I/made/up"),

			// an object that is not in the classpath
			OBJECT_OUT_OF_CLASSPATH,

			// an object that is in the classpath with a nested object not in the classpath
			new Tuple1<>(OBJECT_OUT_OF_CLASSPATH)
	};
}

Example #24

Source File: WebLogAnalysis.java From flink with Apache License 2.0

5 votes

/**
 * If the visit iterator is empty, all pairs of the rank iterator are emitted.
 * Otherwise, no pair is emitted.
 *
 * <p>Output Format:
 * 0: RANK
 * 1: URL
 * 2: AVG_DURATION
 */
@Override
public void coGroup(Iterable<Tuple3<Integer, String, Integer>> ranks, Iterable<Tuple1<String>> visits, Collector<Tuple3<Integer, String, Integer>> out) {
	// Check if there is a entry in the visits relation
	if (!visits.iterator().hasNext()) {
		for (Tuple3<Integer, String, Integer> next : ranks) {
			// Emit all rank pairs
			out.collect(next);
		}
	}
}

Example #25

Source File: UdfAnalyzerExamplesTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testLogisticRegressionExamplesSumGradient() {
	compareAnalyzerResultWithAnnotationsSingleInputWithKeys(ReduceFunction.class, SumGradient.class,
			TypeInformation.of(new TypeHint<Tuple1<Double>>(){}),
			TypeInformation.of(new TypeHint<Tuple1<Double>>(){}),
			new String[] { "0" });
}

Example #26

Source File: LambdaExtractionTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testLambdaWithNonGenericResultType() {
	MapFunction<Tuple2<Tuple1<Integer>, Boolean>, Boolean> f = (i) -> null;

	TypeInformation<?> ti = TypeExtractor.getMapReturnTypes(f, NESTED_TUPLE_BOOLEAN_TYPE, null, true);
	assertTrue(ti instanceof BasicTypeInfo);
	assertEquals(BasicTypeInfo.BOOLEAN_TYPE_INFO, ti);
}

Example #27

Source File: GenericWriteAheadSinkTest.java From flink with Apache License 2.0

5 votes

@Override
protected boolean sendValues(Iterable<Tuple1<Integer>> values, long checkpointId, long timestamp) throws Exception {
	for (Tuple1<Integer> value : values) {
		this.values.add(value.f0);
	}
	return true;
}

Example #28

Source File: ConsumePipelinedAndBlockingResultITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	DataSet<Tuple1<Long>> pipelinedSource = env.fromElements(new Tuple1<Long>(1L));

	DataSet<Tuple1<Long>> slowBlockingSource = env.generateSequence(0, 10).map(
			new MapFunction<Long, Tuple1<Long>>() {
				@Override
				public Tuple1<Long> map(Long value) throws Exception {
					Thread.sleep(200);

					return new Tuple1<Long>(value);
				}
			}
	);

	slowBlockingSource.join(slowBlockingSource)
			.where(0).equalTo(0).output(new DiscardingOutputFormat<Tuple2<Tuple1<Long>, Tuple1<Long>>>());

	// Join the slow blocking and the pipelined source. This test should verify that this works
	// w/o problems and the blocking result is not requested too early.
	pipelinedSource.join(slowBlockingSource)
			.where(0).equalTo(0)
			.output(new DiscardingOutputFormat<Tuple2<Tuple1<Long>, Tuple1<Long>>>());

	env.execute("Consume one pipelined and one blocking result test job");
}

Example #29

Source File: InvalidVertexIdsValidator.java From flink with Apache License 2.0

5 votes

/**
 * Checks that the edge set input contains valid vertex Ids, i.e. that they
 * also exist in the vertex input set.
 *
 * @return a boolean stating whether a graph is valid
 *         with respect to its vertex ids.
 */
@Override
public boolean validate(Graph<K, VV, EV> graph) throws Exception {
	DataSet<Tuple1<K>> edgeIds = graph.getEdges()
			.flatMap(new MapEdgeIds<>()).distinct();
	DataSet<K> invalidIds = graph.getVertices().coGroup(edgeIds).where(0)
			.equalTo(0).with(new GroupInvalidIds<>()).first(1);

	return invalidIds.map(new KToTupleMap<>()).count() == 0;
}

Example #30

Source File: Graph.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Creates a graph from a DataSet of edges.
 * Vertices are created automatically and their values are set
 * by applying the provided map function to the vertex IDs.
 *
 * @param edges a DataSet of edges.
 * @param vertexValueInitializer the mapper function that initializes the vertex values.
 * It allows to apply a map transformation on the vertex ID to produce an initial vertex value.
 * @param context the flink execution environment.
 * @return the newly created graph.
 */
public static <K, VV, EV> Graph<K, VV, EV> fromDataSet(DataSet<Edge<K, EV>> edges,
		final MapFunction<K, VV> vertexValueInitializer, ExecutionEnvironment context) {

	TypeInformation<K> keyType = ((TupleTypeInfo<?>) edges.getType()).getTypeAt(0);

	TypeInformation<VV> valueType = TypeExtractor.createTypeInfo(
			MapFunction.class, vertexValueInitializer.getClass(), 1, keyType, null);

	@SuppressWarnings({ "unchecked", "rawtypes" })
	TypeInformation<Vertex<K, VV>> returnType = (TypeInformation<Vertex<K, VV>>) new TupleTypeInfo(
			Vertex.class, keyType, valueType);

	DataSet<Vertex<K, VV>> vertices = edges
		.flatMap(new EmitSrcAndTargetAsTuple1<>())
			.name("Source and target IDs")
		.distinct()
			.name("IDs")
		.map(new MapFunction<Tuple1<K>, Vertex<K, VV>>() {
			private Vertex<K, VV> output = new Vertex<>();

			public Vertex<K, VV> map(Tuple1<K> value) throws Exception {
				output.f0 = value.f0;
				output.f1 = vertexValueInitializer.map(value.f0);
				return output;
			}
		}).returns(returnType).withForwardedFields("f0").name("Initialize vertex values");

	return new Graph<>(vertices, edges, context);
}