org.apache.flink.api.common.functions.FlatJoinFunction Java Examples

The following examples show how to use org.apache.flink.api.common.functions.FlatJoinFunction. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JoinedStreams.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Completes the join operation with the user function that is executed
 * for each combination of elements with the same key in a window.
 *
 * <p>Note: This method's return type does not support setting an operator-specific parallelism.
 * Due to binary backwards compatibility, this cannot be altered. Use the
 * {@link #with(JoinFunction, TypeInformation)}, method to set an operator-specific parallelism.
 */
public <T> DataStream<T> apply(FlatJoinFunction<T1, T2, T> function, TypeInformation<T> resultType) {
	//clean the closure
	function = input1.getExecutionEnvironment().clean(function);

	coGroupedWindowedStream = input1.coGroup(input2)
		.where(keySelector1)
		.equalTo(keySelector2)
		.window(windowAssigner)
		.trigger(trigger)
		.evictor(evictor)
		.allowedLateness(allowedLateness);

	return coGroupedWindowedStream
			.apply(new FlatJoinCoGroupFunction<>(function), resultType);
}
 
Example #2
Source File: TimeBoundedStreamJoin.java    From flink with Apache License 2.0 6 votes vote down vote up
TimeBoundedStreamJoin(
		FlinkJoinType joinType,
		long leftLowerBound,
		long leftUpperBound,
		long allowedLateness,
		BaseRowTypeInfo leftType,
		BaseRowTypeInfo rightType,
		GeneratedFunction<FlatJoinFunction<BaseRow, BaseRow, BaseRow>> genJoinFunc) {
	this.joinType = joinType;
	this.leftRelativeSize = -leftLowerBound;
	this.rightRelativeSize = leftUpperBound;
	minCleanUpInterval = (leftRelativeSize + rightRelativeSize) / 2;
	if (allowedLateness < 0) {
		throw new IllegalArgumentException("The allowed lateness must be non-negative.");
	}
	this.allowedLateness = allowedLateness;
	this.leftType = leftType;
	this.rightType = rightType;
	this.genJoinFunc = genJoinFunc;
}
 
Example #3
Source File: JoinedStreams.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Completes the join operation with the user function that is executed
 * for each combination of elements with the same key in a window.
 *
 * <p>Note: This method's return type does not support setting an operator-specific parallelism.
 * Due to binary backwards compatibility, this cannot be altered. Use the
 * {@link #with(JoinFunction, TypeInformation)}, method to set an operator-specific parallelism.
 */
public <T> DataStream<T> apply(FlatJoinFunction<T1, T2, T> function, TypeInformation<T> resultType) {
	//clean the closure
	function = input1.getExecutionEnvironment().clean(function);

	coGroupedWindowedStream = input1.coGroup(input2)
		.where(keySelector1)
		.equalTo(keySelector2)
		.window(windowAssigner)
		.trigger(trigger)
		.evictor(evictor)
		.allowedLateness(allowedLateness);

	return coGroupedWindowedStream
			.apply(new FlatJoinCoGroupFunction<>(function), resultType);
}
 
Example #4
Source File: JoinedStreams.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Completes the join operation with the user function that is executed
 * for each combination of elements with the same key in a window.
 *
 * <p>Note: This method's return type does not support setting an operator-specific parallelism.
 * Due to binary backwards compatibility, this cannot be altered. Use the
 * {@link #with(FlatJoinFunction)}, method to set an operator-specific parallelism.
 */
public <T> DataStream<T> apply(FlatJoinFunction<T1, T2, T> function) {
	TypeInformation<T> resultType = TypeExtractor.getBinaryOperatorReturnType(
		function,
		FlatJoinFunction.class,
		0,
		1,
		2,
		new int[]{2, 0},
		input1.getType(),
		input2.getType(),
		"Join",
		false);

	return apply(function, resultType);
}
 
Example #5
Source File: AbstractMergeIterator.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Crosses a single value from the second side with N values, all sharing a common key.
 * Effectively realizes a <i>N:1</i> join.
 *
 * @param val1      The value form the <i>1</i> side.
 * @param firstValN The first of the values from the <i>N</i> side.
 * @param valsN     Iterator over remaining <i>N</i> side values.
 * @throws Exception Forwards all exceptions thrown by the stub.
 */
private void crossSecond1withNValues(T2 val1, T1 firstValN,
									Iterator<T1> valsN, FlatJoinFunction<T1, T2, O> joinFunction, Collector<O> collector) throws Exception {
	T2 copy2 = createCopy(serializer2, val1, this.copy2);
	joinFunction.join(firstValN, copy2, collector);

	// set copy and join first element
	boolean more = true;
	do {
		final T1 nRec = valsN.next();

		if (valsN.hasNext()) {
			copy2 = createCopy(serializer2, val1, this.copy2);
			joinFunction.join(nRec, copy2, collector);
		} else {
			joinFunction.join(nRec, val1, collector);
			more = false;
		}
	}
	while (more);
}
 
Example #6
Source File: TypeExtractor.java    From flink with Apache License 2.0 6 votes vote down vote up
@PublicEvolving
public static <IN1, IN2, OUT> TypeInformation<OUT> getFlatJoinReturnTypes(FlatJoinFunction<IN1, IN2, OUT> joinInterface,
		TypeInformation<IN1> in1Type, TypeInformation<IN2> in2Type, String functionName, boolean allowMissing)
{
	return getBinaryOperatorReturnType(
		(Function) joinInterface,
		FlatJoinFunction.class,
		0,
		1,
		2,
		new int[]{2, 0},
		in1Type,
		in2Type,
		functionName,
		allowMissing);
}
 
Example #7
Source File: AbstractMergeIterator.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Crosses a single value from the first input with N values, all sharing a common key.
 * Effectively realizes a <i>1:N</i> join.
 *
 * @param val1      The value form the <i>1</i> side.
 * @param firstValN The first of the values from the <i>N</i> side.
 * @param valsN     Iterator over remaining <i>N</i> side values.
 * @throws Exception Forwards all exceptions thrown by the stub.
 */
private void crossFirst1withNValues(final T1 val1, final T2 firstValN,
									final Iterator<T2> valsN, final FlatJoinFunction<T1, T2, O> joinFunction, final Collector<O> collector)
		throws Exception {
	T1 copy1 = createCopy(serializer1, val1, this.copy1);
	joinFunction.join(copy1, firstValN, collector);

	// set copy and join first element
	boolean more = true;
	do {
		final T2 nRec = valsN.next();

		if (valsN.hasNext()) {
			copy1 = createCopy(serializer1, val1, this.copy1);
			joinFunction.join(copy1, nRec, collector);
		} else {
			joinFunction.join(val1, nRec, collector);
			more = false;
		}
	}
	while (more);
}
 
Example #8
Source File: AbstractMergeIterator.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Crosses a single value from the second side with N values, all sharing a common key.
 * Effectively realizes a <i>N:1</i> join.
 *
 * @param val1      The value form the <i>1</i> side.
 * @param firstValN The first of the values from the <i>N</i> side.
 * @param valsN     Iterator over remaining <i>N</i> side values.
 * @throws Exception Forwards all exceptions thrown by the stub.
 */
private void crossSecond1withNValues(T2 val1, T1 firstValN,
									Iterator<T1> valsN, FlatJoinFunction<T1, T2, O> joinFunction, Collector<O> collector) throws Exception {
	T2 copy2 = createCopy(serializer2, val1, this.copy2);
	joinFunction.join(firstValN, copy2, collector);

	// set copy and join first element
	boolean more = true;
	do {
		final T1 nRec = valsN.next();

		if (valsN.hasNext()) {
			copy2 = createCopy(serializer2, val1, this.copy2);
			joinFunction.join(nRec, copy2, collector);
		} else {
			joinFunction.join(nRec, val1, collector);
			more = false;
		}
	}
	while (more);
}
 
Example #9
Source File: JoinOperator.java    From flink with Apache License 2.0 5 votes vote down vote up
public EquiJoin(DataSet<I1> input1, DataSet<I2> input2,
		Keys<I1> keys1, Keys<I2> keys2, FlatJoinFunction<I1, I2, OUT> function,
		TypeInformation<OUT> returnType, JoinHint hint, String joinLocationName, JoinType type) {
	super(input1, input2, keys1, keys2, returnType, hint, type);

	if (function == null) {
		throw new NullPointerException();
	}

	this.function = function;
	this.joinLocationName = joinLocationName;
}
 
Example #10
Source File: InnerJoinOperatorBaseTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testJoinPlain(){
	final FlatJoinFunction<String, String, Integer> joiner = new FlatJoinFunction<String, String, Integer>() {

		@Override
		public void join(String first, String second, Collector<Integer> out) throws Exception {
			out.collect(first.length());
			out.collect(second.length());
		}
	};

	@SuppressWarnings({ "rawtypes", "unchecked" })
	InnerJoinOperatorBase<String, String, Integer,
					FlatJoinFunction<String, String,Integer> > base = new InnerJoinOperatorBase(joiner,
			new BinaryOperatorInformation(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO,
					BasicTypeInfo.INT_TYPE_INFO), new int[0], new int[0], "TestJoiner");

	List<String> inputData1 = new ArrayList<String>(Arrays.asList("foo", "bar", "foobar"));
	List<String> inputData2 = new ArrayList<String>(Arrays.asList("foobar", "foo"));
	List<Integer> expected = new ArrayList<Integer>(Arrays.asList(3, 3, 6 ,6));

	try {
		ExecutionConfig executionConfig = new ExecutionConfig();
		executionConfig.disableObjectReuse();
		List<Integer> resultSafe = base.executeOnCollections(inputData1, inputData2, null, executionConfig);
		executionConfig.enableObjectReuse();
		List<Integer> resultRegular = base.executeOnCollections(inputData1, inputData2, null, executionConfig);

		assertEquals(expected, resultSafe);
		assertEquals(expected, resultRegular);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #11
Source File: AbstractOuterJoinDriver.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public void run() throws Exception {
	final Counter numRecordsOut = this.taskContext.getMetricGroup().getIOMetricGroup().getNumRecordsOutCounter();
	
	final FlatJoinFunction<IT1, IT2, OT> joinStub = this.taskContext.getStub();
	final Collector<OT> collector = new CountingCollector<>(this.taskContext.getOutputCollector(), numRecordsOut);
	final JoinTaskIterator<IT1, IT2, OT> outerJoinIterator = this.outerJoinIterator;
	
	while (this.running && outerJoinIterator.callWithNextKey(joinStub, collector)) {
	}
}
 
Example #12
Source File: JoinDriver.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public void run() throws Exception {
	final Counter numRecordsOut = this.taskContext.getMetricGroup().getIOMetricGroup().getNumRecordsOutCounter();
	final FlatJoinFunction<IT1, IT2, OT> joinStub = this.taskContext.getStub();
	final Collector<OT> collector = new CountingCollector<>(this.taskContext.getOutputCollector(), numRecordsOut);
	final JoinTaskIterator<IT1, IT2, OT> joinIterator = this.joinIterator;
	
	while (this.running && joinIterator.callWithNextKey(joinStub, collector)) {
	}
}
 
Example #13
Source File: JoinOperator.java    From flink with Apache License 2.0 5 votes vote down vote up
public EquiJoin(DataSet<I1> input1, DataSet<I2> input2,
		Keys<I1> keys1, Keys<I2> keys2, FlatJoinFunction<I1, I2, OUT> generatedFunction, JoinFunction<I1, I2, OUT> function,
		TypeInformation<OUT> returnType, JoinHint hint, String joinLocationName, JoinType type) {
	super(input1, input2, keys1, keys2, returnType, hint, type);

	this.joinLocationName = joinLocationName;

	if (function == null) {
		throw new NullPointerException();
	}

	this.function = generatedFunction;
}
 
Example #14
Source File: JoinOperator.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public <R> EquiJoin<I1, I2, R> with(JoinFunction<I1, I2, R> function) {
	if (function == null) {
		throw new NullPointerException("Join function must not be null.");
	}
	FlatJoinFunction<I1, I2, R> generatedFunction = new WrappingFlatJoinFunction<>(clean(function));
	TypeInformation<R> returnType = TypeExtractor.getJoinReturnTypes(function, getInput1Type(), getInput2Type(), Utils.getCallLocationName(), true);
	return new EquiJoin<>(getInput1(), getInput2(), getKeys1(), getKeys2(), generatedFunction, function, returnType, getJoinHint(), Utils.getCallLocationName(), joinType);
}
 
Example #15
Source File: JoinOperator.java    From flink with Apache License 2.0 5 votes vote down vote up
public <R> EquiJoin<I1, I2, R> with(JoinFunction<I1, I2, R> function) {
	if (function == null) {
		throw new NullPointerException("Join function must not be null.");
	}
	FlatJoinFunction<I1, I2, R> generatedFunction = new WrappingFlatJoinFunction<>(clean(function));
	TypeInformation<R> returnType = TypeExtractor.getJoinReturnTypes(function, getInput1Type(), getInput2Type(), Utils.getCallLocationName(), true);
	return new EquiJoin<>(getInput1(), getInput2(), getKeys1(), getKeys2(), generatedFunction, function, returnType, getJoinHint(), Utils.getCallLocationName(), joinType);
}
 
Example #16
Source File: JoinOperator.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public EquiJoin(DataSet<I1> input1, DataSet<I2> input2,
		Keys<I1> keys1, Keys<I2> keys2, FlatJoinFunction<I1, I2, OUT> generatedFunction, JoinFunction<I1, I2, OUT> function,
		TypeInformation<OUT> returnType, JoinHint hint, String joinLocationName, JoinType type) {
	super(input1, input2, keys1, keys2, returnType, hint, type);

	this.joinLocationName = joinLocationName;

	if (function == null) {
		throw new NullPointerException();
	}

	this.function = generatedFunction;

	UdfOperatorUtils.analyzeDualInputUdf(this, JoinFunction.class, joinLocationName, function, keys1, keys2);
}
 
Example #17
Source File: AbstractMergeOuterJoinIterator.java    From flink with Apache License 2.0 5 votes vote down vote up
private void joinLeftKeyValuesWithNull(Iterator<T1> values, FlatJoinFunction<T1, T2, O> joinFunction, Collector<O> collector) throws Exception {
	while (values.hasNext()) {
		T1 next = values.next();
		this.copy1 = createCopy(serializer1, next, copy1);
		joinFunction.join(copy1, null, collector);
	}
}
 
Example #18
Source File: ProcTimeBoundedStreamJoin.java    From flink with Apache License 2.0 5 votes vote down vote up
public ProcTimeBoundedStreamJoin(
		FlinkJoinType joinType,
		long leftLowerBound,
		long leftUpperBound,
		BaseRowTypeInfo leftType,
		BaseRowTypeInfo rightType,
		GeneratedFunction<FlatJoinFunction<BaseRow, BaseRow, BaseRow>> genJoinFunc) {
	super(joinType, leftLowerBound, leftUpperBound, 0L, leftType, rightType, genJoinFunc);
}
 
Example #19
Source File: UdfAnalyzerExamplesTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testConnectedComponentsExamplesComponentIdFilter() {
	compareAnalyzerResultWithAnnotationsDualInput(FlatJoinFunction.class, ComponentIdFilter.class,
			TypeInformation.of(new TypeHint<Tuple2<Long, Long>>(){}),
			TypeInformation.of(new TypeHint<Tuple2<Long, Long>>(){}),
			TypeInformation.of(new TypeHint<Tuple2<Long, Long>>(){}));
}
 
Example #20
Source File: RowTimeBoundedStreamJoin.java    From flink with Apache License 2.0 5 votes vote down vote up
public RowTimeBoundedStreamJoin(
		FlinkJoinType joinType,
		long leftLowerBound,
		long leftUpperBound,
		long allowedLateness,
		BaseRowTypeInfo leftType,
		BaseRowTypeInfo rightType,
		GeneratedFunction<FlatJoinFunction<BaseRow, BaseRow, BaseRow>> genJoinFunc,
		int leftTimeIdx,
		int rightTimeIdx) {
	super(joinType, leftLowerBound, leftUpperBound, allowedLateness, leftType, rightType, genJoinFunc);
	this.leftTimeIdx = leftTimeIdx;
	this.rightTimeIdx = rightTimeIdx;
}
 
Example #21
Source File: OuterJoinOperatorBase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
protected List<OUT> executeOnCollections(List<IN1> leftInput, List<IN2> rightInput, RuntimeContext runtimeContext, ExecutionConfig executionConfig) throws Exception {
	TypeInformation<IN1> leftInformation = getOperatorInfo().getFirstInputType();
	TypeInformation<IN2> rightInformation = getOperatorInfo().getSecondInputType();
	TypeInformation<OUT> outInformation = getOperatorInfo().getOutputType();

	TypeComparator<IN1> leftComparator = buildComparatorFor(0, executionConfig, leftInformation);
	TypeComparator<IN2> rightComparator = buildComparatorFor(1, executionConfig, rightInformation);

	TypeSerializer<IN1> leftSerializer = leftInformation.createSerializer(executionConfig);
	TypeSerializer<IN2> rightSerializer = rightInformation.createSerializer(executionConfig);

	OuterJoinListIterator<IN1, IN2> outerJoinIterator =
			new OuterJoinListIterator<>(leftInput, leftSerializer, leftComparator,
					rightInput, rightSerializer, rightComparator, outerJoinType);

	// --------------------------------------------------------------------
	// Run UDF
	// --------------------------------------------------------------------
	FlatJoinFunction<IN1, IN2, OUT> function = userFunction.getUserCodeObject();

	FunctionUtils.setFunctionRuntimeContext(function, runtimeContext);
	FunctionUtils.openFunction(function, this.parameters);

	List<OUT> result = new ArrayList<>();
	Collector<OUT> collector = new CopyingListCollector<>(result, outInformation.createSerializer(executionConfig));

	while (outerJoinIterator.next()) {
		IN1 left = outerJoinIterator.getLeft();
		IN2 right = outerJoinIterator.getRight();
		function.join(left == null ? null : leftSerializer.copy(left), right == null ? null : rightSerializer.copy(right), collector);
	}

	FunctionUtils.closeFunction(function);

	return result;
}
 
Example #22
Source File: InnerJoinOperatorBaseTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testJoinPlain(){
	final FlatJoinFunction<String, String, Integer> joiner = new FlatJoinFunction<String, String, Integer>() {

		@Override
		public void join(String first, String second, Collector<Integer> out) throws Exception {
			out.collect(first.length());
			out.collect(second.length());
		}
	};

	@SuppressWarnings({ "rawtypes", "unchecked" })
	InnerJoinOperatorBase<String, String, Integer,
					FlatJoinFunction<String, String,Integer> > base = new InnerJoinOperatorBase(joiner,
			new BinaryOperatorInformation(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO,
					BasicTypeInfo.INT_TYPE_INFO), new int[0], new int[0], "TestJoiner");

	List<String> inputData1 = new ArrayList<String>(Arrays.asList("foo", "bar", "foobar"));
	List<String> inputData2 = new ArrayList<String>(Arrays.asList("foobar", "foo"));
	List<Integer> expected = new ArrayList<Integer>(Arrays.asList(3, 3, 6 ,6));

	try {
		ExecutionConfig executionConfig = new ExecutionConfig();
		executionConfig.disableObjectReuse();
		List<Integer> resultSafe = base.executeOnCollections(inputData1, inputData2, null, executionConfig);
		executionConfig.enableObjectReuse();
		List<Integer> resultRegular = base.executeOnCollections(inputData1, inputData2, null, executionConfig);

		assertEquals(expected, resultSafe);
		assertEquals(expected, resultRegular);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #23
Source File: AbstractCachedBuildSideJoinDriver.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void run() throws Exception {
	final Counter numRecordsOut = taskContext.getMetricGroup().getIOMetricGroup().getNumRecordsOutCounter();
	final FlatJoinFunction<IT1, IT2, OT> matchStub = this.taskContext.getStub();
	final Collector<OT> collector = new CountingCollector<>(this.taskContext.getOutputCollector(), numRecordsOut);
	
	while (this.running && matchIterator != null && matchIterator.callWithNextKey(matchStub, collector)) {
	}
}
 
Example #24
Source File: JoinDriver.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void run() throws Exception {
	final Counter numRecordsOut = this.taskContext.getMetricGroup().getIOMetricGroup().getNumRecordsOutCounter();
	final FlatJoinFunction<IT1, IT2, OT> joinStub = this.taskContext.getStub();
	final Collector<OT> collector = new CountingCollector<>(this.taskContext.getOutputCollector(), numRecordsOut);
	final JoinTaskIterator<IT1, IT2, OT> joinIterator = this.joinIterator;
	
	while (this.running && joinIterator.callWithNextKey(joinStub, collector)) {
	}
}
 
Example #25
Source File: AbstractMergeOuterJoinIterator.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private void joinRightKeyValuesWithNull(Iterator<T2> values, FlatJoinFunction<T1, T2, O> joinFunction, Collector<O> collector) throws Exception {
	while (values.hasNext()) {
		T2 next = values.next();
		this.copy2 = createCopy(serializer2, next, copy2);
		joinFunction.join(null, copy2, collector);
	}
}
 
Example #26
Source File: AbstractMergeIterator.java    From flink with Apache License 2.0 5 votes vote down vote up
protected void crossMatchingGroup(Iterator<T1> values1, Iterator<T2> values2, FlatJoinFunction<T1, T2, O> joinFunction, Collector<O> collector) throws Exception {
	final T1 firstV1 = values1.next();
	final T2 firstV2 = values2.next();

	final boolean v1HasNext = values1.hasNext();
	final boolean v2HasNext = values2.hasNext();

	// check if one side is already empty
	// this check could be omitted if we put this in MatchTask.
	// then we can derive the local strategy (with build side).

	if (v1HasNext) {
		if (v2HasNext) {
			// both sides contain more than one value
			// TODO: Decide which side to spill and which to block!
			crossMwithNValues(firstV1, values1, firstV2, values2, joinFunction, collector);
		} else {
			crossSecond1withNValues(firstV2, firstV1, values1, joinFunction, collector);
		}
	} else {
		if (v2HasNext) {
			crossFirst1withNValues(firstV1, firstV2, values2, joinFunction, collector);
		} else {
			// both sides contain only one value
			joinFunction.join(firstV1, firstV2, collector);
		}
	}
}
 
Example #27
Source File: HashVsSortMiniBenchmark.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testBuildFirst() {
	try {
		TestData.TupleGenerator generator1 = new TestData.TupleGenerator(SEED1, INPUT_1_SIZE / 10, 100, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TestData.TupleGenerator generator2 = new TestData.TupleGenerator(SEED2, INPUT_2_SIZE, 100, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator input1 = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator input2 = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final FlatJoinFunction matcher = new NoOpMatcher();
		
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();
		
		long start = System.nanoTime();
		
		// compare with iterator values
		final ReusingBuildFirstHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
				new ReusingBuildFirstHashJoinIterator<>(
					input1, input2, this.serializer1.getSerializer(), this.comparator1, 
						this.serializer2.getSerializer(), this.comparator2, this.pairComparator11,
						this.memoryManager, this.ioManager, this.parentTask, 1, false, false, true);
		
		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();
		
		long elapsed = System.nanoTime() - start;
		double msecs = elapsed / (1000 * 1000);
		
		System.out.println("Hash Build First Took " + msecs + " msecs.");
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example #28
Source File: AbstractMergeOuterJoinIterator.java    From flink with Apache License 2.0 5 votes vote down vote up
private void joinRightKeyValuesWithNull(Iterator<T2> values, FlatJoinFunction<T1, T2, O> joinFunction, Collector<O> collector) throws Exception {
	while (values.hasNext()) {
		T2 next = values.next();
		this.copy2 = createCopy(serializer2, next, copy2);
		joinFunction.join(null, copy2, collector);
	}
}
 
Example #29
Source File: TupleUnwrappingJoiner.java    From flink with Apache License 2.0 4 votes vote down vote up
public TupleUnwrappingJoiner(FlatJoinFunction<I1, I2, OUT> wrapped) {
	super(wrapped);
}
 
Example #30
Source File: ReusingHashJoinIteratorITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testBuildSecondWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TestData.TupleGenerator generator1 = new TestData.TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TestData.TupleGenerator generator2 = new TestData.TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final FlatJoinFunction matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		ReusingBuildSecondHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new ReusingBuildSecondHashJoinIterator<>(
				input1, input2, this.recordSerializer, this.record1Comparator, 
				this.recordSerializer, this.record2Comparator, this.recordPairComparator,
				this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);
		
		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}