org.apache.flink.util.Collector Java Examples

The following examples show how to use org.apache.flink.util.Collector. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: UrlDBFunction.java    From flink-crawler with Apache License 2.0 6 votes vote down vote up
@Override
public void processElement2(DomainScore domainScore, Context context, Collector<FetchUrl> out)
        throws Exception {
    
    // Ensure we don't wind up with DBZ problems.
    float score = Math.max(0.01f, domainScore.getScore());
    String pld = domainScore.getPld();
    LOGGER.debug("UrlDBFunction ({}/{}) setting '{}' average score to {}",
            _partition, _parallelism, pld, score);
    
    // At this point we might be seeing this PLD for the first time, or we might have seen
    // it before in this method, or we might have seen it via the onTimer call. So it may 
    // or may not have any state set up, and it may or may not be in _domainScores (non-state)
    float summedScores = _averageDomainScore * _scoredDomains.size();
    if (_scoredDomains.contains(pld)) {
        summedScores -= _domainScore.value();
    }
    
    _domainScore.update(score);
    _scoredDomains.add(pld);
    summedScores += score;
    _averageDomainScore = summedScores / _scoredDomains.size();
}
 
Example #2
Source File: ScatterGatherIteration.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
public void coGroup(Iterable<Edge<K, EV>> edges, Iterable<Vertex<K, Tuple3<VV, LongValue, LongValue>>> state,
					Collector<Tuple2<K, Message>> out) throws Exception {

	final Iterator<Vertex<K, Tuple3<VV, LongValue, LongValue>>> stateIter = state.iterator();

	if (stateIter.hasNext()) {
		Vertex<K, Tuple3<VV, LongValue, LongValue>> vertexWithDegrees = stateIter.next();

		nextVertex.f0 = vertexWithDegrees.f0;
		nextVertex.f1 = vertexWithDegrees.f1.f0;

		scatterFunction.setInDegree(vertexWithDegrees.f1.f1.getValue());
		scatterFunction.setOutDegree(vertexWithDegrees.f1.f2.getValue());

		scatterFunction.set(edges.iterator(), out, vertexWithDegrees.getId());
		scatterFunction.sendMessages(nextVertex);
	}
}
 
Example #3
Source File: LocalClusteringCoefficient.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public void flatMap(TriangleListing.Result<T> value, Collector<Tuple2<T, LongValue>> out)
		throws Exception {
	byte bitmask = value.getBitmask().getValue();

	output.f0 = value.getVertexId0();
	output.f1 = ((bitmask & 0b000011) == 0b000011) ? two : one;
	out.collect(output);

	output.f0 = value.getVertexId1();
	output.f1 = ((bitmask & 0b001100) == 0b001100) ? two : one;
	out.collect(output);

	output.f0 = value.getVertexId2();
	output.f1 = ((bitmask & 0b110000) == 0b110000) ? two : one;
	out.collect(output);
}
 
Example #4
Source File: CoBroadcastWithKeyedOperatorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public void processBroadcastElement(Integer value, Context ctx, Collector<String> out) throws Exception {
	// put an element in the broadcast state
	ctx.applyToKeyedState(
			listStateDesc,
			new KeyedStateFunction<String, ListState<String>>() {
				@Override
				public void process(String key, ListState<String> state) throws Exception {
					final Iterator<String> it = state.get().iterator();

					final List<String> list = new ArrayList<>();
					while (it.hasNext()) {
						list.add(it.next());
					}
					assertEquals(expectedKeyedStates.get(key), list);
				}
			});
}
 
Example #5
Source File: ProcTimeRangeBoundedPrecedingFunction.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public void processElement(
		BaseRow input,
		KeyedProcessFunction<K, BaseRow, BaseRow>.Context ctx,
		Collector<BaseRow> out) throws Exception {
	long currentTime = ctx.timerService().currentProcessingTime();
	// register state-cleanup timer
	registerProcessingCleanupTimer(ctx, currentTime);

	// buffer the event incoming event

	// add current element to the window list of elements with corresponding timestamp
	List<BaseRow> rowList = inputState.get(currentTime);
	// null value means that this si the first event received for this timestamp
	if (rowList == null) {
		rowList = new ArrayList<BaseRow>();
		// register timer to process event once the current millisecond passed
		ctx.timerService().registerProcessingTimeTimer(currentTime + 1);
	}
	rowList.add(input);
	inputState.put(currentTime, rowList);
}
 
Example #6
Source File: AlsPredictBatchOp.java    From Alink with Apache License 2.0 6 votes vote down vote up
private static DataSet<Tuple2<Long, float[]>> getFactors(BatchOperator<?> model, final int identity) {
    return model.getDataSet()
        .flatMap(new FlatMapFunction<Row, Tuple2<Long, float[]>>() {
            @Override
            public void flatMap(Row value, Collector<Tuple2<Long, float[]>> out) throws Exception {
                int w = AlsModelDataConverter.getIsUser(value) ? 0 : 1;
                if (w != identity) {
                    return;
                }

                long idx = AlsModelDataConverter.getVertexId(value);
                float[] factors = AlsModelDataConverter.getFactors(value);
                out.collect(Tuple2.of(idx, factors));
            }
        });
}
 
Example #7
Source File: CoGroupConnectedComponentsSecondITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public void coGroup(Iterable<Tuple2<Long, Long>> candidates, Iterable<Tuple2<Long, Long>> current, Collector<Tuple2<Long, Long>> out) {
	Iterator<Tuple2<Long, Long>> iterator = current.iterator();
	if (!iterator.hasNext()) {
		throw new RuntimeException("Error: Id not encountered before.");
	}

	Tuple2<Long, Long> old = iterator.next();

	long minimumComponentID = Long.MAX_VALUE;

	for (Tuple2<Long, Long> candidate : candidates) {
		long candidateComponentID = candidate.f1;
		if (candidateComponentID < minimumComponentID) {
			minimumComponentID = candidateComponentID;
		}
	}

	if (minimumComponentID < old.f1) {
		old.f1 = minimumComponentID;
		out.collect(old);
	}
}
 
Example #8
Source File: Main.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool params = ParameterTool.fromArgs(args);
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().setGlobalJobParameters(params);

    DataSource<String> dataSource = env.fromElements(WORDS);

    dataSource.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
        @Override
        public void flatMap(String line, Collector<Tuple2<String, Integer>> out) throws Exception {
            String[] words = line.split("\\W+");
            for (String word : words) {
                out.collect(new Tuple2<>(word, 1));
            }
        }
    })
            .groupBy(0)
            .sum(1)
            .print();

    long count = dataSource.count();
    System.out.println(count);
}
 
Example #9
Source File: CoBroadcastWithKeyedOperatorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public void processElement(String value, ReadOnlyContext ctx, Collector<String> out) throws Exception {
	Iterable<Map.Entry<String, Integer>> broadcastStateIt = ctx.getBroadcastState(STATE_DESCRIPTOR).immutableEntries();
	Iterator<Map.Entry<String, Integer>> iter = broadcastStateIt.iterator();

	for (int i = 0; i < expectedBroadcastState.size(); i++) {
		assertTrue(iter.hasNext());

		Map.Entry<String, Integer> entry = iter.next();
		assertTrue(expectedBroadcastState.containsKey(entry.getKey()));
		assertEquals(expectedBroadcastState.get(entry.getKey()), entry.getValue());
	}

	assertFalse(iter.hasNext());

	ctx.timerService().registerEventTimeTimer(timerTs);
}
 
Example #10
Source File: PathCoordGroupReduce.java    From OSTMap with Apache License 2.0 6 votes vote down vote up
@Override
public void reduce(Iterable<Tuple2<String, String>> values, Collector<Tuple2<String, /*TODO POJO*/String>> out) throws Exception {
    coords = "";
    for (Tuple2<String,String> entry: values) {
        if(coordSet.size() == 0){
            user = entry.f0;
            coords = entry.f1.toString();
        }else{
            coords += "|" + entry.f1.toString();
        }
        coordSet.add(entry.f1.toString());
    }
    if(coordSet.size() > 1){
        out.collect(new Tuple2<>(user,coords));
        coordSet.clear();
    }

}
 
Example #11
Source File: ReduceApplyProcessWindowFunction.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
public void process(K k, final Context context, Iterable<T> input, Collector<R> out) throws Exception {

	T curr = null;
	for (T val: input) {
		if (curr == null) {
			curr = val;
		} else {
			curr = reduceFunction.reduce(curr, val);
		}
	}

	this.ctx.window = context.window();
	this.ctx.context = context;
	windowFunction.process(k, ctx, Collections.singletonList(curr), out);
}
 
Example #12
Source File: GroupReduceITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testGroupReduceWithAtomicValue() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Integer> ds = env.fromElements(1, 1, 2, 3, 4);
	DataSet<Integer> reduceDs = ds.groupBy("*").reduceGroup(new GroupReduceFunction<Integer, Integer>() {
		@Override
		public void reduce(Iterable<Integer> values, Collector<Integer> out) throws Exception {
			out.collect(values.iterator().next());
		}
	});

	List<Integer> result = reduceDs.collect();

	String expected = "1\n" +
			"2\n" +
			"3\n" +
			"4";

	compareResultAsText(result, expected);
}
 
Example #13
Source File: ParseRowModel.java    From Alink with Apache License 2.0 6 votes vote down vote up
@Override
public void mapPartition(Iterable<Row> iterable,
                         Collector<Tuple2<DenseVector, double[]>> collector) throws Exception {
    DenseVector coefVector = null;
    double[] lossCurve = null;
    int taskId = getRuntimeContext().getIndexOfThisSubtask();
    if (taskId == 0) {
        for (Row row : iterable) {
            Params params = Params.fromJson((String)row.getField(0));
            coefVector = params.get(ModelParamName.COEF);
            lossCurve = params.get(ModelParamName.LOSS_CURVE);
        }

        if (coefVector != null) {
            collector.collect(Tuple2.of(coefVector, lossCurve));
        }
    }
}
 
Example #14
Source File: CepOperatorBuilder.java    From flink with Apache License 2.0 6 votes vote down vote up
public static CepOperatorBuilder<Map<String, List<Event>>> createOperatorForNFA(NFA<Event> nfa) {
	return new CepOperatorBuilder<>(
		true,
		new NFACompiler.NFAFactory<Event>() {
			@Override
			public NFA<Event> createNFA() {
				return nfa;
			}
		},
		null,
		null,
		new PatternProcessFunction<Event, Map<String, List<Event>>>() {
			private static final long serialVersionUID = -7143807777582726991L;

			@Override
			public void processMatch(
				Map<String, List<Event>> match,
				Context ctx,
				Collector<Map<String, List<Event>>> out) throws Exception {
				out.collect(match);
			}
		},
		null);
}
 
Example #15
Source File: Summarization.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public void reduce(Iterable<Vertex<K, VV>> values, Collector<VertexGroupItem<K, VV>> out) throws Exception {
	K vertexGroupRepresentativeID = null;
	long vertexGroupCount = 0L;
	VV vertexGroupValue = null;
	boolean isFirstElement = true;

	for (Vertex<K, VV> vertex : values) {
		if (isFirstElement) {
			// take final group representative vertex id from first tuple
			vertexGroupRepresentativeID = vertex.getId();
			vertexGroupValue = vertex.getValue();
			isFirstElement = false;
		}
		// no need to set group value for those tuples
		reuseVertexGroupItem.setVertexId(vertex.getId());
		reuseVertexGroupItem.setGroupRepresentativeId(vertexGroupRepresentativeID);
		out.collect(reuseVertexGroupItem);
		vertexGroupCount++;
	}

	createGroupRepresentativeTuple(vertexGroupRepresentativeID, vertexGroupValue, vertexGroupCount);
	out.collect(reuseVertexGroupItem);
	reuseVertexGroupItem.reset();
}
 
Example #16
Source File: AbstractMergeIterator.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Crosses a single value from the second side with N values, all sharing a common key.
 * Effectively realizes a <i>N:1</i> join.
 *
 * @param val1      The value form the <i>1</i> side.
 * @param firstValN The first of the values from the <i>N</i> side.
 * @param valsN     Iterator over remaining <i>N</i> side values.
 * @throws Exception Forwards all exceptions thrown by the stub.
 */
private void crossSecond1withNValues(T2 val1, T1 firstValN,
									Iterator<T1> valsN, FlatJoinFunction<T1, T2, O> joinFunction, Collector<O> collector) throws Exception {
	T2 copy2 = createCopy(serializer2, val1, this.copy2);
	joinFunction.join(firstValN, copy2, collector);

	// set copy and join first element
	boolean more = true;
	do {
		final T1 nRec = valsN.next();

		if (valsN.hasNext()) {
			copy2 = createCopy(serializer2, val1, this.copy2);
			joinFunction.join(nRec, copy2, collector);
		} else {
			joinFunction.join(nRec, val1, collector);
			more = false;
		}
	}
	while (more);
}
 
Example #17
Source File: TopTweet.java    From flink-examples with MIT License 6 votes vote down vote up
@Override
public void flatMap(String tweetJsonStr, Collector<Tuple2<String, Integer>> collector) throws Exception {
    JsonNode tweetJson = mapper.readTree(tweetJsonStr);
    JsonNode entities = tweetJson.get("entities");
    if (entities == null) return;

    JsonNode hashtags = entities.get("hashtags");
    if (hashtags == null) return;

    for (Iterator<JsonNode> iter = hashtags.getElements(); iter.hasNext();) {
        JsonNode node = iter.next();
        String hashtag = node.get("text").getTextValue();

        if (hashtag.matches("\\w+")) {
            collector.collect(new Tuple2<>(hashtag, 1));
        }
    }
}
 
Example #18
Source File: ExactlyOnceValidatingConsumerThread.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public void flatMap(String value, Collector<String> out) throws Exception {
	LOG.info("Consumed {}", value);

	int id = Integer.parseInt(value.split("-")[0]);
	if (validator.get(id)) {
		throw new RuntimeException("Saw id " + id + " twice!");
	}
	validator.set(id);
	if (id > totalEventCount - 1) {
		throw new RuntimeException("Out of bounds ID observed");
	}

	if (validator.nextClearBit(0) == totalEventCount) {
		throw new SuccessException();
	}
}
 
Example #19
Source File: BaseComQueue.java    From Alink with Apache License 2.0 6 votes vote down vote up
private DataSet<byte[]> loopStartDataSet(ExecutionEnvironment env) {
	MapPartitionOperator<Integer, byte[]> initial = env
		.fromElements(1)
		.rebalance()
		.mapPartition(new MapPartitionFunction<Integer, byte[]>() {
			@Override
			public void mapPartition(Iterable<Integer> values, Collector<byte[]> out) {
				//pass
			}
		}).name("iterInitialize");

	if (cacheDataRel != null) {
		initial = initial.withBroadcastSet(cacheDataRel, "rel");
	}

	return initial;
}
 
Example #20
Source File: SemanticsCheckMapper.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void flatMap(Event event, Collector<String> out) throws Exception {

	Long currentValue = sequenceValue.value();
	if (currentValue == null) {
		currentValue = 0L;
	}

	long nextValue = event.getSequenceNumber();

	sequenceValue.update(nextValue);
	if (!validator.check(currentValue, nextValue)) {
		out.collect("Alert: " + currentValue + " -> " + nextValue + " (" + event.getKey() + ")");
	}
}
 
Example #21
Source File: DateExtraction.java    From OSTMap with Apache License 2.0 5 votes vote down vote up
/**
 *
 * @param tweetJson tweet as json-string
 * @param out Tuple of timestamp and tweet
 * @throws Exception
 */
@Override
public void flatMap(String tweetJson, Collector<Tuple2<Long, String>> out) throws Exception {

    int pos1 = tweetJson.indexOf("\"created_at\":\"");
    int pos2 = pos1 + 14;
    int pos3 = tweetJson.indexOf("\",\"", pos2);
    if (pos1 != -1 && pos2 != -1) {
        String rawTime = tweetJson.substring(pos2, pos3);
        ZonedDateTime time = ZonedDateTime.parse(rawTime, formatterExtract);
        long ts = time.toEpochSecond();
        out.collect(new Tuple2(ts, tweetJson));
    }
}
 
Example #22
Source File: FirstReducer.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void reduce(Iterable<T> values, Collector<T> out) throws Exception {

	int emitCnt = 0;
	for (T val : values) {
		out.collect(val);

		emitCnt++;
		if (emitCnt == count) {
			break;
		}
	}
}
 
Example #23
Source File: LegacyKeyedCoProcessOperatorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private void handleValue(
	Object value,
	Collector<String> out,
	TimerService timerService,
	int channel) throws IOException {
	final ValueState<String> state = getRuntimeContext().getState(this.state);
	if (state.value() == null) {
		out.collect("INPUT" + channel + ":" + value);
		state.update(String.valueOf(value));
		timerService.registerProcessingTimeTimer(timerService.currentProcessingTime() + 5);
	} else {
		state.clear();
		timerService.deleteProcessingTimeTimer(timerService.currentProcessingTime() + 4);
	}
}
 
Example #24
Source File: StreamFlatMapTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void flatMap(String value, Collector<String> out) throws Exception {
	if (!openCalled) {
		Assert.fail("Open was not called before run.");
	}
	out.collect(value);
}
 
Example #25
Source File: WindowTriangles.java    From gelly-streaming with Apache License 2.0 5 votes vote down vote up
@Override
public void applyOnEdges(Long vertexID,
		Iterable<Tuple2<Long, NullValue>> neighbors,
		Collector<Tuple3<Long, Long, Boolean>> out) throws Exception {

	Tuple3<Long, Long, Boolean> outT = new Tuple3<>();
	outT.setField(vertexID, 0);
	outT.setField(false, 2); //isCandidate=false

	Set<Long> neighborIdsSet = new HashSet<Long>();
	for (Tuple2<Long, NullValue> t: neighbors) {
		outT.setField(t.f0, 1);
		out.collect(outT);
		neighborIdsSet.add(t.f0);
	}
	Object[] neighborIds = neighborIdsSet.toArray();
	neighborIdsSet.clear();
	outT.setField(true, 2); //isCandidate=true
	for (int i=0; i<neighborIds.length-1; i++) {
		for (int j=i; j<neighborIds.length; j++) {
			// only emit the candidates
			// with IDs larger than the vertex ID
			if (((long)neighborIds[i] > vertexID) && ((long)neighborIds[j] > vertexID)) {
				outT.setField((long)neighborIds[i], 0);
				outT.setField((long)neighborIds[j], 1);
				out.collect(outT);
			}
		}
	}
}
 
Example #26
Source File: OuterJoinITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public void join(Tuple3<Integer, Long, String> first,
		Tuple5<Integer, Long, Integer, String, Long> second,
		Collector<Tuple2<String, String>> out) {

	out.collect(new Tuple2<>(first == null ? null : first.f2, second == null ? null : second.f3));
}
 
Example #27
Source File: TypeExtractorTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testBasicArray() {
	// use getCoGroupReturnTypes()
	RichCoGroupFunction<?, ?, ?> function = new RichCoGroupFunction<String[], String[], String[]>() {
		private static final long serialVersionUID = 1L;

		@Override
		public void coGroup(Iterable<String[]> first, Iterable<String[]> second, Collector<String[]> out) throws Exception {
			// nothing to do
		}
	};

	TypeInformation<?> ti = TypeExtractor.getCoGroupReturnTypes(function, (TypeInformation) TypeInformation.of(new TypeHint<String[]>(){}), (TypeInformation) TypeInformation.of(new TypeHint<String[]>(){}));

	Assert.assertFalse(ti.isBasicType());
	Assert.assertFalse(ti.isTupleType());
	
	// Due to a Java 6 bug the classification can be slightly wrong
	Assert.assertTrue(ti instanceof BasicArrayTypeInfo<?,?> || ti instanceof ObjectArrayTypeInfo<?,?>);
	
	if(ti instanceof BasicArrayTypeInfo<?,?>) {
		Assert.assertEquals(BasicArrayTypeInfo.STRING_ARRAY_TYPE_INFO, ti);
	}
	else {
		Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, ((ObjectArrayTypeInfo<?,?>) ti).getComponentInfo());
	}		
}
 
Example #28
Source File: WordCount.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void flatMap(Tuple2<LongWritable, Text> value, Collector<Tuple2<String, Integer>> out) {
	// normalize and split the line
	String line = value.f1.toString();
	String[] tokens = line.toLowerCase().split("\\W+");

	// emit the pairs
	for (String token : tokens) {
		if (token.length() > 0) {
			out.collect(new Tuple2<String, Integer>(token, 1));
		}
	}
}
 
Example #29
Source File: ParallelMaximumLikelihood2.java    From toolbox with Apache License 2.0 5 votes vote down vote up
@Override
public void mapPartition(Iterable<DataInstance> values, Collector<SufficientStatistics> out) throws Exception {
    SufficientStatistics accumulator = null;
    for (DataInstance value : values) {
        this.counterInstances.add(1.0);
        if (accumulator==null)
            accumulator=this.ef_bayesianNetwork.getSufficientStatistics(value);
        else
            accumulator.sum(this.ef_bayesianNetwork.getSufficientStatistics(value));
    }
    out.collect(accumulator);
}
 
Example #30
Source File: LegacyKeyedProcessOperatorTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public void processElement(Integer value, Context ctx, Collector<Integer> out) throws Exception {
	out.collect(value);
	if (timeDomain.equals(TimeDomain.EVENT_TIME)) {
		ctx.timerService().registerEventTimeTimer(ctx.timerService().currentWatermark() + 5);
	} else {
		ctx.timerService().registerProcessingTimeTimer(ctx.timerService().currentProcessingTime() + 5);
	}
}