Java Code Examples for org.apache.flink.api.common.functions.FlatMapFunction

The following examples show how to use org.apache.flink.api.common.functions.FlatMapFunction. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
@Override
public void translateNode(Read.Unbounded<T> transform, FlinkStreamingTranslationContext context) {
	PCollection<T> output = context.getOutput(transform);

	DataStream<WindowedValue<T>> source;
	if (transform.getSource().getClass().equals(UnboundedFlinkSource.class)) {
		UnboundedFlinkSource flinkSource = (UnboundedFlinkSource) transform.getSource();
		source = context.getExecutionEnvironment()
				.addSource(flinkSource.getFlinkSource())
				.flatMap(new FlatMapFunction<String, WindowedValue<String>>() {
					@Override
					public void flatMap(String s, Collector<WindowedValue<String>> collector) throws Exception {
						collector.collect(WindowedValue.<String>of(s, Instant.now(), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING));
					}
				}).assignTimestampsAndWatermarks(new IngestionTimeExtractor());
	} else {
		source = context.getExecutionEnvironment()
				.addSource(new UnboundedSourceWrapper<>(context.getPipelineOptions(), transform));
	}
	context.setOutputDataStream(output, source);
}
 
Example 2
Source Project: Flink-CEPplus   Source File: FlatMapOperator.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected FlatMapOperatorBase<IN, OUT, FlatMapFunction<IN, OUT>> translateToDataFlow(Operator<IN> input) {
	String name = getName() != null ? getName() : "FlatMap at " + defaultName;
	// create operator
	FlatMapOperatorBase<IN, OUT, FlatMapFunction<IN, OUT>> po = new FlatMapOperatorBase<IN, OUT, FlatMapFunction<IN, OUT>>(function,
		new UnaryOperatorInformation<IN, OUT>(getInputType(), getResultType()), name);
	// set input
	po.setInput(input);
	// set parallelism
	if (this.getParallelism() > 0) {
		// use specified parallelism
		po.setParallelism(this.getParallelism());
	} else {
		// if no parallelism has been specified, use parallelism of input operator to enable chaining
		po.setParallelism(input.getParallelism());
	}

	return po;
}
 
Example 3
Source Project: Flink-CEPplus   Source File: FilterOperator.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected org.apache.flink.api.common.operators.base.FilterOperatorBase<T, FlatMapFunction<T, T>> translateToDataFlow(Operator<T> input) {

	String name = getName() != null ? getName() : "Filter at " + defaultName;

	// create operator
	PlanFilterOperator<T> po = new PlanFilterOperator<T>(function, name, getInputType());
	po.setInput(input);

	// set parallelism
	if (getParallelism() > 0) {
		// use specified parallelism
		po.setParallelism(getParallelism());
	} else {
		// if no parallelism has been specified, use parallelism of input operator to enable chaining
		po.setParallelism(input.getParallelism());
	}

	return po;
}
 
Example 4
Source Project: flink   Source File: FilterOperator.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected org.apache.flink.api.common.operators.base.FilterOperatorBase<T, FlatMapFunction<T, T>> translateToDataFlow(Operator<T> input) {

	String name = getName() != null ? getName() : "Filter at " + defaultName;

	// create operator
	PlanFilterOperator<T> po = new PlanFilterOperator<T>(function, name, getInputType());
	po.setInput(input);

	// set parallelism
	if (getParallelism() > 0) {
		// use specified parallelism
		po.setParallelism(getParallelism());
	} else {
		// if no parallelism has been specified, use parallelism of input operator to enable chaining
		po.setParallelism(input.getParallelism());
	}

	return po;
}
 
Example 5
Source Project: flink   Source File: FlatMapOperator.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected FlatMapOperatorBase<IN, OUT, FlatMapFunction<IN, OUT>> translateToDataFlow(Operator<IN> input) {
	String name = getName() != null ? getName() : "FlatMap at " + defaultName;
	// create operator
	FlatMapOperatorBase<IN, OUT, FlatMapFunction<IN, OUT>> po = new FlatMapOperatorBase<IN, OUT, FlatMapFunction<IN, OUT>>(function,
		new UnaryOperatorInformation<IN, OUT>(getInputType(), getResultType()), name);
	// set input
	po.setInput(input);
	// set parallelism
	if (this.getParallelism() > 0) {
		// use specified parallelism
		po.setParallelism(this.getParallelism());
	} else {
		// if no parallelism has been specified, use parallelism of input operator to enable chaining
		po.setParallelism(input.getParallelism());
	}

	return po;
}
 
Example 6
Source Project: flink-learning   Source File: Main.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool params = ParameterTool.fromArgs(args);
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().setGlobalJobParameters(params);

    DataSource<String> dataSource = env.fromElements(WORDS);

    dataSource.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
        @Override
        public void flatMap(String line, Collector<Tuple2<String, Integer>> out) throws Exception {
            String[] words = line.split("\\W+");
            for (String word : words) {
                out.collect(new Tuple2<>(word, 1));
            }
        }
    })
            .groupBy(0)
            .sum(1)
            .print();

    long count = dataSource.count();
    System.out.println(count);
}
 
Example 7
Source Project: OSTMap   Source File: GraphLoader.java    License: Apache License 2.0 6 votes vote down vote up
private DataSet<Tuple2<String, UserNodeValues>> getUserNodes(DataSet<JSONObject> jsonData) {
    DataSet<Tuple2<String, UserNodeValues>> userNodes = jsonData.flatMap(new FlatMapFunction<JSONObject, Tuple2<String, UserNodeValues>>() {
        @Override
        public void flatMap(JSONObject jsonObject, Collector<Tuple2<String, UserNodeValues>> out) throws Exception {
            JSONObject user = jsonObject.getJSONObject("user");
            String userId = user.getString("id_str");
            String userName = user.getString("name");
            out.collect(new Tuple2<String, UserNodeValues>(userId, new UserNodeValues(userId,userName)));

            // other mentioned users
            JSONObject entities = jsonObject.getJSONObject("entities");
            JSONArray userMentions = entities.getJSONArray("user_mentions");
            for (int i = 0; i < userMentions.length(); i++) {
                JSONObject current = userMentions.getJSONObject(i);
                String oUserId = current.getString("id_str");
                String oUserName = current.getString("name");
                out.collect(new Tuple2<String, UserNodeValues>(oUserId, new UserNodeValues(oUserId,oUserName)));
            }
        }
    }).distinct(0);
    return userNodes;
}
 
Example 8
public static void main(String[] args) throws Exception {
        final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
        StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
        Properties props = buildKafkaProps(parameterTool);

        FlinkKafkaConsumer011<ObjectNode> kafkaConsumer = new FlinkKafkaConsumer011<>("zhisheng",
                new KafkaMetricSchema(true),
                props);

        env.addSource(kafkaConsumer)
                .flatMap(new FlatMapFunction<ObjectNode, MetricEvent>() {
                    @Override
                    public void flatMap(ObjectNode jsonNodes, Collector<MetricEvent> collector) throws Exception {
                        try {
//                            System.out.println(jsonNodes);
                            MetricEvent metricEvent = GsonUtil.fromJson(jsonNodes.get("value").asText(), MetricEvent.class);
                            collector.collect(metricEvent);
                        } catch (Exception e) {
                            log.error("jsonNodes = {} convert to MetricEvent has an error", jsonNodes, e);
                        }
                    }
                })
                .print();
        env.execute();
    }
 
Example 9
Source Project: flink-learning   Source File: Main.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool params = ParameterTool.fromArgs(args);
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().setGlobalJobParameters(params);

    DataSource<String> dataSource = env.fromElements(WORDS);

    dataSource.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
        @Override
        public void flatMap(String line, Collector<Tuple2<String, Integer>> out) throws Exception {
            String[] words = line.split("\\W+");
            for (String word : words) {
                out.collect(new Tuple2<>(word, 1));
            }
        }
    })
            .groupBy(0)
            .sum(1)
            .print();

    long count = dataSource.count();
    System.out.println(count);
}
 
Example 10
public static void main(String[] args) throws Exception {
        final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
        StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
        Properties props = buildKafkaProps(parameterTool);

        FlinkKafkaConsumer011<ObjectNode> kafkaConsumer = new FlinkKafkaConsumer011<>("zhisheng",
                new KafkaMetricSchema(true),
                props);

        env.addSource(kafkaConsumer)
                .flatMap(new FlatMapFunction<ObjectNode, MetricEvent>() {
                    @Override
                    public void flatMap(ObjectNode jsonNodes, Collector<MetricEvent> collector) throws Exception {
                        try {
//                            System.out.println(jsonNodes);
                            MetricEvent metricEvent = GsonUtil.fromJson(jsonNodes.get("value").asText(), MetricEvent.class);
                            collector.collect(metricEvent);
                        } catch (Exception e) {
                            log.error("jsonNodes = {} convert to MetricEvent has an error", jsonNodes, e);
                        }
                    }
                })
                .print();
        env.execute();
    }
 
Example 11
Source Project: flink   Source File: DataStreamAllroundTestJobFactory.java    License: Apache License 2.0 6 votes vote down vote up
static FlatMapFunction<Event, String> createSemanticsCheckMapper(ParameterTool pt) {

		String semantics = pt.get(TEST_SEMANTICS.key(), TEST_SEMANTICS.defaultValue());

		SemanticsCheckMapper.ValidatorFunction validatorFunction;

		if (semantics.equalsIgnoreCase("exactly-once")) {
			validatorFunction = SemanticsCheckMapper.ValidatorFunction.exactlyOnce();
		} else if (semantics.equalsIgnoreCase("at-least-once")) {
			validatorFunction = SemanticsCheckMapper.ValidatorFunction.atLeastOnce();
		} else {
			throw new IllegalArgumentException("Unknown semantics requested: " + semantics);
		}

		return new SemanticsCheckMapper(validatorFunction);
	}
 
Example 12
Source Project: Alink   Source File: AlsPredictBatchOp.java    License: Apache License 2.0 6 votes vote down vote up
private static DataSet<Tuple2<Long, float[]>> getFactors(BatchOperator<?> model, final int identity) {
    return model.getDataSet()
        .flatMap(new FlatMapFunction<Row, Tuple2<Long, float[]>>() {
            @Override
            public void flatMap(Row value, Collector<Tuple2<Long, float[]>> out) throws Exception {
                int w = AlsModelDataConverter.getIsUser(value) ? 0 : 1;
                if (w != identity) {
                    return;
                }

                long idx = AlsModelDataConverter.getVertexId(value);
                float[] factors = AlsModelDataConverter.getFactors(value);
                out.collect(Tuple2.of(idx, factors));
            }
        });
}
 
Example 13
Source Project: flink   Source File: FlatMapOperator.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected FlatMapOperatorBase<IN, OUT, FlatMapFunction<IN, OUT>> translateToDataFlow(Operator<IN> input) {
	String name = getName() != null ? getName() : "FlatMap at " + defaultName;
	// create operator
	FlatMapOperatorBase<IN, OUT, FlatMapFunction<IN, OUT>> po = new FlatMapOperatorBase<IN, OUT, FlatMapFunction<IN, OUT>>(function,
		new UnaryOperatorInformation<IN, OUT>(getInputType(), getResultType()), name);
	// set input
	po.setInput(input);
	// set parallelism
	if (this.getParallelism() > 0) {
		// use specified parallelism
		po.setParallelism(this.getParallelism());
	} else {
		// if no parallelism has been specified, use parallelism of input operator to enable chaining
		po.setParallelism(input.getParallelism());
	}

	return po;
}
 
Example 14
Source Project: flink   Source File: FlatMapOperatorBase.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected List<OUT> executeOnCollections(List<IN> input, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception {
	FlatMapFunction<IN, OUT> function = userFunction.getUserCodeObject();
	
	FunctionUtils.setFunctionRuntimeContext(function, ctx);
	FunctionUtils.openFunction(function, parameters);

	ArrayList<OUT> result = new ArrayList<OUT>(input.size());

	TypeSerializer<IN> inSerializer = getOperatorInfo().getInputType().createSerializer(executionConfig);
	TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig);

	CopyingListCollector<OUT> resultCollector = new CopyingListCollector<OUT>(result, outSerializer);

	for (IN element : input) {
		IN inCopy = inSerializer.copy(element);
		function.flatMap(inCopy, resultCollector);
	}

	FunctionUtils.closeFunction(function);

	return result;
}
 
Example 15
Source Project: Alink   Source File: AssociationRule.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Generate association rules from frequent patterns.
 *
 * @param patterns            A DataSet of frequent patterns and there supports.
 * @param transactionsCnt     The number of transactions in the original dataset.
 * @param itemCounts          A DataSet of items and their supports.
 * @param minConfidence       Minimum confidence.
 * @param minLift             Minimum lift.
 * @param maxConsequentLength Maximum length of a consequent.
 * @return The association rules with fields: antecedent(left hand side), consequent(right hand side),
 * support count, [lift, support, confidence]).
 */
public static DataSet<Tuple4<int[], int[], Integer, double[]>> extractRules(
    DataSet<Tuple2<int[], Integer>> patterns,
    DataSet<Long> transactionsCnt,
    DataSet<Tuple2<Integer, Integer>> itemCounts,
    final double minConfidence,
    final double minLift,
    final int maxConsequentLength) {

    if (maxConsequentLength <= 0) {
        return patterns.getExecutionEnvironment().fromElements(0)
            .flatMap(new FlatMapFunction<Integer, Tuple4<int[], int[], Integer, double[]>>() {
                @Override
                public void flatMap(Integer value, Collector<Tuple4<int[], int[], Integer, double[]>> out) throws Exception {
                }
            });
    } else if (maxConsequentLength == 1) {
        return extractSingleConsequentRules(patterns, transactionsCnt, itemCounts, minConfidence, minLift);
    } else {
        return extractMultiConsequentRules(patterns, transactionsCnt, minConfidence, minLift,
            maxConsequentLength);
    }
}
 
Example 16
public void runExample() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	DatahubSourceFunction datahubSource =
			new DatahubSourceFunction(endPoint, projectName, topicName, accessId, accessKey, 0,
									Long.MAX_VALUE, 1, 1, 1);
	env.addSource(datahubSource).flatMap(
			(FlatMapFunction<List<RecordEntry>, Tuple2<String, Long>>) (recordEntries, collector) -> {
		for (RecordEntry recordEntry : recordEntries) {
			collector.collect(getStringLongTuple2(recordEntry));
		}
	}).returns(new TypeHint<Tuple2<String, Long>>() {}).print();
	env.execute();
}
 
Example 17
Source Project: Flink-CEPplus   Source File: KafkaConsumerTestBase.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Test that ensures that DeserializationSchema.isEndOfStream() is properly evaluated.
 *
 * @throws Exception
 */
public void runEndOfStreamTest() throws Exception {

	final int elementCount = 300;
	final String topic = writeSequence("testEndOfStream", elementCount, 1, 1);

	// read using custom schema
	final StreamExecutionEnvironment env1 = StreamExecutionEnvironment.getExecutionEnvironment();
	env1.setParallelism(1);
	env1.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env1.getConfig().disableSysoutLogging();

	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);

	DataStream<Tuple2<Integer, Integer>> fromKafka = env1.addSource(kafkaServer.getConsumer(topic, new FixedNumberDeserializationSchema(elementCount), props));
	fromKafka.flatMap(new FlatMapFunction<Tuple2<Integer, Integer>, Void>() {
		@Override
		public void flatMap(Tuple2<Integer, Integer> value, Collector<Void> out) throws Exception {
			// noop ;)
		}
	});

	tryExecute(env1, "Consume " + elementCount + " elements from Kafka");

	deleteTestTopic(topic);
}
 
Example 18
Source Project: Flink-CEPplus   Source File: SampleITCase.java    License: Apache License 2.0 5 votes vote down vote up
private FlatMapOperator<Tuple3<Integer, Long, String>, String> getSourceDataSet(ExecutionEnvironment env) {
	return CollectionDataSets.get3TupleDataSet(env).flatMap(
		new FlatMapFunction<Tuple3<Integer, Long, String>, String>() {
			@Override
			public void flatMap(Tuple3<Integer, Long, String> value, Collector<String> out) throws Exception {
				out.collect(value.f2);
			}
		});
}
 
Example 19
@Override
public void testProgram(StreamExecutionEnvironment env) {

	// set the restart strategy.
	env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(NO_OF_RETRIES, 0));
	env.enableCheckpointing(10);

	// create and start the file creating thread.
	fc = new FileCreator();
	fc.start();

	// create the monitoring source along with the necessary readers.
	TextInputFormat format = new TextInputFormat(new org.apache.flink.core.fs.Path(localFsURI));
	format.setFilesFilter(FilePathFilter.createDefaultFilter());

	DataStream<String> inputStream = env.readFile(format, localFsURI,
		FileProcessingMode.PROCESS_CONTINUOUSLY, INTERVAL);

	TestingSinkFunction sink = new TestingSinkFunction();

	inputStream.flatMap(new FlatMapFunction<String, String>() {
		@Override
		public void flatMap(String value, Collector<String> out) throws Exception {
			out.collect(value);
		}
	}).addSink(sink).setParallelism(1);
}
 
Example 20
Source Project: Flink-CEPplus   Source File: IPv6HostnamesITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testClusterWithIPv6host() {
	try {

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(4);
		env.getConfig().disableSysoutLogging();

		// get input data
		DataSet<String> text = env.fromElements(WordCountData.TEXT.split("\n"));

		DataSet<Tuple2<String, Integer>> counts = text
				.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
					@Override
					public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
						for (String token : value.toLowerCase().split("\\W+")) {
							if (token.length() > 0) {
								out.collect(new Tuple2<String, Integer>(token, 1));
							}
						}
					}
				})
				.groupBy(0).sum(1);

		List<Tuple2<String, Integer>> result = counts.collect();

		TestBaseUtils.compareResultAsText(result, WordCountData.COUNTS_AS_TUPLES);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 21
Source Project: Flink-CEPplus   Source File: StreamingOperatorsITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testOperatorChainWithObjectReuseAndNoOutputOperators() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().enableObjectReuse();
	DataStream<Integer> input = env.fromElements(1, 2, 3);
	input.flatMap(new FlatMapFunction<Integer, Integer>() {
		@Override
		public void flatMap(Integer value, Collector<Integer> out) throws Exception {
			out.collect(value << 1);
		}
	});
	env.execute();
}
 
Example 22
Source Project: flink   Source File: FlatMapOperatorCollectionTest.java    License: Apache License 2.0 5 votes vote down vote up
private FlatMapOperatorBase<String, String, FlatMapFunction<String, String>> getTestFlatMapOperator(
		FlatMapFunction<String, String> udf) {

	UnaryOperatorInformation<String, String> typeInfo = new UnaryOperatorInformation<String, String>(
			BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);

	return new FlatMapOperatorBase<String, String, FlatMapFunction<String, String>>(
			udf, typeInfo, "flatMap on Collections");
}
 
Example 23
Source Project: gelly-streaming   Source File: ExactTriangleCount.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("serial")
private static SimpleEdgeStream<Integer, NullValue> getGraphStream(StreamExecutionEnvironment env) {

	if (fileOutput) {
		return new SimpleEdgeStream<>(env.readTextFile(edgeInputPath)
				.flatMap(new FlatMapFunction<String, Edge<Integer, NullValue>>() {
					@Override
					public void flatMap(String s, Collector<Edge<Integer, NullValue>> out) {
						String[] fields = s.split("\\s");
						if (!fields[0].equals("%")) {
							int src = Integer.parseInt(fields[0]);
							int trg = Integer.parseInt(fields[1]);
							out.collect(new Edge<>(src, trg, NullValue.getInstance()));
						}
					}
				}), env);
	}

	return new SimpleEdgeStream<>(env.fromElements(
			new Edge<>(1, 2, NullValue.getInstance()),
			new Edge<>(2, 3, NullValue.getInstance()),
			new Edge<>(2, 6, NullValue.getInstance()),
			new Edge<>(5, 6, NullValue.getInstance()),
			new Edge<>(1, 4, NullValue.getInstance()),
			new Edge<>(5, 3, NullValue.getInstance()),
			new Edge<>(3, 4, NullValue.getInstance()),
			new Edge<>(3, 6, NullValue.getInstance()),
			new Edge<>(1, 3, NullValue.getInstance())), env);
}
 
Example 24
Source Project: flink   Source File: WordCountWithAnonymousClass.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	// set up the execution environment
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// get input data
	DataSet<String> text = StaticData.getDefaultTextLineDataSet(env);

	DataSet<Tuple2<String, Integer>> counts =
		// split up the lines in pairs (2-tuples) containing: (word,1)
		text.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
			@Override
			public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
				// normalize and split the line
				String[] tokens = value.toLowerCase().split("\\W+");

				// emit the pairs
				for (String token : tokens) {
					if (token.length() > 0) {
						out.collect(new Tuple2<String, Integer>(token, 1));
					}
				}
			}
		})
			// group by the tuple field "0" and sum up tuple field "1"
			.groupBy(0)
			.sum(1);

	// emit result
	counts.print();

	// execute program
	env.execute("WordCount Example");
}
 
Example 25
Source Project: flink   Source File: LookupJoinRunner.java    License: Apache License 2.0 5 votes vote down vote up
public LookupJoinRunner(
		GeneratedFunction<FlatMapFunction<RowData, RowData>> generatedFetcher,
		GeneratedCollector<TableFunctionCollector<RowData>> generatedCollector,
		boolean isLeftOuterJoin,
		int tableFieldsCount) {
	this.generatedFetcher = generatedFetcher;
	this.generatedCollector = generatedCollector;
	this.isLeftOuterJoin = isLeftOuterJoin;
	this.tableFieldsCount = tableFieldsCount;
}
 
Example 26
Source Project: flink   Source File: ChainTaskTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testBatchTaskOutputInCloseMethod() {
	final int numChainedTasks = 10;
	final int keyCnt = 100;
	final int valCnt = 10;
	try {
		initEnvironment(MEMORY_MANAGER_SIZE, NETWORK_BUFFER_SIZE);
		addInput(new UniformRecordGenerator(keyCnt, valCnt, false), 0);
		addOutput(outList);
		registerTask(FlatMapDriver.class, MockMapStub.class);
		for (int i = 0; i < numChainedTasks; i++) {
			final TaskConfig taskConfig = new TaskConfig(new Configuration());
			taskConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
			taskConfig.setOutputSerializer(serFact);
			taskConfig.setStubWrapper(
				new UserCodeClassWrapper<>(MockDuplicateLastValueMapFunction.class));
			getTaskConfig().addChainedTask(
				ChainedFlatMapDriver.class, taskConfig, "chained-" + i);
		}
		final BatchTask<FlatMapFunction<Record, Record>, Record> testTask =
			new BatchTask<>(mockEnv);
		testTask.invoke();
		Assert.assertEquals(keyCnt * valCnt + numChainedTasks, outList.size());
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 27
Source Project: Flink-CEPplus   Source File: LambdaExtractionTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testFlatMapLambda() {
	FlatMapFunction<Tuple2<Tuple1<Integer>, Boolean>, Tuple2<Tuple1<Integer>, String>> f = (i, out) -> out.collect(null);

	TypeInformation<?> ti = TypeExtractor.getFlatMapReturnTypes(f, NESTED_TUPLE_BOOLEAN_TYPE, null, true);
	if (!(ti instanceof MissingTypeInfo)) {
		assertTrue(ti.isTupleType());
		assertEquals(2, ti.getArity());
		assertTrue(((TupleTypeInfo<?>) ti).getTypeAt(0).isTupleType());
		assertEquals(((TupleTypeInfo<?>) ti).getTypeAt(1), BasicTypeInfo.STRING_TYPE_INFO);
	}
}
 
Example 28
@Override
public void translateNode(TextIO.Write.Bound<T> transform, FlinkStreamingTranslationContext context) {
	PValue input = context.getInput(transform);
	DataStream<WindowedValue<T>> inputDataStream = context.getInputDataStream(input);

	String filenamePrefix = transform.getFilenamePrefix();
	String filenameSuffix = transform.getFilenameSuffix();
	boolean needsValidation = transform.needsValidation();
	int numShards = transform.getNumShards();
	String shardNameTemplate = transform.getShardNameTemplate();

	// TODO: Implement these. We need Flink support for this.
	LOG.warn("Translation of TextIO.Write.needsValidation not yet supported. Is: {}.", needsValidation);
	LOG.warn("Translation of TextIO.Write.filenameSuffix not yet supported. Is: {}.", filenameSuffix);
	LOG.warn("Translation of TextIO.Write.shardNameTemplate not yet supported. Is: {}.", shardNameTemplate);

	DataStream<String> dataSink = inputDataStream.flatMap(new FlatMapFunction<WindowedValue<T>, String>() {
		@Override
		public void flatMap(WindowedValue<T> value, Collector<String> out) throws Exception {
			out.collect(value.getValue().toString());
		}
	});
	DataStreamSink<String> output = dataSink.writeAsText(filenamePrefix, FileSystem.WriteMode.OVERWRITE);

	if (numShards > 0) {
		output.setParallelism(numShards);
	}
}
 
Example 29
private FlatMapOperatorBase<String, String, FlatMapFunction<String, String>> getTestFlatMapOperator(
		FlatMapFunction<String, String> udf) {

	UnaryOperatorInformation<String, String> typeInfo = new UnaryOperatorInformation<String, String>(
			BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);

	return new FlatMapOperatorBase<String, String, FlatMapFunction<String, String>>(
			udf, typeInfo, "flatMap on Collections");
}
 
Example 30
Source Project: Flink-CEPplus   Source File: ChainedFlatMapDriver.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void setup(AbstractInvokable parent) {
	@SuppressWarnings("unchecked")
	final FlatMapFunction<IT, OT> mapper =
		BatchTask.instantiateUserCode(this.config, userCodeClassLoader, FlatMapFunction.class);
	this.mapper = mapper;
	FunctionUtils.setFunctionRuntimeContext(mapper, getUdfRuntimeContext());
}