org.apache.flink.api.java.operators.MapOperator Java Examples

The following examples show how to use org.apache.flink.api.java.operators.MapOperator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: GroupingSetsITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Before
public void setupTables() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	tableEnv = BatchTableEnvironment.create(env, new TableConfig());

	DataSet<Tuple3<Integer, Long, String>> dataSet = CollectionDataSets.get3TupleDataSet(env);
	tableEnv.registerDataSet(TABLE_NAME, dataSet);

	MapOperator<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>> dataSetWithNulls =
		dataSet.map(new MapFunction<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>() {

			@Override
			public Tuple3<Integer, Long, String> map(Tuple3<Integer, Long, String> value) throws Exception {
				if (value.f2.toLowerCase().contains("world")) {
					value.f2 = null;
				}
				return value;
			}
		});
	tableEnv.registerDataSet(TABLE_WITH_NULLS_NAME, dataSetWithNulls);
}
 
Example #2
Source File: GroupingSetsITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Before
public void setupTables() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	tableEnv = BatchTableEnvironment.create(env, new TableConfig());

	DataSet<Tuple3<Integer, Long, String>> dataSet = CollectionDataSets.get3TupleDataSet(env);
	tableEnv.registerDataSet(TABLE_NAME, dataSet);

	MapOperator<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>> dataSetWithNulls =
		dataSet.map(new MapFunction<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>() {

			@Override
			public Tuple3<Integer, Long, String> map(Tuple3<Integer, Long, String> value) throws Exception {
				if (value.f2.toLowerCase().contains("world")) {
					value.f2 = null;
				}
				return value;
			}
		});
	tableEnv.registerDataSet(TABLE_WITH_NULLS_NAME, dataSetWithNulls);
}
 
Example #3
Source File: CsvTableSink.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public void emitDataSet(DataSet<Row> dataSet) {
	MapOperator<Row, String> csvRows =
		dataSet.map(new CsvFormatter(fieldDelim == null ? "," : fieldDelim));

	DataSink<String> sink;
	if (writeMode != null) {
		sink = csvRows.writeAsText(path, writeMode);
	} else {
		sink = csvRows.writeAsText(path);
	}

	if (numFiles > 0) {
		csvRows.setParallelism(numFiles);
		sink.setParallelism(numFiles);
	}

	sink.name(TableConnectorUtils.generateRuntimeName(CsvTableSink.class, fieldNames));
}
 
Example #4
Source File: GroupingSetsITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Before
public void setupTables() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	tableEnv = BatchTableEnvironment.create(env, new TableConfig());

	DataSet<Tuple3<Integer, Long, String>> dataSet = CollectionDataSets.get3TupleDataSet(env);
	tableEnv.createTemporaryView(TABLE_NAME, dataSet);

	MapOperator<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>> dataSetWithNulls =
		dataSet.map(new MapFunction<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>() {

			@Override
			public Tuple3<Integer, Long, String> map(Tuple3<Integer, Long, String> value) throws Exception {
				if (value.f2.toLowerCase().contains("world")) {
					value.f2 = null;
				}
				return value;
			}
		});
	tableEnv.createTemporaryView(TABLE_WITH_NULLS_NAME, dataSetWithNulls);
}
 
Example #5
Source File: CsvTableSink.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public DataSink<?> consumeDataSet(DataSet<Row> dataSet) {
	MapOperator<Row, String> csvRows =
		dataSet.map(new CsvFormatter(fieldDelim == null ? "," : fieldDelim));

	DataSink<String> sink;
	if (writeMode != null) {
		sink = csvRows.writeAsText(path, writeMode);
	} else {
		sink = csvRows.writeAsText(path);
	}

	if (numFiles > 0) {
		csvRows.setParallelism(numFiles);
		sink.setParallelism(numFiles);
	}

	return sink.name(TableConnectorUtils.generateRuntimeName(CsvTableSink.class, fieldNames));
}
 
Example #6
Source File: FlinkBatchTransformTranslators.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void translateNode(
    Reshuffle<K, InputT> transform, FlinkBatchTranslationContext context) {
  final DataSet<WindowedValue<KV<K, InputT>>> inputDataSet =
      context.getInputDataSet(context.getInput(transform));
  // Construct an instance of CoderTypeInformation which contains the pipeline options.
  // This will be used to initialized FileSystems.
  final CoderTypeInformation<WindowedValue<KV<K, InputT>>> outputType =
      ((CoderTypeInformation<WindowedValue<KV<K, InputT>>>) inputDataSet.getType())
          .withPipelineOptions(context.getPipelineOptions());
  // We insert a NOOP here to initialize the FileSystems via the above CoderTypeInformation.
  // The output type coder may be relying on file system access. The shuffled data may have to
  // be deserialized on a different machine using this coder where FileSystems has not been
  // initialized.
  final DataSet<WindowedValue<KV<K, InputT>>> retypedDataSet =
      new MapOperator<>(
          inputDataSet,
          outputType,
          FlinkIdentityFunction.of(),
          getCurrentTransformName(context));
  final Configuration partitionOptions = new Configuration();
  partitionOptions.setString(
      Optimizer.HINT_SHIP_STRATEGY, Optimizer.HINT_SHIP_STRATEGY_REPARTITION);
  context.setOutputDataSet(
      context.getOutput(transform),
      retypedDataSet.map(FlinkIdentityFunction.of()).withParameters(partitionOptions));
}
 
Example #7
Source File: DataSet.java    From Flink-CEPplus with Apache License 2.0 3 votes vote down vote up
/**
 * Applies a Map transformation on this DataSet.
 *
 * <p>The transformation calls a {@link org.apache.flink.api.common.functions.MapFunction} for each element of the DataSet.
 * Each MapFunction call returns exactly one element.
 *
 * @param mapper The MapFunction that is called for each element of the DataSet.
 * @return A MapOperator that represents the transformed DataSet.
 *
 * @see org.apache.flink.api.common.functions.MapFunction
 * @see org.apache.flink.api.common.functions.RichMapFunction
 * @see MapOperator
 */
public <R> MapOperator<T, R> map(MapFunction<T, R> mapper) {
	if (mapper == null) {
		throw new NullPointerException("Map function must not be null.");
	}

	String callLocation = Utils.getCallLocationName();
	TypeInformation<R> resultType = TypeExtractor.getMapReturnTypes(mapper, getType(), callLocation, true);
	return new MapOperator<>(this, resultType, clean(mapper), callLocation);
}
 
Example #8
Source File: DataSet.java    From flink with Apache License 2.0 3 votes vote down vote up
/**
 * Applies a Map transformation on this DataSet.
 *
 * <p>The transformation calls a {@link org.apache.flink.api.common.functions.MapFunction} for each element of the DataSet.
 * Each MapFunction call returns exactly one element.
 *
 * @param mapper The MapFunction that is called for each element of the DataSet.
 * @return A MapOperator that represents the transformed DataSet.
 *
 * @see org.apache.flink.api.common.functions.MapFunction
 * @see org.apache.flink.api.common.functions.RichMapFunction
 * @see MapOperator
 */
public <R> MapOperator<T, R> map(MapFunction<T, R> mapper) {
	if (mapper == null) {
		throw new NullPointerException("Map function must not be null.");
	}

	String callLocation = Utils.getCallLocationName();
	TypeInformation<R> resultType = TypeExtractor.getMapReturnTypes(mapper, getType(), callLocation, true);
	return new MapOperator<>(this, resultType, clean(mapper), callLocation);
}
 
Example #9
Source File: DataSet.java    From flink with Apache License 2.0 3 votes vote down vote up
/**
 * Applies a Map transformation on this DataSet.
 *
 * <p>The transformation calls a {@link org.apache.flink.api.common.functions.MapFunction} for each element of the DataSet.
 * Each MapFunction call returns exactly one element.
 *
 * @param mapper The MapFunction that is called for each element of the DataSet.
 * @return A MapOperator that represents the transformed DataSet.
 *
 * @see org.apache.flink.api.common.functions.MapFunction
 * @see org.apache.flink.api.common.functions.RichMapFunction
 * @see MapOperator
 */
public <R> MapOperator<T, R> map(MapFunction<T, R> mapper) {
	if (mapper == null) {
		throw new NullPointerException("Map function must not be null.");
	}

	String callLocation = Utils.getCallLocationName();
	TypeInformation<R> resultType = TypeExtractor.getMapReturnTypes(mapper, getType(), callLocation, true);
	return new MapOperator<>(this, resultType, clean(mapper), callLocation);
}