org.apache.flink.api.java.utils.DataSetUtils Java Examples

The following examples show how to use org.apache.flink.api.java.utils.DataSetUtils. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DataSetUtilsITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testZipWithUniqueId() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	long expectedSize = 100L;
	DataSet<Long> numbers = env.generateSequence(1L, expectedSize);

	DataSet<Long> ids = DataSetUtils.zipWithUniqueId(numbers).map(new MapFunction<Tuple2<Long, Long>, Long>() {
		@Override
		public Long map(Tuple2<Long, Long> value) throws Exception {
			return value.f0;
		}
	});

	Set<Long> result = new HashSet<>(ids.collect());

	Assert.assertEquals(expectedSize, result.size());
}
 
Example #2
Source File: DataSetUtilsITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testZipWithUniqueId() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	long expectedSize = 100L;
	DataSet<Long> numbers = env.generateSequence(1L, expectedSize);

	DataSet<Long> ids = DataSetUtils.zipWithUniqueId(numbers).map(new MapFunction<Tuple2<Long, Long>, Long>() {
		@Override
		public Long map(Tuple2<Long, Long> value) throws Exception {
			return value.f0;
		}
	});

	Set<Long> result = new HashSet<>(ids.collect());

	Assert.assertEquals(expectedSize, result.size());
}
 
Example #3
Source File: DataSetUtilsITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testZipWithIndex() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	long expectedSize = 100L;
	DataSet<Long> numbers = env.generateSequence(0, expectedSize - 1);

	List<Tuple2<Long, Long>> result = new ArrayList<>(DataSetUtils.zipWithIndex(numbers).collect());

	Assert.assertEquals(expectedSize, result.size());
	// sort result by created index
	Collections.sort(result, new Comparator<Tuple2<Long, Long>>() {
		@Override
		public int compare(Tuple2<Long, Long> o1, Tuple2<Long, Long> o2) {
			return o1.f0.compareTo(o2.f0);
		}
	});
	// test if index is consecutive
	for (int i = 0; i < expectedSize; i++) {
		Assert.assertEquals(i, result.get(i).f0.longValue());
	}
}
 
Example #4
Source File: DataSetUtilsITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testZipWithIndex() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	long expectedSize = 100L;
	DataSet<Long> numbers = env.generateSequence(0, expectedSize - 1);

	List<Tuple2<Long, Long>> result = new ArrayList<>(DataSetUtils.zipWithIndex(numbers).collect());

	Assert.assertEquals(expectedSize, result.size());
	// sort result by created index
	Collections.sort(result, new Comparator<Tuple2<Long, Long>>() {
		@Override
		public int compare(Tuple2<Long, Long> o1, Tuple2<Long, Long> o2) {
			return o1.f0.compareTo(o2.f0);
		}
	});
	// test if index is consecutive
	for (int i = 0; i < expectedSize; i++) {
		Assert.assertEquals(i, result.get(i).f0.longValue());
	}
}
 
Example #5
Source File: DataSetUtilsITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testZipWithIndex() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	long expectedSize = 100L;
	DataSet<Long> numbers = env.generateSequence(0, expectedSize - 1);

	List<Tuple2<Long, Long>> result = new ArrayList<>(DataSetUtils.zipWithIndex(numbers).collect());

	Assert.assertEquals(expectedSize, result.size());
	// sort result by created index
	Collections.sort(result, new Comparator<Tuple2<Long, Long>>() {
		@Override
		public int compare(Tuple2<Long, Long> o1, Tuple2<Long, Long> o2) {
			return o1.f0.compareTo(o2.f0);
		}
	});
	// test if index is consecutive
	for (int i = 0; i < expectedSize; i++) {
		Assert.assertEquals(i, result.get(i).f0.longValue());
	}
}
 
Example #6
Source File: DataSetUtilsITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testZipWithUniqueId() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	long expectedSize = 100L;
	DataSet<Long> numbers = env.generateSequence(1L, expectedSize);

	DataSet<Long> ids = DataSetUtils.zipWithUniqueId(numbers).map(new MapFunction<Tuple2<Long, Long>, Long>() {
		@Override
		public Long map(Tuple2<Long, Long> value) throws Exception {
			return value.f0;
		}
	});

	Set<Long> result = new HashSet<>(ids.collect());

	Assert.assertEquals(expectedSize, result.size());
}
 
Example #7
Source File: SplitBatchOp.java    From Alink with Apache License 2.0 6 votes vote down vote up
@Override
public SplitBatchOp linkFrom(BatchOperator<?>... inputs) {
    BatchOperator<?> in = checkAndGetFirst(inputs);
    final double fraction = getFraction();
    if (fraction < 0. || fraction > 1.0) {
        throw new RuntimeException("invalid fraction " + fraction);
    }

    DataSet<Row> rows = in.getDataSet();

    DataSet<Tuple2<Integer, Long>> countsPerPartition = DataSetUtils.countElementsPerPartition(rows);
    DataSet<long[]> numPickedPerPartition = countsPerPartition
        .mapPartition(new CountInPartition(fraction))
        .setParallelism(1)
        .name("decide_count_of_each_partition");

    DataSet<Row> out = rows
        .mapPartition(new PickInPartition())
        .withBroadcastSet(numPickedPerPartition, "counts")
        .name("pick_in_each_partition");

    this.setOutput(out, in.getSchema());
    this.setSideOutputTables(new Table[]{in.getOutputTable().minusAll(this.getOutputTable())});
    return this;
}
 
Example #8
Source File: BaseComQueue.java    From Alink with Apache License 2.0 6 votes vote down vote up
private <T> void createRelationshipAndCachedData(DataSet<T> data, final String key) {
	final int localSessionId = sessionId;
	if (cacheDataRel == null) {
		cacheDataRel = clearObjs(
			BatchOperator
				.getExecutionEnvironmentFromDataSets(data)
				.fromElements(new byte[0])
				.mapPartition(new MapPartitionFunction<byte[], byte[]>() {
					@Override
					public void mapPartition(Iterable<byte[]> values, Collector<byte[]> out) throws Exception {
						//pass
					}
				})
		);
	}

	DataSet<Tuple2<Integer, Long>> rowCount = DataSetUtils.countElementsPerPartition(data);

	cacheDataRel = data.mapPartition(new PutCachedData<T>(key, localSessionId))
		.withBroadcastSet(cacheDataRel, "rel")
		.withBroadcastSet(rowCount, "rowCount")
		.name("cachedDataRel@" + key);

	cacheDataObjNames.add(key);
}
 
Example #9
Source File: SampleITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
private void verifySamplerWithFraction(boolean withReplacement, double fraction, long seed) throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	FlatMapOperator<Tuple3<Integer, Long, String>, String> ds = getSourceDataSet(env);
	MapPartitionOperator<String, String> sampled = DataSetUtils.sample(ds, withReplacement, fraction, seed);
	List<String> result = sampled.collect();
	containsResultAsText(result, getSourceStrings());
}
 
Example #10
Source File: SampleWithSizeBatchOp.java    From Alink with Apache License 2.0 5 votes vote down vote up
@Override
public SampleWithSizeBatchOp linkFrom(BatchOperator<?>... inputs) {
    BatchOperator<?> in = checkAndGetFirst(inputs);
    boolean withReplacement = getWithReplacement();
    int numSamples = getSize();
    DataSet<Row> rows = DataSetUtils.sampleWithSize(in.getDataSet(), withReplacement, numSamples);
    this.setOutput(rows, in.getSchema());
    return this;
}
 
Example #11
Source File: AppendIdBatchOp.java    From Alink with Apache License 2.0 5 votes vote down vote up
public static Table appendId(
	DataSet <Row> dataSet,
	TableSchema schema,
	String appendIdColName,
	AppendType appendType,
	Long sessionId) {
	String[] rawColNames = schema.getFieldNames();
	TypeInformation[] rawColTypes = schema.getFieldTypes();

	String[] colNames = ArrayUtils.add(rawColNames, appendIdColName);
	TypeInformation[] colTypes = ArrayUtils.add(rawColTypes, appendIdColType);

	DataSet <Row> ret = null;

	switch (appendType) {
		case DENSE:
			ret = DataSetUtils.zipWithIndex(dataSet)
				.map(new TransTupleToRowMapper());
			break;
		case UNIQUE:
			ret = DataSetUtils.zipWithUniqueId(dataSet)
				.map(new TransTupleToRowMapper());
			ret = dataSet.map(new AppendIdMapper());
			break;
		default:
			throw new IllegalArgumentException("Error append type.");
	}

	return DataSetConversionUtil.toTable(sessionId, ret, colNames, colTypes);
}
 
Example #12
Source File: MultilayerPerceptronTrainBatchOp.java    From Alink with Apache License 2.0 5 votes vote down vote up
/**
 * Get distinct labels and assign each label an index.
 */
private static DataSet<Tuple2<Long, Object>> getDistinctLabels(BatchOperator data, final String labelColName) {
    data = data.select("`" + labelColName + "`").distinct();
    DataSet<Row> labelRows = data.getDataSet();
    return DataSetUtils.zipWithIndex(labelRows)
        .map(new MapFunction<Tuple2<Long, Row>, Tuple2<Long, Object>>() {
            @Override
            public Tuple2<Long, Object> map(Tuple2<Long, Row> value) throws Exception {
                return Tuple2.of(value.f0, value.f1.getField(0));
            }
        })
        .name("get_labels");
}
 
Example #13
Source File: JoinITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testJoinWithRangePartitioning() throws Exception {
	/*
	 * Test Join on tuples with multiple key field positions and same customized distribution
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);

	env.setParallelism(4);
	TestDistribution testDis = new TestDistribution();
	DataSet<Tuple2<String, String>> joinDs =
			DataSetUtils.partitionByRange(ds1, testDis, 0, 1)
					.join(DataSetUtils.partitionByRange(ds2, testDis, 0, 4))
					.where(0, 1)
					.equalTo(0, 4)
					.with(new T3T5FlatJoin());

	List<Tuple2<String, String>> result = joinDs.collect();

	String expected = "Hi,Hallo\n" +
			"Hello,Hallo Welt\n" +
			"Hello world,Hallo Welt wie gehts?\n" +
			"Hello world,ABC\n" +
			"I am fine.,HIJ\n" +
			"I am fine.,IJK\n";

	compareResultAsTuples(result, expected);
}
 
Example #14
Source File: CoGroupITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCoGroupWithRangePartitioning() throws Exception {
	/*
	 * Test coGroup on tuples with multiple key field positions and same customized distribution
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds1 = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.get3TupleDataSet(env);

	env.setParallelism(4);
	TestDistribution testDis = new TestDistribution();
	DataSet<Tuple3<Integer, Long, String>> coGrouped =
			DataSetUtils.partitionByRange(ds1, testDis, 0, 4)
					.coGroup(DataSetUtils.partitionByRange(ds2, testDis, 0, 1))
					.where(0, 4)
					.equalTo(0, 1)
					.with(new Tuple5Tuple3CoGroup());

	List<Tuple3<Integer, Long, String>> result = coGrouped.collect();

	String expected = "1,1,Hallo\n" +
			"2,2,Hallo Welt\n" +
			"3,2,Hallo Welt wie gehts?\n" +
			"3,2,ABC\n" +
			"5,3,HIJ\n" +
			"5,3,IJK\n";

	compareResultAsTuples(result, expected);
}
 
Example #15
Source File: CoGroupITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testCoGroupWithRangePartitioning() throws Exception {
	/*
	 * Test coGroup on tuples with multiple key field positions and same customized distribution
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds1 = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.get3TupleDataSet(env);

	env.setParallelism(4);
	TestDistribution testDis = new TestDistribution();
	DataSet<Tuple3<Integer, Long, String>> coGrouped =
			DataSetUtils.partitionByRange(ds1, testDis, 0, 4)
					.coGroup(DataSetUtils.partitionByRange(ds2, testDis, 0, 1))
					.where(0, 4)
					.equalTo(0, 1)
					.with(new Tuple5Tuple3CoGroup());

	List<Tuple3<Integer, Long, String>> result = coGrouped.collect();

	String expected = "1,1,Hallo\n" +
			"2,2,Hallo Welt\n" +
			"3,2,Hallo Welt wie gehts?\n" +
			"3,2,ABC\n" +
			"5,3,HIJ\n" +
			"5,3,IJK\n";

	compareResultAsTuples(result, expected);
}
 
Example #16
Source File: SampleITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
private void verifySamplerWithFixedSize(boolean withReplacement, int numSamples, long seed) throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	FlatMapOperator<Tuple3<Integer, Long, String>, String> ds = getSourceDataSet(env);
	DataSet<String> sampled = DataSetUtils.sampleWithSize(ds, withReplacement, numSamples, seed);
	List<String> result = sampled.collect();
	assertEquals(numSamples, result.size());
	containsResultAsText(result, getSourceStrings());
}
 
Example #17
Source File: CustomDistributionITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testPartitionMoreThanDistribution() throws Exception {
	final TestDataDist2 dist = new TestDataDist2();

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> input = CollectionDataSets.get3TupleDataSet(env);
	DataSetUtils.partitionByRange(input, dist, 0, 1, 2);
}
 
Example #18
Source File: DataSetUtilsITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCountElementsPerPartition() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	long expectedSize = 100L;
	DataSet<Long> numbers = env.generateSequence(0, expectedSize - 1);

	DataSet<Tuple2<Integer, Long>> ds = DataSetUtils.countElementsPerPartition(numbers);

	Assert.assertEquals(env.getParallelism(), ds.count());
	Assert.assertEquals(expectedSize, ds.sum(1).collect().get(0).f1.longValue());
}
 
Example #19
Source File: DataSetUtilsITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testIntegerDataSetChecksumHashCode() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Integer> ds = CollectionDataSets.getIntegerDataSet(env);

	Utils.ChecksumHashCode checksum = DataSetUtils.checksumHashCode(ds);
	Assert.assertEquals(checksum.getCount(), 15);
	Assert.assertEquals(checksum.getChecksum(), 55);
}
 
Example #20
Source File: SampleITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private void verifySamplerWithFraction(boolean withReplacement, double fraction, long seed) throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	FlatMapOperator<Tuple3<Integer, Long, String>, String> ds = getSourceDataSet(env);
	MapPartitionOperator<String, String> sampled = DataSetUtils.sample(ds, withReplacement, fraction, seed);
	List<String> result = sampled.collect();
	containsResultAsText(result, getSourceStrings());
}
 
Example #21
Source File: JoinITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testJoinWithRangePartitioning() throws Exception {
	/*
	 * Test Join on tuples with multiple key field positions and same customized distribution
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);

	env.setParallelism(4);
	TestDistribution testDis = new TestDistribution();
	DataSet<Tuple2<String, String>> joinDs =
			DataSetUtils.partitionByRange(ds1, testDis, 0, 1)
					.join(DataSetUtils.partitionByRange(ds2, testDis, 0, 4))
					.where(0, 1)
					.equalTo(0, 4)
					.with(new T3T5FlatJoin());

	List<Tuple2<String, String>> result = joinDs.collect();

	String expected = "Hi,Hallo\n" +
			"Hello,Hallo Welt\n" +
			"Hello world,Hallo Welt wie gehts?\n" +
			"Hello world,ABC\n" +
			"I am fine.,HIJ\n" +
			"I am fine.,IJK\n";

	compareResultAsTuples(result, expected);
}
 
Example #22
Source File: CoGroupITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCoGroupWithRangePartitioning() throws Exception {
	/*
	 * Test coGroup on tuples with multiple key field positions and same customized distribution
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds1 = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.get3TupleDataSet(env);

	env.setParallelism(4);
	TestDistribution testDis = new TestDistribution();
	DataSet<Tuple3<Integer, Long, String>> coGrouped =
			DataSetUtils.partitionByRange(ds1, testDis, 0, 4)
					.coGroup(DataSetUtils.partitionByRange(ds2, testDis, 0, 1))
					.where(0, 4)
					.equalTo(0, 1)
					.with(new Tuple5Tuple3CoGroup());

	List<Tuple3<Integer, Long, String>> result = coGrouped.collect();

	String expected = "1,1,Hallo\n" +
			"2,2,Hallo Welt\n" +
			"3,2,Hallo Welt wie gehts?\n" +
			"3,2,ABC\n" +
			"5,3,HIJ\n" +
			"5,3,IJK\n";

	compareResultAsTuples(result, expected);
}
 
Example #23
Source File: DataSetUtilsITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testIntegerDataSetChecksumHashCode() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Integer> ds = CollectionDataSets.getIntegerDataSet(env);

	Utils.ChecksumHashCode checksum = DataSetUtils.checksumHashCode(ds);
	Assert.assertEquals(checksum.getCount(), 15);
	Assert.assertEquals(checksum.getChecksum(), 55);
}
 
Example #24
Source File: DataSetUtilsITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testCountElementsPerPartition() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	long expectedSize = 100L;
	DataSet<Long> numbers = env.generateSequence(0, expectedSize - 1);

	DataSet<Tuple2<Integer, Long>> ds = DataSetUtils.countElementsPerPartition(numbers);

	Assert.assertEquals(env.getParallelism(), ds.count());
	Assert.assertEquals(expectedSize, ds.sum(1).collect().get(0).f1.longValue());
}
 
Example #25
Source File: CustomDistributionITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testPartitionMoreThanDistribution() throws Exception {
	final TestDataDist2 dist = new TestDataDist2();

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> input = CollectionDataSets.get3TupleDataSet(env);
	DataSetUtils.partitionByRange(input, dist, 0, 1, 2);
}
 
Example #26
Source File: SampleITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private void verifySamplerWithFixedSize(boolean withReplacement, int numSamples, long seed) throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	FlatMapOperator<Tuple3<Integer, Long, String>, String> ds = getSourceDataSet(env);
	DataSet<String> sampled = DataSetUtils.sampleWithSize(ds, withReplacement, numSamples, seed);
	List<String> result = sampled.collect();
	assertEquals(numSamples, result.size());
	containsResultAsText(result, getSourceStrings());
}
 
Example #27
Source File: SampleITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
private void verifySamplerWithFraction(boolean withReplacement, double fraction, long seed) throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	FlatMapOperator<Tuple3<Integer, Long, String>, String> ds = getSourceDataSet(env);
	MapPartitionOperator<String, String> sampled = DataSetUtils.sample(ds, withReplacement, fraction, seed);
	List<String> result = sampled.collect();
	containsResultAsText(result, getSourceStrings());
}
 
Example #28
Source File: SampleITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
private void verifySamplerWithFixedSize(boolean withReplacement, int numSamples, long seed) throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	FlatMapOperator<Tuple3<Integer, Long, String>, String> ds = getSourceDataSet(env);
	DataSet<String> sampled = DataSetUtils.sampleWithSize(ds, withReplacement, numSamples, seed);
	List<String> result = sampled.collect();
	assertEquals(numSamples, result.size());
	containsResultAsText(result, getSourceStrings());
}
 
Example #29
Source File: CustomDistributionITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testPartitionMoreThanDistribution() throws Exception {
	final TestDataDist2 dist = new TestDataDist2();

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> input = CollectionDataSets.get3TupleDataSet(env);
	DataSetUtils.partitionByRange(input, dist, 0, 1, 2);
}
 
Example #30
Source File: DataSetUtilsITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCountElementsPerPartition() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	long expectedSize = 100L;
	DataSet<Long> numbers = env.generateSequence(0, expectedSize - 1);

	DataSet<Tuple2<Integer, Long>> ds = DataSetUtils.countElementsPerPartition(numbers);

	Assert.assertEquals(env.getParallelism(), ds.count());
	Assert.assertEquals(expectedSize, ds.sum(1).collect().get(0).f1.longValue());
}