Java Code Examples for org.apache.flink.api.java.utils.DataSetUtils

The following examples show how to use org.apache.flink.api.java.utils.DataSetUtils. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Flink-CEPplus   Source File: DataSetUtilsITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testZipWithIndex() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	long expectedSize = 100L;
	DataSet<Long> numbers = env.generateSequence(0, expectedSize - 1);

	List<Tuple2<Long, Long>> result = new ArrayList<>(DataSetUtils.zipWithIndex(numbers).collect());

	Assert.assertEquals(expectedSize, result.size());
	// sort result by created index
	Collections.sort(result, new Comparator<Tuple2<Long, Long>>() {
		@Override
		public int compare(Tuple2<Long, Long> o1, Tuple2<Long, Long> o2) {
			return o1.f0.compareTo(o2.f0);
		}
	});
	// test if index is consecutive
	for (int i = 0; i < expectedSize; i++) {
		Assert.assertEquals(i, result.get(i).f0.longValue());
	}
}
 
Example 2
Source Project: Flink-CEPplus   Source File: DataSetUtilsITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testZipWithUniqueId() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	long expectedSize = 100L;
	DataSet<Long> numbers = env.generateSequence(1L, expectedSize);

	DataSet<Long> ids = DataSetUtils.zipWithUniqueId(numbers).map(new MapFunction<Tuple2<Long, Long>, Long>() {
		@Override
		public Long map(Tuple2<Long, Long> value) throws Exception {
			return value.f0;
		}
	});

	Set<Long> result = new HashSet<>(ids.collect());

	Assert.assertEquals(expectedSize, result.size());
}
 
Example 3
Source Project: flink   Source File: DataSetUtilsITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testZipWithIndex() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	long expectedSize = 100L;
	DataSet<Long> numbers = env.generateSequence(0, expectedSize - 1);

	List<Tuple2<Long, Long>> result = new ArrayList<>(DataSetUtils.zipWithIndex(numbers).collect());

	Assert.assertEquals(expectedSize, result.size());
	// sort result by created index
	Collections.sort(result, new Comparator<Tuple2<Long, Long>>() {
		@Override
		public int compare(Tuple2<Long, Long> o1, Tuple2<Long, Long> o2) {
			return o1.f0.compareTo(o2.f0);
		}
	});
	// test if index is consecutive
	for (int i = 0; i < expectedSize; i++) {
		Assert.assertEquals(i, result.get(i).f0.longValue());
	}
}
 
Example 4
Source Project: flink   Source File: DataSetUtilsITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testZipWithUniqueId() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	long expectedSize = 100L;
	DataSet<Long> numbers = env.generateSequence(1L, expectedSize);

	DataSet<Long> ids = DataSetUtils.zipWithUniqueId(numbers).map(new MapFunction<Tuple2<Long, Long>, Long>() {
		@Override
		public Long map(Tuple2<Long, Long> value) throws Exception {
			return value.f0;
		}
	});

	Set<Long> result = new HashSet<>(ids.collect());

	Assert.assertEquals(expectedSize, result.size());
}
 
Example 5
Source Project: Alink   Source File: SplitBatchOp.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public SplitBatchOp linkFrom(BatchOperator<?>... inputs) {
    BatchOperator<?> in = checkAndGetFirst(inputs);
    final double fraction = getFraction();
    if (fraction < 0. || fraction > 1.0) {
        throw new RuntimeException("invalid fraction " + fraction);
    }

    DataSet<Row> rows = in.getDataSet();

    DataSet<Tuple2<Integer, Long>> countsPerPartition = DataSetUtils.countElementsPerPartition(rows);
    DataSet<long[]> numPickedPerPartition = countsPerPartition
        .mapPartition(new CountInPartition(fraction))
        .setParallelism(1)
        .name("decide_count_of_each_partition");

    DataSet<Row> out = rows
        .mapPartition(new PickInPartition())
        .withBroadcastSet(numPickedPerPartition, "counts")
        .name("pick_in_each_partition");

    this.setOutput(out, in.getSchema());
    this.setSideOutputTables(new Table[]{in.getOutputTable().minusAll(this.getOutputTable())});
    return this;
}
 
Example 6
Source Project: Alink   Source File: BaseComQueue.java    License: Apache License 2.0 6 votes vote down vote up
private <T> void createRelationshipAndCachedData(DataSet<T> data, final String key) {
	final int localSessionId = sessionId;
	if (cacheDataRel == null) {
		cacheDataRel = clearObjs(
			BatchOperator
				.getExecutionEnvironmentFromDataSets(data)
				.fromElements(new byte[0])
				.mapPartition(new MapPartitionFunction<byte[], byte[]>() {
					@Override
					public void mapPartition(Iterable<byte[]> values, Collector<byte[]> out) throws Exception {
						//pass
					}
				})
		);
	}

	DataSet<Tuple2<Integer, Long>> rowCount = DataSetUtils.countElementsPerPartition(data);

	cacheDataRel = data.mapPartition(new PutCachedData<T>(key, localSessionId))
		.withBroadcastSet(cacheDataRel, "rel")
		.withBroadcastSet(rowCount, "rowCount")
		.name("[email protected]" + key);

	cacheDataObjNames.add(key);
}
 
Example 7
Source Project: flink   Source File: DataSetUtilsITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testZipWithIndex() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	long expectedSize = 100L;
	DataSet<Long> numbers = env.generateSequence(0, expectedSize - 1);

	List<Tuple2<Long, Long>> result = new ArrayList<>(DataSetUtils.zipWithIndex(numbers).collect());

	Assert.assertEquals(expectedSize, result.size());
	// sort result by created index
	Collections.sort(result, new Comparator<Tuple2<Long, Long>>() {
		@Override
		public int compare(Tuple2<Long, Long> o1, Tuple2<Long, Long> o2) {
			return o1.f0.compareTo(o2.f0);
		}
	});
	// test if index is consecutive
	for (int i = 0; i < expectedSize; i++) {
		Assert.assertEquals(i, result.get(i).f0.longValue());
	}
}
 
Example 8
Source Project: flink   Source File: DataSetUtilsITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testZipWithUniqueId() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	long expectedSize = 100L;
	DataSet<Long> numbers = env.generateSequence(1L, expectedSize);

	DataSet<Long> ids = DataSetUtils.zipWithUniqueId(numbers).map(new MapFunction<Tuple2<Long, Long>, Long>() {
		@Override
		public Long map(Tuple2<Long, Long> value) throws Exception {
			return value.f0;
		}
	});

	Set<Long> result = new HashSet<>(ids.collect());

	Assert.assertEquals(expectedSize, result.size());
}
 
Example 9
Source Project: Flink-CEPplus   Source File: JoinITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testJoinWithRangePartitioning() throws Exception {
	/*
	 * Test Join on tuples with multiple key field positions and same customized distribution
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);

	env.setParallelism(4);
	TestDistribution testDis = new TestDistribution();
	DataSet<Tuple2<String, String>> joinDs =
			DataSetUtils.partitionByRange(ds1, testDis, 0, 1)
					.join(DataSetUtils.partitionByRange(ds2, testDis, 0, 4))
					.where(0, 1)
					.equalTo(0, 4)
					.with(new T3T5FlatJoin());

	List<Tuple2<String, String>> result = joinDs.collect();

	String expected = "Hi,Hallo\n" +
			"Hello,Hallo Welt\n" +
			"Hello world,Hallo Welt wie gehts?\n" +
			"Hello world,ABC\n" +
			"I am fine.,HIJ\n" +
			"I am fine.,IJK\n";

	compareResultAsTuples(result, expected);
}
 
Example 10
Source Project: Flink-CEPplus   Source File: CoGroupITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCoGroupWithRangePartitioning() throws Exception {
	/*
	 * Test coGroup on tuples with multiple key field positions and same customized distribution
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds1 = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.get3TupleDataSet(env);

	env.setParallelism(4);
	TestDistribution testDis = new TestDistribution();
	DataSet<Tuple3<Integer, Long, String>> coGrouped =
			DataSetUtils.partitionByRange(ds1, testDis, 0, 4)
					.coGroup(DataSetUtils.partitionByRange(ds2, testDis, 0, 1))
					.where(0, 4)
					.equalTo(0, 1)
					.with(new Tuple5Tuple3CoGroup());

	List<Tuple3<Integer, Long, String>> result = coGrouped.collect();

	String expected = "1,1,Hallo\n" +
			"2,2,Hallo Welt\n" +
			"3,2,Hallo Welt wie gehts?\n" +
			"3,2,ABC\n" +
			"5,3,HIJ\n" +
			"5,3,IJK\n";

	compareResultAsTuples(result, expected);
}
 
Example 11
Source Project: Flink-CEPplus   Source File: SampleITCase.java    License: Apache License 2.0 5 votes vote down vote up
private void verifySamplerWithFraction(boolean withReplacement, double fraction, long seed) throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	FlatMapOperator<Tuple3<Integer, Long, String>, String> ds = getSourceDataSet(env);
	MapPartitionOperator<String, String> sampled = DataSetUtils.sample(ds, withReplacement, fraction, seed);
	List<String> result = sampled.collect();
	containsResultAsText(result, getSourceStrings());
}
 
Example 12
Source Project: Flink-CEPplus   Source File: SampleITCase.java    License: Apache License 2.0 5 votes vote down vote up
private void verifySamplerWithFixedSize(boolean withReplacement, int numSamples, long seed) throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	FlatMapOperator<Tuple3<Integer, Long, String>, String> ds = getSourceDataSet(env);
	DataSet<String> sampled = DataSetUtils.sampleWithSize(ds, withReplacement, numSamples, seed);
	List<String> result = sampled.collect();
	assertEquals(numSamples, result.size());
	containsResultAsText(result, getSourceStrings());
}
 
Example 13
Source Project: Flink-CEPplus   Source File: CustomDistributionITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testPartitionMoreThanDistribution() throws Exception {
	final TestDataDist2 dist = new TestDataDist2();

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> input = CollectionDataSets.get3TupleDataSet(env);
	DataSetUtils.partitionByRange(input, dist, 0, 1, 2);
}
 
Example 14
Source Project: Flink-CEPplus   Source File: DataSetUtilsITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCountElementsPerPartition() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	long expectedSize = 100L;
	DataSet<Long> numbers = env.generateSequence(0, expectedSize - 1);

	DataSet<Tuple2<Integer, Long>> ds = DataSetUtils.countElementsPerPartition(numbers);

	Assert.assertEquals(env.getParallelism(), ds.count());
	Assert.assertEquals(expectedSize, ds.sum(1).collect().get(0).f1.longValue());
}
 
Example 15
Source Project: Flink-CEPplus   Source File: DataSetUtilsITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testIntegerDataSetChecksumHashCode() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Integer> ds = CollectionDataSets.getIntegerDataSet(env);

	Utils.ChecksumHashCode checksum = DataSetUtils.checksumHashCode(ds);
	Assert.assertEquals(checksum.getCount(), 15);
	Assert.assertEquals(checksum.getChecksum(), 55);
}
 
Example 16
Source Project: flink   Source File: JoinITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testJoinWithRangePartitioning() throws Exception {
	/*
	 * Test Join on tuples with multiple key field positions and same customized distribution
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);

	env.setParallelism(4);
	TestDistribution testDis = new TestDistribution();
	DataSet<Tuple2<String, String>> joinDs =
			DataSetUtils.partitionByRange(ds1, testDis, 0, 1)
					.join(DataSetUtils.partitionByRange(ds2, testDis, 0, 4))
					.where(0, 1)
					.equalTo(0, 4)
					.with(new T3T5FlatJoin());

	List<Tuple2<String, String>> result = joinDs.collect();

	String expected = "Hi,Hallo\n" +
			"Hello,Hallo Welt\n" +
			"Hello world,Hallo Welt wie gehts?\n" +
			"Hello world,ABC\n" +
			"I am fine.,HIJ\n" +
			"I am fine.,IJK\n";

	compareResultAsTuples(result, expected);
}
 
Example 17
Source Project: flink   Source File: CoGroupITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCoGroupWithRangePartitioning() throws Exception {
	/*
	 * Test coGroup on tuples with multiple key field positions and same customized distribution
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds1 = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.get3TupleDataSet(env);

	env.setParallelism(4);
	TestDistribution testDis = new TestDistribution();
	DataSet<Tuple3<Integer, Long, String>> coGrouped =
			DataSetUtils.partitionByRange(ds1, testDis, 0, 4)
					.coGroup(DataSetUtils.partitionByRange(ds2, testDis, 0, 1))
					.where(0, 4)
					.equalTo(0, 1)
					.with(new Tuple5Tuple3CoGroup());

	List<Tuple3<Integer, Long, String>> result = coGrouped.collect();

	String expected = "1,1,Hallo\n" +
			"2,2,Hallo Welt\n" +
			"3,2,Hallo Welt wie gehts?\n" +
			"3,2,ABC\n" +
			"5,3,HIJ\n" +
			"5,3,IJK\n";

	compareResultAsTuples(result, expected);
}
 
Example 18
Source Project: flink   Source File: SampleITCase.java    License: Apache License 2.0 5 votes vote down vote up
private void verifySamplerWithFraction(boolean withReplacement, double fraction, long seed) throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	FlatMapOperator<Tuple3<Integer, Long, String>, String> ds = getSourceDataSet(env);
	MapPartitionOperator<String, String> sampled = DataSetUtils.sample(ds, withReplacement, fraction, seed);
	List<String> result = sampled.collect();
	containsResultAsText(result, getSourceStrings());
}
 
Example 19
Source Project: flink   Source File: SampleITCase.java    License: Apache License 2.0 5 votes vote down vote up
private void verifySamplerWithFixedSize(boolean withReplacement, int numSamples, long seed) throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	FlatMapOperator<Tuple3<Integer, Long, String>, String> ds = getSourceDataSet(env);
	DataSet<String> sampled = DataSetUtils.sampleWithSize(ds, withReplacement, numSamples, seed);
	List<String> result = sampled.collect();
	assertEquals(numSamples, result.size());
	containsResultAsText(result, getSourceStrings());
}
 
Example 20
Source Project: flink   Source File: CustomDistributionITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testPartitionMoreThanDistribution() throws Exception {
	final TestDataDist2 dist = new TestDataDist2();

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> input = CollectionDataSets.get3TupleDataSet(env);
	DataSetUtils.partitionByRange(input, dist, 0, 1, 2);
}
 
Example 21
Source Project: flink   Source File: DataSetUtilsITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCountElementsPerPartition() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	long expectedSize = 100L;
	DataSet<Long> numbers = env.generateSequence(0, expectedSize - 1);

	DataSet<Tuple2<Integer, Long>> ds = DataSetUtils.countElementsPerPartition(numbers);

	Assert.assertEquals(env.getParallelism(), ds.count());
	Assert.assertEquals(expectedSize, ds.sum(1).collect().get(0).f1.longValue());
}
 
Example 22
Source Project: flink   Source File: DataSetUtilsITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testIntegerDataSetChecksumHashCode() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Integer> ds = CollectionDataSets.getIntegerDataSet(env);

	Utils.ChecksumHashCode checksum = DataSetUtils.checksumHashCode(ds);
	Assert.assertEquals(checksum.getCount(), 15);
	Assert.assertEquals(checksum.getChecksum(), 55);
}
 
Example 23
Source Project: Alink   Source File: SampleWithSizeBatchOp.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public SampleWithSizeBatchOp linkFrom(BatchOperator<?>... inputs) {
    BatchOperator<?> in = checkAndGetFirst(inputs);
    boolean withReplacement = getWithReplacement();
    int numSamples = getSize();
    DataSet<Row> rows = DataSetUtils.sampleWithSize(in.getDataSet(), withReplacement, numSamples);
    this.setOutput(rows, in.getSchema());
    return this;
}
 
Example 24
Source Project: Alink   Source File: AppendIdBatchOp.java    License: Apache License 2.0 5 votes vote down vote up
public static Table appendId(
	DataSet <Row> dataSet,
	TableSchema schema,
	String appendIdColName,
	AppendType appendType,
	Long sessionId) {
	String[] rawColNames = schema.getFieldNames();
	TypeInformation[] rawColTypes = schema.getFieldTypes();

	String[] colNames = ArrayUtils.add(rawColNames, appendIdColName);
	TypeInformation[] colTypes = ArrayUtils.add(rawColTypes, appendIdColType);

	DataSet <Row> ret = null;

	switch (appendType) {
		case DENSE:
			ret = DataSetUtils.zipWithIndex(dataSet)
				.map(new TransTupleToRowMapper());
			break;
		case UNIQUE:
			ret = DataSetUtils.zipWithUniqueId(dataSet)
				.map(new TransTupleToRowMapper());
			ret = dataSet.map(new AppendIdMapper());
			break;
		default:
			throw new IllegalArgumentException("Error append type.");
	}

	return DataSetConversionUtil.toTable(sessionId, ret, colNames, colTypes);
}
 
Example 25
Source Project: Alink   Source File: MultilayerPerceptronTrainBatchOp.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Get distinct labels and assign each label an index.
 */
private static DataSet<Tuple2<Long, Object>> getDistinctLabels(BatchOperator data, final String labelColName) {
    data = data.select("`" + labelColName + "`").distinct();
    DataSet<Row> labelRows = data.getDataSet();
    return DataSetUtils.zipWithIndex(labelRows)
        .map(new MapFunction<Tuple2<Long, Row>, Tuple2<Long, Object>>() {
            @Override
            public Tuple2<Long, Object> map(Tuple2<Long, Row> value) throws Exception {
                return Tuple2.of(value.f0, value.f1.getField(0));
            }
        })
        .name("get_labels");
}
 
Example 26
Source Project: flink   Source File: JoinITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testJoinWithRangePartitioning() throws Exception {
	/*
	 * Test Join on tuples with multiple key field positions and same customized distribution
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);

	env.setParallelism(4);
	TestDistribution testDis = new TestDistribution();
	DataSet<Tuple2<String, String>> joinDs =
			DataSetUtils.partitionByRange(ds1, testDis, 0, 1)
					.join(DataSetUtils.partitionByRange(ds2, testDis, 0, 4))
					.where(0, 1)
					.equalTo(0, 4)
					.with(new T3T5FlatJoin());

	List<Tuple2<String, String>> result = joinDs.collect();

	String expected = "Hi,Hallo\n" +
			"Hello,Hallo Welt\n" +
			"Hello world,Hallo Welt wie gehts?\n" +
			"Hello world,ABC\n" +
			"I am fine.,HIJ\n" +
			"I am fine.,IJK\n";

	compareResultAsTuples(result, expected);
}
 
Example 27
Source Project: flink   Source File: CoGroupITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCoGroupWithRangePartitioning() throws Exception {
	/*
	 * Test coGroup on tuples with multiple key field positions and same customized distribution
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds1 = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.get3TupleDataSet(env);

	env.setParallelism(4);
	TestDistribution testDis = new TestDistribution();
	DataSet<Tuple3<Integer, Long, String>> coGrouped =
			DataSetUtils.partitionByRange(ds1, testDis, 0, 4)
					.coGroup(DataSetUtils.partitionByRange(ds2, testDis, 0, 1))
					.where(0, 4)
					.equalTo(0, 1)
					.with(new Tuple5Tuple3CoGroup());

	List<Tuple3<Integer, Long, String>> result = coGrouped.collect();

	String expected = "1,1,Hallo\n" +
			"2,2,Hallo Welt\n" +
			"3,2,Hallo Welt wie gehts?\n" +
			"3,2,ABC\n" +
			"5,3,HIJ\n" +
			"5,3,IJK\n";

	compareResultAsTuples(result, expected);
}
 
Example 28
Source Project: flink   Source File: SampleITCase.java    License: Apache License 2.0 5 votes vote down vote up
private void verifySamplerWithFraction(boolean withReplacement, double fraction, long seed) throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	FlatMapOperator<Tuple3<Integer, Long, String>, String> ds = getSourceDataSet(env);
	MapPartitionOperator<String, String> sampled = DataSetUtils.sample(ds, withReplacement, fraction, seed);
	List<String> result = sampled.collect();
	containsResultAsText(result, getSourceStrings());
}
 
Example 29
Source Project: flink   Source File: SampleITCase.java    License: Apache License 2.0 5 votes vote down vote up
private void verifySamplerWithFixedSize(boolean withReplacement, int numSamples, long seed) throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	FlatMapOperator<Tuple3<Integer, Long, String>, String> ds = getSourceDataSet(env);
	DataSet<String> sampled = DataSetUtils.sampleWithSize(ds, withReplacement, numSamples, seed);
	List<String> result = sampled.collect();
	assertEquals(numSamples, result.size());
	containsResultAsText(result, getSourceStrings());
}
 
Example 30
Source Project: flink   Source File: CustomDistributionITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testPartitionMoreThanDistribution() throws Exception {
	final TestDataDist2 dist = new TestDataDist2();

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> input = CollectionDataSets.get3TupleDataSet(env);
	DataSetUtils.partitionByRange(input, dist, 0, 1, 2);
}