Java Code Examples for org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy

The following examples show how to use org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Flink-CEPplus   Source File: BucketAssignerITCases.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testAssembleBucketPath() throws Exception {
	final File outDir = TEMP_FOLDER.newFolder();
	final Path basePath = new Path(outDir.toURI());
	final long time = 1000L;

	final RollingPolicy<String, String> rollingPolicy =
		DefaultRollingPolicy
			.create()
			.withMaxPartSize(7L)
			.build();

	final Buckets<String, String> buckets =  new Buckets<>(
		basePath,
		new BasePathBucketAssigner<>(),
		new DefaultBucketFactoryImpl<>(),
		new RowWisePartWriter.Factory<>(new SimpleStringEncoder<>()),
		rollingPolicy,
		0
	);

	Bucket<String, String> bucket =
		buckets.onElement("abc", new TestUtils.MockSinkContext(time, time, time));
	Assert.assertEquals(new Path(basePath.toUri()), bucket.getBucketPath());
}
 
Example 2
Source Project: Flink-CEPplus   Source File: BucketsTest.java    License: Apache License 2.0 6 votes vote down vote up
private void testCorrectTimestampPassingInContext(Long timestamp, long watermark, long processingTime) throws Exception {
	final File outDir = TEMP_FOLDER.newFolder();
	final Path path = new Path(outDir.toURI());

	final Buckets<String, String> buckets = new Buckets<>(
			path,
			new VerifyingBucketAssigner(timestamp, watermark, processingTime),
			new DefaultBucketFactoryImpl<>(),
			new RowWisePartWriter.Factory<>(new SimpleStringEncoder<>()),
			DefaultRollingPolicy.create().build(),
			2
	);

	buckets.onElement(
			"test",
			new TestUtils.MockSinkContext(
					timestamp,
					watermark,
					processingTime)
	);
}
 
Example 3
Source Project: flink   Source File: BucketAssignerITCases.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testAssembleBucketPath() throws Exception {
	final File outDir = TEMP_FOLDER.newFolder();
	final Path basePath = new Path(outDir.toURI());
	final long time = 1000L;

	final RollingPolicy<String, String> rollingPolicy =
		DefaultRollingPolicy
			.create()
			.withMaxPartSize(7L)
			.build();

	final Buckets<String, String> buckets =  new Buckets<>(
		basePath,
		new BasePathBucketAssigner<>(),
		new DefaultBucketFactoryImpl<>(),
		new RowWisePartWriter.Factory<>(new SimpleStringEncoder<>()),
		rollingPolicy,
		0,
		new PartFileConfig()
	);

	Bucket<String, String> bucket =
		buckets.onElement("abc", new TestUtils.MockSinkContext(time, time, time));
	Assert.assertEquals(new Path(basePath.toUri()), bucket.getBucketPath());
}
 
Example 4
Source Project: flink   Source File: BucketsTest.java    License: Apache License 2.0 6 votes vote down vote up
private void testCorrectTimestampPassingInContext(Long timestamp, long watermark, long processingTime) throws Exception {
	final File outDir = TEMP_FOLDER.newFolder();
	final Path path = new Path(outDir.toURI());

	final Buckets<String, String> buckets = new Buckets<>(
			path,
			new VerifyingBucketAssigner(timestamp, watermark, processingTime),
			new DefaultBucketFactoryImpl<>(),
			new RowWisePartWriter.Factory<>(new SimpleStringEncoder<>()),
			DefaultRollingPolicy.create().build(),
			2,
			new PartFileConfig()
	);

	buckets.onElement(
			"test",
			new TestUtils.MockSinkContext(
					timestamp,
					watermark,
					processingTime)
	);
}
 
Example 5
Source Project: flink   Source File: TestUtils.java    License: Apache License 2.0 6 votes vote down vote up
static OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> createRescalingTestSink(
		File outDir,
		int totalParallelism,
		int taskIdx,
		long inactivityInterval,
		long partMaxSize) throws Exception {

	final RollingPolicy<Tuple2<String, Integer>, String> rollingPolicy =
			DefaultRollingPolicy
					.builder()
					.withMaxPartSize(partMaxSize)
					.withRolloverInterval(inactivityInterval)
					.withInactivityInterval(inactivityInterval)
					.build();

	return createRescalingTestSink(
			outDir,
			totalParallelism,
			taskIdx,
			10L,
			new TupleToStringBucketer(),
			new Tuple2Encoder(),
			rollingPolicy,
			new DefaultBucketFactoryImpl<>());
}
 
Example 6
Source Project: flink   Source File: BucketAssignerITCases.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testAssembleBucketPath() throws Exception {
	final File outDir = TEMP_FOLDER.newFolder();
	final Path basePath = new Path(outDir.toURI());
	final long time = 1000L;

	final RollingPolicy<String, String> rollingPolicy =
		DefaultRollingPolicy
			.builder()
			.withMaxPartSize(7L)
			.build();

	final Buckets<String, String> buckets =  new Buckets<>(
		basePath,
		new BasePathBucketAssigner<>(),
		new DefaultBucketFactoryImpl<>(),
		new RowWiseBucketWriter<>(FileSystem.get(basePath.toUri()).createRecoverableWriter(), new SimpleStringEncoder<>()),
		rollingPolicy,
		0,
		OutputFileConfig.builder().build()
	);

	Bucket<String, String> bucket =
		buckets.onElement("abc", new TestUtils.MockSinkContext(time, time, time));
	Assert.assertEquals(new Path(basePath.toUri()), bucket.getBucketPath());
}
 
Example 7
Source Project: flink   Source File: BucketsTest.java    License: Apache License 2.0 6 votes vote down vote up
private void testCorrectTimestampPassingInContext(Long timestamp, long watermark, long processingTime) throws Exception {
	final File outDir = TEMP_FOLDER.newFolder();
	final Path path = new Path(outDir.toURI());

	final Buckets<String, String> buckets = new Buckets<>(
			path,
			new VerifyingBucketAssigner(timestamp, watermark, processingTime),
			new DefaultBucketFactoryImpl<>(),
			new RowWiseBucketWriter<>(FileSystem.get(path.toUri()).createRecoverableWriter(), new SimpleStringEncoder<>()),
			DefaultRollingPolicy.builder().build(),
			2,
			OutputFileConfig.builder().build()
	);

	buckets.onElement(
			"test",
			new TestUtils.MockSinkContext(
					timestamp,
					watermark,
					processingTime)
	);
}
 
Example 8
Source Project: Flink-CEPplus   Source File: TestUtils.java    License: Apache License 2.0 5 votes vote down vote up
static OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> createRescalingTestSink(
		File outDir,
		int totalParallelism,
		int taskIdx,
		long inactivityInterval,
		long partMaxSize) throws Exception {

	final RollingPolicy<Tuple2<String, Integer>, String> rollingPolicy =
			DefaultRollingPolicy
					.create()
					.withMaxPartSize(partMaxSize)
					.withRolloverInterval(inactivityInterval)
					.withInactivityInterval(inactivityInterval)
					.build();

	final BucketAssigner<Tuple2<String, Integer>, String> bucketer = new TupleToStringBucketer();

	final Encoder<Tuple2<String, Integer>> encoder = (element, stream) -> {
		stream.write((element.f0 + '@' + element.f1).getBytes(StandardCharsets.UTF_8));
		stream.write('\n');
	};

	return createCustomRescalingTestSink(
			outDir,
			totalParallelism,
			taskIdx,
			10L,
			bucketer,
			encoder,
			rollingPolicy,
			new DefaultBucketFactoryImpl<>());
}
 
Example 9
Source Project: flink   Source File: TestUtils.java    License: Apache License 2.0 5 votes vote down vote up
static OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> createRescalingTestSink(
		File outDir,
		int totalParallelism,
		int taskIdx,
		long inactivityInterval,
		long partMaxSize) throws Exception {

	final RollingPolicy<Tuple2<String, Integer>, String> rollingPolicy =
			DefaultRollingPolicy
					.create()
					.withMaxPartSize(partMaxSize)
					.withRolloverInterval(inactivityInterval)
					.withInactivityInterval(inactivityInterval)
					.build();

	final BucketAssigner<Tuple2<String, Integer>, String> bucketer = new TupleToStringBucketer();

	final Encoder<Tuple2<String, Integer>> encoder = (element, stream) -> {
		stream.write((element.f0 + '@' + element.f1).getBytes(StandardCharsets.UTF_8));
		stream.write('\n');
	};

	return createCustomRescalingTestSink(
			outDir,
			totalParallelism,
			taskIdx,
			10L,
			bucketer,
			encoder,
			rollingPolicy,
			new DefaultBucketFactoryImpl<>());
}
 
Example 10
Source Project: flink   Source File: RollingPolicyTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testDefaultRollingPolicyDeprecatedCreate() throws Exception {
	DefaultRollingPolicy policy = DefaultRollingPolicy.create()
		.withInactivityInterval(10)
		.withMaxPartSize(20)
		.withRolloverInterval(30)
		.build();

	Assert.assertEquals(10, policy.getInactivityInterval());
	Assert.assertEquals(20, policy.getMaxPartSize());
	Assert.assertEquals(30, policy.getRolloverInterval());
}
 
Example 11
Source Project: flink   Source File: BucketStateSerializerTest.java    License: Apache License 2.0 5 votes vote down vote up
private static Bucket<String, String> createNewBucket(final Path bucketPath) throws IOException {
	return Bucket.getNew(
		0,
		BUCKET_ID,
		bucketPath,
		0,
		createBucketWriter(),
		DefaultRollingPolicy.builder().withMaxPartSize(10).build(),
		OutputFileConfig.builder().build());
}
 
Example 12
Source Project: flink   Source File: BucketStateSerializerTest.java    License: Apache License 2.0 5 votes vote down vote up
private static Bucket<String, String> restoreBucket(final int initialPartCounter, final BucketState<String> bucketState) throws IOException {
	return Bucket.restore(
		0,
		initialPartCounter,
		createBucketWriter(),
		DefaultRollingPolicy.builder().withMaxPartSize(10).build(),
		bucketState,
		OutputFileConfig.builder().build());
}
 
Example 13
Source Project: Flink-CEPplus   Source File: StreamingFileSink.java    License: Apache License 2.0 4 votes vote down vote up
RowFormatBuilder(Path basePath, Encoder<IN> encoder, BucketAssigner<IN, BucketID> bucketAssigner) {
	this(basePath, encoder, bucketAssigner, DefaultRollingPolicy.create().build(), 60L * 1000L, new DefaultBucketFactoryImpl<>());
}
 
Example 14
Source Project: Flink-CEPplus   Source File: RollingPolicyTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testDefaultRollingPolicy() throws Exception {
	final File outDir = TEMP_FOLDER.newFolder();
	final Path path = new Path(outDir.toURI());

	final RollingPolicy<String, String> originalRollingPolicy =
			DefaultRollingPolicy
					.create()
					.withMaxPartSize(10L)
					.withInactivityInterval(4L)
					.withRolloverInterval(11L)
					.build();

	final MethodCallCountingPolicyWrapper<String, String> rollingPolicy =
			new MethodCallCountingPolicyWrapper<>(originalRollingPolicy);

	final Buckets<String, String> buckets = createBuckets(path, rollingPolicy);

	rollingPolicy.verifyCallCounters(0L, 0L, 0L, 0L, 0L, 0L);

	// these two will fill up the first in-progress file and at the third it will roll ...
	buckets.onElement("test1", new TestUtils.MockSinkContext(1L, 1L, 1L));
	buckets.onElement("test1", new TestUtils.MockSinkContext(2L, 1L, 2L));
	rollingPolicy.verifyCallCounters(0L, 0L, 1L, 0L, 0L, 0L);

	buckets.onElement("test1", new TestUtils.MockSinkContext(3L, 1L, 3L));
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 0L, 0L);

	// still no time to roll
	buckets.onProcessingTime(5L);
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 1L, 0L);

	// roll due to inactivity
	buckets.onProcessingTime(7L);
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 2L, 1L);

	buckets.onElement("test1", new TestUtils.MockSinkContext(3L, 1L, 3L));

	// roll due to rollover interval
	buckets.onProcessingTime(20L);
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 3L, 2L);

	// we take a checkpoint but we should not roll.
	buckets.snapshotState(1L, new TestUtils.MockListState<>(), new TestUtils.MockListState<>());
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 3L, 2L);
}
 
Example 15
Source Project: Flink-CEPplus   Source File: BucketsTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testMergeAtScaleInAndMaxCounterAtRecovery() throws Exception {
	final File outDir = TEMP_FOLDER.newFolder();
	final Path path = new Path(outDir.toURI());

	final RollingPolicy<String, String> onCheckpointRP =
			DefaultRollingPolicy
					.create()
					.withMaxPartSize(7L) // roll with 2 elements
					.build();

	final MockListState<byte[]> bucketStateContainerOne = new MockListState<>();
	final MockListState<byte[]> bucketStateContainerTwo = new MockListState<>();

	final MockListState<Long> partCounterContainerOne = new MockListState<>();
	final MockListState<Long> partCounterContainerTwo = new MockListState<>();

	final Buckets<String, String> bucketsOne = createBuckets(path, onCheckpointRP, 0);
	final Buckets<String, String> bucketsTwo = createBuckets(path, onCheckpointRP, 1);

	bucketsOne.onElement("test1", new TestUtils.MockSinkContext(null, 1L, 2L));
	bucketsOne.snapshotState(0L, bucketStateContainerOne, partCounterContainerOne);

	Assert.assertEquals(1L, bucketsOne.getMaxPartCounter());

	// make sure we have one in-progress file here
	Assert.assertNotNull(bucketsOne.getActiveBuckets().get("test1").getInProgressPart());

	// add a couple of in-progress files so that the part counter increases.
	bucketsTwo.onElement("test1", new TestUtils.MockSinkContext(null, 1L, 2L));
	bucketsTwo.onElement("test1", new TestUtils.MockSinkContext(null, 1L, 2L));

	bucketsTwo.onElement("test1", new TestUtils.MockSinkContext(null, 1L, 2L));

	bucketsTwo.snapshotState(0L, bucketStateContainerTwo, partCounterContainerTwo);

	Assert.assertEquals(2L, bucketsTwo.getMaxPartCounter());

	// make sure we have one in-progress file here and a pending
	Assert.assertEquals(1L, bucketsTwo.getActiveBuckets().get("test1").getPendingPartsPerCheckpoint().size());
	Assert.assertNotNull(bucketsTwo.getActiveBuckets().get("test1").getInProgressPart());

	final ListState<byte[]> mergedBucketStateContainer = new MockListState<>();
	final ListState<Long> mergedPartCounterContainer = new MockListState<>();

	mergedBucketStateContainer.addAll(bucketStateContainerOne.getBackingList());
	mergedBucketStateContainer.addAll(bucketStateContainerTwo.getBackingList());

	mergedPartCounterContainer.addAll(partCounterContainerOne.getBackingList());
	mergedPartCounterContainer.addAll(partCounterContainerTwo.getBackingList());

	final Buckets<String, String> restoredBuckets =
			restoreBuckets(path, onCheckpointRP, 0, mergedBucketStateContainer, mergedPartCounterContainer);

	// we get the maximum of the previous tasks
	Assert.assertEquals(2L, restoredBuckets.getMaxPartCounter());

	final Map<String, Bucket<String, String>> activeBuckets = restoredBuckets.getActiveBuckets();
	Assert.assertEquals(1L, activeBuckets.size());
	Assert.assertTrue(activeBuckets.keySet().contains("test1"));

	final Bucket<String, String> bucket = activeBuckets.get("test1");
	Assert.assertEquals("test1", bucket.getBucketId());
	Assert.assertEquals(new Path(path, "test1"), bucket.getBucketPath());

	Assert.assertNotNull(bucket.getInProgressPart()); // the restored part file

	// this is due to the Bucket#merge(). The in progress file of one
	// of the previous tasks is put in the list of pending files.
	Assert.assertEquals(1L, bucket.getPendingPartsForCurrentCheckpoint().size());

	// we commit the pending for previous checkpoints
	Assert.assertTrue(bucket.getPendingPartsPerCheckpoint().isEmpty());
}
 
Example 16
Source Project: flink   Source File: StreamingFileSink.java    License: Apache License 2.0 4 votes vote down vote up
RowFormatBuilder(Path basePath, Encoder<IN> encoder, BucketAssigner<IN, BucketID> bucketAssigner) {
	this(basePath, encoder, bucketAssigner, DefaultRollingPolicy.create().build(), 60L * 1000L, new DefaultBucketFactoryImpl<>(), PartFileConfig.DEFAULT_PART_PREFIX, PartFileConfig.DEFAULT_PART_SUFFIX);
}
 
Example 17
Source Project: flink   Source File: RollingPolicyTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testDefaultRollingPolicy() throws Exception {
	final File outDir = TEMP_FOLDER.newFolder();
	final Path path = new Path(outDir.toURI());

	final RollingPolicy<String, String> originalRollingPolicy =
			DefaultRollingPolicy
					.create()
					.withMaxPartSize(10L)
					.withInactivityInterval(4L)
					.withRolloverInterval(11L)
					.build();

	final MethodCallCountingPolicyWrapper<String, String> rollingPolicy =
			new MethodCallCountingPolicyWrapper<>(originalRollingPolicy);

	final Buckets<String, String> buckets = createBuckets(path, rollingPolicy);

	rollingPolicy.verifyCallCounters(0L, 0L, 0L, 0L, 0L, 0L);

	// these two will fill up the first in-progress file and at the third it will roll ...
	buckets.onElement("test1", new TestUtils.MockSinkContext(1L, 1L, 1L));
	buckets.onElement("test1", new TestUtils.MockSinkContext(2L, 1L, 2L));
	rollingPolicy.verifyCallCounters(0L, 0L, 1L, 0L, 0L, 0L);

	buckets.onElement("test1", new TestUtils.MockSinkContext(3L, 1L, 3L));
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 0L, 0L);

	// still no time to roll
	buckets.onProcessingTime(5L);
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 1L, 0L);

	// roll due to inactivity
	buckets.onProcessingTime(7L);
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 2L, 1L);

	buckets.onElement("test1", new TestUtils.MockSinkContext(3L, 1L, 3L));

	// roll due to rollover interval
	buckets.onProcessingTime(20L);
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 3L, 2L);

	// we take a checkpoint but we should not roll.
	buckets.snapshotState(1L, new TestUtils.MockListState<>(), new TestUtils.MockListState<>());
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 3L, 2L);
}
 
Example 18
Source Project: flink   Source File: BucketsTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testMergeAtScaleInAndMaxCounterAtRecovery() throws Exception {
	final File outDir = TEMP_FOLDER.newFolder();
	final Path path = new Path(outDir.toURI());

	final RollingPolicy<String, String> onCheckpointRP =
			DefaultRollingPolicy
					.create()
					.withMaxPartSize(7L) // roll with 2 elements
					.build();

	final MockListState<byte[]> bucketStateContainerOne = new MockListState<>();
	final MockListState<byte[]> bucketStateContainerTwo = new MockListState<>();

	final MockListState<Long> partCounterContainerOne = new MockListState<>();
	final MockListState<Long> partCounterContainerTwo = new MockListState<>();

	final Buckets<String, String> bucketsOne = createBuckets(path, onCheckpointRP, 0);
	final Buckets<String, String> bucketsTwo = createBuckets(path, onCheckpointRP, 1);

	bucketsOne.onElement("test1", new TestUtils.MockSinkContext(null, 1L, 2L));
	bucketsOne.snapshotState(0L, bucketStateContainerOne, partCounterContainerOne);

	Assert.assertEquals(1L, bucketsOne.getMaxPartCounter());

	// make sure we have one in-progress file here
	Assert.assertNotNull(bucketsOne.getActiveBuckets().get("test1").getInProgressPart());

	// add a couple of in-progress files so that the part counter increases.
	bucketsTwo.onElement("test1", new TestUtils.MockSinkContext(null, 1L, 2L));
	bucketsTwo.onElement("test1", new TestUtils.MockSinkContext(null, 1L, 2L));

	bucketsTwo.onElement("test1", new TestUtils.MockSinkContext(null, 1L, 2L));

	bucketsTwo.snapshotState(0L, bucketStateContainerTwo, partCounterContainerTwo);

	Assert.assertEquals(2L, bucketsTwo.getMaxPartCounter());

	// make sure we have one in-progress file here and a pending
	Assert.assertEquals(1L, bucketsTwo.getActiveBuckets().get("test1").getPendingPartsPerCheckpoint().size());
	Assert.assertNotNull(bucketsTwo.getActiveBuckets().get("test1").getInProgressPart());

	final ListState<byte[]> mergedBucketStateContainer = new MockListState<>();
	final ListState<Long> mergedPartCounterContainer = new MockListState<>();

	mergedBucketStateContainer.addAll(bucketStateContainerOne.getBackingList());
	mergedBucketStateContainer.addAll(bucketStateContainerTwo.getBackingList());

	mergedPartCounterContainer.addAll(partCounterContainerOne.getBackingList());
	mergedPartCounterContainer.addAll(partCounterContainerTwo.getBackingList());

	final Buckets<String, String> restoredBuckets =
			restoreBuckets(path, onCheckpointRP, 0, mergedBucketStateContainer, mergedPartCounterContainer);

	// we get the maximum of the previous tasks
	Assert.assertEquals(2L, restoredBuckets.getMaxPartCounter());

	final Map<String, Bucket<String, String>> activeBuckets = restoredBuckets.getActiveBuckets();
	Assert.assertEquals(1L, activeBuckets.size());
	Assert.assertTrue(activeBuckets.keySet().contains("test1"));

	final Bucket<String, String> bucket = activeBuckets.get("test1");
	Assert.assertEquals("test1", bucket.getBucketId());
	Assert.assertEquals(new Path(path, "test1"), bucket.getBucketPath());

	Assert.assertNotNull(bucket.getInProgressPart()); // the restored part file

	// this is due to the Bucket#merge(). The in progress file of one
	// of the previous tasks is put in the list of pending files.
	Assert.assertEquals(1L, bucket.getPendingPartsForCurrentCheckpoint().size());

	// we commit the pending for previous checkpoints
	Assert.assertTrue(bucket.getPendingPartsPerCheckpoint().isEmpty());
}
 
Example 19
Source Project: flink   Source File: StreamingFileSink.java    License: Apache License 2.0 4 votes vote down vote up
protected RowFormatBuilder(Path basePath, Encoder<IN> encoder, BucketAssigner<IN, BucketID> bucketAssigner) {
	this(basePath, encoder, bucketAssigner, DefaultRollingPolicy.builder().build(), DEFAULT_BUCKET_CHECK_INTERVAL, new DefaultBucketFactoryImpl<>(), OutputFileConfig.builder().build());
}
 
Example 20
Source Project: flink   Source File: RollingPolicyTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testDefaultRollingPolicy() throws Exception {
	final File outDir = TEMP_FOLDER.newFolder();
	final Path path = new Path(outDir.toURI());

	final RollingPolicy<String, String> originalRollingPolicy =
			DefaultRollingPolicy
					.builder()
					.withMaxPartSize(10L)
					.withInactivityInterval(4L)
					.withRolloverInterval(11L)
					.build();

	final MethodCallCountingPolicyWrapper<String, String> rollingPolicy =
			new MethodCallCountingPolicyWrapper<>(originalRollingPolicy);

	final Buckets<String, String> buckets = createBuckets(path, rollingPolicy);

	rollingPolicy.verifyCallCounters(0L, 0L, 0L, 0L, 0L, 0L);

	// these two will fill up the first in-progress file and at the third it will roll ...
	buckets.onElement("test1", new TestUtils.MockSinkContext(1L, 1L, 1L));
	buckets.onElement("test1", new TestUtils.MockSinkContext(2L, 1L, 2L));
	rollingPolicy.verifyCallCounters(0L, 0L, 1L, 0L, 0L, 0L);

	buckets.onElement("test1", new TestUtils.MockSinkContext(3L, 1L, 3L));
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 0L, 0L);

	// still no time to roll
	buckets.onProcessingTime(5L);
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 1L, 0L);

	// roll due to inactivity
	buckets.onProcessingTime(7L);
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 2L, 1L);

	buckets.onElement("test1", new TestUtils.MockSinkContext(3L, 1L, 3L));

	// roll due to rollover interval
	buckets.onProcessingTime(20L);
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 3L, 2L);

	// we take a checkpoint but we should not roll.
	buckets.snapshotState(1L, new TestUtils.MockListState<>(), new TestUtils.MockListState<>());
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 3L, 2L);
}
 
Example 21
Source Project: flink   Source File: LocalStreamingFileSinkTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testClosingWithCustomizedBucketer() throws Exception {
	final File outDir = TEMP_FOLDER.newFolder();
	final long partMaxSize = 2L;
	final long inactivityInterval = 100L;
	final RollingPolicy<Tuple2<String, Integer>, Integer> rollingPolicy =
			DefaultRollingPolicy
					.builder()
					.withMaxPartSize(partMaxSize)
					.withRolloverInterval(inactivityInterval)
					.withInactivityInterval(inactivityInterval)
					.build();

	try (
			OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> testHarness =
					TestUtils.createCustomizedRescalingTestSink(outDir, 1, 0, 100L, new TupleToIntegerBucketer(), new Tuple2Encoder(), rollingPolicy, new DefaultBucketFactoryImpl<>());
	) {
		testHarness.setup();
		testHarness.open();

		testHarness.setProcessingTime(0L);

		testHarness.processElement(new StreamRecord<>(Tuple2.of("test1", 1), 1L));
		testHarness.processElement(new StreamRecord<>(Tuple2.of("test2", 2), 1L));
		TestUtils.checkLocalFs(outDir, 2, 0);

		// this is to check the inactivity threshold
		testHarness.setProcessingTime(101L);
		TestUtils.checkLocalFs(outDir, 2, 0);

		testHarness.processElement(new StreamRecord<>(Tuple2.of("test3", 3), 1L));
		TestUtils.checkLocalFs(outDir, 3, 0);

		testHarness.snapshot(0L, 1L);
		TestUtils.checkLocalFs(outDir, 3, 0);

		testHarness.notifyOfCompletedCheckpoint(0L);
		TestUtils.checkLocalFs(outDir, 0, 3);

		testHarness.processElement(new StreamRecord<>(Tuple2.of("test4", 4), 10L));
		TestUtils.checkLocalFs(outDir, 1, 3);

		testHarness.snapshot(1L, 0L);
		testHarness.notifyOfCompletedCheckpoint(1L);
	}

	// at close all files moved to final.
	TestUtils.checkLocalFs(outDir, 0, 4);

	// check file content and bucket ID.
	Map<File, String> contents = TestUtils.getFileContentByPath(outDir);
	for (Map.Entry<File, String> fileContents : contents.entrySet()) {
		Integer bucketId = Integer.parseInt(fileContents.getKey().getParentFile().getName());

		Assert.assertTrue(bucketId >= 1 && bucketId <= 4);
		Assert.assertEquals(String.format("test%[email protected]%d\n", bucketId, bucketId), fileContents.getValue());
	}
}
 
Example 22
Source Project: flink   Source File: BucketsTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testMergeAtScaleInAndMaxCounterAtRecovery() throws Exception {
	final File outDir = TEMP_FOLDER.newFolder();
	final Path path = new Path(outDir.toURI());

	final RollingPolicy<String, String> onCheckpointRP =
			DefaultRollingPolicy
					.builder()
					.withMaxPartSize(7L) // roll with 2 elements
					.build();

	final MockListState<byte[]> bucketStateContainerOne = new MockListState<>();
	final MockListState<byte[]> bucketStateContainerTwo = new MockListState<>();

	final MockListState<Long> partCounterContainerOne = new MockListState<>();
	final MockListState<Long> partCounterContainerTwo = new MockListState<>();

	final Buckets<String, String> bucketsOne = createBuckets(path, onCheckpointRP, 0);
	final Buckets<String, String> bucketsTwo = createBuckets(path, onCheckpointRP, 1);

	bucketsOne.onElement("test1", new TestUtils.MockSinkContext(null, 1L, 2L));
	bucketsOne.snapshotState(0L, bucketStateContainerOne, partCounterContainerOne);

	Assert.assertEquals(1L, bucketsOne.getMaxPartCounter());

	// make sure we have one in-progress file here
	Assert.assertNotNull(bucketsOne.getActiveBuckets().get("test1").getInProgressPart());

	// add a couple of in-progress files so that the part counter increases.
	bucketsTwo.onElement("test1", new TestUtils.MockSinkContext(null, 1L, 2L));
	bucketsTwo.onElement("test1", new TestUtils.MockSinkContext(null, 1L, 2L));

	bucketsTwo.onElement("test1", new TestUtils.MockSinkContext(null, 1L, 2L));

	bucketsTwo.snapshotState(0L, bucketStateContainerTwo, partCounterContainerTwo);

	Assert.assertEquals(2L, bucketsTwo.getMaxPartCounter());

	// make sure we have one in-progress file here and a pending
	Assert.assertEquals(1L, bucketsTwo.getActiveBuckets().get("test1").getPendingFileRecoverablesPerCheckpoint().size());
	Assert.assertNotNull(bucketsTwo.getActiveBuckets().get("test1").getInProgressPart());

	final ListState<byte[]> mergedBucketStateContainer = new MockListState<>();
	final ListState<Long> mergedPartCounterContainer = new MockListState<>();

	mergedBucketStateContainer.addAll(bucketStateContainerOne.getBackingList());
	mergedBucketStateContainer.addAll(bucketStateContainerTwo.getBackingList());

	mergedPartCounterContainer.addAll(partCounterContainerOne.getBackingList());
	mergedPartCounterContainer.addAll(partCounterContainerTwo.getBackingList());

	final Buckets<String, String> restoredBuckets =
			restoreBuckets(path, onCheckpointRP, 0, mergedBucketStateContainer, mergedPartCounterContainer);

	// we get the maximum of the previous tasks
	Assert.assertEquals(2L, restoredBuckets.getMaxPartCounter());

	final Map<String, Bucket<String, String>> activeBuckets = restoredBuckets.getActiveBuckets();
	Assert.assertEquals(1L, activeBuckets.size());
	Assert.assertTrue(activeBuckets.keySet().contains("test1"));

	final Bucket<String, String> bucket = activeBuckets.get("test1");
	Assert.assertEquals("test1", bucket.getBucketId());
	Assert.assertEquals(new Path(path, "test1"), bucket.getBucketPath());

	Assert.assertNotNull(bucket.getInProgressPart()); // the restored part file

	// this is due to the Bucket#merge(). The in progress file of one
	// of the previous tasks is put in the list of pending files.
	Assert.assertEquals(1L, bucket.getPendingFileRecoverablesForCurrentCheckpoint().size());

	// we commit the pending for previous checkpoints
	Assert.assertTrue(bucket.getPendingFileRecoverablesPerCheckpoint().isEmpty());
}