org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy Java Examples

The following examples show how to use org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BucketsTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private void testCorrectTimestampPassingInContext(Long timestamp, long watermark, long processingTime) throws Exception {
	final File outDir = TEMP_FOLDER.newFolder();
	final Path path = new Path(outDir.toURI());

	final Buckets<String, String> buckets = new Buckets<>(
			path,
			new VerifyingBucketAssigner(timestamp, watermark, processingTime),
			new DefaultBucketFactoryImpl<>(),
			new RowWisePartWriter.Factory<>(new SimpleStringEncoder<>()),
			DefaultRollingPolicy.create().build(),
			2,
			new PartFileConfig()
	);

	buckets.onElement(
			"test",
			new TestUtils.MockSinkContext(
					timestamp,
					watermark,
					processingTime)
	);
}
 
Example #2
Source File: BucketsTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private void testCorrectTimestampPassingInContext(Long timestamp, long watermark, long processingTime) throws Exception {
	final File outDir = TEMP_FOLDER.newFolder();
	final Path path = new Path(outDir.toURI());

	final Buckets<String, String> buckets = new Buckets<>(
			path,
			new VerifyingBucketAssigner(timestamp, watermark, processingTime),
			new DefaultBucketFactoryImpl<>(),
			new RowWiseBucketWriter<>(FileSystem.get(path.toUri()).createRecoverableWriter(), new SimpleStringEncoder<>()),
			DefaultRollingPolicy.builder().build(),
			2,
			OutputFileConfig.builder().build()
	);

	buckets.onElement(
			"test",
			new TestUtils.MockSinkContext(
					timestamp,
					watermark,
					processingTime)
	);
}
 
Example #3
Source File: BucketAssignerITCases.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testAssembleBucketPath() throws Exception {
	final File outDir = TEMP_FOLDER.newFolder();
	final Path basePath = new Path(outDir.toURI());
	final long time = 1000L;

	final RollingPolicy<String, String> rollingPolicy =
		DefaultRollingPolicy
			.builder()
			.withMaxPartSize(7L)
			.build();

	final Buckets<String, String> buckets =  new Buckets<>(
		basePath,
		new BasePathBucketAssigner<>(),
		new DefaultBucketFactoryImpl<>(),
		new RowWiseBucketWriter<>(FileSystem.get(basePath.toUri()).createRecoverableWriter(), new SimpleStringEncoder<>()),
		rollingPolicy,
		0,
		OutputFileConfig.builder().build()
	);

	Bucket<String, String> bucket =
		buckets.onElement("abc", new TestUtils.MockSinkContext(time, time, time));
	Assert.assertEquals(new Path(basePath.toUri()), bucket.getBucketPath());
}
 
Example #4
Source File: TestUtils.java    From flink with Apache License 2.0 6 votes vote down vote up
static OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> createRescalingTestSink(
		File outDir,
		int totalParallelism,
		int taskIdx,
		long inactivityInterval,
		long partMaxSize) throws Exception {

	final RollingPolicy<Tuple2<String, Integer>, String> rollingPolicy =
			DefaultRollingPolicy
					.builder()
					.withMaxPartSize(partMaxSize)
					.withRolloverInterval(inactivityInterval)
					.withInactivityInterval(inactivityInterval)
					.build();

	return createRescalingTestSink(
			outDir,
			totalParallelism,
			taskIdx,
			10L,
			new TupleToStringBucketer(),
			new Tuple2Encoder(),
			rollingPolicy,
			new DefaultBucketFactoryImpl<>());
}
 
Example #5
Source File: BucketAssignerITCases.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testAssembleBucketPath() throws Exception {
	final File outDir = TEMP_FOLDER.newFolder();
	final Path basePath = new Path(outDir.toURI());
	final long time = 1000L;

	final RollingPolicy<String, String> rollingPolicy =
		DefaultRollingPolicy
			.create()
			.withMaxPartSize(7L)
			.build();

	final Buckets<String, String> buckets =  new Buckets<>(
		basePath,
		new BasePathBucketAssigner<>(),
		new DefaultBucketFactoryImpl<>(),
		new RowWisePartWriter.Factory<>(new SimpleStringEncoder<>()),
		rollingPolicy,
		0,
		new PartFileConfig()
	);

	Bucket<String, String> bucket =
		buckets.onElement("abc", new TestUtils.MockSinkContext(time, time, time));
	Assert.assertEquals(new Path(basePath.toUri()), bucket.getBucketPath());
}
 
Example #6
Source File: BucketAssignerITCases.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testAssembleBucketPath() throws Exception {
	final File outDir = TEMP_FOLDER.newFolder();
	final Path basePath = new Path(outDir.toURI());
	final long time = 1000L;

	final RollingPolicy<String, String> rollingPolicy =
		DefaultRollingPolicy
			.create()
			.withMaxPartSize(7L)
			.build();

	final Buckets<String, String> buckets =  new Buckets<>(
		basePath,
		new BasePathBucketAssigner<>(),
		new DefaultBucketFactoryImpl<>(),
		new RowWisePartWriter.Factory<>(new SimpleStringEncoder<>()),
		rollingPolicy,
		0
	);

	Bucket<String, String> bucket =
		buckets.onElement("abc", new TestUtils.MockSinkContext(time, time, time));
	Assert.assertEquals(new Path(basePath.toUri()), bucket.getBucketPath());
}
 
Example #7
Source File: BucketsTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private void testCorrectTimestampPassingInContext(Long timestamp, long watermark, long processingTime) throws Exception {
	final File outDir = TEMP_FOLDER.newFolder();
	final Path path = new Path(outDir.toURI());

	final Buckets<String, String> buckets = new Buckets<>(
			path,
			new VerifyingBucketAssigner(timestamp, watermark, processingTime),
			new DefaultBucketFactoryImpl<>(),
			new RowWisePartWriter.Factory<>(new SimpleStringEncoder<>()),
			DefaultRollingPolicy.create().build(),
			2
	);

	buckets.onElement(
			"test",
			new TestUtils.MockSinkContext(
					timestamp,
					watermark,
					processingTime)
	);
}
 
Example #8
Source File: TestUtils.java    From flink with Apache License 2.0 5 votes vote down vote up
static OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> createRescalingTestSink(
		File outDir,
		int totalParallelism,
		int taskIdx,
		long inactivityInterval,
		long partMaxSize) throws Exception {

	final RollingPolicy<Tuple2<String, Integer>, String> rollingPolicy =
			DefaultRollingPolicy
					.create()
					.withMaxPartSize(partMaxSize)
					.withRolloverInterval(inactivityInterval)
					.withInactivityInterval(inactivityInterval)
					.build();

	final BucketAssigner<Tuple2<String, Integer>, String> bucketer = new TupleToStringBucketer();

	final Encoder<Tuple2<String, Integer>> encoder = (element, stream) -> {
		stream.write((element.f0 + '@' + element.f1).getBytes(StandardCharsets.UTF_8));
		stream.write('\n');
	};

	return createCustomRescalingTestSink(
			outDir,
			totalParallelism,
			taskIdx,
			10L,
			bucketer,
			encoder,
			rollingPolicy,
			new DefaultBucketFactoryImpl<>());
}
 
Example #9
Source File: TestUtils.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
static OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> createRescalingTestSink(
		File outDir,
		int totalParallelism,
		int taskIdx,
		long inactivityInterval,
		long partMaxSize) throws Exception {

	final RollingPolicy<Tuple2<String, Integer>, String> rollingPolicy =
			DefaultRollingPolicy
					.create()
					.withMaxPartSize(partMaxSize)
					.withRolloverInterval(inactivityInterval)
					.withInactivityInterval(inactivityInterval)
					.build();

	final BucketAssigner<Tuple2<String, Integer>, String> bucketer = new TupleToStringBucketer();

	final Encoder<Tuple2<String, Integer>> encoder = (element, stream) -> {
		stream.write((element.f0 + '@' + element.f1).getBytes(StandardCharsets.UTF_8));
		stream.write('\n');
	};

	return createCustomRescalingTestSink(
			outDir,
			totalParallelism,
			taskIdx,
			10L,
			bucketer,
			encoder,
			rollingPolicy,
			new DefaultBucketFactoryImpl<>());
}
 
Example #10
Source File: BucketStateSerializerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private static Bucket<String, String> restoreBucket(final int initialPartCounter, final BucketState<String> bucketState) throws IOException {
	return Bucket.restore(
		0,
		initialPartCounter,
		createBucketWriter(),
		DefaultRollingPolicy.builder().withMaxPartSize(10).build(),
		bucketState,
		OutputFileConfig.builder().build());
}
 
Example #11
Source File: BucketStateSerializerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private static Bucket<String, String> createNewBucket(final Path bucketPath) throws IOException {
	return Bucket.getNew(
		0,
		BUCKET_ID,
		bucketPath,
		0,
		createBucketWriter(),
		DefaultRollingPolicy.builder().withMaxPartSize(10).build(),
		OutputFileConfig.builder().build());
}
 
Example #12
Source File: RollingPolicyTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testDefaultRollingPolicyDeprecatedCreate() throws Exception {
	DefaultRollingPolicy policy = DefaultRollingPolicy.create()
		.withInactivityInterval(10)
		.withMaxPartSize(20)
		.withRolloverInterval(30)
		.build();

	Assert.assertEquals(10, policy.getInactivityInterval());
	Assert.assertEquals(20, policy.getMaxPartSize());
	Assert.assertEquals(30, policy.getRolloverInterval());
}
 
Example #13
Source File: RollingPolicyTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testDefaultRollingPolicy() throws Exception {
	final File outDir = TEMP_FOLDER.newFolder();
	final Path path = new Path(outDir.toURI());

	final RollingPolicy<String, String> originalRollingPolicy =
			DefaultRollingPolicy
					.builder()
					.withMaxPartSize(10L)
					.withInactivityInterval(4L)
					.withRolloverInterval(11L)
					.build();

	final MethodCallCountingPolicyWrapper<String, String> rollingPolicy =
			new MethodCallCountingPolicyWrapper<>(originalRollingPolicy);

	final Buckets<String, String> buckets = createBuckets(path, rollingPolicy);

	rollingPolicy.verifyCallCounters(0L, 0L, 0L, 0L, 0L, 0L);

	// these two will fill up the first in-progress file and at the third it will roll ...
	buckets.onElement("test1", new TestUtils.MockSinkContext(1L, 1L, 1L));
	buckets.onElement("test1", new TestUtils.MockSinkContext(2L, 1L, 2L));
	rollingPolicy.verifyCallCounters(0L, 0L, 1L, 0L, 0L, 0L);

	buckets.onElement("test1", new TestUtils.MockSinkContext(3L, 1L, 3L));
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 0L, 0L);

	// still no time to roll
	buckets.onProcessingTime(5L);
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 1L, 0L);

	// roll due to inactivity
	buckets.onProcessingTime(7L);
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 2L, 1L);

	buckets.onElement("test1", new TestUtils.MockSinkContext(3L, 1L, 3L));

	// roll due to rollover interval
	buckets.onProcessingTime(20L);
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 3L, 2L);

	// we take a checkpoint but we should not roll.
	buckets.snapshotState(1L, new TestUtils.MockListState<>(), new TestUtils.MockListState<>());
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 3L, 2L);
}
 
Example #14
Source File: RollingPolicyTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testDefaultRollingPolicy() throws Exception {
	final File outDir = TEMP_FOLDER.newFolder();
	final Path path = new Path(outDir.toURI());

	final RollingPolicy<String, String> originalRollingPolicy =
			DefaultRollingPolicy
					.create()
					.withMaxPartSize(10L)
					.withInactivityInterval(4L)
					.withRolloverInterval(11L)
					.build();

	final MethodCallCountingPolicyWrapper<String, String> rollingPolicy =
			new MethodCallCountingPolicyWrapper<>(originalRollingPolicy);

	final Buckets<String, String> buckets = createBuckets(path, rollingPolicy);

	rollingPolicy.verifyCallCounters(0L, 0L, 0L, 0L, 0L, 0L);

	// these two will fill up the first in-progress file and at the third it will roll ...
	buckets.onElement("test1", new TestUtils.MockSinkContext(1L, 1L, 1L));
	buckets.onElement("test1", new TestUtils.MockSinkContext(2L, 1L, 2L));
	rollingPolicy.verifyCallCounters(0L, 0L, 1L, 0L, 0L, 0L);

	buckets.onElement("test1", new TestUtils.MockSinkContext(3L, 1L, 3L));
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 0L, 0L);

	// still no time to roll
	buckets.onProcessingTime(5L);
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 1L, 0L);

	// roll due to inactivity
	buckets.onProcessingTime(7L);
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 2L, 1L);

	buckets.onElement("test1", new TestUtils.MockSinkContext(3L, 1L, 3L));

	// roll due to rollover interval
	buckets.onProcessingTime(20L);
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 3L, 2L);

	// we take a checkpoint but we should not roll.
	buckets.snapshotState(1L, new TestUtils.MockListState<>(), new TestUtils.MockListState<>());
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 3L, 2L);
}
 
Example #15
Source File: BucketsTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testMergeAtScaleInAndMaxCounterAtRecovery() throws Exception {
	final File outDir = TEMP_FOLDER.newFolder();
	final Path path = new Path(outDir.toURI());

	final RollingPolicy<String, String> onCheckpointRP =
			DefaultRollingPolicy
					.builder()
					.withMaxPartSize(7L) // roll with 2 elements
					.build();

	final MockListState<byte[]> bucketStateContainerOne = new MockListState<>();
	final MockListState<byte[]> bucketStateContainerTwo = new MockListState<>();

	final MockListState<Long> partCounterContainerOne = new MockListState<>();
	final MockListState<Long> partCounterContainerTwo = new MockListState<>();

	final Buckets<String, String> bucketsOne = createBuckets(path, onCheckpointRP, 0);
	final Buckets<String, String> bucketsTwo = createBuckets(path, onCheckpointRP, 1);

	bucketsOne.onElement("test1", new TestUtils.MockSinkContext(null, 1L, 2L));
	bucketsOne.snapshotState(0L, bucketStateContainerOne, partCounterContainerOne);

	Assert.assertEquals(1L, bucketsOne.getMaxPartCounter());

	// make sure we have one in-progress file here
	Assert.assertNotNull(bucketsOne.getActiveBuckets().get("test1").getInProgressPart());

	// add a couple of in-progress files so that the part counter increases.
	bucketsTwo.onElement("test1", new TestUtils.MockSinkContext(null, 1L, 2L));
	bucketsTwo.onElement("test1", new TestUtils.MockSinkContext(null, 1L, 2L));

	bucketsTwo.onElement("test1", new TestUtils.MockSinkContext(null, 1L, 2L));

	bucketsTwo.snapshotState(0L, bucketStateContainerTwo, partCounterContainerTwo);

	Assert.assertEquals(2L, bucketsTwo.getMaxPartCounter());

	// make sure we have one in-progress file here and a pending
	Assert.assertEquals(1L, bucketsTwo.getActiveBuckets().get("test1").getPendingFileRecoverablesPerCheckpoint().size());
	Assert.assertNotNull(bucketsTwo.getActiveBuckets().get("test1").getInProgressPart());

	final ListState<byte[]> mergedBucketStateContainer = new MockListState<>();
	final ListState<Long> mergedPartCounterContainer = new MockListState<>();

	mergedBucketStateContainer.addAll(bucketStateContainerOne.getBackingList());
	mergedBucketStateContainer.addAll(bucketStateContainerTwo.getBackingList());

	mergedPartCounterContainer.addAll(partCounterContainerOne.getBackingList());
	mergedPartCounterContainer.addAll(partCounterContainerTwo.getBackingList());

	final Buckets<String, String> restoredBuckets =
			restoreBuckets(path, onCheckpointRP, 0, mergedBucketStateContainer, mergedPartCounterContainer);

	// we get the maximum of the previous tasks
	Assert.assertEquals(2L, restoredBuckets.getMaxPartCounter());

	final Map<String, Bucket<String, String>> activeBuckets = restoredBuckets.getActiveBuckets();
	Assert.assertEquals(1L, activeBuckets.size());
	Assert.assertTrue(activeBuckets.keySet().contains("test1"));

	final Bucket<String, String> bucket = activeBuckets.get("test1");
	Assert.assertEquals("test1", bucket.getBucketId());
	Assert.assertEquals(new Path(path, "test1"), bucket.getBucketPath());

	Assert.assertNotNull(bucket.getInProgressPart()); // the restored part file

	// this is due to the Bucket#merge(). The in progress file of one
	// of the previous tasks is put in the list of pending files.
	Assert.assertEquals(1L, bucket.getPendingFileRecoverablesForCurrentCheckpoint().size());

	// we commit the pending for previous checkpoints
	Assert.assertTrue(bucket.getPendingFileRecoverablesPerCheckpoint().isEmpty());
}
 
Example #16
Source File: LocalStreamingFileSinkTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testClosingWithCustomizedBucketer() throws Exception {
	final File outDir = TEMP_FOLDER.newFolder();
	final long partMaxSize = 2L;
	final long inactivityInterval = 100L;
	final RollingPolicy<Tuple2<String, Integer>, Integer> rollingPolicy =
			DefaultRollingPolicy
					.builder()
					.withMaxPartSize(partMaxSize)
					.withRolloverInterval(inactivityInterval)
					.withInactivityInterval(inactivityInterval)
					.build();

	try (
			OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> testHarness =
					TestUtils.createCustomizedRescalingTestSink(outDir, 1, 0, 100L, new TupleToIntegerBucketer(), new Tuple2Encoder(), rollingPolicy, new DefaultBucketFactoryImpl<>());
	) {
		testHarness.setup();
		testHarness.open();

		testHarness.setProcessingTime(0L);

		testHarness.processElement(new StreamRecord<>(Tuple2.of("test1", 1), 1L));
		testHarness.processElement(new StreamRecord<>(Tuple2.of("test2", 2), 1L));
		TestUtils.checkLocalFs(outDir, 2, 0);

		// this is to check the inactivity threshold
		testHarness.setProcessingTime(101L);
		TestUtils.checkLocalFs(outDir, 2, 0);

		testHarness.processElement(new StreamRecord<>(Tuple2.of("test3", 3), 1L));
		TestUtils.checkLocalFs(outDir, 3, 0);

		testHarness.snapshot(0L, 1L);
		TestUtils.checkLocalFs(outDir, 3, 0);

		testHarness.notifyOfCompletedCheckpoint(0L);
		TestUtils.checkLocalFs(outDir, 0, 3);

		testHarness.processElement(new StreamRecord<>(Tuple2.of("test4", 4), 10L));
		TestUtils.checkLocalFs(outDir, 1, 3);

		testHarness.snapshot(1L, 0L);
		testHarness.notifyOfCompletedCheckpoint(1L);
	}

	// at close all files moved to final.
	TestUtils.checkLocalFs(outDir, 0, 4);

	// check file content and bucket ID.
	Map<File, String> contents = TestUtils.getFileContentByPath(outDir);
	for (Map.Entry<File, String> fileContents : contents.entrySet()) {
		Integer bucketId = Integer.parseInt(fileContents.getKey().getParentFile().getName());

		Assert.assertTrue(bucketId >= 1 && bucketId <= 4);
		Assert.assertEquals(String.format("test%d@%d\n", bucketId, bucketId), fileContents.getValue());
	}
}
 
Example #17
Source File: StreamingFileSink.java    From flink with Apache License 2.0 4 votes vote down vote up
protected RowFormatBuilder(Path basePath, Encoder<IN> encoder, BucketAssigner<IN, BucketID> bucketAssigner) {
	this(basePath, encoder, bucketAssigner, DefaultRollingPolicy.builder().build(), DEFAULT_BUCKET_CHECK_INTERVAL, new DefaultBucketFactoryImpl<>(), OutputFileConfig.builder().build());
}
 
Example #18
Source File: StreamingFileSink.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
RowFormatBuilder(Path basePath, Encoder<IN> encoder, BucketAssigner<IN, BucketID> bucketAssigner) {
	this(basePath, encoder, bucketAssigner, DefaultRollingPolicy.create().build(), 60L * 1000L, new DefaultBucketFactoryImpl<>());
}
 
Example #19
Source File: BucketsTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testMergeAtScaleInAndMaxCounterAtRecovery() throws Exception {
	final File outDir = TEMP_FOLDER.newFolder();
	final Path path = new Path(outDir.toURI());

	final RollingPolicy<String, String> onCheckpointRP =
			DefaultRollingPolicy
					.create()
					.withMaxPartSize(7L) // roll with 2 elements
					.build();

	final MockListState<byte[]> bucketStateContainerOne = new MockListState<>();
	final MockListState<byte[]> bucketStateContainerTwo = new MockListState<>();

	final MockListState<Long> partCounterContainerOne = new MockListState<>();
	final MockListState<Long> partCounterContainerTwo = new MockListState<>();

	final Buckets<String, String> bucketsOne = createBuckets(path, onCheckpointRP, 0);
	final Buckets<String, String> bucketsTwo = createBuckets(path, onCheckpointRP, 1);

	bucketsOne.onElement("test1", new TestUtils.MockSinkContext(null, 1L, 2L));
	bucketsOne.snapshotState(0L, bucketStateContainerOne, partCounterContainerOne);

	Assert.assertEquals(1L, bucketsOne.getMaxPartCounter());

	// make sure we have one in-progress file here
	Assert.assertNotNull(bucketsOne.getActiveBuckets().get("test1").getInProgressPart());

	// add a couple of in-progress files so that the part counter increases.
	bucketsTwo.onElement("test1", new TestUtils.MockSinkContext(null, 1L, 2L));
	bucketsTwo.onElement("test1", new TestUtils.MockSinkContext(null, 1L, 2L));

	bucketsTwo.onElement("test1", new TestUtils.MockSinkContext(null, 1L, 2L));

	bucketsTwo.snapshotState(0L, bucketStateContainerTwo, partCounterContainerTwo);

	Assert.assertEquals(2L, bucketsTwo.getMaxPartCounter());

	// make sure we have one in-progress file here and a pending
	Assert.assertEquals(1L, bucketsTwo.getActiveBuckets().get("test1").getPendingPartsPerCheckpoint().size());
	Assert.assertNotNull(bucketsTwo.getActiveBuckets().get("test1").getInProgressPart());

	final ListState<byte[]> mergedBucketStateContainer = new MockListState<>();
	final ListState<Long> mergedPartCounterContainer = new MockListState<>();

	mergedBucketStateContainer.addAll(bucketStateContainerOne.getBackingList());
	mergedBucketStateContainer.addAll(bucketStateContainerTwo.getBackingList());

	mergedPartCounterContainer.addAll(partCounterContainerOne.getBackingList());
	mergedPartCounterContainer.addAll(partCounterContainerTwo.getBackingList());

	final Buckets<String, String> restoredBuckets =
			restoreBuckets(path, onCheckpointRP, 0, mergedBucketStateContainer, mergedPartCounterContainer);

	// we get the maximum of the previous tasks
	Assert.assertEquals(2L, restoredBuckets.getMaxPartCounter());

	final Map<String, Bucket<String, String>> activeBuckets = restoredBuckets.getActiveBuckets();
	Assert.assertEquals(1L, activeBuckets.size());
	Assert.assertTrue(activeBuckets.keySet().contains("test1"));

	final Bucket<String, String> bucket = activeBuckets.get("test1");
	Assert.assertEquals("test1", bucket.getBucketId());
	Assert.assertEquals(new Path(path, "test1"), bucket.getBucketPath());

	Assert.assertNotNull(bucket.getInProgressPart()); // the restored part file

	// this is due to the Bucket#merge(). The in progress file of one
	// of the previous tasks is put in the list of pending files.
	Assert.assertEquals(1L, bucket.getPendingPartsForCurrentCheckpoint().size());

	// we commit the pending for previous checkpoints
	Assert.assertTrue(bucket.getPendingPartsPerCheckpoint().isEmpty());
}
 
Example #20
Source File: BucketsTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testMergeAtScaleInAndMaxCounterAtRecovery() throws Exception {
	final File outDir = TEMP_FOLDER.newFolder();
	final Path path = new Path(outDir.toURI());

	final RollingPolicy<String, String> onCheckpointRP =
			DefaultRollingPolicy
					.create()
					.withMaxPartSize(7L) // roll with 2 elements
					.build();

	final MockListState<byte[]> bucketStateContainerOne = new MockListState<>();
	final MockListState<byte[]> bucketStateContainerTwo = new MockListState<>();

	final MockListState<Long> partCounterContainerOne = new MockListState<>();
	final MockListState<Long> partCounterContainerTwo = new MockListState<>();

	final Buckets<String, String> bucketsOne = createBuckets(path, onCheckpointRP, 0);
	final Buckets<String, String> bucketsTwo = createBuckets(path, onCheckpointRP, 1);

	bucketsOne.onElement("test1", new TestUtils.MockSinkContext(null, 1L, 2L));
	bucketsOne.snapshotState(0L, bucketStateContainerOne, partCounterContainerOne);

	Assert.assertEquals(1L, bucketsOne.getMaxPartCounter());

	// make sure we have one in-progress file here
	Assert.assertNotNull(bucketsOne.getActiveBuckets().get("test1").getInProgressPart());

	// add a couple of in-progress files so that the part counter increases.
	bucketsTwo.onElement("test1", new TestUtils.MockSinkContext(null, 1L, 2L));
	bucketsTwo.onElement("test1", new TestUtils.MockSinkContext(null, 1L, 2L));

	bucketsTwo.onElement("test1", new TestUtils.MockSinkContext(null, 1L, 2L));

	bucketsTwo.snapshotState(0L, bucketStateContainerTwo, partCounterContainerTwo);

	Assert.assertEquals(2L, bucketsTwo.getMaxPartCounter());

	// make sure we have one in-progress file here and a pending
	Assert.assertEquals(1L, bucketsTwo.getActiveBuckets().get("test1").getPendingPartsPerCheckpoint().size());
	Assert.assertNotNull(bucketsTwo.getActiveBuckets().get("test1").getInProgressPart());

	final ListState<byte[]> mergedBucketStateContainer = new MockListState<>();
	final ListState<Long> mergedPartCounterContainer = new MockListState<>();

	mergedBucketStateContainer.addAll(bucketStateContainerOne.getBackingList());
	mergedBucketStateContainer.addAll(bucketStateContainerTwo.getBackingList());

	mergedPartCounterContainer.addAll(partCounterContainerOne.getBackingList());
	mergedPartCounterContainer.addAll(partCounterContainerTwo.getBackingList());

	final Buckets<String, String> restoredBuckets =
			restoreBuckets(path, onCheckpointRP, 0, mergedBucketStateContainer, mergedPartCounterContainer);

	// we get the maximum of the previous tasks
	Assert.assertEquals(2L, restoredBuckets.getMaxPartCounter());

	final Map<String, Bucket<String, String>> activeBuckets = restoredBuckets.getActiveBuckets();
	Assert.assertEquals(1L, activeBuckets.size());
	Assert.assertTrue(activeBuckets.keySet().contains("test1"));

	final Bucket<String, String> bucket = activeBuckets.get("test1");
	Assert.assertEquals("test1", bucket.getBucketId());
	Assert.assertEquals(new Path(path, "test1"), bucket.getBucketPath());

	Assert.assertNotNull(bucket.getInProgressPart()); // the restored part file

	// this is due to the Bucket#merge(). The in progress file of one
	// of the previous tasks is put in the list of pending files.
	Assert.assertEquals(1L, bucket.getPendingPartsForCurrentCheckpoint().size());

	// we commit the pending for previous checkpoints
	Assert.assertTrue(bucket.getPendingPartsPerCheckpoint().isEmpty());
}
 
Example #21
Source File: RollingPolicyTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testDefaultRollingPolicy() throws Exception {
	final File outDir = TEMP_FOLDER.newFolder();
	final Path path = new Path(outDir.toURI());

	final RollingPolicy<String, String> originalRollingPolicy =
			DefaultRollingPolicy
					.create()
					.withMaxPartSize(10L)
					.withInactivityInterval(4L)
					.withRolloverInterval(11L)
					.build();

	final MethodCallCountingPolicyWrapper<String, String> rollingPolicy =
			new MethodCallCountingPolicyWrapper<>(originalRollingPolicy);

	final Buckets<String, String> buckets = createBuckets(path, rollingPolicy);

	rollingPolicy.verifyCallCounters(0L, 0L, 0L, 0L, 0L, 0L);

	// these two will fill up the first in-progress file and at the third it will roll ...
	buckets.onElement("test1", new TestUtils.MockSinkContext(1L, 1L, 1L));
	buckets.onElement("test1", new TestUtils.MockSinkContext(2L, 1L, 2L));
	rollingPolicy.verifyCallCounters(0L, 0L, 1L, 0L, 0L, 0L);

	buckets.onElement("test1", new TestUtils.MockSinkContext(3L, 1L, 3L));
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 0L, 0L);

	// still no time to roll
	buckets.onProcessingTime(5L);
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 1L, 0L);

	// roll due to inactivity
	buckets.onProcessingTime(7L);
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 2L, 1L);

	buckets.onElement("test1", new TestUtils.MockSinkContext(3L, 1L, 3L));

	// roll due to rollover interval
	buckets.onProcessingTime(20L);
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 3L, 2L);

	// we take a checkpoint but we should not roll.
	buckets.snapshotState(1L, new TestUtils.MockListState<>(), new TestUtils.MockListState<>());
	rollingPolicy.verifyCallCounters(0L, 0L, 2L, 1L, 3L, 2L);
}
 
Example #22
Source File: StreamingFileSink.java    From flink with Apache License 2.0 4 votes vote down vote up
RowFormatBuilder(Path basePath, Encoder<IN> encoder, BucketAssigner<IN, BucketID> bucketAssigner) {
	this(basePath, encoder, bucketAssigner, DefaultRollingPolicy.create().build(), 60L * 1000L, new DefaultBucketFactoryImpl<>(), PartFileConfig.DEFAULT_PART_PREFIX, PartFileConfig.DEFAULT_PART_SUFFIX);
}