org.apache.flink.core.fs.RecoverableWriter Java Exaples

Source File: S3RecoverableFsDataOutputStreamTest.java From flink with Apache License 2.0

6 votes

@Override
public RecoverableFsDataOutputStream.Committer snapshotAndGetCommitter() throws IOException {
	lastPersistedIndex = uploadedContent.size();

	return new RecoverableFsDataOutputStream.Committer() {
		@Override
		public void commit() throws IOException {
			published = getPublishedContents();
			uploadedContent.clear();
			lastPersistedIndex = 0;
		}

		@Override
		public void commitAfterRecovery() throws IOException {
			if (published.length == 0) {
				commit();
			}
		}

		@Override
		public RecoverableWriter.CommitRecoverable getRecoverable() {
			return null;
		}
	};
}

Source File: S3RecoverableFsDataOutputStreamTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Override
public RecoverableFsDataOutputStream.Committer snapshotAndGetCommitter() throws IOException {
	lastPersistedIndex = uploadedContent.size();

	return new RecoverableFsDataOutputStream.Committer() {
		@Override
		public void commit() throws IOException {
			published = getPublishedContents();
			uploadedContent.clear();
			lastPersistedIndex = 0;
		}

		@Override
		public void commitAfterRecovery() throws IOException {
			if (published.length == 0) {
				commit();
			}
		}

		@Override
		public RecoverableWriter.CommitRecoverable getRecoverable() {
			return null;
		}
	};
}

Source File: HadoopS3RecoverableWriterITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test(expected = FileNotFoundException.class)
public void testCleanupRecoverableState() throws Exception {
	final RecoverableWriter writer = getRecoverableWriter();
	final Path path = new Path(basePathForTest, "part-0");

	final RecoverableFsDataOutputStream stream = writer.open(path);
	stream.write(bytesOf(testData1));
	S3Recoverable recoverable = (S3Recoverable) stream.persist();

	stream.closeForCommit().commit();

	// still the data is there as we have not deleted them from the tmp object
	final String content = getContentsOfFile(new Path('/' + recoverable.incompleteObjectName()));
	Assert.assertEquals(testData1, content);

	boolean successfullyDeletedState = writer.cleanupRecoverableState(recoverable);
	Assert.assertTrue(successfullyDeletedState);

	// this should throw the exception as we deleted the file.
	getContentsOfFile(new Path('/' + recoverable.incompleteObjectName()));
}

Source File: BucketTest.java From flink with Apache License 2.0

6 votes

private static Bucket<String, String> createBucket(
		final RecoverableWriter writer,
		final Path bucketPath,
		final int subtaskIdx,
		final int initialPartCounter,
		final PartFileConfig partFileConfig) {

	return Bucket.getNew(
			writer,
			subtaskIdx,
			bucketId,
			bucketPath,
			initialPartCounter,
			partFileFactory,
			rollingPolicy,
			partFileConfig);
}

Source File: HadoopS3RecoverableWriterExceptionITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test(expected = IOException.class)
public void testResumeAfterCommit() throws Exception {
	final RecoverableWriter writer = getFileSystem().createRecoverableWriter();
	final Path path = new Path(basePathForTest, "part-0");

	final RecoverableFsDataOutputStream stream = writer.open(path);
	stream.write(testData1.getBytes(StandardCharsets.UTF_8));

	final RecoverableWriter.ResumeRecoverable recoverable = stream.persist();
	stream.write(testData2.getBytes(StandardCharsets.UTF_8));

	stream.closeForCommit().commit();

	final RecoverableFsDataOutputStream recoveredStream = writer.recover(recoverable);
	recoveredStream.closeForCommit().commit();
}

Source File: HadoopS3RecoverableWriterExceptionITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test(expected = IOException.class)
public void testResumeWithWrongOffset() throws Exception {
	// this is a rather unrealistic scenario, but it is to trigger
	// truncation of the file and try to resume with missing data.

	final RecoverableWriter writer = getFileSystem().createRecoverableWriter();
	final Path path = new Path(basePathForTest, "part-0");

	final RecoverableFsDataOutputStream stream = writer.open(path);
	stream.write(testData1.getBytes(StandardCharsets.UTF_8));

	final RecoverableWriter.ResumeRecoverable recoverable1 = stream.persist();
	stream.write(testData2.getBytes(StandardCharsets.UTF_8));

	final RecoverableWriter.ResumeRecoverable recoverable2 = stream.persist();
	stream.write(testData3.getBytes(StandardCharsets.UTF_8));

	final RecoverableFsDataOutputStream recoveredStream = writer.recover(recoverable1);
	recoveredStream.closeForCommit().commit();

	// this should throw an exception
	final RecoverableFsDataOutputStream newRecoveredStream = writer.recover(recoverable2);
	newRecoveredStream.closeForCommit().commit();
}

Source File: HadoopS3RecoverableWriterITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testCallingDeleteObjectTwiceDoesNotThroughException() throws Exception {
	final RecoverableWriter writer = getRecoverableWriter();
	final Path path = new Path(basePathForTest, "part-0");

	final RecoverableFsDataOutputStream stream = writer.open(path);
	stream.write(bytesOf(testData1));
	S3Recoverable recoverable = (S3Recoverable) stream.persist();

	stream.closeForCommit().commit();

	// still the data is there as we have not deleted them from the tmp object
	final String content = getContentsOfFile(new Path('/' + recoverable.incompleteObjectName()));
	Assert.assertEquals(testData1, content);

	boolean successfullyDeletedState = writer.cleanupRecoverableState(recoverable);
	Assert.assertTrue(successfullyDeletedState);

	boolean unsuccessfulDeletion = writer.cleanupRecoverableState(recoverable);
	Assert.assertFalse(unsuccessfulDeletion);
}

Source File: HadoopS3RecoverableWriterExceptionITCase.java From flink with Apache License 2.0

6 votes

@Test(expected = IOException.class)
public void testResumeWithWrongOffset() throws Exception {
	// this is a rather unrealistic scenario, but it is to trigger
	// truncation of the file and try to resume with missing data.

	final RecoverableWriter writer = getFileSystem().createRecoverableWriter();
	final Path path = new Path(basePathForTest, "part-0");

	final RecoverableFsDataOutputStream stream = writer.open(path);
	stream.write(testData1.getBytes(StandardCharsets.UTF_8));

	final RecoverableWriter.ResumeRecoverable recoverable1 = stream.persist();
	stream.write(testData2.getBytes(StandardCharsets.UTF_8));

	final RecoverableWriter.ResumeRecoverable recoverable2 = stream.persist();
	stream.write(testData3.getBytes(StandardCharsets.UTF_8));

	final RecoverableFsDataOutputStream recoveredStream = writer.recover(recoverable1);
	recoveredStream.closeForCommit().commit();

	// this should throw an exception
	final RecoverableFsDataOutputStream newRecoveredStream = writer.recover(recoverable2);
	newRecoveredStream.closeForCommit().commit();
}

Source File: Bucket.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Constructor to restore a bucket from checkpointed state.
 */
private Bucket(
		final RecoverableWriter fsWriter,
		final int subtaskIndex,
		final long initialPartCounter,
		final PartFileWriter.PartFileFactory<IN, BucketID> partFileFactory,
		final RollingPolicy<IN, BucketID> rollingPolicy,
		final BucketState<BucketID> bucketState) throws IOException {

	this(
			fsWriter,
			subtaskIndex,
			bucketState.getBucketId(),
			bucketState.getBucketPath(),
			initialPartCounter,
			partFileFactory,
			rollingPolicy);

	restoreInProgressFile(bucketState);
	commitRecoveredPendingFiles(bucketState);
}

Source File: HadoopRecoverableWriterOldHadoopWithNoTruncateSupportTest.java From flink with Apache License 2.0

6 votes

@Test
public void testRecoveryAfterClosingForCommitWorks() throws IOException {
	final Path testPath = new Path(basePath, "test-1");
	final String expectedContent = "test_line";

	final RecoverableWriter writerUnderTest = fileSystem.createRecoverableWriter();
	final RecoverableFsDataOutputStream streamUnderTest =
			getOpenStreamToFileWithContent(writerUnderTest, testPath, expectedContent);

	final RecoverableWriter.CommitRecoverable committable =
			streamUnderTest.closeForCommit().getRecoverable();

	writerUnderTest.recoverForCommit(committable).commitAfterRecovery();

	verifyFileContent(testPath, expectedContent);
}

Source File: S3RecoverableFsDataOutputStream.java From flink with Apache License 2.0

6 votes

@Override
public RecoverableWriter.ResumeRecoverable persist() throws IOException {
	lock();
	try {
		fileStream.flush();
		openNewPartIfNecessary(userDefinedMinPartSize);

		// We do not stop writing to the current file, we merely limit the upload to the
		// first n bytes of the current file

		return upload.snapshotAndGetRecoverable(fileStream);
	}
	finally {
		unlock();
	}
}

Source File: S3RecoverableFsDataOutputStreamTest.java From flink with Apache License 2.0

6 votes

@Override
public RecoverableFsDataOutputStream.Committer snapshotAndGetCommitter() throws IOException {
	lastPersistedIndex = uploadedContent.size();

	return new RecoverableFsDataOutputStream.Committer() {
		@Override
		public void commit() throws IOException {
			published = getPublishedContents();
			uploadedContent.clear();
			lastPersistedIndex = 0;
		}

		@Override
		public void commitAfterRecovery() throws IOException {
			if (published.length == 0) {
				commit();
			}
		}

		@Override
		public RecoverableWriter.CommitRecoverable getRecoverable() {
			return null;
		}
	};
}

Source File: HadoopS3RecoverableWriterExceptionITCase.java From flink with Apache License 2.0

6 votes

@Test(expected = IOException.class)
public void testResumeWithWrongOffset() throws Exception {
	// this is a rather unrealistic scenario, but it is to trigger
	// truncation of the file and try to resume with missing data.

	final RecoverableWriter writer = getFileSystem().createRecoverableWriter();
	final Path path = new Path(basePathForTest, "part-0");

	final RecoverableFsDataOutputStream stream = writer.open(path);
	stream.write(testData1.getBytes(StandardCharsets.UTF_8));

	final RecoverableWriter.ResumeRecoverable recoverable1 = stream.persist();
	stream.write(testData2.getBytes(StandardCharsets.UTF_8));

	final RecoverableWriter.ResumeRecoverable recoverable2 = stream.persist();
	stream.write(testData3.getBytes(StandardCharsets.UTF_8));

	final RecoverableFsDataOutputStream recoveredStream = writer.recover(recoverable1);
	recoveredStream.closeForCommit().commit();

	// this should throw an exception
	final RecoverableFsDataOutputStream newRecoveredStream = writer.recover(recoverable2);
	newRecoveredStream.closeForCommit().commit();
}

Source File: DefaultBucketFactoryImpl.java From flink with Apache License 2.0

6 votes

@Override
public Bucket<IN, BucketID> restoreBucket(
		final RecoverableWriter fsWriter,
		final int subtaskIndex,
		final long initialPartCounter,
		final PartFileWriter.PartFileFactory<IN, BucketID> partFileWriterFactory,
		final RollingPolicy<IN, BucketID> rollingPolicy,
		final BucketState<BucketID> bucketState,
		final PartFileConfig partFileConfig) throws IOException {

	return Bucket.restore(
			fsWriter,
			subtaskIndex,
			initialPartCounter,
			partFileWriterFactory,
			rollingPolicy,
			bucketState,
			partFileConfig);
}

Source File: Bucket.java From flink with Apache License 2.0

6 votes

/**
 * Constructor to create a new empty bucket.
 */
private Bucket(
		final RecoverableWriter fsWriter,
		final int subtaskIndex,
		final BucketID bucketId,
		final Path bucketPath,
		final long initialPartCounter,
		final PartFileWriter.PartFileFactory<IN, BucketID> partFileFactory,
		final RollingPolicy<IN, BucketID> rollingPolicy,
		final PartFileConfig partFileConfig) {
	this.fsWriter = checkNotNull(fsWriter);
	this.subtaskIndex = subtaskIndex;
	this.bucketId = checkNotNull(bucketId);
	this.bucketPath = checkNotNull(bucketPath);
	this.partCounter = initialPartCounter;
	this.partFileFactory = checkNotNull(partFileFactory);
	this.rollingPolicy = checkNotNull(rollingPolicy);

	this.pendingPartsForCurrentCheckpoint = new ArrayList<>();
	this.pendingPartsPerCheckpoint = new TreeMap<>();
	this.resumablesPerCheckpoint = new TreeMap<>();

	this.partFileConfig = checkNotNull(partFileConfig);
}

Source File: BucketStateSerializerTest.java From flink with Apache License 2.0

6 votes

@Test
public void testSerializationEmpty() throws IOException {
	final File testFolder = tempFolder.newFolder();
	final FileSystem fs = FileSystem.get(testFolder.toURI());
	final RecoverableWriter writer = fs.createRecoverableWriter();

	final Path testBucket = new Path(testFolder.getPath(), "test");

	final BucketState<String> bucketState = new BucketState<>(
			"test", testBucket, Long.MAX_VALUE, null, new HashMap<>());

	final SimpleVersionedSerializer<BucketState<String>> serializer =
			new BucketStateSerializer<>(
					writer.getResumeRecoverableSerializer(),
					writer.getCommitRecoverableSerializer(),
					SimpleVersionedStringSerializer.INSTANCE
			);

	byte[] bytes = SimpleVersionedSerialization.writeVersionAndSerialize(serializer, bucketState);
	final BucketState<String> recoveredState =  SimpleVersionedSerialization.readVersionAndDeSerialize(serializer, bytes);

	Assert.assertEquals(testBucket, recoveredState.getBucketPath());
	Assert.assertNull(recoveredState.getInProgressResumableFile());
	Assert.assertTrue(recoveredState.getCommittableFilesPerCheckpoint().isEmpty());
}

Source File: HadoopS3RecoverableWriterExceptionITCase.java From flink with Apache License 2.0

6 votes

@Test(expected = IOException.class)
public void testResumeAfterCommit() throws Exception {
	final RecoverableWriter writer = getFileSystem().createRecoverableWriter();
	final Path path = new Path(basePathForTest, "part-0");

	final RecoverableFsDataOutputStream stream = writer.open(path);
	stream.write(testData1.getBytes(StandardCharsets.UTF_8));

	final RecoverableWriter.ResumeRecoverable recoverable = stream.persist();
	stream.write(testData2.getBytes(StandardCharsets.UTF_8));

	stream.closeForCommit().commit();

	final RecoverableFsDataOutputStream recoveredStream = writer.recover(recoverable);
	recoveredStream.closeForCommit().commit();
}

Source File: BucketStateSerializerTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testSerializationEmpty() throws IOException {
	final File testFolder = tempFolder.newFolder();
	final FileSystem fs = FileSystem.get(testFolder.toURI());
	final RecoverableWriter writer = fs.createRecoverableWriter();

	final Path testBucket = new Path(testFolder.getPath(), "test");

	final BucketState<String> bucketState = new BucketState<>(
			"test", testBucket, Long.MAX_VALUE, null, new HashMap<>());

	final SimpleVersionedSerializer<BucketState<String>> serializer =
			new BucketStateSerializer<>(
					writer.getResumeRecoverableSerializer(),
					writer.getCommitRecoverableSerializer(),
					SimpleVersionedStringSerializer.INSTANCE
			);

	byte[] bytes = SimpleVersionedSerialization.writeVersionAndSerialize(serializer, bucketState);
	final BucketState<String> recoveredState =  SimpleVersionedSerialization.readVersionAndDeSerialize(serializer, bytes);

	Assert.assertEquals(testBucket, recoveredState.getBucketPath());
	Assert.assertNull(recoveredState.getInProgressResumableFile());
	Assert.assertTrue(recoveredState.getCommittableFilesPerCheckpoint().isEmpty());
}

Source File: S3RecoverableFsDataOutputStream.java From Flink-CEPplus with Apache License 2.0

6 votes

@Override
public RecoverableWriter.ResumeRecoverable persist() throws IOException {
	lock();
	try {
		fileStream.flush();
		openNewPartIfNecessary(userDefinedMinPartSize);

		// We do not stop writing to the current file, we merely limit the upload to the
		// first n bytes of the current file

		return upload.snapshotAndGetRecoverable(fileStream);
	}
	finally {
		unlock();
	}
}

Source File: HadoopRecoverableWriterOldHadoopWithNoTruncateSupportTest.java From flink with Apache License 2.0

5 votes

@Test
public void testWriteAndCommitWorks() throws IOException {
	final Path testPath = new Path(basePath, "test-0");
	final String expectedContent = "test_line";

	final RecoverableWriter writerUnderTest = fileSystem.createRecoverableWriter();
	final RecoverableFsDataOutputStream streamUnderTest =
			getOpenStreamToFileWithContent(writerUnderTest, testPath, expectedContent);
	streamUnderTest.closeForCommit().commit();

	verifyFileContent(testPath, expectedContent);
}

Source File: S3RecoverableFsDataOutputStreamTest.java From flink with Apache License 2.0

5 votes

@Override
public RecoverableWriter.ResumeRecoverable snapshotAndGetRecoverable(RefCountedFSOutputStream incompletePartFile) throws IOException {
	lastPersistedIndex = uploadedContent.size();

	if (incompletePartFile.getPos() >= 0L) {
		byte[] bytes = readFileContents(incompletePartFile);
		uncompleted = Optional.of(bytes);
	}

	return null;
}

Source File: BucketTest.java From flink with Apache License 2.0

5 votes

private Bucket<String, String> getRestoredBucketWithOnlyPendingParts(final BaseStubWriter writer, final int numberOfPendingParts) throws IOException {
	final Map<Long, List<RecoverableWriter.CommitRecoverable>> completePartsPerCheckpoint =
			createPendingPartsPerCheckpoint(numberOfPendingParts);

	final BucketState<String> initStateWithOnlyInProgressFile =
			new BucketState<>("test", new Path(), 12345L, null, completePartsPerCheckpoint);
	return Bucket.restore(writer, 0, 1L, partFileFactory, rollingPolicy, initStateWithOnlyInProgressFile, new PartFileConfig());
}

Source File: FlinkS3FileSystem.java From flink with Apache License 2.0

5 votes

@Override
public RecoverableWriter createRecoverableWriter() throws IOException {
	if (s3AccessHelper == null) {
		// this is the case for Presto
		throw new UnsupportedOperationException("This s3 file system implementation does not support recoverable writers.");
	}

	return S3RecoverableWriter.writer(
			getHadoopFileSystem(),
			tmpFileCreator,
			s3AccessHelper,
			uploadThreadPool,
			s3uploadPartSize,
			maxConcurrentUploadsPerStream);
}

Source File: HadoopS3RecoverableWriterITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testCommitAfterPersist() throws Exception {
	final RecoverableWriter writer = getRecoverableWriter();
	final Path path = new Path(basePathForTest, "part-0");

	final RecoverableFsDataOutputStream stream = writer.open(path);
	stream.write(bytesOf(testData1));
	stream.persist();

	stream.write(bytesOf(testData2));
	stream.closeForCommit().commit();

	Assert.assertEquals(testData1 + testData2, getContentsOfFile(path));
}

Source File: BucketStateSerializerTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testSerializationOnlyInProgress() throws IOException {
	final File testFolder = tempFolder.newFolder();
	final FileSystem fs = FileSystem.get(testFolder.toURI());

	final Path testBucket = new Path(testFolder.getPath(), "test");

	final RecoverableWriter writer = fs.createRecoverableWriter();
	final RecoverableFsDataOutputStream stream = writer.open(testBucket);
	stream.write(IN_PROGRESS_CONTENT.getBytes(Charset.forName("UTF-8")));

	final RecoverableWriter.ResumeRecoverable current = stream.persist();

	final BucketState<String> bucketState = new BucketState<>(
			"test", testBucket, Long.MAX_VALUE, current, new HashMap<>());

	final SimpleVersionedSerializer<BucketState<String>> serializer =
			new BucketStateSerializer<>(
					writer.getResumeRecoverableSerializer(),
					writer.getCommitRecoverableSerializer(),
					SimpleVersionedStringSerializer.INSTANCE
			);

	final byte[] bytes = SimpleVersionedSerialization.writeVersionAndSerialize(serializer, bucketState);

	// to simulate that everything is over for file.
	stream.close();

	final BucketState<String> recoveredState =  SimpleVersionedSerialization.readVersionAndDeSerialize(serializer, bytes);

	Assert.assertEquals(testBucket, recoveredState.getBucketPath());

	FileStatus[] statuses = fs.listStatus(testBucket.getParent());
	Assert.assertEquals(1L, statuses.length);
	Assert.assertTrue(
			statuses[0].getPath().getPath().startsWith(
					(new Path(testBucket.getParent(), ".test.inprogress")).toString())
	);
}

Source File: BucketTest.java From flink with Apache License 2.0

5 votes

private static Bucket<String, String> restoreBucket(
		final RecoverableWriter writer,
		final int subtaskIndex,
		final long initialPartCounter,
		final BucketState<String> bucketState,
		final OutputFileConfig outputFileConfig) throws Exception {

	return Bucket.restore(
			subtaskIndex,
			initialPartCounter,
			new RowWiseBucketWriter<>(writer, ENCODER),
			rollingPolicy,
			bucketState,
			outputFileConfig);
}

Source File: HadoopRecoverableWriterOldHadoopWithNoTruncateSupportTest.java From flink with Apache License 2.0

5 votes

@Test
public void testExceptionThrownWhenRecoveringWithInProgressFile() throws IOException {
	final RecoverableWriter writerUnderTest = fileSystem.createRecoverableWriter();
	final RecoverableFsDataOutputStream stream = writerUnderTest.open(new Path(basePath, "test-2"));
	final RecoverableWriter.ResumeRecoverable recoverable = stream.persist();
	assertNotNull(recoverable);

	try {
		writerUnderTest.recover(recoverable);
	} catch (IOException e) {
		// this is the expected exception and we check also if the root cause is the hadoop < 2.7 version
		assertTrue(e.getCause() instanceof IllegalStateException);
	}
}

Source File: BucketTest.java From Flink-CEPplus with Apache License 2.0

5 votes

private Map<Long, List<RecoverableWriter.CommitRecoverable>> createPendingPartsPerCheckpoint(int noOfCheckpoints) {
	final Map<Long, List<RecoverableWriter.CommitRecoverable>> pendingCommittablesPerCheckpoint = new HashMap<>();
	for (int checkpointId = 0; checkpointId < noOfCheckpoints; checkpointId++) {
		final List<RecoverableWriter.CommitRecoverable> pending = new ArrayList<>();
		pending.add(new NoOpRecoverable());
		pendingCommittablesPerCheckpoint.put((long) checkpointId, pending);
	}
	return pendingCommittablesPerCheckpoint;
}

Source File: FlinkS3FileSystem.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
public RecoverableWriter createRecoverableWriter() throws IOException {
	if (s3AccessHelper == null) {
		// this is the case for Presto
		throw new UnsupportedOperationException("This s3 file system implementation does not support recoverable writers.");
	}

	return S3RecoverableWriter.writer(
			getHadoopFileSystem(),
			tmpFileCreator,
			s3AccessHelper,
			uploadThreadPool,
			s3uploadPartSize,
			maxConcurrentUploadsPerStream);
}

Source File: HadoopS3RecoverableWriterITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testCloseWithNoData() throws Exception {
	final RecoverableWriter writer = getRecoverableWriter();
	final Path path = new Path(basePathForTest, "part-0");

	final RecoverableFsDataOutputStream stream = writer.open(path);

	stream.closeForCommit().commit();
}

org.apache.flink.core.fs.RecoverableWriter Java Examples