org.apache.flink.core.fs.RecoverableWriter Java Examples
The following examples show how to use
org.apache.flink.core.fs.RecoverableWriter.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: S3RecoverableFsDataOutputStreamTest.java From flink with Apache License 2.0 | 6 votes |
@Override public RecoverableFsDataOutputStream.Committer snapshotAndGetCommitter() throws IOException { lastPersistedIndex = uploadedContent.size(); return new RecoverableFsDataOutputStream.Committer() { @Override public void commit() throws IOException { published = getPublishedContents(); uploadedContent.clear(); lastPersistedIndex = 0; } @Override public void commitAfterRecovery() throws IOException { if (published.length == 0) { commit(); } } @Override public RecoverableWriter.CommitRecoverable getRecoverable() { return null; } }; }
Example #2
Source File: S3RecoverableFsDataOutputStreamTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Override public RecoverableFsDataOutputStream.Committer snapshotAndGetCommitter() throws IOException { lastPersistedIndex = uploadedContent.size(); return new RecoverableFsDataOutputStream.Committer() { @Override public void commit() throws IOException { published = getPublishedContents(); uploadedContent.clear(); lastPersistedIndex = 0; } @Override public void commitAfterRecovery() throws IOException { if (published.length == 0) { commit(); } } @Override public RecoverableWriter.CommitRecoverable getRecoverable() { return null; } }; }
Example #3
Source File: HadoopS3RecoverableWriterITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test(expected = FileNotFoundException.class) public void testCleanupRecoverableState() throws Exception { final RecoverableWriter writer = getRecoverableWriter(); final Path path = new Path(basePathForTest, "part-0"); final RecoverableFsDataOutputStream stream = writer.open(path); stream.write(bytesOf(testData1)); S3Recoverable recoverable = (S3Recoverable) stream.persist(); stream.closeForCommit().commit(); // still the data is there as we have not deleted them from the tmp object final String content = getContentsOfFile(new Path('/' + recoverable.incompleteObjectName())); Assert.assertEquals(testData1, content); boolean successfullyDeletedState = writer.cleanupRecoverableState(recoverable); Assert.assertTrue(successfullyDeletedState); // this should throw the exception as we deleted the file. getContentsOfFile(new Path('/' + recoverable.incompleteObjectName())); }
Example #4
Source File: BucketTest.java From flink with Apache License 2.0 | 6 votes |
private static Bucket<String, String> createBucket( final RecoverableWriter writer, final Path bucketPath, final int subtaskIdx, final int initialPartCounter, final PartFileConfig partFileConfig) { return Bucket.getNew( writer, subtaskIdx, bucketId, bucketPath, initialPartCounter, partFileFactory, rollingPolicy, partFileConfig); }
Example #5
Source File: HadoopS3RecoverableWriterExceptionITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test(expected = IOException.class) public void testResumeAfterCommit() throws Exception { final RecoverableWriter writer = getFileSystem().createRecoverableWriter(); final Path path = new Path(basePathForTest, "part-0"); final RecoverableFsDataOutputStream stream = writer.open(path); stream.write(testData1.getBytes(StandardCharsets.UTF_8)); final RecoverableWriter.ResumeRecoverable recoverable = stream.persist(); stream.write(testData2.getBytes(StandardCharsets.UTF_8)); stream.closeForCommit().commit(); final RecoverableFsDataOutputStream recoveredStream = writer.recover(recoverable); recoveredStream.closeForCommit().commit(); }
Example #6
Source File: HadoopS3RecoverableWriterExceptionITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test(expected = IOException.class) public void testResumeWithWrongOffset() throws Exception { // this is a rather unrealistic scenario, but it is to trigger // truncation of the file and try to resume with missing data. final RecoverableWriter writer = getFileSystem().createRecoverableWriter(); final Path path = new Path(basePathForTest, "part-0"); final RecoverableFsDataOutputStream stream = writer.open(path); stream.write(testData1.getBytes(StandardCharsets.UTF_8)); final RecoverableWriter.ResumeRecoverable recoverable1 = stream.persist(); stream.write(testData2.getBytes(StandardCharsets.UTF_8)); final RecoverableWriter.ResumeRecoverable recoverable2 = stream.persist(); stream.write(testData3.getBytes(StandardCharsets.UTF_8)); final RecoverableFsDataOutputStream recoveredStream = writer.recover(recoverable1); recoveredStream.closeForCommit().commit(); // this should throw an exception final RecoverableFsDataOutputStream newRecoveredStream = writer.recover(recoverable2); newRecoveredStream.closeForCommit().commit(); }
Example #7
Source File: HadoopS3RecoverableWriterITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testCallingDeleteObjectTwiceDoesNotThroughException() throws Exception { final RecoverableWriter writer = getRecoverableWriter(); final Path path = new Path(basePathForTest, "part-0"); final RecoverableFsDataOutputStream stream = writer.open(path); stream.write(bytesOf(testData1)); S3Recoverable recoverable = (S3Recoverable) stream.persist(); stream.closeForCommit().commit(); // still the data is there as we have not deleted them from the tmp object final String content = getContentsOfFile(new Path('/' + recoverable.incompleteObjectName())); Assert.assertEquals(testData1, content); boolean successfullyDeletedState = writer.cleanupRecoverableState(recoverable); Assert.assertTrue(successfullyDeletedState); boolean unsuccessfulDeletion = writer.cleanupRecoverableState(recoverable); Assert.assertFalse(unsuccessfulDeletion); }
Example #8
Source File: HadoopS3RecoverableWriterExceptionITCase.java From flink with Apache License 2.0 | 6 votes |
@Test(expected = IOException.class) public void testResumeWithWrongOffset() throws Exception { // this is a rather unrealistic scenario, but it is to trigger // truncation of the file and try to resume with missing data. final RecoverableWriter writer = getFileSystem().createRecoverableWriter(); final Path path = new Path(basePathForTest, "part-0"); final RecoverableFsDataOutputStream stream = writer.open(path); stream.write(testData1.getBytes(StandardCharsets.UTF_8)); final RecoverableWriter.ResumeRecoverable recoverable1 = stream.persist(); stream.write(testData2.getBytes(StandardCharsets.UTF_8)); final RecoverableWriter.ResumeRecoverable recoverable2 = stream.persist(); stream.write(testData3.getBytes(StandardCharsets.UTF_8)); final RecoverableFsDataOutputStream recoveredStream = writer.recover(recoverable1); recoveredStream.closeForCommit().commit(); // this should throw an exception final RecoverableFsDataOutputStream newRecoveredStream = writer.recover(recoverable2); newRecoveredStream.closeForCommit().commit(); }
Example #9
Source File: Bucket.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Constructor to restore a bucket from checkpointed state. */ private Bucket( final RecoverableWriter fsWriter, final int subtaskIndex, final long initialPartCounter, final PartFileWriter.PartFileFactory<IN, BucketID> partFileFactory, final RollingPolicy<IN, BucketID> rollingPolicy, final BucketState<BucketID> bucketState) throws IOException { this( fsWriter, subtaskIndex, bucketState.getBucketId(), bucketState.getBucketPath(), initialPartCounter, partFileFactory, rollingPolicy); restoreInProgressFile(bucketState); commitRecoveredPendingFiles(bucketState); }
Example #10
Source File: HadoopRecoverableWriterOldHadoopWithNoTruncateSupportTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testRecoveryAfterClosingForCommitWorks() throws IOException { final Path testPath = new Path(basePath, "test-1"); final String expectedContent = "test_line"; final RecoverableWriter writerUnderTest = fileSystem.createRecoverableWriter(); final RecoverableFsDataOutputStream streamUnderTest = getOpenStreamToFileWithContent(writerUnderTest, testPath, expectedContent); final RecoverableWriter.CommitRecoverable committable = streamUnderTest.closeForCommit().getRecoverable(); writerUnderTest.recoverForCommit(committable).commitAfterRecovery(); verifyFileContent(testPath, expectedContent); }
Example #11
Source File: S3RecoverableFsDataOutputStream.java From flink with Apache License 2.0 | 6 votes |
@Override public RecoverableWriter.ResumeRecoverable persist() throws IOException { lock(); try { fileStream.flush(); openNewPartIfNecessary(userDefinedMinPartSize); // We do not stop writing to the current file, we merely limit the upload to the // first n bytes of the current file return upload.snapshotAndGetRecoverable(fileStream); } finally { unlock(); } }
Example #12
Source File: S3RecoverableFsDataOutputStreamTest.java From flink with Apache License 2.0 | 6 votes |
@Override public RecoverableFsDataOutputStream.Committer snapshotAndGetCommitter() throws IOException { lastPersistedIndex = uploadedContent.size(); return new RecoverableFsDataOutputStream.Committer() { @Override public void commit() throws IOException { published = getPublishedContents(); uploadedContent.clear(); lastPersistedIndex = 0; } @Override public void commitAfterRecovery() throws IOException { if (published.length == 0) { commit(); } } @Override public RecoverableWriter.CommitRecoverable getRecoverable() { return null; } }; }
Example #13
Source File: HadoopS3RecoverableWriterExceptionITCase.java From flink with Apache License 2.0 | 6 votes |
@Test(expected = IOException.class) public void testResumeWithWrongOffset() throws Exception { // this is a rather unrealistic scenario, but it is to trigger // truncation of the file and try to resume with missing data. final RecoverableWriter writer = getFileSystem().createRecoverableWriter(); final Path path = new Path(basePathForTest, "part-0"); final RecoverableFsDataOutputStream stream = writer.open(path); stream.write(testData1.getBytes(StandardCharsets.UTF_8)); final RecoverableWriter.ResumeRecoverable recoverable1 = stream.persist(); stream.write(testData2.getBytes(StandardCharsets.UTF_8)); final RecoverableWriter.ResumeRecoverable recoverable2 = stream.persist(); stream.write(testData3.getBytes(StandardCharsets.UTF_8)); final RecoverableFsDataOutputStream recoveredStream = writer.recover(recoverable1); recoveredStream.closeForCommit().commit(); // this should throw an exception final RecoverableFsDataOutputStream newRecoveredStream = writer.recover(recoverable2); newRecoveredStream.closeForCommit().commit(); }
Example #14
Source File: DefaultBucketFactoryImpl.java From flink with Apache License 2.0 | 6 votes |
@Override public Bucket<IN, BucketID> restoreBucket( final RecoverableWriter fsWriter, final int subtaskIndex, final long initialPartCounter, final PartFileWriter.PartFileFactory<IN, BucketID> partFileWriterFactory, final RollingPolicy<IN, BucketID> rollingPolicy, final BucketState<BucketID> bucketState, final PartFileConfig partFileConfig) throws IOException { return Bucket.restore( fsWriter, subtaskIndex, initialPartCounter, partFileWriterFactory, rollingPolicy, bucketState, partFileConfig); }
Example #15
Source File: Bucket.java From flink with Apache License 2.0 | 6 votes |
/** * Constructor to create a new empty bucket. */ private Bucket( final RecoverableWriter fsWriter, final int subtaskIndex, final BucketID bucketId, final Path bucketPath, final long initialPartCounter, final PartFileWriter.PartFileFactory<IN, BucketID> partFileFactory, final RollingPolicy<IN, BucketID> rollingPolicy, final PartFileConfig partFileConfig) { this.fsWriter = checkNotNull(fsWriter); this.subtaskIndex = subtaskIndex; this.bucketId = checkNotNull(bucketId); this.bucketPath = checkNotNull(bucketPath); this.partCounter = initialPartCounter; this.partFileFactory = checkNotNull(partFileFactory); this.rollingPolicy = checkNotNull(rollingPolicy); this.pendingPartsForCurrentCheckpoint = new ArrayList<>(); this.pendingPartsPerCheckpoint = new TreeMap<>(); this.resumablesPerCheckpoint = new TreeMap<>(); this.partFileConfig = checkNotNull(partFileConfig); }
Example #16
Source File: BucketStateSerializerTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testSerializationEmpty() throws IOException { final File testFolder = tempFolder.newFolder(); final FileSystem fs = FileSystem.get(testFolder.toURI()); final RecoverableWriter writer = fs.createRecoverableWriter(); final Path testBucket = new Path(testFolder.getPath(), "test"); final BucketState<String> bucketState = new BucketState<>( "test", testBucket, Long.MAX_VALUE, null, new HashMap<>()); final SimpleVersionedSerializer<BucketState<String>> serializer = new BucketStateSerializer<>( writer.getResumeRecoverableSerializer(), writer.getCommitRecoverableSerializer(), SimpleVersionedStringSerializer.INSTANCE ); byte[] bytes = SimpleVersionedSerialization.writeVersionAndSerialize(serializer, bucketState); final BucketState<String> recoveredState = SimpleVersionedSerialization.readVersionAndDeSerialize(serializer, bytes); Assert.assertEquals(testBucket, recoveredState.getBucketPath()); Assert.assertNull(recoveredState.getInProgressResumableFile()); Assert.assertTrue(recoveredState.getCommittableFilesPerCheckpoint().isEmpty()); }
Example #17
Source File: HadoopS3RecoverableWriterExceptionITCase.java From flink with Apache License 2.0 | 6 votes |
@Test(expected = IOException.class) public void testResumeAfterCommit() throws Exception { final RecoverableWriter writer = getFileSystem().createRecoverableWriter(); final Path path = new Path(basePathForTest, "part-0"); final RecoverableFsDataOutputStream stream = writer.open(path); stream.write(testData1.getBytes(StandardCharsets.UTF_8)); final RecoverableWriter.ResumeRecoverable recoverable = stream.persist(); stream.write(testData2.getBytes(StandardCharsets.UTF_8)); stream.closeForCommit().commit(); final RecoverableFsDataOutputStream recoveredStream = writer.recover(recoverable); recoveredStream.closeForCommit().commit(); }
Example #18
Source File: BucketStateSerializerTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testSerializationEmpty() throws IOException { final File testFolder = tempFolder.newFolder(); final FileSystem fs = FileSystem.get(testFolder.toURI()); final RecoverableWriter writer = fs.createRecoverableWriter(); final Path testBucket = new Path(testFolder.getPath(), "test"); final BucketState<String> bucketState = new BucketState<>( "test", testBucket, Long.MAX_VALUE, null, new HashMap<>()); final SimpleVersionedSerializer<BucketState<String>> serializer = new BucketStateSerializer<>( writer.getResumeRecoverableSerializer(), writer.getCommitRecoverableSerializer(), SimpleVersionedStringSerializer.INSTANCE ); byte[] bytes = SimpleVersionedSerialization.writeVersionAndSerialize(serializer, bucketState); final BucketState<String> recoveredState = SimpleVersionedSerialization.readVersionAndDeSerialize(serializer, bytes); Assert.assertEquals(testBucket, recoveredState.getBucketPath()); Assert.assertNull(recoveredState.getInProgressResumableFile()); Assert.assertTrue(recoveredState.getCommittableFilesPerCheckpoint().isEmpty()); }
Example #19
Source File: S3RecoverableFsDataOutputStream.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Override public RecoverableWriter.ResumeRecoverable persist() throws IOException { lock(); try { fileStream.flush(); openNewPartIfNecessary(userDefinedMinPartSize); // We do not stop writing to the current file, we merely limit the upload to the // first n bytes of the current file return upload.snapshotAndGetRecoverable(fileStream); } finally { unlock(); } }
Example #20
Source File: HadoopRecoverableWriterOldHadoopWithNoTruncateSupportTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testWriteAndCommitWorks() throws IOException { final Path testPath = new Path(basePath, "test-0"); final String expectedContent = "test_line"; final RecoverableWriter writerUnderTest = fileSystem.createRecoverableWriter(); final RecoverableFsDataOutputStream streamUnderTest = getOpenStreamToFileWithContent(writerUnderTest, testPath, expectedContent); streamUnderTest.closeForCommit().commit(); verifyFileContent(testPath, expectedContent); }
Example #21
Source File: S3RecoverableFsDataOutputStreamTest.java From flink with Apache License 2.0 | 5 votes |
@Override public RecoverableWriter.ResumeRecoverable snapshotAndGetRecoverable(RefCountedFSOutputStream incompletePartFile) throws IOException { lastPersistedIndex = uploadedContent.size(); if (incompletePartFile.getPos() >= 0L) { byte[] bytes = readFileContents(incompletePartFile); uncompleted = Optional.of(bytes); } return null; }
Example #22
Source File: BucketTest.java From flink with Apache License 2.0 | 5 votes |
private Bucket<String, String> getRestoredBucketWithOnlyPendingParts(final BaseStubWriter writer, final int numberOfPendingParts) throws IOException { final Map<Long, List<RecoverableWriter.CommitRecoverable>> completePartsPerCheckpoint = createPendingPartsPerCheckpoint(numberOfPendingParts); final BucketState<String> initStateWithOnlyInProgressFile = new BucketState<>("test", new Path(), 12345L, null, completePartsPerCheckpoint); return Bucket.restore(writer, 0, 1L, partFileFactory, rollingPolicy, initStateWithOnlyInProgressFile, new PartFileConfig()); }
Example #23
Source File: FlinkS3FileSystem.java From flink with Apache License 2.0 | 5 votes |
@Override public RecoverableWriter createRecoverableWriter() throws IOException { if (s3AccessHelper == null) { // this is the case for Presto throw new UnsupportedOperationException("This s3 file system implementation does not support recoverable writers."); } return S3RecoverableWriter.writer( getHadoopFileSystem(), tmpFileCreator, s3AccessHelper, uploadThreadPool, s3uploadPartSize, maxConcurrentUploadsPerStream); }
Example #24
Source File: HadoopS3RecoverableWriterITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testCommitAfterPersist() throws Exception { final RecoverableWriter writer = getRecoverableWriter(); final Path path = new Path(basePathForTest, "part-0"); final RecoverableFsDataOutputStream stream = writer.open(path); stream.write(bytesOf(testData1)); stream.persist(); stream.write(bytesOf(testData2)); stream.closeForCommit().commit(); Assert.assertEquals(testData1 + testData2, getContentsOfFile(path)); }
Example #25
Source File: BucketStateSerializerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testSerializationOnlyInProgress() throws IOException { final File testFolder = tempFolder.newFolder(); final FileSystem fs = FileSystem.get(testFolder.toURI()); final Path testBucket = new Path(testFolder.getPath(), "test"); final RecoverableWriter writer = fs.createRecoverableWriter(); final RecoverableFsDataOutputStream stream = writer.open(testBucket); stream.write(IN_PROGRESS_CONTENT.getBytes(Charset.forName("UTF-8"))); final RecoverableWriter.ResumeRecoverable current = stream.persist(); final BucketState<String> bucketState = new BucketState<>( "test", testBucket, Long.MAX_VALUE, current, new HashMap<>()); final SimpleVersionedSerializer<BucketState<String>> serializer = new BucketStateSerializer<>( writer.getResumeRecoverableSerializer(), writer.getCommitRecoverableSerializer(), SimpleVersionedStringSerializer.INSTANCE ); final byte[] bytes = SimpleVersionedSerialization.writeVersionAndSerialize(serializer, bucketState); // to simulate that everything is over for file. stream.close(); final BucketState<String> recoveredState = SimpleVersionedSerialization.readVersionAndDeSerialize(serializer, bytes); Assert.assertEquals(testBucket, recoveredState.getBucketPath()); FileStatus[] statuses = fs.listStatus(testBucket.getParent()); Assert.assertEquals(1L, statuses.length); Assert.assertTrue( statuses[0].getPath().getPath().startsWith( (new Path(testBucket.getParent(), ".test.inprogress")).toString()) ); }
Example #26
Source File: BucketTest.java From flink with Apache License 2.0 | 5 votes |
private static Bucket<String, String> restoreBucket( final RecoverableWriter writer, final int subtaskIndex, final long initialPartCounter, final BucketState<String> bucketState, final OutputFileConfig outputFileConfig) throws Exception { return Bucket.restore( subtaskIndex, initialPartCounter, new RowWiseBucketWriter<>(writer, ENCODER), rollingPolicy, bucketState, outputFileConfig); }
Example #27
Source File: HadoopRecoverableWriterOldHadoopWithNoTruncateSupportTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testExceptionThrownWhenRecoveringWithInProgressFile() throws IOException { final RecoverableWriter writerUnderTest = fileSystem.createRecoverableWriter(); final RecoverableFsDataOutputStream stream = writerUnderTest.open(new Path(basePath, "test-2")); final RecoverableWriter.ResumeRecoverable recoverable = stream.persist(); assertNotNull(recoverable); try { writerUnderTest.recover(recoverable); } catch (IOException e) { // this is the expected exception and we check also if the root cause is the hadoop < 2.7 version assertTrue(e.getCause() instanceof IllegalStateException); } }
Example #28
Source File: BucketTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private Map<Long, List<RecoverableWriter.CommitRecoverable>> createPendingPartsPerCheckpoint(int noOfCheckpoints) { final Map<Long, List<RecoverableWriter.CommitRecoverable>> pendingCommittablesPerCheckpoint = new HashMap<>(); for (int checkpointId = 0; checkpointId < noOfCheckpoints; checkpointId++) { final List<RecoverableWriter.CommitRecoverable> pending = new ArrayList<>(); pending.add(new NoOpRecoverable()); pendingCommittablesPerCheckpoint.put((long) checkpointId, pending); } return pendingCommittablesPerCheckpoint; }
Example #29
Source File: FlinkS3FileSystem.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override public RecoverableWriter createRecoverableWriter() throws IOException { if (s3AccessHelper == null) { // this is the case for Presto throw new UnsupportedOperationException("This s3 file system implementation does not support recoverable writers."); } return S3RecoverableWriter.writer( getHadoopFileSystem(), tmpFileCreator, s3AccessHelper, uploadThreadPool, s3uploadPartSize, maxConcurrentUploadsPerStream); }
Example #30
Source File: HadoopS3RecoverableWriterITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testCloseWithNoData() throws Exception { final RecoverableWriter writer = getRecoverableWriter(); final Path path = new Path(basePathForTest, "part-0"); final RecoverableFsDataOutputStream stream = writer.open(path); stream.closeForCommit().commit(); }