org.apache.beam.sdk.io.fs.MatchResult.Metadata Java Examples
The following examples show how to use
org.apache.beam.sdk.io.fs.MatchResult.Metadata.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CompressedSource.java From beam with Apache License 2.0 | 6 votes |
/** * Creates a {@code CompressedSource} for an individual file. Used by {@link * CompressedSource#createForSubrangeOfFile}. */ private CompressedSource( FileBasedSource<T> sourceDelegate, DecompressingChannelFactory channelFactory, Metadata metadata, long minBundleSize, long startOffset, long endOffset) { super(metadata, minBundleSize, startOffset, endOffset); this.sourceDelegate = sourceDelegate; this.channelFactory = channelFactory; boolean splittable; try { splittable = isSplittable(); } catch (Exception e) { throw new RuntimeException("Failed to determine if the source is splittable", e); } checkArgument( splittable || startOffset == 0, "CompressedSources must start reading at offset 0. Requested offset: %s", startOffset); }
Example #2
Source File: SplitIntoRangesFn.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@ProcessElement public void processElement(ProcessContext c) throws FileNotFoundException { Map<String, String> filenamesToTableNamesMap = c.sideInput(filenamesToTableNamesMapView); Metadata metadata = c.element().getMetadata(); String filename = metadata.resourceId().toString(); String tableName = filenamesToTableNamesMap.get(filename); if (tableName == null) { throw new FileNotFoundException( "Unknown table name for file:" + filename + " in map " + filenamesToTableNamesMap); } if (!metadata.isReadSeekEfficient()) { // Do not shard the file. c.output(FileShard.create(tableName, c.element(), new OffsetRange(0, metadata.sizeBytes()))); } else { // Create shards. for (OffsetRange range : new OffsetRange(0, metadata.sizeBytes()).split(desiredBundleSize, 0)) { c.output(FileShard.create(tableName, c.element(), range)); } } }
Example #3
Source File: FileBasedSourceTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testReadRangeAtEnd() throws IOException { PipelineOptions options = PipelineOptionsFactory.create(); List<String> data = createStringDataset(3, 50); String fileName = "file"; File file = createFileWithData(fileName, data); Metadata metadata = FileSystems.matchSingleFileSpec(file.getPath()); TestFileBasedSource source1 = new TestFileBasedSource(metadata, 64, 0, 162, null); TestFileBasedSource source2 = new TestFileBasedSource(metadata, 1024, 162, Long.MAX_VALUE, null); List<String> results = new ArrayList<>(); results.addAll(readFromSource(source1, options)); results.addAll(readFromSource(source2, options)); assertThat(data, containsInAnyOrder(results.toArray())); }
Example #4
Source File: FileBasedSourceTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testSplitAtFraction() throws Exception { PipelineOptions options = PipelineOptionsFactory.create(); File file = createFileWithData("file", createStringDataset(3, 100)); Metadata metadata = FileSystems.matchSingleFileSpec(file.getPath()); TestFileBasedSource source = new TestFileBasedSource(metadata, 1, 0, file.length(), null); // Shouldn't be able to split while unstarted. assertSplitAtFractionFails(source, 0, 0.7, options); assertSplitAtFractionSucceedsAndConsistent(source, 1, 0.7, options); assertSplitAtFractionSucceedsAndConsistent(source, 30, 0.7, options); assertSplitAtFractionFails(source, 0, 0.0, options); assertSplitAtFractionFails(source, 70, 0.3, options); assertSplitAtFractionFails(source, 100, 1.0, options); assertSplitAtFractionFails(source, 100, 0.99, options); assertSplitAtFractionSucceedsAndConsistent(source, 100, 0.995, options); }
Example #5
Source File: StreamingDataGenerator.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@Setup public void setup() throws IOException { dataGenerator = new JsonDataGeneratorImpl(); Metadata metadata = FileSystems.matchSingleFileSpec(schemaLocation); // Copy the schema file into a string which can be used for generation. try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) { try (ReadableByteChannel readerChannel = FileSystems.open(metadata.resourceId())) { try (WritableByteChannel writerChannel = Channels.newChannel(byteArrayOutputStream)) { ByteStreams.copy(readerChannel, writerChannel); } } schema = byteArrayOutputStream.toString(); } }
Example #6
Source File: GeoCityLookup.java From gcp-ingestion with Mozilla Public License 2.0 | 6 votes |
/** * Returns a singleton object for reading from the GeoCity database. * * <p>We copy the configured database file to a static temp location so that the MaxMind API can * save on heap usage by using memory mapping. The reader is threadsafe and this singleton pattern * allows multiple worker threads on the same machine to share a single reader instance. * * <p>Note that we do not clean up the temp mmdb file, but it's a static path, so running locally * will overwrite the existing path every time rather than creating an unbounded number of copies. * This also assumes that only one JVM per machine is running this code. In the production case * where this is running on Cloud Dataflow, we should always have a clean environment and the temp * state will be cleaned up along with the workers once the job finishes. However, behavior is * undefined if you run multiple local jobs concurrently. * * @throws IOException if the configured file path is not a valid .mmdb file */ private static synchronized DatabaseReader getOrCreateSingletonGeoCityReader( ValueProvider<String> geoCityDatabase) throws IOException { if (singletonGeoCityReader == null) { File mmdb; try { InputStream inputStream; Metadata metadata = FileSystems.matchSingleFileSpec(geoCityDatabase.get()); ReadableByteChannel channel = FileSystems.open(metadata.resourceId()); inputStream = Channels.newInputStream(channel); Path mmdbPath = Paths.get(System.getProperty("java.io.tmpdir"), "GeoCityLookup.mmdb"); Files.copy(inputStream, mmdbPath, StandardCopyOption.REPLACE_EXISTING); mmdb = mmdbPath.toFile(); } catch (IOException e) { throw new IOException("Exception thrown while fetching configured geoCityDatabase", e); } singletonGeoCityReader = new DatabaseReader.Builder(mmdb).withCache(new CHMCache()).build(); } return singletonGeoCityReader; }
Example #7
Source File: BulkCompressorTest.java From DataflowTemplates with Apache License 2.0 | 6 votes |
/** Tests the {@link BulkCompressor.Compressor} performs compression properly. */ @Test public void testCompressFile() throws Exception { // Setup test final Compression compression = Compression.GZIP; final ValueProvider<String> outputDirectoryProvider = pipeline.newProvider(tempFolderCompressedPath.toString()); final ValueProvider<Compression> compressionProvider = StaticValueProvider.of(compression); final Metadata metadata = FileSystems.matchSingleFileSpec(textFile.toString()); // Execute the compressor PCollection<String> lines = pipeline .apply("Create File Input", Create.of(metadata)) .apply("Compress", ParDo.of(new Compressor(outputDirectoryProvider, compressionProvider))) .apply("Read the Files", TextIO.readAll().withCompression(Compression.AUTO)); // Test the result PAssert.that(lines).containsInAnyOrder(FILE_CONTENT); pipeline.run(); }
Example #8
Source File: HadoopFileSystemTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testMatchDirectory() throws Exception { create("dir/file", "data".getBytes(StandardCharsets.UTF_8)); final MatchResult matchResult = Iterables.getOnlyElement( fileSystem.match(Collections.singletonList(testPath("dir").toString()))); assertThat( matchResult, equalTo( MatchResult.create( Status.OK, ImmutableList.of( Metadata.builder() .setResourceId(testPath("dir")) .setIsReadSeekEfficient(true) .setSizeBytes(0L) .setLastModifiedMillis(lastModified("dir")) .build())))); }
Example #9
Source File: FileBasedSourceTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testReadRangeAtStart() throws IOException { PipelineOptions options = PipelineOptionsFactory.create(); List<String> data = createStringDataset(3, 50); String fileName = "file"; File file = createFileWithData(fileName, data); Metadata metadata = FileSystems.matchSingleFileSpec(file.getPath()); TestFileBasedSource source1 = new TestFileBasedSource(metadata, 64, 0, 25, null); TestFileBasedSource source2 = new TestFileBasedSource(metadata, 64, 25, Long.MAX_VALUE, null); List<String> results = new ArrayList<>(); results.addAll(readFromSource(source1, options)); results.addAll(readFromSource(source2, options)); assertThat(data, containsInAnyOrder(results.toArray())); }
Example #10
Source File: FilePatternMatchingShardedFile.java From beam with Apache License 2.0 | 6 votes |
/** * Reads all the lines of all the files. * * <p>Not suitable for use except in testing of small data, since the data size may be far more * than can be reasonably processed serially, in-memory, by a single thread. */ @VisibleForTesting List<String> readLines(Collection<Metadata> files) throws IOException { List<String> allLines = Lists.newArrayList(); int i = 1; for (Metadata file : files) { try (Reader reader = Channels.newReader(FileSystems.open(file.resourceId()), StandardCharsets.UTF_8.name())) { List<String> lines = CharStreams.readLines(reader); allLines.addAll(lines); LOG.debug("[{} of {}] Read {} lines from file: {}", i, files.size(), lines.size(), file); } i++; } return allLines; }
Example #11
Source File: NumberedShardedFile.java From beam with Apache License 2.0 | 6 votes |
/** * Reads all the lines of all the files. * * <p>Not suitable for use except in testing of small data, since the data size may be far more * than can be reasonably processed serially, in-memory, by a single thread. */ @VisibleForTesting List<String> readLines(Collection<Metadata> files) throws IOException { List<String> allLines = Lists.newArrayList(); int i = 1; for (Metadata file : files) { try (Reader reader = Channels.newReader(FileSystems.open(file.resourceId()), StandardCharsets.UTF_8.name())) { List<String> lines = CharStreams.readLines(reader); allLines.addAll(lines); LOG.debug("[{} of {}] Read {} lines from file: {}", i, files.size(), lines.size(), file); } i++; } return allLines; }
Example #12
Source File: NumberedShardedFile.java From beam with Apache License 2.0 | 6 votes |
/** * Check if total number of files is correct by comparing with the number that is parsed from * shard name using a name template. If no template is specified, "SSSS-of-NNNN" will be used as * default, and "NNNN" will be the expected total number of files. * * @return {@code true} if at least one shard name matches template and total number of given * files equals the number that is parsed from shard name. */ @VisibleForTesting boolean checkTotalNumOfFiles(Collection<Metadata> files) { for (Metadata fileMedadata : files) { String fileName = fileMedadata.resourceId().getFilename(); if (fileName == null) { // this path has zero elements continue; } Matcher matcher = shardTemplate.matcher(fileName); if (!matcher.matches()) { // shard name doesn't match the pattern, check with the next shard continue; } // once match, extract total number of shards and compare to file list return files.size() == Integer.parseInt(matcher.group("numshards")); } return false; }
Example #13
Source File: AvroSourceTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testCreateFromMetadata() throws Exception { List<Bird> expected = createRandomRecords(DEFAULT_RECORD_COUNT); String codec = DataFileConstants.NULL_CODEC; String filename = generateTestFile( codec, expected, SyncBehavior.SYNC_DEFAULT, 0, AvroCoder.of(Bird.class), codec); Metadata fileMeta = FileSystems.matchSingleFileSpec(filename); AvroSource<GenericRecord> source = AvroSource.from(fileMeta); AvroSource<Bird> sourceWithSchema = source.withSchema(Bird.class); AvroSource<Bird> sourceWithSchemaWithMinBundleSize = sourceWithSchema.withMinBundleSize(1234); assertEquals(FileBasedSource.Mode.SINGLE_FILE_OR_SUBRANGE, source.getMode()); assertEquals(FileBasedSource.Mode.SINGLE_FILE_OR_SUBRANGE, sourceWithSchema.getMode()); assertEquals( FileBasedSource.Mode.SINGLE_FILE_OR_SUBRANGE, sourceWithSchemaWithMinBundleSize.getMode()); }
Example #14
Source File: AvroSourceTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testReadMetadataWithCodecs() throws Exception { // Test reading files generated using all codecs. String[] codecs = { DataFileConstants.NULL_CODEC, DataFileConstants.BZIP2_CODEC, DataFileConstants.DEFLATE_CODEC, DataFileConstants.SNAPPY_CODEC, DataFileConstants.XZ_CODEC }; List<Bird> expected = createRandomRecords(DEFAULT_RECORD_COUNT); for (String codec : codecs) { String filename = generateTestFile( codec, expected, SyncBehavior.SYNC_DEFAULT, 0, AvroCoder.of(Bird.class), codec); Metadata fileMeta = FileSystems.matchSingleFileSpec(filename); AvroMetadata metadata = AvroSource.readMetadataFromFile(fileMeta.resourceId()); assertEquals(codec, metadata.getCodec()); } }
Example #15
Source File: FileSystems.java From beam with Apache License 2.0 | 6 votes |
/** * Returns the {@link Metadata} for a single file resource. Expects a resource specification * {@code spec} that matches a single result. * * @param spec a resource specification that matches exactly one result. * @return the {@link Metadata} for the specified resource. * @throws FileNotFoundException if the file resource is not found. * @throws IOException in the event of an error in the inner call to {@link #match}, or if the * given spec does not match exactly 1 result. */ public static Metadata matchSingleFileSpec(String spec) throws IOException { List<MatchResult> matches = FileSystems.match(Collections.singletonList(spec)); MatchResult matchResult = Iterables.getOnlyElement(matches); if (matchResult.status() == Status.NOT_FOUND) { throw new FileNotFoundException(String.format("File spec %s not found", spec)); } else if (matchResult.status() != Status.OK) { throw new IOException( String.format("Error matching file spec %s: status %s", spec, matchResult.status())); } else { List<Metadata> metadata = matchResult.metadata(); if (metadata.size() != 1) { throw new IOException( String.format( "Expecting spec %s to match exactly one file, but matched %s: %s", spec, metadata.size(), metadata)); } return metadata.get(0); } }
Example #16
Source File: TFRecordIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testReadFilesNamed() { readPipeline.enableAbandonedNodeEnforcement(false); Metadata metadata = Metadata.builder() .setResourceId(FileSystems.matchNewResource("file", false /* isDirectory */)) .setIsReadSeekEfficient(true) .setSizeBytes(1024) .build(); Create.Values<ReadableFile> create = Create.of(new ReadableFile(metadata, Compression.AUTO)); assertEquals( "TFRecordIO.ReadFiles/Read all via FileBasedSource/Read ranges/ParMultiDo(ReadFileRanges).output", readPipeline.apply(create).apply(TFRecordIO.readFiles()).getName()); assertEquals( "MyRead/Read all via FileBasedSource/Read ranges/ParMultiDo(ReadFileRanges).output", readPipeline.apply(create).apply("MyRead", TFRecordIO.readFiles()).getName()); }
Example #17
Source File: AvroTableFileAsMutationsTest.java From DataflowTemplates with Apache License 2.0 | 6 votes |
private PCollection<FileShard> runFileShardingPipeline(Metadata fileMetadata, int splitSize) { PCollectionView<Map<String, String>> filenamesToTableNamesMapView = p.apply( "Create File/Table names Map", Create.of( ImmutableMap.<String, String>of( fileMetadata.resourceId().toString(), "testtable"))) .apply(View.asMap()); return p.apply("Create Metadata", Create.of(fileMetadata)) .apply(FileIO.readMatches()) // Pcollection<FileIO.ReadableFile> .apply( "Split into ranges", ParDo.of(new SplitIntoRangesFn(splitSize, filenamesToTableNamesMapView)) .withSideInputs(filenamesToTableNamesMapView)) .setCoder(FileShard.Coder.of()); }
Example #18
Source File: HadoopFileSystemTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testRename() throws Exception { create("testFileA", "testDataA".getBytes(StandardCharsets.UTF_8)); create("testFileB", "testDataB".getBytes(StandardCharsets.UTF_8)); // ensure files exist assertArrayEquals("testDataA".getBytes(StandardCharsets.UTF_8), read("testFileA", 0)); assertArrayEquals("testDataB".getBytes(StandardCharsets.UTF_8), read("testFileB", 0)); fileSystem.rename( ImmutableList.of(testPath("testFileA"), testPath("testFileB")), ImmutableList.of(testPath("renameFileA"), testPath("renameFileB"))); List<MatchResult> results = fileSystem.match(ImmutableList.of(testPath("*").toString())); assertEquals(Status.OK, Iterables.getOnlyElement(results).status()); assertThat( Iterables.getOnlyElement(results).metadata(), containsInAnyOrder( Metadata.builder() .setResourceId(testPath("renameFileA")) .setIsReadSeekEfficient(true) .setSizeBytes("testDataA".getBytes(StandardCharsets.UTF_8).length) .setLastModifiedMillis(lastModified("renameFileA")) .build(), Metadata.builder() .setResourceId(testPath("renameFileB")) .setIsReadSeekEfficient(true) .setSizeBytes("testDataB".getBytes(StandardCharsets.UTF_8).length) .setLastModifiedMillis(lastModified("renameFileB")) .build())); // ensure files exist assertArrayEquals("testDataA".getBytes(StandardCharsets.UTF_8), read("renameFileA", 0)); assertArrayEquals("testDataB".getBytes(StandardCharsets.UTF_8), read("renameFileB", 0)); }
Example #19
Source File: MetadataCoderTest.java From beam with Apache License 2.0 | 5 votes |
@Test(expected = AssertionError.class) public void testEncodeDecodeWithCustomLastModifiedMills() throws Exception { Path filePath = tmpFolder.newFile("somefile").toPath(); Metadata metadata = Metadata.builder() .setResourceId( FileSystems.matchNewResource(filePath.toString(), false /* isDirectory */)) .setIsReadSeekEfficient(true) .setSizeBytes(1024) .setLastModifiedMillis(1541097000L) .build(); // This should throw because the decoded Metadata has default lastModifiedMills. CoderProperties.coderDecodeEncodeEqual(MetadataCoder.of(), metadata); }
Example #20
Source File: MetadataCoderV2Test.java From beam with Apache License 2.0 | 5 votes |
@Test public void testEncodeDecodeWithDefaultLastModifiedMills() throws Exception { Path filePath = tmpFolder.newFile("somefile").toPath(); Metadata metadata = Metadata.builder() .setResourceId( FileSystems.matchNewResource(filePath.toString(), false /* isDirectory */)) .setIsReadSeekEfficient(true) .setSizeBytes(1024) .build(); CoderProperties.coderDecodeEncodeEqual(MetadataCoderV2.of(), metadata); }
Example #21
Source File: MetadataCoderTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testEncodeDecodeWithDefaultLastModifiedMills() throws Exception { Path filePath = tmpFolder.newFile("somefile").toPath(); Metadata metadata = Metadata.builder() .setResourceId( FileSystems.matchNewResource(filePath.toString(), false /* isDirectory */)) .setIsReadSeekEfficient(true) .setSizeBytes(1024) .build(); CoderProperties.coderDecodeEncodeEqual(MetadataCoder.of(), metadata); }
Example #22
Source File: Transforms.java From nomulus with Apache License 2.0 | 5 votes |
/** * Returns a {@link PTransform} from file name patterns to file {@link Metadata Metadata records}. */ public static PTransform<PCollection<String>, PCollection<Metadata>> getFilesByPatterns() { return new PTransform<PCollection<String>, PCollection<Metadata>>() { @Override public PCollection<Metadata> expand(PCollection<String> input) { return input.apply(FileIO.matchAll().withEmptyMatchTreatment(EmptyMatchTreatment.DISALLOW)); } }; }
Example #23
Source File: FileBasedSource.java From beam with Apache License 2.0 | 5 votes |
@Override public final long getMaxEndOffset(PipelineOptions options) throws IOException { checkArgument( mode != Mode.FILEPATTERN, "Cannot determine the exact end offset of a file pattern"); Metadata metadata = getSingleFileMetadata(); return metadata.sizeBytes(); }
Example #24
Source File: Transforms.java From nomulus with Apache License 2.0 | 5 votes |
/** Returns a {@link PTransform} from file {@link Metadata} to {@link VersionedEntity}. */ public static PTransform<PCollection<Metadata>, PCollection<VersionedEntity>> loadExportDataFromFiles() { return processFiles( new BackupFileReader( file -> Iterators.transform( LevelDbLogReader.from(file.open()), (byte[] bytes) -> VersionedEntity.from(EXPORT_ENTITY_TIME_STAMP, bytes)))); }
Example #25
Source File: FileBasedSourceTest.java From beam with Apache License 2.0 | 5 votes |
public TestFileBasedSource( Metadata fileOrPattern, long minBundleSize, long startOffset, long endOffset, @Nullable String splitHeader) { super(fileOrPattern, minBundleSize, startOffset, endOffset); this.splitHeader = splitHeader; }
Example #26
Source File: FileBasedSource.java From beam with Apache License 2.0 | 5 votes |
/** * Returns the information about the single file that this source is reading from. * * @throws IllegalArgumentException if this source is in {@link Mode#FILEPATTERN} mode. */ protected final MatchResult.Metadata getSingleFileMetadata() { checkArgument( mode == Mode.SINGLE_FILE_OR_SUBRANGE, "This function should only be called for a single file, not %s", this); checkState( singleFileMetadata != null, "It should not be possible to construct a %s in mode %s with null metadata: %s", FileBasedSource.class, mode, this); return singleFileMetadata; }
Example #27
Source File: CompressedSource.java From beam with Apache License 2.0 | 5 votes |
/** * Creates a {@code CompressedSource} for a subrange of a file. Called by superclass to create a * source for a single file. */ @Override protected FileBasedSource<T> createForSubrangeOfFile(Metadata metadata, long start, long end) { return new CompressedSource<>( sourceDelegate.createForSubrangeOfFile(metadata, start, end), channelFactory, metadata, sourceDelegate.getMinBundleSize(), start, end); }
Example #28
Source File: FileBasedSourceTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testSplitAtFractionExhaustive() throws Exception { PipelineOptions options = PipelineOptionsFactory.create(); // Smaller file for exhaustive testing. File file = createFileWithData("file", createStringDataset(3, 20)); Metadata metadata = FileSystems.matchSingleFileSpec(file.getPath()); TestFileBasedSource source = new TestFileBasedSource(metadata, 1, 0, file.length(), null); assertSplitAtFractionExhaustive(source, options); }
Example #29
Source File: FileBasedSourceTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testReadRangeFromFileWithSplitsFromStart() throws IOException { PipelineOptions options = PipelineOptionsFactory.create(); String header = "<h>"; List<String> data = new ArrayList<>(); for (int i = 0; i < 10; i++) { data.add(header); data.addAll(createStringDataset(3, 9)); } String fileName = "file"; File file = createFileWithData(fileName, data); Metadata metadata = FileSystems.matchSingleFileSpec(file.getPath()); TestFileBasedSource source1 = new TestFileBasedSource(metadata, 64, 0, 60, header); TestFileBasedSource source2 = new TestFileBasedSource(metadata, 64, 60, Long.MAX_VALUE, header); List<String> expectedResults = new ArrayList<>(); expectedResults.addAll(data); // Remove all occurrences of header from expected results. expectedResults.removeAll(Arrays.asList(header)); List<String> results = new ArrayList<>(); results.addAll(readFromSource(source1, options)); results.addAll(readFromSource(source2, options)); assertThat(expectedResults, containsInAnyOrder(results.toArray())); }
Example #30
Source File: FileBasedSourceTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testReadRangeFromFileWithSplitsFromMiddle() throws IOException { PipelineOptions options = PipelineOptionsFactory.create(); String header = "<h>"; List<String> data = new ArrayList<>(); for (int i = 0; i < 10; i++) { data.add(header); data.addAll(createStringDataset(3, 9)); } String fileName = "file"; File file = createFileWithData(fileName, data); Metadata metadata = FileSystems.matchSingleFileSpec(file.getPath()); TestFileBasedSource source1 = new TestFileBasedSource(metadata, 64, 0, 42, header); TestFileBasedSource source2 = new TestFileBasedSource(metadata, 64, 42, 112, header); TestFileBasedSource source3 = new TestFileBasedSource(metadata, 64, 112, Long.MAX_VALUE, header); List<String> expectedResults = new ArrayList<>(); expectedResults.addAll(data); // Remove all occurrences of header from expected results. expectedResults.removeAll(Collections.singletonList(header)); List<String> results = new ArrayList<>(); results.addAll(readFromSource(source1, options)); results.addAll(readFromSource(source2, options)); results.addAll(readFromSource(source3, options)); assertThat(expectedResults, containsInAnyOrder(results.toArray())); }