Java Code Examples for htsjdk.tribble.readers.LineIterator#hasNext()

The following examples show how to use htsjdk.tribble.readers.LineIterator#hasNext() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GenotypeGVCFsIntegrationTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Returns a list of VariantContext records from a VCF file
 *
 * @param vcfFile VCF file
 * @return list of VariantContext records
 * @throws IOException if the file does not exist or can not be opened
 */
private static List<VariantContext> getVariantContexts(final File vcfFile) throws IOException {
    final VCFCodec codec = new VCFCodec();
    final FileInputStream s = new FileInputStream(vcfFile);
    final LineIterator lineIteratorVCF = codec.makeSourceFromStream(new PositionalBufferedStream(s));
    codec.readHeader(lineIteratorVCF);

    final List<VariantContext> VCs = new ArrayList<>();
    while (lineIteratorVCF.hasNext()) {
        final String line = lineIteratorVCF.next();
        Assert.assertFalse(line == null);
        VCs.add(codec.decode(line));
    }

    return VCs;
}
 
Example 2
Source File: TableCodec.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
public List<String> readActualHeader(final LineIterator reader) {
    boolean isFirst = true;
    while (reader.hasNext()) {
        final String line = reader.peek(); // Peek to avoid reading non-header data
        if ( isFirst && ! line.startsWith(COMMENT_DELIMITER) &&  headerDelimiter != null && ! line.startsWith(headerDelimiter) ) {
            throw new UserException.MalformedFile("TableCodec file does not have a header");
        }
        isFirst &= line.startsWith(COMMENT_DELIMITER);
        if (headerDelimiter == null || line.startsWith(headerDelimiter)) {
            reader.next(); // "Commit" the peek
            if (!header.isEmpty()) {
                throw new UserException.MalformedFile("Input table file seems to have two header lines.  The second is = " + line);
            }
            final String[] spl = line.split(delimiter_regex);
            Collections.addAll(header, spl);
            return header;
        } else if (line.startsWith(COMMENT_DELIMITER)) {
            reader.next(); // "Commit" the peek
        } else {
            break;
        }
    }
    return header;
}
 
Example 3
Source File: GencodeFuncotationFactoryUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private List<Feature> getCntn4Features() throws IOException {
    final GencodeGtfCodec gencodeGtfCodec = new GencodeGtfCodec();
    Assert.assertTrue(gencodeGtfCodec.canDecode(CNTN4_GENCODE_ANNOTATIONS_FILE_NAME));

    final List<Feature> gencodeFeatures = new ArrayList<>();
    try (BufferedInputStream bufferedInputStream =
                 new BufferedInputStream(
                         new FileInputStream(CNTN4_GENCODE_ANNOTATIONS_FILE_NAME)
                 )
    ) {
        // Get the line iterator:
        final LineIterator lineIterator = gencodeGtfCodec.makeSourceFromStream(bufferedInputStream);

        // Get the header (required for the read to work correctly):
        gencodeGtfCodec.readHeader(lineIterator);

        while (lineIterator.hasNext()) {
            gencodeFeatures.add(gencodeGtfCodec.decode(lineIterator));
        }
        Assert.assertTrue(gencodeFeatures.size() > 1);
    }
    return gencodeFeatures;
}
 
Example 4
Source File: SimpleCountCodec.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public SampleLocatableMetadata readActualHeader(final LineIterator reader) {
    final List<String> samHeaderLines = new ArrayList<>(SAM_HEADER_LINES_INITIAL_CAPACITY);
    //we check that the SAM header lines and the column header line are present in the correct order, then return the mandatory column header
    boolean isSAMHeaderPresent = false;
    while (reader.hasNext()) {
        final String line = reader.peek();
        if (line.startsWith(CopyNumberFormatsUtils.COMMENT_PREFIX)) {
            isSAMHeaderPresent = true;
            samHeaderLines.add(line);
            reader.next();
        } else {
            if (!isSAMHeaderPresent) {
                throw new UserException.MalformedFile("SAM header lines must be at the beginning of the file.");
            } else if (!line.startsWith(COLUMN_HEADER_STRING)) {
                throw new UserException.MalformedFile("File does not have a column header.");
            } else {
                //we just peeked at the column header line, so we need to advance past it
                reader.next();
                break;
            }
        }
    }
    final SAMFileHeader samFileHeader = new SAMTextHeaderCodec()
            .decode(BufferedLineReader.fromString(StringUtils.join(samHeaderLines, System.lineSeparator())), null);
    return MetadataUtils.fromHeader(samFileHeader, Metadata.Type.SAMPLE_LOCATABLE);
}
 
Example 5
Source File: EnsemblGtfCodecUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "decodeTestProvider")
public void testDecode( final String filePath, final List<GencodeGtfFeature> expected, final String expectedUcscVersion) throws IOException {
    final EnsemblGtfCodec ensemblGtfCodec = new EnsemblGtfCodec();

    try (final BufferedInputStream bufferedInputStream =
                 new BufferedInputStream(
                         new FileInputStream(filePath)
                 )
    ) {
        // Get the line iterator:
        final LineIterator lineIterator = ensemblGtfCodec.makeSourceFromStream(bufferedInputStream);

        // Get the header (required for the read to work correctly):
        ensemblGtfCodec.readHeader(lineIterator);

        // Setup our expected data iterator:
        final Iterator<GencodeGtfFeature> expectedIterator = expected.iterator();

        // Now read our features and make sure they're what we expect:
        // NOTE: We only decode the number of features expect to see.
        int numDecoded = 0;
        while ( lineIterator.hasNext() && (numDecoded < expected.size()) ) {
            final GencodeGtfFeature feature = ensemblGtfCodec.decode(lineIterator);

            Assert.assertTrue(expectedIterator.hasNext());

            for ( final GencodeGtfFeature subFeature : feature.getAllFeatures() ) {
                Assert.assertEquals(subFeature.getUcscGenomeVersion(), expectedUcscVersion);
            }
            final GencodeGtfFeature expectedFeature = expectedIterator.next();

            // Big equals check:
            Assert.assertEquals(feature, expectedFeature);

            ++numDecoded;
        }
    }
}
 
Example 6
Source File: SimpleReference.java    From varsim with BSD 2-Clause "Simplified" License 5 votes vote down vote up
public long getNumNonNBases(final File regions) throws IOException {
    loadAllSequences();
    long count = 0;

    final FeatureCodec<BEDFeature, LineIterator> bedCodec = new BEDCodec(BEDCodec.StartOffset.ONE);
    final LineIterator lineIterator = new AsciiLineReaderIterator(new AsciiLineReader(new FileInputStream(regions)));

    while (lineIterator.hasNext()) {
        final BEDFeature bedFeature = bedCodec.decode(lineIterator);
        count += data.get(new ChrString(bedFeature.getContig())).getNumNonNBases(bedFeature.getStart(), bedFeature.getEnd());
    }
    return count;
}
 
Example 7
Source File: XsvLocatableTableCodec.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * {@inheritDoc}
 * Read until we get to the header of this xsv
 *
 * Dev note:  We also determine the actual locatable columns here.
 *
 * @param reader iterator of the lines in the file.  Never {@code null}.
 * @return a list of strings that are the header columns.  Throws exception if no valid header line is found.
 */
@Override
public List<String> readActualHeader(final LineIterator reader) {

    Utils.nonNull(reader);

    // All leading lines with preamble / header info are headers:
    while ( reader.hasNext() ) {

        final String line = reader.next();
        ++currentLine;

        // Ignore preamble lines:
        if (!isPreambleLine(line)) {

            // The first non-commented line is the column header.
            // Add the data source name to the start of each header row,
            // then add those rows to the header object.
            header = Arrays.stream(line.split(delimiter))
                    .map(x -> determinePrefixForHeader() + x)
                    .collect(Collectors.toCollection(ArrayList::new));
            headerToIndex = IntStream.range(0, header.size()).boxed()
                    .collect(Collectors.toMap(i-> header.get(i), Function.identity()));
            isHeaderInitialized = true;

            finalContigColumn = determineFinalColumn(inputContigColumn);
            finalStartColumn = determineFinalColumn(inputStartColumn);
            finalEndColumn = determineFinalColumn(inputEndColumn);
            validateFinalColumns();

            locatableColumns = Arrays.asList(finalContigColumn, finalStartColumn, finalEndColumn);

            assertLocatableColumnsInHeaderToIndex(locatableColumns, headerToIndex);

            return header;

        } else {
            preamble.add(line.substring(preambleLineStart.length()));
        }
    }

    throw new UserException.BadInput("Given file is malformed - does not contain a header!");
}