Java Code Examples for htsjdk.samtools.ValidationStringency#LENIENT

The following examples show how to use htsjdk.samtools.ValidationStringency#LENIENT . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LoadReadsToBigQuery.java    From dataflow-java with Apache License 2.0 5 votes vote down vote up
private static PCollection<Read> getReadsFromBamFile() throws IOException, URISyntaxException {
  LOG.info("getReadsFromBamFile");

  final Iterable<Contig> contigs =
      Contig.parseContigsFromCommandLine(pipelineOptions.getReferences());
  final ReaderOptions readerOptions = new ReaderOptions(
      ValidationStringency.LENIENT,
      pipelineOptions.isIncludeUnmapped());
  if (pipelineOptions.getMaxShardSizeMB() > 0) {
    LOG.info("Sharded reading of " + pipelineOptions.getBamFilePath());

    ShardingPolicy policy = new ShardingPolicy() {
      final int MAX_BYTES_PER_SHARD_IN_BYTES = pipelineOptions.getMaxShardSizeMB() * 1024 * 1024;
      @Override
      public Boolean apply(BAMShard shard) {
        return shard.approximateSizeInBytes() > MAX_BYTES_PER_SHARD_IN_BYTES;
      }
    };

    return ReadBAMTransform.getReadsFromBAMFilesSharded(p,
        pipelineOptions,
        auth,
        Lists.newArrayList(contigs),
        readerOptions,
        pipelineOptions.getBamFilePath(),
        policy);
  } else {  // For testing and comparing sharded vs. not sharded only.
    LOG.info("Unsharded reading of " + pipelineOptions.getBamFilePath());
    return p.apply(
        Create.of(
            Reader.readSequentiallyForTesting(
                GCSOptions.Methods.createStorageClient(pipelineOptions, auth),
                pipelineOptions.getBamFilePath(),
                contigs.iterator().next(),
                readerOptions)));
  }
}
 
Example 2
Source File: CountReads.java    From dataflow-java with Apache License 2.0 5 votes vote down vote up
private static PCollection<Read> getReadsFromBAMFile() throws IOException, URISyntaxException {
  LOG.info("getReadsFromBAMFile");

  final Iterable<Contig> contigs = Contig.parseContigsFromCommandLine(pipelineOptions.getReferences());
  final ReaderOptions readerOptions = new ReaderOptions(
      ValidationStringency.LENIENT,
      pipelineOptions.isIncludeUnmapped());
  if (pipelineOptions.isShardBAMReading()) {
    LOG.info("Sharded reading of "+ pipelineOptions.getBAMFilePath());

    ShardingPolicy policy = new ShardingPolicy() {
      final int MAX_BYTES_PER_SHARD = pipelineOptions.getMaxShardSizeBytes();
      @Override
      public Boolean apply(BAMShard shard) {
        return shard.approximateSizeInBytes() > MAX_BYTES_PER_SHARD;
      }
    };

    return ReadBAMTransform.getReadsFromBAMFilesSharded(p,
        pipelineOptions,
        auth,
        Lists.newArrayList(contigs),
        readerOptions,
        pipelineOptions.getBAMFilePath(),
        policy);
  } else {  // For testing and comparing sharded vs. not sharded only
    LOG.info("Unsharded reading of " + pipelineOptions.getBAMFilePath());
    return p.apply(
        Create.of(
            Reader.readSequentiallyForTesting(
                GCSOptions.Methods.createStorageClient(pipelineOptions, auth),
                pipelineOptions.getBAMFilePath(),
                contigs.iterator().next(),
                readerOptions)));
  }
}
 
Example 3
Source File: TileMetricsUtil.java    From picard with MIT License 4 votes vote down vote up
/**
 * Pulls out the phasing & prephasing value for the template reads and returns a collection of TilePhasingValues representing these
 */
private static Collection<TilePhasingValue> getTilePhasingValues(final Map<Integer, ? extends Collection<IlluminaTileMetrics>> codeMetricsMap, final ReadStructure readStructure, final ValidationStringency validationStringency) {
    boolean isFirstRead = true;
    final Collection<TilePhasingValue> tilePhasingValues = new ArrayList<>();
    for (int descriptorIndex = 0; descriptorIndex < readStructure.descriptors.size(); descriptorIndex++) {
        if (readStructure.descriptors.get(descriptorIndex).type == ReadType.Template) {
            final TileTemplateRead tileTemplateRead = isFirstRead ? TileTemplateRead.FIRST : TileTemplateRead.SECOND;
            // For both phasing & prephasing, pull out the value and create a TilePhasingValue for further processing
            final int phasingCode = IlluminaMetricsCode.getPhasingCode(descriptorIndex, IlluminaMetricsCode.PHASING_BASE);
            final int prePhasingCode = IlluminaMetricsCode.getPhasingCode(descriptorIndex, IlluminaMetricsCode.PREPHASING_BASE);

            final float phasingValue, prePhasingValue;

            // If both the phasing and pre-phasing data are missing, then likely something went wrong when imaging
            // this tile, for example a grain of sand disrupting the path of light to the sensor.  If only one of them
            // is missing, then likely the data is corrupt.
            if (codeMetricsMap.containsKey(phasingCode) && codeMetricsMap.containsKey(prePhasingCode)) {
                phasingValue = CollectionUtil.getSoleElement(codeMetricsMap.get(phasingCode)).getMetricValue();
                prePhasingValue = CollectionUtil.getSoleElement(codeMetricsMap.get(prePhasingCode)).getMetricValue();
            } else {
                final String message = String.format(
                        "Don't have both phasing and prephasing values for %s read cycle %s.  Phasing code was %d and prephasing code was %d.",
                        tileTemplateRead.toString(), descriptorIndex + 1, phasingCode, prePhasingCode
                );
                if (!codeMetricsMap.containsKey(phasingCode) && !codeMetricsMap.containsKey(prePhasingCode) && validationStringency != ValidationStringency.STRICT) {
                    // Ignore the error, and use the default (zero) for the phasing values
                    if (validationStringency == ValidationStringency.LENIENT) {
                        LOG.warn(message);
                    }
                } else {
                    throw new PicardException(message);
                }
                phasingValue = 0;
                prePhasingValue = 0;
            }

            tilePhasingValues.add(new TilePhasingValue(tileTemplateRead, phasingValue, prePhasingValue));
            isFirstRead = false;
        }
    }

    return tilePhasingValues;
}
 
Example 4
Source File: VCFRecordReader.java    From Hadoop-BAM with MIT License 4 votes vote down vote up
@Override public boolean nextKeyValue() throws IOException {
	while (true) {
		String line;
		while (true) {
			if (!lineRecordReader.nextKeyValue()) {
				return false;
			}
			line = lineRecordReader.getCurrentValue().toString();
			if (!line.startsWith("#")) {
				break;
			}
		}

                       final VariantContext v;
                       try {
			v = codec.decode(line);
		} catch (TribbleException e) {
			if (stringency == ValidationStringency.STRICT) {
				if (logger.isErrorEnabled()) {
					logger.error("Parsing line {} failed with {}.", line, e);
				}
				throw e;
			} else {
				if (stringency == ValidationStringency.LENIENT &&
                                           logger.isWarnEnabled()) {
					logger.warn("Parsing line {} failed with {}. Skipping...",
                                                           line, e);
				}
				continue;
			}
		}

		if (!overlaps(v)) {
			continue;
		}

		Integer chromIdx = contigDict.get(v.getContig());
		if (chromIdx == null)
			chromIdx = (int) MurmurHash3.murmurhash3(v.getContig(), 0);

		key.set((long) chromIdx << 32 | (long) (v.getStart() - 1));
		vc.set(v, header);

		return true;
	}
}
 
Example 5
Source File: CalculateCoverage.java    From dataflow-java with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws GeneralSecurityException, IOException, URISyntaxException {
  // Register the options so that they show up via --help
  PipelineOptionsFactory.register(Options.class);
  options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);

  auth = GenomicsOptions.Methods.getGenomicsAuth(options);

  p = Pipeline.create(options);
  registerPipelineCoders(p);

  if (options.getBamInput().isEmpty() && options.getInputDatasetId().isEmpty() && options.getReadGroupSetIds().isEmpty()) {
    throw new IllegalArgumentException("BamInput or InputDatasetId or ReadGroupSetIds must be specified");
  }

  PCollection<Read> reads = null;
  String referenceSetId = options.getReferenceSetId();

  if (!options.getBamInput().isEmpty()) {

    if (options.isAllReferences()) {
      throw new IllegalArgumentException("--allReferences is not currently supported for BAM "
      + "file reading. Instead use --references and list all of the genomic regions in the file.");
    }

    final List<Contig> contigs = Lists.newArrayList(Contig.parseContigsFromCommandLine(options.getReferences()));
    final ReaderOptions readerOptions = new ReaderOptions(
        ValidationStringency.LENIENT,
        false);  // Do not include unmapped reads.

    ShardingPolicy policy = new ShardingPolicy() {
      final int MAX_BYTES_PER_SHARD = options.getMaxShardSizeBytes();
      @Override
      public Boolean apply(BAMShard shard) {
        return shard.approximateSizeInBytes() > MAX_BYTES_PER_SHARD;
      }
    };

    reads = ReadBAMTransform.getReadsFromBAMFilesSharded (
        p,
        options,
        auth,
        contigs,
        readerOptions,
        options.getBamInput(),
        policy);

  } else {

    List<String> rgsIds;
    if (options.getInputDatasetId().isEmpty()) {
      rgsIds = Lists.newArrayList(options.getReadGroupSetIds().split(","));
    } else {
      rgsIds = GenomicsUtils.getReadGroupSetIds(options.getInputDatasetId(), auth);
    }

    if (rgsIds.size() < options.getNumQuantiles()) {
      throw new IllegalArgumentException("Number of ReadGroupSets must be greater than or equal to"
          + " the number of requested quantiles.");
    }

    // Grab one ReferenceSetId to be used within the pipeline to confirm that all ReadGroupSets
    // are associated with the same ReferenceSet.
    referenceSetId = GenomicsUtils.getReferenceSetId(rgsIds.get(0), auth);
    if (Strings.isNullOrEmpty(referenceSetId)) {
      throw new IllegalArgumentException("No ReferenceSetId associated with ReadGroupSetId "
          + rgsIds.get(0)
          + ". All ReadGroupSets in given input must have an associated ReferenceSet.");
    }

    reads = p.begin()
        .apply(Create.of(rgsIds))
        .apply(ParDo.of(new CheckMatchingReferenceSet(referenceSetId, auth)))
        .apply(new ReadGroupStreamer(auth, ShardBoundary.Requirement.STRICT, READ_FIELDS, SexChromosomeFilter.INCLUDE_XY));
  }

  // Create our destination AnnotationSet for the associated ReferenceSet.
  AnnotationSet annotationSet = createAnnotationSet(referenceSetId);

  PCollection<KV<PosRgsMq, Double>> coverageMeans = reads.apply("CalculateCoverateMean",
      new CalculateCoverageMean(options.getBucketWidth()));
  PCollection<KV<Position, KV<PosRgsMq.MappingQuality, List<Double>>>> quantiles
      = coverageMeans.apply("CalculateQuantiles", new CalculateQuantiles(options.getNumQuantiles()));
  PCollection<KV<Position, Iterable<KV<PosRgsMq.MappingQuality, List<Double>>>>> answer =
      quantiles.apply(GroupByKey.<Position, KV<PosRgsMq.MappingQuality, List<Double>>>create());
  answer.apply("CreateAnnotations", ParDo.of(new CreateAnnotations(annotationSet.getId(), auth, true)));

  p.run();
}