Java Code Examples for org.apache.spark.broadcast.Broadcast#getValue()

The following examples show how to use org.apache.spark.broadcast.Broadcast#getValue() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HaplotypeCallerSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private static FlatMapFunction<Iterator<AssemblyRegionWalkerContext>, VariantContext> assemblyFunction(final SAMFileHeader header,
                                                                                                       final String referenceFileName,
                                                                                                       final Broadcast<HaplotypeCallerArgumentCollection> hcArgsBroadcast,
                                                                                                       final Broadcast<AssemblyRegionArgumentCollection> assemblyRegionArgsBroadcast,
                                                                                                       final Broadcast<VariantAnnotatorEngine> annotatorEngineBroadcast) {
    return (FlatMapFunction<Iterator<AssemblyRegionWalkerContext>, VariantContext>) contexts -> {
        // HaplotypeCallerEngine isn't serializable but is expensive to instantiate, so construct and reuse one for every partition
        final ReferenceSequenceFile taskReferenceSequenceFile = taskReferenceSequenceFile(referenceFileName);
        final HaplotypeCallerEngine hcEngine = new HaplotypeCallerEngine(hcArgsBroadcast.value(), assemblyRegionArgsBroadcast.value(), false, false, header, taskReferenceSequenceFile, annotatorEngineBroadcast.getValue());
        Iterator<Iterator<VariantContext>> iterators = Utils.stream(contexts).map(context -> {
            AssemblyRegion region = context.getAssemblyRegion();
            FeatureContext featureContext = context.getFeatureContext();
            return hcEngine.callRegion(region, featureContext, context.getReferenceContext()).iterator();
        }).iterator();

        return Iterators.concat(iterators);
    };
}
 
Example 2
Source File: VariantWalkerSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private static FlatMapFunction<Shard<VariantContext>, VariantWalkerContext> getVariantsFunction(
        final String referenceFileName,
        final Broadcast<FeatureManager> bFeatureManager) {
    return (FlatMapFunction<Shard<VariantContext>, VariantWalkerContext>) shard -> {
        ReferenceDataSource reference = referenceFileName == null ? null : new ReferenceFileSource(IOUtils.getPath(SparkFiles.get(referenceFileName)));
        FeatureManager features = bFeatureManager == null ? null : bFeatureManager.getValue();

        return StreamSupport.stream(shard.spliterator(), false)
                .filter(v -> v.getStart() >= shard.getStart() && v.getStart() <= shard.getEnd()) // only include variants that start in the shard
                .map(v -> {
                    final SimpleInterval variantInterval = new SimpleInterval(v);
                    return new VariantWalkerContext(v,
                            new ReadsContext(), // empty
                            new ReferenceContext(reference, variantInterval),
                            new FeatureContext(features, variantInterval));
                }).iterator();
    };
}
 
Example 3
Source File: AnnotatedVariantProducer.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@VisibleForTesting
static VariantContextBuilder annotateWithExternalCNVCalls(final String recordContig, final int pos, final int end,
                                                          final VariantContextBuilder inputBuilder,
                                                          final Broadcast<SAMSequenceDictionary> broadcastSequenceDictionary,
                                                          final Broadcast<SVIntervalTree<VariantContext>> broadcastCNVCalls,
                                                          final String sampleId) {
    if (broadcastCNVCalls == null)
        return inputBuilder;
    final SVInterval variantInterval = new SVInterval(broadcastSequenceDictionary.getValue().getSequenceIndex(recordContig), pos, end);
    final SVIntervalTree<VariantContext> cnvCallTree = broadcastCNVCalls.getValue();
    final String cnvCallAnnotation =
            Utils.stream(cnvCallTree.overlappers(variantInterval))
                    .map(overlapper -> formatExternalCNVCallAnnotation(overlapper.getValue(), sampleId))
                    .collect(Collectors.joining(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR));
    if (!cnvCallAnnotation.isEmpty()) {
        return inputBuilder.attribute(GATKSVVCFConstants.EXTERNAL_CNV_CALLS, cnvCallAnnotation);
    } else
        return inputBuilder;
}
 
Example 4
Source File: StructuralVariationDiscoveryPipelineSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private static JavaRDD<GATKRead> getContigRawAlignments(final JavaSparkContext ctx,
                                                        final FindBreakpointEvidenceSpark.AssembledEvidenceResults assembledEvidenceResults,
                                                        final SvDiscoveryInputMetaData svDiscoveryInputMetaData) {
    final Broadcast<SAMSequenceDictionary> referenceSequenceDictionaryBroadcast =
            svDiscoveryInputMetaData.getReferenceData().getReferenceSequenceDictionaryBroadcast();
    final Broadcast<SAMFileHeader> headerBroadcast = svDiscoveryInputMetaData.getSampleSpecificData().getHeaderBroadcast();
    final SAMFileHeader headerForReads = headerBroadcast.getValue();
    final SAMReadGroupRecord contigAlignmentsReadGroup = new SAMReadGroupRecord(SVUtils.GATKSV_CONTIG_ALIGNMENTS_READ_GROUP_ID);
    final List<String> refNames = SequenceDictionaryUtils.getContigNamesList(referenceSequenceDictionaryBroadcast.getValue());

    return ctx.parallelize(
            assembledEvidenceResults
                    .getAlignedAssemblyOrExcuseList().stream()
                    .filter(AlignedAssemblyOrExcuse::isNotFailure)
                    .flatMap(aa -> aa.toSAMStreamForAlignmentsOfThisAssembly(headerForReads, refNames, contigAlignmentsReadGroup))
                    .map(SAMRecordToGATKReadAdapter::new)
                    .collect(Collectors.toList())
    );
}
 
Example 5
Source File: SparkExecutionContext.java    From rheem with Apache License 2.0 5 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public <T> Collection<T> getBroadcast(String name) {
    final Broadcast<?> broadcast = this.broadcasts.get(name);
    if (broadcast == null) {
        throw new RheemException("No such broadcast found: " + name);
    }

    return (Collection<T>) broadcast.getValue();
}
 
Example 6
Source File: HaplotypeCallerSpark.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Call variants from Tuples of AssemblyRegion and Simple Interval
 * The interval should be the non-padded shard boundary for the shard that the corresponding AssemblyRegion was
 * created in, it's used to eliminate redundant variant calls at the edge of shard boundaries.
 */
private static FlatMapFunction<Iterator<Tuple2<AssemblyRegion, SimpleInterval>>, VariantContext> callVariantsFromAssemblyRegions(
        final AuthHolder authHolder,
        final SAMFileHeader header,
        final Broadcast<ReferenceMultiSource> referenceBroadcast,
        final Broadcast<HaplotypeCallerArgumentCollection> hcArgsBroadcast) {
    return regionAndIntervals -> {
        //HaplotypeCallerEngine isn't serializable but is expensive to instantiate, so construct and reuse one for every partition
        final ReferenceMultiSourceAdapter referenceReader = new ReferenceMultiSourceAdapter(referenceBroadcast.getValue(), authHolder);
        final HaplotypeCallerEngine hcEngine = new HaplotypeCallerEngine(hcArgsBroadcast.value(), header, referenceReader);
        return iteratorToStream(regionAndIntervals).flatMap(regionToVariants(hcEngine)).iterator();
    };
}
 
Example 7
Source File: FirstIterationFunction.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public FirstIterationFunction(Broadcast<Map<String, Object>> word2vecVarMapBroadcast,
                                     Broadcast<double[]> expTableBroadcast, Broadcast<VocabCache<VocabWord>> vocabCacheBroadcast) {

    Map<String, Object> word2vecVarMap = word2vecVarMapBroadcast.getValue();
    this.expTable = expTableBroadcast.getValue();
    this.vectorLength = (int) word2vecVarMap.get("vectorLength");
    this.useAdaGrad = (boolean) word2vecVarMap.get("useAdaGrad");
    this.negative = (double) word2vecVarMap.get("negative");
    this.window = (int) word2vecVarMap.get("window");
    this.alpha = (double) word2vecVarMap.get("alpha");
    this.minAlpha = (double) word2vecVarMap.get("minAlpha");
    this.totalWordCount = (long) word2vecVarMap.get("totalWordCount");
    this.seed = (long) word2vecVarMap.get("seed");
    this.maxExp = (int) word2vecVarMap.get("maxExp");
    this.iterations = (int) word2vecVarMap.get("iterations");
    this.batchSize = (int) word2vecVarMap.get("batchSize");
    this.indexSyn0VecMap = new HashMap<>();
    this.pointSyn1VecMap = new HashMap<>();
    this.vocab = vocabCacheBroadcast.getValue();

    if (this.vocab == null)
        throw new RuntimeException("VocabCache is null");

    if (negative > 0) {
        negativeHolder = NegativeHolder.getInstance();
        negativeHolder.initHolder(vocab, expTable, this.vectorLength);
    }
}
 
Example 8
Source File: FindAssemblyRegionsSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static FlatMapFunction<Iterator<Shard<GATKRead>>, AssemblyRegionWalkerContext> getAssemblyRegionsFunctionFast(
        final String referenceFileName,
        final Broadcast<FeatureManager> bFeatureManager,
        final SAMFileHeader header,
        final Broadcast<Supplier<AssemblyRegionEvaluator>> supplierBroadcast,
        final AssemblyRegionArgumentCollection assemblyRegionArgs) {
    return (FlatMapFunction<Iterator<Shard<GATKRead>>, AssemblyRegionWalkerContext>) shardedReadIterator -> {
        final ReferenceDataSource reference = referenceFileName == null ? null : new ReferenceFileSource(IOUtils.getPath(SparkFiles.get(referenceFileName)));
        final FeatureManager features = bFeatureManager == null ? null : bFeatureManager.getValue();
        final AssemblyRegionEvaluator assemblyRegionEvaluator = supplierBroadcast.getValue().get(); // one AssemblyRegionEvaluator instance per Spark partition
        final ReadsDownsampler readsDownsampler = assemblyRegionArgs.maxReadsPerAlignmentStart > 0 ?
                new PositionalDownsampler(assemblyRegionArgs.maxReadsPerAlignmentStart, header) : null;

        Iterator<Iterator<AssemblyRegionWalkerContext>> iterators = Utils.stream(shardedReadIterator)
                .map(shardedRead -> new ShardToMultiIntervalShardAdapter<>(
                        new DownsampleableSparkReadShard(
                                new ShardBoundary(shardedRead.getInterval(), shardedRead.getPaddedInterval()), shardedRead, readsDownsampler)))
                .map(downsampledShardedRead -> {
                    final Iterator<AssemblyRegion> assemblyRegionIter = new AssemblyRegionIterator(
                            new ShardToMultiIntervalShardAdapter<>(downsampledShardedRead),
                            header, reference, features, assemblyRegionEvaluator, assemblyRegionArgs);
                    return Utils.stream(assemblyRegionIter).map(assemblyRegion ->
                            new AssemblyRegionWalkerContext(assemblyRegion,
                                    new ReferenceContext(reference, assemblyRegion.getPaddedSpan()),
                                    new FeatureContext(features, assemblyRegion.getPaddedSpan()))).iterator();
                }).iterator();
        return Iterators.concat(iterators);
    };
}
 
Example 9
Source File: FindAssemblyRegionsSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static FlatMapFunction<Iterator<Shard<GATKRead>>, ActivityProfileStateRange> getActivityProfileStatesFunction(
        final String referenceFileName,
        final Broadcast<FeatureManager> bFeatureManager,
        final SAMFileHeader header,
        final Broadcast<Supplier<AssemblyRegionEvaluator>> supplierBroadcast,
        final AssemblyRegionArgumentCollection assemblyRegionArgs) {
    return (FlatMapFunction<Iterator<Shard<GATKRead>>, ActivityProfileStateRange>) shardedReadIterator -> {
        final ReferenceDataSource reference = referenceFileName == null ? null : new ReferenceFileSource(IOUtils.getPath(SparkFiles.get(referenceFileName)));
        final FeatureManager features = bFeatureManager == null ? null : bFeatureManager.getValue();
        final AssemblyRegionEvaluator assemblyRegionEvaluator = supplierBroadcast.getValue().get(); // one AssemblyRegionEvaluator instance per Spark partition
        
        return Utils.stream(shardedReadIterator)
                .map(shardedRead -> {
                    final ReadsDownsampler readsDownsampler = assemblyRegionArgs.maxReadsPerAlignmentStart > 0 ?
                            new PositionalDownsampler(assemblyRegionArgs.maxReadsPerAlignmentStart, header) : null;
                    return new ShardToMultiIntervalShardAdapter<>(
                            new DownsampleableSparkReadShard(
                                    new ShardBoundary(shardedRead.getInterval(), shardedRead.getPaddedInterval()), shardedRead, readsDownsampler));
                })
                .map(shardedRead -> {
                    final Iterator<ActivityProfileState> activityProfileStateIter = new ActivityProfileStateIterator(
                            new ShardToMultiIntervalShardAdapter<>(shardedRead),
                            header, reference, features, assemblyRegionEvaluator
                    );
                    return new ActivityProfileStateRange(shardedRead, activityProfileStateIter);
                }).iterator();
    };
}
 
Example 10
Source File: FindAssemblyRegionsSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static FlatMapFunction<Iterator<AssemblyRegion>, AssemblyRegionWalkerContext> getAssemblyRegionWalkerContextFunction(
        final String referenceFileName,
        final Broadcast<FeatureManager> bFeatureManager) {

    return (FlatMapFunction<Iterator<AssemblyRegion>, AssemblyRegionWalkerContext>) assemblyRegionIter -> {
        final ReferenceDataSource reference = referenceFileName == null ? null : new ReferenceFileSource(IOUtils.getPath(SparkFiles.get(referenceFileName)));
        final FeatureManager features = bFeatureManager == null ? null : bFeatureManager.getValue();
        return Utils.stream(assemblyRegionIter).map(assemblyRegion ->
                new AssemblyRegionWalkerContext(assemblyRegion,
                        new ReferenceContext(reference, assemblyRegion.getPaddedSpan()),
                        new FeatureContext(features, assemblyRegion.getPaddedSpan()))).iterator();
    };
}
 
Example 11
Source File: ReadWalkerSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static FlatMapFunction<Iterator<GATKRead>, ReadWalkerContext> getReadsFunction(
        String referenceFileName, Broadcast<FeatureManager> bFeatureManager) {
    return readIterator -> {
        ReferenceDataSource reference = referenceFileName == null ? null : new ReferenceFileSource(IOUtils.getPath(SparkFiles.get(referenceFileName)));
        FeatureManager features = bFeatureManager == null ? null : bFeatureManager.getValue();
        return Iterators.transform(readIterator, new Function<GATKRead, ReadWalkerContext>() {
            @Nullable
            @Override
            public ReadWalkerContext apply(@Nullable GATKRead r) {
                final SimpleInterval readInterval = getReadInterval(r);
                return new ReadWalkerContext(r, new ReferenceContext(reference, readInterval), new FeatureContext(features, readInterval));
            }
        });
    };
}
 
Example 12
Source File: CollectAllelicCountsSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static FlatMapFunction<Iterator<LocusWalkerContext>, AllelicCountCollector> distributedCount(final Broadcast<SampleLocatableMetadata> sampleMetadataBroadcast,
                                                                                                     final int minimumBaseQuality) {
    return (FlatMapFunction<Iterator<LocusWalkerContext>, AllelicCountCollector>) contextIterator -> {
        final AllelicCountCollector result = new AllelicCountCollector(sampleMetadataBroadcast.getValue());

        contextIterator.forEachRemaining( ctx -> {
            final byte refAsByte = ctx.getReferenceContext().getBase();
            result.collectAtLocus(Nucleotide.decode(refAsByte), ctx.getAlignmentContext().getBasePileup(),
                    ctx.getAlignmentContext().getLocation(), minimumBaseQuality);
            }
        );
        return Collections.singletonList(result).iterator();
    };
}
 
Example 13
Source File: FindBreakpointEvidenceSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static List<SVInterval> findHighCoverageSubintervalsAndLog(
        final FindBreakpointEvidenceSparkArgumentCollection params,
        final JavaSparkContext ctx,
        final Broadcast<ReadMetadata> broadcastMetadata,
        final List<SVInterval> intervals,
        final JavaRDD<GATKRead> unfilteredReads,
        final SVReadFilter filter,
        final Logger logger) {

    final int minFlankingHighCovFactor = params.highDepthCoverageFactor;
    final int minPeakHighCovFactor = params.highDepthCoveragePeakFactor;

    final ReadMetadata shortReadMetadata = broadcastMetadata.getValue();
    int minFlankingHighCoverageValue = (int) (minFlankingHighCovFactor * shortReadMetadata.getCoverage());
    int minPeakHighCoverageValue = (int) (minPeakHighCovFactor * shortReadMetadata.getCoverage());
    final List<SVInterval> result =
            findHighCoverageSubIntervals(ctx, broadcastMetadata, intervals, unfilteredReads,
                    filter,
                    minFlankingHighCoverageValue,
                    minPeakHighCoverageValue);
    log("Found " + result.size() + " sub-intervals with coverage over " + minFlankingHighCoverageValue +
            " and a peak coverage of over " + minPeakHighCoverageValue + ".", logger);

    final String intervalFile = params.highCoverageIntervalsFile;
    if (intervalFile != null) {
        try (final OutputStreamWriter writer =
                     new OutputStreamWriter(new BufferedOutputStream(BucketUtils.createFile(intervalFile)))) {
            for (final SVInterval svInterval : result) {
                final String bedLine = shortReadMetadata.getContigName(svInterval.getContig()) + "\t" + (svInterval.getStart() - 1) + "\t" + svInterval.getEnd() + "\n";
                writer.write(bedLine);
            }
        } catch (final IOException ioe) {
            throw new UserException.CouldNotCreateOutputFile("Can't write high coverage intervals file " + intervalFile, ioe);
        }
    }
    return result;
}
 
Example 14
Source File: KMeansArbitraryDimension.java    From flink-perf with Apache License 2.0 4 votes vote down vote up
public SelectNearestCentroid(Broadcast<List<Tuple2<Integer, Point>>> brCenters) {
	this.brCenters = brCenters.getValue();
}
 
Example 15
Source File: Reader.java    From iceberg with Apache License 2.0 4 votes vote down vote up
Reader(Table table, Broadcast<FileIO> io, Broadcast<EncryptionManager> encryptionManager,
    boolean caseSensitive, DataSourceOptions options) {
  this.table = table;
  this.snapshotId = options.get("snapshot-id").map(Long::parseLong).orElse(null);
  this.asOfTimestamp = options.get("as-of-timestamp").map(Long::parseLong).orElse(null);
  if (snapshotId != null && asOfTimestamp != null) {
    throw new IllegalArgumentException(
        "Cannot scan using both snapshot-id and as-of-timestamp to select the table snapshot");
  }

  this.startSnapshotId = options.get("start-snapshot-id").map(Long::parseLong).orElse(null);
  this.endSnapshotId = options.get("end-snapshot-id").map(Long::parseLong).orElse(null);
  if (snapshotId != null || asOfTimestamp != null) {
    if (startSnapshotId != null || endSnapshotId != null) {
      throw new IllegalArgumentException(
          "Cannot specify start-snapshot-id and end-snapshot-id to do incremental scan when either snapshot-id or " +
              "as-of-timestamp is specified");
    }
  } else {
    if (startSnapshotId == null && endSnapshotId != null) {
      throw new IllegalArgumentException("Cannot only specify option end-snapshot-id to do incremental scan");
    }
  }

  // look for split behavior overrides in options
  this.splitSize = options.get("split-size").map(Long::parseLong).orElse(null);
  this.splitLookback = options.get("lookback").map(Integer::parseInt).orElse(null);
  this.splitOpenFileCost = options.get("file-open-cost").map(Long::parseLong).orElse(null);

  if (io.getValue() instanceof HadoopFileIO) {
    String scheme = "no_exist";
    try {
      Configuration conf = new Configuration(SparkSession.active().sparkContext().hadoopConfiguration());
      // merge hadoop config set on table
      mergeIcebergHadoopConfs(conf, table.properties());
      // merge hadoop config passed as options and overwrite the one on table
      mergeIcebergHadoopConfs(conf, options.asMap());
      FileSystem fs = new Path(table.location()).getFileSystem(conf);
      scheme = fs.getScheme().toLowerCase(Locale.ENGLISH);
    } catch (IOException ioe) {
      LOG.warn("Failed to get Hadoop Filesystem", ioe);
    }
    this.localityPreferred = options.get("locality").map(Boolean::parseBoolean)
        .orElse(LOCALITY_WHITELIST_FS.contains(scheme));
  } else {
    this.localityPreferred = false;
  }

  this.schema = table.schema();
  this.io = io;
  this.encryptionManager = encryptionManager;
  this.caseSensitive = caseSensitive;

  this.batchReadsEnabled = options.get("vectorization-enabled").map(Boolean::parseBoolean).orElse(
      PropertyUtil.propertyAsBoolean(table.properties(),
          TableProperties.PARQUET_VECTORIZATION_ENABLED, TableProperties.PARQUET_VECTORIZATION_ENABLED_DEFAULT));
  this.batchSize = options.get("batch-size").map(Integer::parseInt).orElse(
      PropertyUtil.propertyAsInt(table.properties(),
        TableProperties.PARQUET_BATCH_SIZE, TableProperties.PARQUET_BATCH_SIZE_DEFAULT));
}
 
Example 16
Source File: RevertSamSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Override
protected void runTool(JavaSparkContext ctx) {
    Broadcast<SAMFileHeader> headerBroadcast = ctx.broadcast(getHeaderForReads());
    JavaRDD<GATKRead> reads = getReads();

    ////////////////////////////////////////////////////////////////////////////
    // Grab the input header and remap values where appropriate
    ////////////////////////////////////////////////////////////////////////////
    SAMFileHeader localHeader = headerBroadcast.getValue();
    validateHeaderOverrides(localHeader, sampleAlias, libraryName);
    if (sampleAlias != null) {
        localHeader.getReadGroups().forEach(rg -> rg.setSample(sampleAlias));
    }
    if (libraryName != null) {
        localHeader.getReadGroups().forEach(rg -> rg.setLibrary(libraryName));
    }

    ////////////////////////////////////////////////////////////////////////////
    // Map the readgroups in the header to appropriate
    ////////////////////////////////////////////////////////////////////////////
    Map<String, Path> writerMap = getOutputMap(outputMap,
                                              output,
                                              getDefaultExtension(readArguments.getReadPathSpecifiers().get(0), outputByReadgroupFileFormat),
                                              localHeader.getReadGroups(),
                                              outputByReadGroup);

    ////////////////////////////////////////////////////////////////////////////
    // Construct appropriate headers for the output files
    ////////////////////////////////////////////////////////////////////////////
    final Map<String, SAMFileHeader> headerMap = getReadGroupHeaderMap(localHeader, writerMap);

    // Revert the reads based on the given attributes
    List<String> attributesToRevert = removeDefaults ? DEFAULT_ATTRIBUTES_TO_CLEAR : new ArrayList<>();
    attributesToRevert.addAll(attributesToClear);
    JavaRDD<GATKRead> readsReverted = revertReads(reads, attributesToRevert);

    ////////////////////////////////////////////////////////////////////////////
    // Sanitize the reads, sorting them into appropriate order if necessary
    ////////////////////////////////////////////////////////////////////////////
    if (sanitize) {
        Map<String, FastqQualityFormat> readGroupFormatMap = createReadGroupFormatMap(readsReverted, headerBroadcast, !dontRestoreOriginalQualities);

        readsReverted = sanitize(readGroupFormatMap, readsReverted, localHeader, keepFirstDuplicate);
    }

    // Write the one or many read output files
    for (Map.Entry<String, Path> rmap: writerMap.entrySet()) {
        //TODO what to do if the readgroup isn't present
        final String key = rmap.getKey();
        JavaRDD<GATKRead> filteredreads = rmap.getKey()==null? readsReverted :
                                                                readsReverted.filter(r -> r.getReadGroup().equals(key));
        writeReads(ctx, rmap.getValue().toString(), filteredreads, headerMap.get(rmap.getKey()), false); //TODO proper header map
    }
}
 
Example 17
Source File: ChunkProcessingTask.java    From p3-batchrefine with Apache License 2.0 4 votes vote down vote up
public ChunkProcessingTask(Broadcast<String> transform,
                           Broadcast<String> header, Broadcast<Properties> exporterProperties) {
    fHeader = header.getValue();
    fTansform = transform.getValue();
    fProperites = exporterProperties.getValue();
}
 
Example 18
Source File: HaplotypeCallerSparkIntegrationTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Test
public void testBroadcastHcArgs() {
    Broadcast<HaplotypeCallerArgumentCollection> broadcast = SparkContextFactory.getTestSparkContext().broadcast(new HaplotypeCallerArgumentCollection());
    broadcast.getValue();
}
 
Example 19
Source File: StructureToAtomInteractions.java    From mmtf-spark with Apache License 2.0 2 votes vote down vote up
/**
 * This constructor specifies to calculate the interaction of 
 * a specific group and a specific atom,.e.g. the atom "O" in
 * the group "HOH".
 * @param filter Specifies the conditions for calculating interactions
 * @param pairwise If true, results as one row per pair interaction, otherwise
 * the interactions of one atom with all other atoms are returned as a single row.
 */
public StructureToAtomInteractions(Broadcast<InteractionFilter> bfilter, boolean pairwise) {
	this.filter = bfilter.getValue();
	this.pairwise = pairwise;
}