Java Code Examples for org.apache.spark.broadcast.Broadcast#getValue()

The following examples show how to use org.apache.spark.broadcast.Broadcast#getValue() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: HaplotypeCallerSpark.java From gatk with BSD 3-Clause "New" or "Revised" License

6 votes

private static FlatMapFunction<Iterator<AssemblyRegionWalkerContext>, VariantContext> assemblyFunction(final SAMFileHeader header,
                                                                                                       final String referenceFileName,
                                                                                                       final Broadcast<HaplotypeCallerArgumentCollection> hcArgsBroadcast,
                                                                                                       final Broadcast<AssemblyRegionArgumentCollection> assemblyRegionArgsBroadcast,
                                                                                                       final Broadcast<VariantAnnotatorEngine> annotatorEngineBroadcast) {
    return (FlatMapFunction<Iterator<AssemblyRegionWalkerContext>, VariantContext>) contexts -> {
        // HaplotypeCallerEngine isn't serializable but is expensive to instantiate, so construct and reuse one for every partition
        final ReferenceSequenceFile taskReferenceSequenceFile = taskReferenceSequenceFile(referenceFileName);
        final HaplotypeCallerEngine hcEngine = new HaplotypeCallerEngine(hcArgsBroadcast.value(), assemblyRegionArgsBroadcast.value(), false, false, header, taskReferenceSequenceFile, annotatorEngineBroadcast.getValue());
        Iterator<Iterator<VariantContext>> iterators = Utils.stream(contexts).map(context -> {
            AssemblyRegion region = context.getAssemblyRegion();
            FeatureContext featureContext = context.getFeatureContext();
            return hcEngine.callRegion(region, featureContext, context.getReferenceContext()).iterator();
        }).iterator();

        return Iterators.concat(iterators);
    };
}

Example 2

Source File: VariantWalkerSpark.java From gatk with BSD 3-Clause "New" or "Revised" License

6 votes

private static FlatMapFunction<Shard<VariantContext>, VariantWalkerContext> getVariantsFunction(
        final String referenceFileName,
        final Broadcast<FeatureManager> bFeatureManager) {
    return (FlatMapFunction<Shard<VariantContext>, VariantWalkerContext>) shard -> {
        ReferenceDataSource reference = referenceFileName == null ? null : new ReferenceFileSource(IOUtils.getPath(SparkFiles.get(referenceFileName)));
        FeatureManager features = bFeatureManager == null ? null : bFeatureManager.getValue();

        return StreamSupport.stream(shard.spliterator(), false)
                .filter(v -> v.getStart() >= shard.getStart() && v.getStart() <= shard.getEnd()) // only include variants that start in the shard
                .map(v -> {
                    final SimpleInterval variantInterval = new SimpleInterval(v);
                    return new VariantWalkerContext(v,
                            new ReadsContext(), // empty
                            new ReferenceContext(reference, variantInterval),
                            new FeatureContext(features, variantInterval));
                }).iterator();
    };
}

Example 3

Source File: AnnotatedVariantProducer.java From gatk with BSD 3-Clause "New" or "Revised" License

6 votes

@VisibleForTesting
static VariantContextBuilder annotateWithExternalCNVCalls(final String recordContig, final int pos, final int end,
                                                          final VariantContextBuilder inputBuilder,
                                                          final Broadcast<SAMSequenceDictionary> broadcastSequenceDictionary,
                                                          final Broadcast<SVIntervalTree<VariantContext>> broadcastCNVCalls,
                                                          final String sampleId) {
    if (broadcastCNVCalls == null)
        return inputBuilder;
    final SVInterval variantInterval = new SVInterval(broadcastSequenceDictionary.getValue().getSequenceIndex(recordContig), pos, end);
    final SVIntervalTree<VariantContext> cnvCallTree = broadcastCNVCalls.getValue();
    final String cnvCallAnnotation =
            Utils.stream(cnvCallTree.overlappers(variantInterval))
                    .map(overlapper -> formatExternalCNVCallAnnotation(overlapper.getValue(), sampleId))
                    .collect(Collectors.joining(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR));
    if (!cnvCallAnnotation.isEmpty()) {
        return inputBuilder.attribute(GATKSVVCFConstants.EXTERNAL_CNV_CALLS, cnvCallAnnotation);
    } else
        return inputBuilder;
}

Example 4

Source File: StructuralVariationDiscoveryPipelineSpark.java From gatk with BSD 3-Clause "New" or "Revised" License

6 votes

private static JavaRDD<GATKRead> getContigRawAlignments(final JavaSparkContext ctx,
                                                        final FindBreakpointEvidenceSpark.AssembledEvidenceResults assembledEvidenceResults,
                                                        final SvDiscoveryInputMetaData svDiscoveryInputMetaData) {
    final Broadcast<SAMSequenceDictionary> referenceSequenceDictionaryBroadcast =
            svDiscoveryInputMetaData.getReferenceData().getReferenceSequenceDictionaryBroadcast();
    final Broadcast<SAMFileHeader> headerBroadcast = svDiscoveryInputMetaData.getSampleSpecificData().getHeaderBroadcast();
    final SAMFileHeader headerForReads = headerBroadcast.getValue();
    final SAMReadGroupRecord contigAlignmentsReadGroup = new SAMReadGroupRecord(SVUtils.GATKSV_CONTIG_ALIGNMENTS_READ_GROUP_ID);
    final List<String> refNames = SequenceDictionaryUtils.getContigNamesList(referenceSequenceDictionaryBroadcast.getValue());

    return ctx.parallelize(
            assembledEvidenceResults
                    .getAlignedAssemblyOrExcuseList().stream()
                    .filter(AlignedAssemblyOrExcuse::isNotFailure)
                    .flatMap(aa -> aa.toSAMStreamForAlignmentsOfThisAssembly(headerForReads, refNames, contigAlignmentsReadGroup))
                    .map(SAMRecordToGATKReadAdapter::new)
                    .collect(Collectors.toList())
    );
}

Example 5

Source File: SparkExecutionContext.java From rheem with Apache License 2.0

5 votes

@Override
@SuppressWarnings("unchecked")
public <T> Collection<T> getBroadcast(String name) {
    final Broadcast<?> broadcast = this.broadcasts.get(name);
    if (broadcast == null) {
        throw new RheemException("No such broadcast found: " + name);
    }

    return (Collection<T>) broadcast.getValue();
}

Example 6

Source File: HaplotypeCallerSpark.java From gatk-protected with BSD 3-Clause "New" or "Revised" License

5 votes

/**
 * Call variants from Tuples of AssemblyRegion and Simple Interval
 * The interval should be the non-padded shard boundary for the shard that the corresponding AssemblyRegion was
 * created in, it's used to eliminate redundant variant calls at the edge of shard boundaries.
 */
private static FlatMapFunction<Iterator<Tuple2<AssemblyRegion, SimpleInterval>>, VariantContext> callVariantsFromAssemblyRegions(
        final AuthHolder authHolder,
        final SAMFileHeader header,
        final Broadcast<ReferenceMultiSource> referenceBroadcast,
        final Broadcast<HaplotypeCallerArgumentCollection> hcArgsBroadcast) {
    return regionAndIntervals -> {
        //HaplotypeCallerEngine isn't serializable but is expensive to instantiate, so construct and reuse one for every partition
        final ReferenceMultiSourceAdapter referenceReader = new ReferenceMultiSourceAdapter(referenceBroadcast.getValue(), authHolder);
        final HaplotypeCallerEngine hcEngine = new HaplotypeCallerEngine(hcArgsBroadcast.value(), header, referenceReader);
        return iteratorToStream(regionAndIntervals).flatMap(regionToVariants(hcEngine)).iterator();
    };
}

Example 7

Source File: FirstIterationFunction.java From deeplearning4j with Apache License 2.0

5 votes

public FirstIterationFunction(Broadcast<Map<String, Object>> word2vecVarMapBroadcast,
                                     Broadcast<double[]> expTableBroadcast, Broadcast<VocabCache<VocabWord>> vocabCacheBroadcast) {

    Map<String, Object> word2vecVarMap = word2vecVarMapBroadcast.getValue();
    this.expTable = expTableBroadcast.getValue();
    this.vectorLength = (int) word2vecVarMap.get("vectorLength");
    this.useAdaGrad = (boolean) word2vecVarMap.get("useAdaGrad");
    this.negative = (double) word2vecVarMap.get("negative");
    this.window = (int) word2vecVarMap.get("window");
    this.alpha = (double) word2vecVarMap.get("alpha");
    this.minAlpha = (double) word2vecVarMap.get("minAlpha");
    this.totalWordCount = (long) word2vecVarMap.get("totalWordCount");
    this.seed = (long) word2vecVarMap.get("seed");
    this.maxExp = (int) word2vecVarMap.get("maxExp");
    this.iterations = (int) word2vecVarMap.get("iterations");
    this.batchSize = (int) word2vecVarMap.get("batchSize");
    this.indexSyn0VecMap = new HashMap<>();
    this.pointSyn1VecMap = new HashMap<>();
    this.vocab = vocabCacheBroadcast.getValue();

    if (this.vocab == null)
        throw new RuntimeException("VocabCache is null");

    if (negative > 0) {
        negativeHolder = NegativeHolder.getInstance();
        negativeHolder.initHolder(vocab, expTable, this.vectorLength);
    }
}

Example 8

Source File: FindAssemblyRegionsSpark.java From gatk with BSD 3-Clause "New" or "Revised" License

5 votes

private static FlatMapFunction<Iterator<Shard<GATKRead>>, AssemblyRegionWalkerContext> getAssemblyRegionsFunctionFast(
        final String referenceFileName,
        final Broadcast<FeatureManager> bFeatureManager,
        final SAMFileHeader header,
        final Broadcast<Supplier<AssemblyRegionEvaluator>> supplierBroadcast,
        final AssemblyRegionArgumentCollection assemblyRegionArgs) {
    return (FlatMapFunction<Iterator<Shard<GATKRead>>, AssemblyRegionWalkerContext>) shardedReadIterator -> {
        final ReferenceDataSource reference = referenceFileName == null ? null : new ReferenceFileSource(IOUtils.getPath(SparkFiles.get(referenceFileName)));
        final FeatureManager features = bFeatureManager == null ? null : bFeatureManager.getValue();
        final AssemblyRegionEvaluator assemblyRegionEvaluator = supplierBroadcast.getValue().get(); // one AssemblyRegionEvaluator instance per Spark partition
        final ReadsDownsampler readsDownsampler = assemblyRegionArgs.maxReadsPerAlignmentStart > 0 ?
                new PositionalDownsampler(assemblyRegionArgs.maxReadsPerAlignmentStart, header) : null;

        Iterator<Iterator<AssemblyRegionWalkerContext>> iterators = Utils.stream(shardedReadIterator)
                .map(shardedRead -> new ShardToMultiIntervalShardAdapter<>(
                        new DownsampleableSparkReadShard(
                                new ShardBoundary(shardedRead.getInterval(), shardedRead.getPaddedInterval()), shardedRead, readsDownsampler)))
                .map(downsampledShardedRead -> {
                    final Iterator<AssemblyRegion> assemblyRegionIter = new AssemblyRegionIterator(
                            new ShardToMultiIntervalShardAdapter<>(downsampledShardedRead),
                            header, reference, features, assemblyRegionEvaluator, assemblyRegionArgs);
                    return Utils.stream(assemblyRegionIter).map(assemblyRegion ->
                            new AssemblyRegionWalkerContext(assemblyRegion,
                                    new ReferenceContext(reference, assemblyRegion.getPaddedSpan()),
                                    new FeatureContext(features, assemblyRegion.getPaddedSpan()))).iterator();
                }).iterator();
        return Iterators.concat(iterators);
    };
}

Example 9

Source File: FindAssemblyRegionsSpark.java From gatk with BSD 3-Clause "New" or "Revised" License

5 votes

private static FlatMapFunction<Iterator<Shard<GATKRead>>, ActivityProfileStateRange> getActivityProfileStatesFunction(
        final String referenceFileName,
        final Broadcast<FeatureManager> bFeatureManager,
        final SAMFileHeader header,
        final Broadcast<Supplier<AssemblyRegionEvaluator>> supplierBroadcast,
        final AssemblyRegionArgumentCollection assemblyRegionArgs) {
    return (FlatMapFunction<Iterator<Shard<GATKRead>>, ActivityProfileStateRange>) shardedReadIterator -> {
        final ReferenceDataSource reference = referenceFileName == null ? null : new ReferenceFileSource(IOUtils.getPath(SparkFiles.get(referenceFileName)));
        final FeatureManager features = bFeatureManager == null ? null : bFeatureManager.getValue();
        final AssemblyRegionEvaluator assemblyRegionEvaluator = supplierBroadcast.getValue().get(); // one AssemblyRegionEvaluator instance per Spark partition
        
        return Utils.stream(shardedReadIterator)
                .map(shardedRead -> {
                    final ReadsDownsampler readsDownsampler = assemblyRegionArgs.maxReadsPerAlignmentStart > 0 ?
                            new PositionalDownsampler(assemblyRegionArgs.maxReadsPerAlignmentStart, header) : null;
                    return new ShardToMultiIntervalShardAdapter<>(
                            new DownsampleableSparkReadShard(
                                    new ShardBoundary(shardedRead.getInterval(), shardedRead.getPaddedInterval()), shardedRead, readsDownsampler));
                })
                .map(shardedRead -> {
                    final Iterator<ActivityProfileState> activityProfileStateIter = new ActivityProfileStateIterator(
                            new ShardToMultiIntervalShardAdapter<>(shardedRead),
                            header, reference, features, assemblyRegionEvaluator
                    );
                    return new ActivityProfileStateRange(shardedRead, activityProfileStateIter);
                }).iterator();
    };
}

Example 10

Source File: FindAssemblyRegionsSpark.java From gatk with BSD 3-Clause "New" or "Revised" License

5 votes

private static FlatMapFunction<Iterator<AssemblyRegion>, AssemblyRegionWalkerContext> getAssemblyRegionWalkerContextFunction(
        final String referenceFileName,
        final Broadcast<FeatureManager> bFeatureManager) {

    return (FlatMapFunction<Iterator<AssemblyRegion>, AssemblyRegionWalkerContext>) assemblyRegionIter -> {
        final ReferenceDataSource reference = referenceFileName == null ? null : new ReferenceFileSource(IOUtils.getPath(SparkFiles.get(referenceFileName)));
        final FeatureManager features = bFeatureManager == null ? null : bFeatureManager.getValue();
        return Utils.stream(assemblyRegionIter).map(assemblyRegion ->
                new AssemblyRegionWalkerContext(assemblyRegion,
                        new ReferenceContext(reference, assemblyRegion.getPaddedSpan()),
                        new FeatureContext(features, assemblyRegion.getPaddedSpan()))).iterator();
    };
}

Example 11

Source File: ReadWalkerSpark.java From gatk with BSD 3-Clause "New" or "Revised" License

5 votes

private static FlatMapFunction<Iterator<GATKRead>, ReadWalkerContext> getReadsFunction(
        String referenceFileName, Broadcast<FeatureManager> bFeatureManager) {
    return readIterator -> {
        ReferenceDataSource reference = referenceFileName == null ? null : new ReferenceFileSource(IOUtils.getPath(SparkFiles.get(referenceFileName)));
        FeatureManager features = bFeatureManager == null ? null : bFeatureManager.getValue();
        return Iterators.transform(readIterator, new Function<GATKRead, ReadWalkerContext>() {
            @Nullable
            @Override
            public ReadWalkerContext apply(@Nullable GATKRead r) {
                final SimpleInterval readInterval = getReadInterval(r);
                return new ReadWalkerContext(r, new ReferenceContext(reference, readInterval), new FeatureContext(features, readInterval));
            }
        });
    };
}

Example 12

Source File: CollectAllelicCountsSpark.java From gatk with BSD 3-Clause "New" or "Revised" License

5 votes

private static FlatMapFunction<Iterator<LocusWalkerContext>, AllelicCountCollector> distributedCount(final Broadcast<SampleLocatableMetadata> sampleMetadataBroadcast,
                                                                                                     final int minimumBaseQuality) {
    return (FlatMapFunction<Iterator<LocusWalkerContext>, AllelicCountCollector>) contextIterator -> {
        final AllelicCountCollector result = new AllelicCountCollector(sampleMetadataBroadcast.getValue());

        contextIterator.forEachRemaining( ctx -> {
            final byte refAsByte = ctx.getReferenceContext().getBase();
            result.collectAtLocus(Nucleotide.decode(refAsByte), ctx.getAlignmentContext().getBasePileup(),
                    ctx.getAlignmentContext().getLocation(), minimumBaseQuality);
            }
        );
        return Collections.singletonList(result).iterator();
    };
}

Example 13

Source File: FindBreakpointEvidenceSpark.java From gatk with BSD 3-Clause "New" or "Revised" License

5 votes

private static List<SVInterval> findHighCoverageSubintervalsAndLog(
        final FindBreakpointEvidenceSparkArgumentCollection params,
        final JavaSparkContext ctx,
        final Broadcast<ReadMetadata> broadcastMetadata,
        final List<SVInterval> intervals,
        final JavaRDD<GATKRead> unfilteredReads,
        final SVReadFilter filter,
        final Logger logger) {

    final int minFlankingHighCovFactor = params.highDepthCoverageFactor;
    final int minPeakHighCovFactor = params.highDepthCoveragePeakFactor;

    final ReadMetadata shortReadMetadata = broadcastMetadata.getValue();
    int minFlankingHighCoverageValue = (int) (minFlankingHighCovFactor * shortReadMetadata.getCoverage());
    int minPeakHighCoverageValue = (int) (minPeakHighCovFactor * shortReadMetadata.getCoverage());
    final List<SVInterval> result =
            findHighCoverageSubIntervals(ctx, broadcastMetadata, intervals, unfilteredReads,
                    filter,
                    minFlankingHighCoverageValue,
                    minPeakHighCoverageValue);
    log("Found " + result.size() + " sub-intervals with coverage over " + minFlankingHighCoverageValue +
            " and a peak coverage of over " + minPeakHighCoverageValue + ".", logger);

    final String intervalFile = params.highCoverageIntervalsFile;
    if (intervalFile != null) {
        try (final OutputStreamWriter writer =
                     new OutputStreamWriter(new BufferedOutputStream(BucketUtils.createFile(intervalFile)))) {
            for (final SVInterval svInterval : result) {
                final String bedLine = shortReadMetadata.getContigName(svInterval.getContig()) + "\t" + (svInterval.getStart() - 1) + "\t" + svInterval.getEnd() + "\n";
                writer.write(bedLine);
            }
        } catch (final IOException ioe) {
            throw new UserException.CouldNotCreateOutputFile("Can't write high coverage intervals file " + intervalFile, ioe);
        }
    }
    return result;
}

Example 14

Source File: KMeansArbitraryDimension.java From flink-perf with Apache License 2.0

4 votes

public SelectNearestCentroid(Broadcast<List<Tuple2<Integer, Point>>> brCenters) {
	this.brCenters = brCenters.getValue();
}

Example 15

Source File: Reader.java From iceberg with Apache License 2.0

4 votes

Reader(Table table, Broadcast<FileIO> io, Broadcast<EncryptionManager> encryptionManager,
    boolean caseSensitive, DataSourceOptions options) {
  this.table = table;
  this.snapshotId = options.get("snapshot-id").map(Long::parseLong).orElse(null);
  this.asOfTimestamp = options.get("as-of-timestamp").map(Long::parseLong).orElse(null);
  if (snapshotId != null && asOfTimestamp != null) {
    throw new IllegalArgumentException(
        "Cannot scan using both snapshot-id and as-of-timestamp to select the table snapshot");
  }

  this.startSnapshotId = options.get("start-snapshot-id").map(Long::parseLong).orElse(null);
  this.endSnapshotId = options.get("end-snapshot-id").map(Long::parseLong).orElse(null);
  if (snapshotId != null || asOfTimestamp != null) {
    if (startSnapshotId != null || endSnapshotId != null) {
      throw new IllegalArgumentException(
          "Cannot specify start-snapshot-id and end-snapshot-id to do incremental scan when either snapshot-id or " +
              "as-of-timestamp is specified");
    }
  } else {
    if (startSnapshotId == null && endSnapshotId != null) {
      throw new IllegalArgumentException("Cannot only specify option end-snapshot-id to do incremental scan");
    }
  }

  // look for split behavior overrides in options
  this.splitSize = options.get("split-size").map(Long::parseLong).orElse(null);
  this.splitLookback = options.get("lookback").map(Integer::parseInt).orElse(null);
  this.splitOpenFileCost = options.get("file-open-cost").map(Long::parseLong).orElse(null);

  if (io.getValue() instanceof HadoopFileIO) {
    String scheme = "no_exist";
    try {
      Configuration conf = new Configuration(SparkSession.active().sparkContext().hadoopConfiguration());
      // merge hadoop config set on table
      mergeIcebergHadoopConfs(conf, table.properties());
      // merge hadoop config passed as options and overwrite the one on table
      mergeIcebergHadoopConfs(conf, options.asMap());
      FileSystem fs = new Path(table.location()).getFileSystem(conf);
      scheme = fs.getScheme().toLowerCase(Locale.ENGLISH);
    } catch (IOException ioe) {
      LOG.warn("Failed to get Hadoop Filesystem", ioe);
    }
    this.localityPreferred = options.get("locality").map(Boolean::parseBoolean)
        .orElse(LOCALITY_WHITELIST_FS.contains(scheme));
  } else {
    this.localityPreferred = false;
  }

  this.schema = table.schema();
  this.io = io;
  this.encryptionManager = encryptionManager;
  this.caseSensitive = caseSensitive;

  this.batchReadsEnabled = options.get("vectorization-enabled").map(Boolean::parseBoolean).orElse(
      PropertyUtil.propertyAsBoolean(table.properties(),
          TableProperties.PARQUET_VECTORIZATION_ENABLED, TableProperties.PARQUET_VECTORIZATION_ENABLED_DEFAULT));
  this.batchSize = options.get("batch-size").map(Integer::parseInt).orElse(
      PropertyUtil.propertyAsInt(table.properties(),
        TableProperties.PARQUET_BATCH_SIZE, TableProperties.PARQUET_BATCH_SIZE_DEFAULT));
}

Example 16

Source File: RevertSamSpark.java From gatk with BSD 3-Clause "New" or "Revised" License

4 votes

@Override
protected void runTool(JavaSparkContext ctx) {
    Broadcast<SAMFileHeader> headerBroadcast = ctx.broadcast(getHeaderForReads());
    JavaRDD<GATKRead> reads = getReads();

    ////////////////////////////////////////////////////////////////////////////
    // Grab the input header and remap values where appropriate
    ////////////////////////////////////////////////////////////////////////////
    SAMFileHeader localHeader = headerBroadcast.getValue();
    validateHeaderOverrides(localHeader, sampleAlias, libraryName);
    if (sampleAlias != null) {
        localHeader.getReadGroups().forEach(rg -> rg.setSample(sampleAlias));
    }
    if (libraryName != null) {
        localHeader.getReadGroups().forEach(rg -> rg.setLibrary(libraryName));
    }

    ////////////////////////////////////////////////////////////////////////////
    // Map the readgroups in the header to appropriate
    ////////////////////////////////////////////////////////////////////////////
    Map<String, Path> writerMap = getOutputMap(outputMap,
                                              output,
                                              getDefaultExtension(readArguments.getReadPathSpecifiers().get(0), outputByReadgroupFileFormat),
                                              localHeader.getReadGroups(),
                                              outputByReadGroup);

    ////////////////////////////////////////////////////////////////////////////
    // Construct appropriate headers for the output files
    ////////////////////////////////////////////////////////////////////////////
    final Map<String, SAMFileHeader> headerMap = getReadGroupHeaderMap(localHeader, writerMap);

    // Revert the reads based on the given attributes
    List<String> attributesToRevert = removeDefaults ? DEFAULT_ATTRIBUTES_TO_CLEAR : new ArrayList<>();
    attributesToRevert.addAll(attributesToClear);
    JavaRDD<GATKRead> readsReverted = revertReads(reads, attributesToRevert);

    ////////////////////////////////////////////////////////////////////////////
    // Sanitize the reads, sorting them into appropriate order if necessary
    ////////////////////////////////////////////////////////////////////////////
    if (sanitize) {
        Map<String, FastqQualityFormat> readGroupFormatMap = createReadGroupFormatMap(readsReverted, headerBroadcast, !dontRestoreOriginalQualities);

        readsReverted = sanitize(readGroupFormatMap, readsReverted, localHeader, keepFirstDuplicate);
    }

    // Write the one or many read output files
    for (Map.Entry<String, Path> rmap: writerMap.entrySet()) {
        //TODO what to do if the readgroup isn't present
        final String key = rmap.getKey();
        JavaRDD<GATKRead> filteredreads = rmap.getKey()==null? readsReverted :
                                                                readsReverted.filter(r -> r.getReadGroup().equals(key));
        writeReads(ctx, rmap.getValue().toString(), filteredreads, headerMap.get(rmap.getKey()), false); //TODO proper header map
    }
}

Example 17

Source File: ChunkProcessingTask.java From p3-batchrefine with Apache License 2.0

4 votes

public ChunkProcessingTask(Broadcast<String> transform,
                           Broadcast<String> header, Broadcast<Properties> exporterProperties) {
    fHeader = header.getValue();
    fTansform = transform.getValue();
    fProperites = exporterProperties.getValue();
}

Example 18

Source File: HaplotypeCallerSparkIntegrationTest.java From gatk-protected with BSD 3-Clause "New" or "Revised" License

4 votes

@Test
public void testBroadcastHcArgs() {
    Broadcast<HaplotypeCallerArgumentCollection> broadcast = SparkContextFactory.getTestSparkContext().broadcast(new HaplotypeCallerArgumentCollection());
    broadcast.getValue();
}

Example 19

Source File: StructureToAtomInteractions.java From mmtf-spark with Apache License 2.0

2 votes

/**
 * This constructor specifies to calculate the interaction of 
 * a specific group and a specific atom,.e.g. the atom "O" in
 * the group "HOH".
 * @param filter Specifies the conditions for calculating interactions
 * @param pairwise If true, results as one row per pair interaction, otherwise
 * the interactions of one atom with all other atoms are returned as a single row.
 */
public StructureToAtomInteractions(Broadcast<InteractionFilter> bfilter, boolean pairwise) {
	this.filter = bfilter.getValue();
	this.pairwise = pairwise;
}