Java Code Examples for htsjdk.samtools.util.StringUtil#join()

The following examples show how to use htsjdk.samtools.util.StringUtil#join() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: IlluminaBasecallsToFastqTest.java    From picard with MIT License 6 votes vote down vote up
private void convertParamsFile(String libraryParamsFile, int concatNColumnFields, File testDataDir, File outputDir, File libraryParams, List<File> outputPrefixes) throws FileNotFoundException {
    try (LineReader reader = new BufferedLineReader(new FileInputStream(new File(testDataDir, libraryParamsFile)))) {
        final PrintWriter writer = new PrintWriter(libraryParams);
        final String header = reader.readLine();
        writer.println(header + "\tOUTPUT_PREFIX");
        while (true) {
            final String line = reader.readLine();
            if (line == null) {
                break;
            }
            final String[] fields = line.split("\t");
            final File outputPrefix = new File(outputDir, StringUtil.join("", Arrays.copyOfRange(fields, 0, concatNColumnFields)));
            outputPrefixes.add(outputPrefix);
            writer.println(line + "\t" + outputPrefix);
        }
        writer.close();
    }
}
 
Example 2
Source File: CompareDropSeqAlignments.java    From Drop-seq with MIT License 6 votes vote down vote up
private void writeContigReport (final File outFile, final ObjectCounter<ContigResult> contigResults) {
	PrintStream writer = new ErrorCheckingPrintStream(IOUtil.openFileForWriting(outFile));
	List<String> header = new ArrayList<>();
	header.add("INPUT_1="+this.INPUT_1.toString());
	header.add("INPUT_2="+this.INPUT_2.toString());
	header.add("READ_QUALITY="+this.READ_QUALITY);
	header.add("TRIM_CONTIG_STRING="+this.TRIM_CONTIG_STRING);
	String h = StringUtils.join(header, "\t");
	writer.print("#");
	writer.println(h);

	String [] colNames = {"ORIGINAL_CONTIG", "NEW_CONTIGS", "NUM_READS", "MAPPED_UNIQUELY"};
	writer.println(StringUtil.join("\t", colNames));
	List<ContigResult> crList = contigResults.getKeysOrderedByCount(true);
	for (ContigResult cr: crList) {
		String [] body = {cr.getOldContig(), StringUtil.join(",", cr.getNewContigs()), Integer.toString(contigResults.getCountForKey(cr)), Boolean.toString(cr.isNewReadMapsUniquely())};
		writer.println(StringUtil.join("\t", body));
	}
	writer.close();
}
 
Example 3
Source File: CollapseTagWithContext.java    From Drop-seq with MIT License 6 votes vote down vote up
private void writeMetrics (final boolean writeEditDistanceDistribution, final String context, final AdaptiveMappingResult r, final PrintStream out) {
	if (out==null) return;
	List<EditDistanceMappingMetric> metricList= r.getMetricResult();

	for (EditDistanceMappingMetric edmm: metricList) {
		edmm.getOriginalObservations();
		// Steve reports the number of barcodes including the one that everything is merged into.
		List<String> line = new ArrayList<>(Arrays.asList(context, edmm.getBarcode(), Integer.toString(edmm.getNumMergedBarcodes()+1), Integer.toString(edmm.getEditDistanceDiscovered()), Integer.toString(edmm.getEditDistanceUsed()),
				Integer.toString(edmm.getOriginalObservations()), Integer.toString(edmm.getTotalObservations())));

		if (writeEditDistanceDistribution) {
			int [] edList = edmm.getEdList();
			if (edList.length>0) {
				Integer[] x = Arrays.stream( edList ).boxed().toArray( Integer[]::new );
				String edFormatted = StringUtil.join(",", x);
				line.add(edFormatted);
			} else
				line.add("NA");

		}
		out.println(StringUtil.join("\t", line));

	}
}
 
Example 4
Source File: Adjuster.java    From rtg-tools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
private String sumAdjustFormatField(String fieldValue, int[] alleleMap, int numAlleles, MetaType type) {
  final String newFieldForsample;
  if (VcfUtils.MISSING_FIELD.equals(fieldValue)) {
    newFieldForsample = VcfUtils.MISSING_FIELD;
  } else {
    final String[] parts = StringUtils.split(fieldValue, ',');
    if (parts.length != alleleMap.length) {
      throw new VcfFormatException("FORMAT field value " + fieldValue + " was expected to contain " + alleleMap.length + " values");
    }
    try {
      newFieldForsample = StringUtil.join(",", sumAdjustField(alleleMap, numAlleles, type, parts));
    } catch (NumberFormatException e) {
      throw new VcfFormatException(e.getMessage());
    }
  }
  return newFieldForsample;
}
 
Example 5
Source File: IlluminaBasecallsToSam.java    From picard with MIT License 6 votes vote down vote up
/**
 * Given a set of columns assert that all columns conform to the format of an RG header attribute (i.e. 2 letters)
 * the attribute is NOT a member of the rgHeaderTags that are built by default in buildSamHeaderParameters
 *
 * @param rgTagColumns A set of columns that should conform to the rg header attribute format
 */
private void checkRgTagColumns(final Set<String> rgTagColumns) {
    final Set<String> forbiddenHeaders = buildSamHeaderParameters(null).keySet();
    forbiddenHeaders.retainAll(rgTagColumns);

    if (!forbiddenHeaders.isEmpty()) {
        throw new PicardException("Illegal ReadGroup tags in library params(barcode params) file(" + LIBRARY_PARAMS.getAbsolutePath() + ") Offending headers = " + StringUtil.join(", ", forbiddenHeaders));
    }

    for (final String column : rgTagColumns) {
        if (column.length() > 2) {
            throw new PicardException("Column label (" + column + ") unrecognized.  Library params(barcode params) can only contain the columns " +
                    "(OUTPUT, LIBRARY_NAME, SAMPLE_ALIAS, BARCODE, BARCODE_<X> where X is a positive integer) OR two letter RG tags!");
        }
    }
}
 
Example 6
Source File: PerTileParser.java    From picard with MIT License 5 votes vote down vote up
public void seekToTile(int oneBasedTileNumber) {
    nextTile = oneBasedTileNumber;

    if(!tileToFiles.containsKey(oneBasedTileNumber)) {
        throw new PicardException("PerTileParser does not contain key(" + oneBasedTileNumber +") keys available (" + StringUtil.join(",", new ArrayList<Integer>(tileToFiles.keySet())) + ")");
    }

    if(currentIterator != null) {
        currentIterator.close();
    }
    currentIterator = null;
}
 
Example 7
Source File: XsvLocatableTableCodec.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private void assertLocatableColumnsInHeaderToIndex(final List<String> locatableColumns, final Map<String, Integer> headerToIndex) {
    final List<String> missingColumns =
            locatableColumns.stream().filter(c -> headerToIndex.get(c) == null)
                    .map(c -> getHeaderWithoutPrefix(c))
                    .collect(Collectors.toList());

    if (missingColumns.size() > 0) {
        final String missingColumnsString = StringUtil.join(", ", missingColumns);
        throw new UserException.BadInput("Error in input file: cannot find the locatable column(s): " + missingColumnsString + ", though these were specified in the parsing configuration.  Do those columns need to be added to the input file?  Do you have a heterogenous preamble (e.g. lines that start with both '#' and '@') before the headers?  Does each line of your preamble start with the correct string ('" + preambleLineStart + "')?");
    }
}
 
Example 8
Source File: CollectHsMetrics.java    From picard with MIT License 5 votes vote down vote up
@Override
protected String getProbeSetName() {
    if (BAIT_SET_NAME != null) {
        return BAIT_SET_NAME;
    } else {
        final SortedSet<String> baitSetNames = new TreeSet<String>();
        for (final File file : BAIT_INTERVALS) {
            baitSetNames.add(CollectTargetedMetrics.renderProbeNameFromFile(file));
        }
        return StringUtil.join(".", baitSetNames);
    }
}
 
Example 9
Source File: IlluminaBasecallsToFastq.java    From picard with MIT License 5 votes vote down vote up
/**
 * For each line in the MULTIPLEX_PARAMS file create a FastqRecordsWriter and put it in the sampleBarcodeFastqWriterMap map,
 * where the key to the map is the concatenation of all sampleBarcodes in order for the given line.
 */
private void populateWritersFromMultiplexParams() {
    final TabbedTextFileWithHeaderParser libraryParamsParser = new TabbedTextFileWithHeaderParser(MULTIPLEX_PARAMS);

    final Set<String> expectedColumnLabels = CollectionUtil.makeSet("OUTPUT_PREFIX");
    final List<String> sampleBarcodeColumnLabels = new ArrayList<>();
    for (int i = 1; i <= readStructure.sampleBarcodes.length(); i++) {
        sampleBarcodeColumnLabels.add("BARCODE_" + i);
    }

    expectedColumnLabels.addAll(sampleBarcodeColumnLabels);
    assertExpectedColumns(libraryParamsParser.columnLabels(), expectedColumnLabels);

    for (final TabbedTextFileWithHeaderParser.Row row : libraryParamsParser) {
        List<String> sampleBarcodeValues = null;

        if (!sampleBarcodeColumnLabels.isEmpty()) {
            sampleBarcodeValues = new ArrayList<>();
            for (final String sampleBarcodeLabel : sampleBarcodeColumnLabels) {
                sampleBarcodeValues.add(row.getField(sampleBarcodeLabel));
            }
        }

        final String key = (sampleBarcodeValues == null || sampleBarcodeValues.contains("N")) ? null : StringUtil.join("", sampleBarcodeValues);
        if (sampleBarcodeFastqWriterMap.containsKey(key)) {    //This will catch the case of having more than 1 line in a non-barcoded MULTIPLEX_PARAMS file
            throw new PicardException("Row for barcode " + key + " appears more than once in MULTIPLEX_PARAMS file " +
                    MULTIPLEX_PARAMS);
        }

        final FastqRecordsWriter writer = buildWriter(new File(row.getField("OUTPUT_PREFIX")));
        sampleBarcodeFastqWriterMap.put(key, writer);
    }
    if (sampleBarcodeFastqWriterMap.isEmpty()) {
        throw new PicardException("MULTIPLEX_PARAMS file " + MULTIPLEX_PARAMS + " does have any data rows.");
    }
    libraryParamsParser.close();
}
 
Example 10
Source File: PerTileParser.java    From picard with MIT License 5 votes vote down vote up
public void verifyData(List<Integer> tiles, final int [] cycles) {
    final List<Integer> mapTiles = new ArrayList<Integer>(this.tileToFiles.keySet());
    if(!mapTiles.containsAll(tiles)) {
        throw new PicardException("Missing tiles in PerTileParser expected(" + StringUtil.join(",", tiles) + ") but found (" + StringUtil.join(",", mapTiles) + ")");
    }

    if(!tiles.containsAll(mapTiles)) {
        throw new PicardException("Extra tiles where found in PerTileParser  expected(" + StringUtil.join(",", tiles) + ") but found (" + StringUtil.join(",", mapTiles) + ")");
    }
}
 
Example 11
Source File: PerTileParser.java    From picard with MIT License 5 votes vote down vote up
public PerTileParser(final IlluminaFileMap tilesToFiles, final int nextTile) {
    this.tileToFiles = tilesToFiles;
    this.currentTile = null;
    this.nextTile = nextTile;

    if(!tilesToFiles.containsKey(nextTile)) {
        throw new IllegalArgumentException("NextTile (" + nextTile + ") is not contained by tilesToFiles (" + StringUtil.join(",", new ArrayList<Integer>(tilesToFiles.keySet())));
    }
}
 
Example 12
Source File: CollapseTagWithContext.java    From Drop-seq with MIT License 5 votes vote down vote up
private SAMFileWriter getWriter (final SamReader reader) {
	SAMFileHeader header = reader.getFileHeader();
	SamHeaderUtil.addPgRecord(header, this);
	String context = StringUtil.join(" ", this.CONTEXT_TAGS);
	header.addComment("Edit distance collapsed tag " +  this.COLLAPSE_TAG + " to new tag " + this.OUT_TAG+ " with edit distance "+ this.EDIT_DISTANCE + "using indels=" + this.FIND_INDELS + " in the context of tags [" + context + "]");
       SAMFileWriter writer= new SAMFileWriterFactory().makeSAMOrBAMWriter(header, false, this.OUTPUT);
       return writer;
}
 
Example 13
Source File: IlluminaDataProviderFactory.java    From picard with MIT License 4 votes vote down vote up
/**
 * Create factory with the specified options, one that favors using QSeqs over all other files
 *
 * @param basecallDirectory The baseCalls directory of a complete Illumina directory.  Files are found by searching relative to this folder (some of them higher up in the directory tree).
 * @param barcodesDirectory The barcodesDirectory with barcode files extracted by 'ExtractIlluminaBarcodes' (optional, use basecallDirectory if not specified)
 * @param lane              Which lane to iterate over.
 * @param readStructure     The read structure to which output clusters will conform.  When not using QSeqs, EAMSS masking(see BclParser) is run on individual reads as found in the readStructure, if
 *                          the readStructure specified does not match the readStructure implied by the sequencer's output than the quality scores output may differ than what would be found
 *                          in a run's QSeq files
 * @param dataTypesArg      Which data types to read
 */
public IlluminaDataProviderFactory(final File basecallDirectory, File barcodesDirectory, final int lane,
                                   final ReadStructure readStructure,
                                   final BclQualityEvaluationStrategy bclQualityEvaluationStrategy, final IlluminaDataType... dataTypesArg) {
    this.basecallDirectory = basecallDirectory;
    this.barcodesDirectory = barcodesDirectory;
    this.bclQualityEvaluationStrategy = bclQualityEvaluationStrategy;

    this.lane = lane;
    /* The types of data that will be returned by any IlluminaDataProviders created by this factory.
      Note: In previous version, data of types not specified might be returned if a data type was specified
      for data residing in QSeqs (since QSeqs span multiple data types).  This is no longer the case, you
      MUST specify all data types that should be returned.*/
    final Set<IlluminaDataType> dataTypes = Collections.unmodifiableSet(new HashSet<>(Arrays.asList(dataTypesArg)));

    if (dataTypes.isEmpty()) {
        throw new PicardException("No data types have been specified for basecall output " + basecallDirectory +
                ", lane " + lane);
    }

    this.fileUtil = new IlluminaFileUtil(basecallDirectory, barcodesDirectory, lane);

    //find what request IlluminaDataTypes we have files for and select the most preferred file format available for that type
    formatToDataTypes = determineFormats(dataTypes, fileUtil);

    //find if we have any IlluminaDataType with NO available file formats and, if any exist, throw an exception
    final Set<IlluminaDataType> unmatchedDataTypes = findUnmatchedTypes(dataTypes, formatToDataTypes);
    if (!unmatchedDataTypes.isEmpty()) {
        throw new PicardException("Could not find a format with available files for the following data types: " + StringUtil.join(", ", new ArrayList<>(unmatchedDataTypes)));
    }

    log.debug("The following file formats will be used by IlluminaDataProvider: " + StringUtil.join("," + formatToDataTypes.keySet()));

    availableTiles = fileUtil.getActualTiles(new ArrayList<>(formatToDataTypes.keySet()));
    if (availableTiles.isEmpty()) {
        throw new PicardException("No available tiles were found, make sure that " + basecallDirectory.getAbsolutePath() + " has a lane " + lane);
    }
    availableTiles.sort(NewIlluminaBasecallsConverter.TILE_NUMBER_COMPARATOR);

    //fill in available tiles for run based files
    formatToDataTypes.keySet().stream().map(fileUtil::getUtil)
            .forEach(util -> util.setTilesForPerRunFile(availableTiles));

    outputMapping = new OutputMapping(readStructure);
}
 
Example 14
Source File: GQuadruplex.java    From Drop-seq with MIT License 4 votes vote down vote up
public String getSequence() {
	return StringUtil.join("", this.sequence);
}
 
Example 15
Source File: GenotypeConcordanceScheme.java    From picard with MIT License 4 votes vote down vote up
/**
 * Get the contingency state array as a parse-able string
 */
public String getContingencyStateString(final TruthState truthState, final CallState callState) {
    final ContingencyState[] contingencyStateArray = getConcordanceStateArray(truthState, callState);
    return (contingencyStateArray.length == 0) ? "EMPTY" : StringUtil.join(",", contingencyStateArray);
}
 
Example 16
Source File: GeneResult.java    From Drop-seq with MIT License 4 votes vote down vote up
@Override
public String toString () {
	return "Gene [" + this.originalGene +"] contig [" + getOriginalContig() +"] original read count  [" + countOriginalReads+ "] same mapping [" + this.countSameMapping+"] different unique count [" + countDifferentUniqueGene+"] non-unique read count [" + getCountSameGeneMapsNonUniqueCount() +"] multimap gene counts [" + getCountMultiGeneMappingCount()
			+ "] other unique genes " + StringUtil.join(",", getUniqueMapOtherGene()) + " other non-unique genes " + StringUtil.join(",", getNonUniqueMapOtherGene()) + " other contigs " + StringUtil.join(",", getOtherContigs()) ;
}
 
Example 17
Source File: DgeHeaderCodec.java    From Drop-seq with MIT License 4 votes vote down vote up
public String build() {
    return StringUtil.join(FIELD_SEPARATOR, fields);
}