package HMMR_ATAC; /* * Copyright (C) 2019 Evan Tarbell and Tao Liu This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <https://www.gnu.org/licenses/>. */ /* import htsjdk.samtools.SAMFileReader; import htsjdk.samtools.SAMFormatException; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.util.CloseableIterator; */ import java.io.File; import java.util.ArrayList; import java.util.Random; import Node.TagNode; import net.sf.samtools.SAMFileReader; import net.sf.samtools.SAMFormatException; import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; public class pullLargeLengths { private File bam; private File index; private int minQ; private double[] lengths; private ArrayList<TagNode> genome; private double[] weights = new double[3]; private double[] means; /** * Constructor for creating pullLargeLengths object, reading the data and setting weights * @param b a File representing the BAM file of reads * @param i a File representing the BAM index file of reads * @param m an integer representing the minimum mapping quality of reads to pull * @param g an ArrayList of TagNode representing the genome * @param mu an Array of doubles representing the means of the fragment length distributions */ public pullLargeLengths(File b, File i,int m,ArrayList<TagNode> g,double[] mu){ bam = b; index = i; minQ = m; genome = g; means = mu; read(); setWeights(); } /** * Access the lengths * @return an Array of doubles representing the lengths of the fragments read */ public double[] getLengths(){return lengths;} /** * Access the weights * @return an Array of doubles representing the proportion of fragments in each length category */ public double[] getWeights(){return weights;} /** * Access the down sampled weights * @param div an integer representing the factor to divide the number of lengths by to downsample * @return an Array of doubles representing the downsampled lengths */ public double[] getSampledLengths(int div){ int len = lengths.length/div; double[] newLengths = new double[len]; shuffle(lengths); for(int i = 0; i < len;i++){ newLengths[i] = lengths[i]; } return newLengths; } /** * Shuffle the length data * @param list an Array of doubles representing the pulled fragment lengths */ private void shuffle(double[] list){ Random rnd = new Random(); for(int i = list.length -1;i > 0;i--){ int index = rnd.nextInt(i+1); double a = list[index]; list[index] = list[i]; list[i] = a; } } /** * Read the data and create a list of lengths */ private void read(){ int counter = 0; SAMFileReader reader = new SAMFileReader(bam,index); ArrayList<Double> temp = new ArrayList<Double>(); for (int i = 0; i < genome.size();i++){ String chr = genome.get(i).getChrom(); int start = genome.get(i).getStart(); int stop = genome.get(i).getStop(); CloseableIterator<SAMRecord> iter = reader.query(chr,start,stop,false); while (iter.hasNext()){ SAMRecord record = null; try{ record = iter.next(); } catch(SAMFormatException ex){ System.out.println("SAM Record is problematic. Has mapQ != 0 for unmapped read. Will continue anyway"); } if(record != null){ if(!record.getReadUnmappedFlag() && !record.getMateUnmappedFlag() && record.getFirstOfPairFlag()) { if (record.getMappingQuality() >= minQ){ if (Math.abs(record.getInferredInsertSize()) > 100 && Math.abs(record.getInferredInsertSize()) < 1000){ counter+=1; temp.add((double)Math.abs(record.getInferredInsertSize())); } } } } } iter.close(); } reader.close(); lengths = new double[counter]; for (int i = 0;i < temp.size();i++){ if (temp.get(i) > 100){ lengths[i] = temp.get(i); } } } /** * Set the weights ie the proportion of fragments in each length category */ private void setWeights(){ double cutone = (means[1] - means[0])/2 + means[0]; double cuttwo = (means[2] - means[1])/2 + means[1]; int counter1=0; int counter2=0; int counter3=0; for (int i = 0;i < lengths.length;i++){ if (lengths[i] < cutone) counter1++; else if(lengths[i] >= cutone && lengths[i] <= cuttwo) counter2++; else counter3++; } weights[0] = (double)counter1/(double)lengths.length; weights[1] = (double)counter2/(double)lengths.length; weights[2] = (double)counter3/(double)lengths.length; } }