Skip to content

Commit

Permalink
Merge pull request #108 from PolinaBevad/unique_mode_by_read_number
Browse files Browse the repository at this point in the history
Added unique mode by read number (option -UN)
  • Loading branch information
pcingola authored Jun 14, 2018
2 parents cc6aba9 + 5a312fb commit 2680d40
Show file tree
Hide file tree
Showing 7 changed files with 71 additions and 25 deletions.
5 changes: 4 additions & 1 deletion Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,10 @@ The VarDictJava program follows the workflow:
`SILENT` - Like `LENIENT`, only don't emit warning messages.
Default: `LENIENT`
- `-u`
Indicate unique mode, which when mate pairs overlap, the overlapping part will be counted only once using forward read only.
Indicate unique mode, which when mate pairs overlap, the overlapping part will be counted only once using **forward** read only.
Default: unique mode disabled, all reads are counted.
- `-UN`
Indicate unique mode, which when mate pairs overlap, the overlapping part will be counted only once using **first** read only.
Default: unique mode disabled, all reads are counted.
- `--chimeric`
Indicate to turn off chimeric reads filtering. Chimeric reads are artifacts from library construction,
Expand Down
8 changes: 7 additions & 1 deletion src/main/java/com/astrazeneca/vardict/Configuration.java
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,13 @@ public class Configuration {
* Indicate unique mode, which when mate pairs overlap,
* the overlapping part will be counted only once using forward read only.
*/
boolean uniqueModeOn = false; // -u
boolean uniqueModeAlignmentEnabled = false; // -u

/**
* Indicate unique mode, which when mate pairs overlap,
* the overlapping part will be counted only once using first read only.
*/
boolean uniqueModeSecondInPairEnabled = false; // -UN

/**
* Threads count
Expand Down
8 changes: 6 additions & 2 deletions src/main/java/com/astrazeneca/vardict/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,12 @@ private void run(CommandLine cmd) throws ParseException, IOException {
conf.chimeric = true;
}

if (cmd.hasOption("UN")) {
conf.uniqueModeSecondInPairEnabled = true;
}

if (cmd.hasOption("u")) {
conf.uniqueModeOn = true;
conf.uniqueModeAlignmentEnabled = true;
}

conf.threads = Math.max(readThreadsCount(cmd), 1);
Expand Down Expand Up @@ -190,7 +193,8 @@ private static Options buildOptions() {
options.addOption("t", false, "Indicate to remove duplicated reads. Only one pair with same start positions will be kept");
options.addOption("3", false, "Indicate to move indels to 3-prime if alternative alignment can be achieved.");
options.addOption("K", false, "Include Ns in the total depth calculation");
options.addOption("u", false, "Indicate unique mode, which when mate pairs overlap, the overlapping part will be counted only once using forward read only.");
options.addOption("u", false, "Indicate unique mode, which when mate pairs overlap, the overlapping part will be counted only once using first read only.");
options.addOption("UN", false, "Indicate unique mode, which when mate pairs overlap, the overlapping part will be counted only once using forward read only.");
options.addOption("chimeric", false, "Indicate to turn off chimeric reads filtering.");

options.addOption(OptionBuilder.withArgName("bit")
Expand Down
71 changes: 50 additions & 21 deletions src/main/java/com/astrazeneca/vardict/VarDict.java
Original file line number Diff line number Diff line change
Expand Up @@ -1582,12 +1582,10 @@ static Tuple4<Map<Integer, Map<String, Variation>>, Map<Integer, Map<String, Var

int mateAlignmentStart = record.getMateAlignmentStart();

boolean isPairedAndTheSameChromosome = record.getReadPairedFlag()
&& record.getMateReferenceName().equals(record.getReferenceName());

processCigar:
//Loop over CIGAR records
for (int ci = 0; ci < cigar.numCigarElements(); ci++) {
if (conf.uniqueModeOn && !dir && isPairedAndTheSameChromosome && start >= mateAlignmentStart) {
if (skipOverlappingReads(conf, record, dir, start, mateAlignmentStart)) {
break;
}
//length of segment in CIGAR
Expand Down Expand Up @@ -2474,8 +2472,8 @@ && isNotEquals('N', ref.get(start))) {
p++;
}
// Skip read if it is overlap
if (conf.uniqueModeOn && !dir && isPairedAndTheSameChromosome && start >= mateAlignmentStart) {
break;
if (skipOverlappingReads(conf, record, dir, start, mateAlignmentStart)) {
break processCigar;
}
}
if (moffset != 0) {
Expand All @@ -2500,28 +2498,59 @@ && isNotEquals('N', ref.get(start))) {
}

if (conf.performLocalRealignment) {
if (conf.y)
System.err.println("Start Realigndel");
realigndel(hash, dels5, cov, sclip5, sclip3, ref, region.chr, chrs, rlen, bams, conf);
if (conf.y)
System.err.println("Start Realignins");
realignins(hash, iHash, ins, cov, sclip5, sclip3, ref, region.chr, chrs, conf);
if (conf.y)
System.err.println("Start Realignlgdel");
realignlgdel(hash, cov, sclip5, sclip3, ref, region.chr, chrs, rlen, bams, conf);
if (conf.y)
System.err.println("Start Realignlgins");
realignlgins(hash, iHash, cov, sclip5, sclip3, ref, region.chr, chrs, rlen, bams, conf);
if (conf.y)
System.err.println("Start Realignlgins30");
realignlgins30(hash, iHash, cov, sclip5, sclip3, ref, region.chr, chrs, rlen, bams, conf);
realignIndels(region, chrs, rlen, ref, conf, bams, hash, iHash, cov, sclip3, sclip5, ins, dels5);
}

adjMNP(hash, mnp, cov, ref, sclip3, sclip5, conf);

return tuple(hash, iHash, cov, rlen);
}

private static void realignIndels(Region region, Map<String, Integer> chrs, int rlen, Map<Integer, Character> ref, Configuration conf, String[] bams, Map<Integer, Map<String, Variation>> hash, Map<Integer, Map<String, Variation>> iHash, Map<Integer, Integer> cov, Map<Integer, Sclip> sclip3, Map<Integer, Sclip> sclip5, Map<Integer, Map<String, Integer>> ins, Map<Integer, Map<String, Integer>> dels5) throws IOException {
if (conf.y)
System.err.println("Start Realigndel");
realigndel(hash, dels5, cov, sclip5, sclip3, ref, region.chr, chrs, rlen, bams, conf);
if (conf.y)
System.err.println("Start Realignins");
realignins(hash, iHash, ins, cov, sclip5, sclip3, ref, region.chr, chrs, conf);
if (conf.y)
System.err.println("Start Realignlgdel");
realignlgdel(hash, cov, sclip5, sclip3, ref, region.chr, chrs, rlen, bams, conf);
if (conf.y)
System.err.println("Start Realignlgins");
realignlgins(hash, iHash, cov, sclip5, sclip3, ref, region.chr, chrs, rlen, bams, conf);
if (conf.y)
System.err.println("Start Realignlgins30");
realignlgins30(hash, iHash, cov, sclip5, sclip3, ref, region.chr, chrs, rlen, bams, conf);
}

private static boolean skipOverlappingReads(Configuration conf, SAMRecord record, boolean dir, int start, int mateAlignmentStart) {
if (conf.uniqueModeAlignmentEnabled && isPairedAndSameChromosome(record)
&& !dir && start >= mateAlignmentStart) {
return true;
}
if (conf.uniqueModeSecondInPairEnabled && record.getSecondOfPairFlag() && isPairedAndSameChromosome(record)
&& isReadsOverlap(record, start, mateAlignmentStart)) {
return true;
}
return false;
}

private static boolean isReadsOverlap(SAMRecord record, int start, int mateAlignmentStart){
if (record.getAlignmentStart() >= mateAlignmentStart) {
return start >= mateAlignmentStart
&& start <= (mateAlignmentStart + record.getCigar().getReferenceLength() - 1);
}
else {
return start >= mateAlignmentStart
&& record.getMateAlignmentStart() <= record.getAlignmentEnd();
}
}

private static boolean isPairedAndSameChromosome(SAMRecord record) {
return record.getReadPairedFlag() && record.getMateReferenceName().equals(record.getReferenceName());
}

private static void sclip3HighQualityProcessing(Region region, Configuration conf, Map<Integer, Sclip> sclip3,
String querySequence, int mappingQuality, String queryQuality,
int nm, int n, boolean dir, int start, int m, int q,
Expand Down
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Simple,hg19.fa,L861Q.bam,chr7,55259400,55259600,-UN
L861Q testbed chr7 55259504 55259504 A T 437 8 210 218 0 8 A/T 0.0183 2;0 7.0 0 36.0 0 60.0 16.000 0.0183 0 0 1.000 1 1.0 8 437 CCGCAGCATGTCAAGATCAC GATTTTGGGCTGGCCAAACT chr7:55259401-55259600 SNV
L861Q testbed chr7 55259524 55259524 T A 533 125 198 209 57 68 T/A 0.2345 2;2 21.6 1 35.5 1 60.0 124.000 0.2362 0 0 2.000 3 1.2 124 525 AGATTTTGGGCTGGCCAAAC GCTGGGTGCGGAAGAGAAAG chr7:55259401-55259600 SNV
L861Q testbed chr7 55259544 55259544 G A 371 4 164 202 3 1 G/A 0.0108 2;2 6.8 1 36.0 0 60.0 8.000 0.0110 0 0 2.000 1 1.5 4 365 TGCTGGGTGCGGAAGAGAAA AATACCATGCAGAAGGAGGC chr7:55259401-55259600 SNV

0 comments on commit 2680d40

Please sign in to comment.