Skip to content

Commit

Permalink
implements +tag2tag --gl-to-pl
Browse files Browse the repository at this point in the history
(via @wkretzsch)

Closes samtools#260
  • Loading branch information
mcshane committed Jun 3, 2015
1 parent d30dea2 commit 0bd5461
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 0 deletions.
34 changes: 34 additions & 0 deletions plugins/tag2tag.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,12 @@ DEALINGS IN THE SOFTWARE. */


#define GP_TO_GL 1
#define GL_TO_PL 2

static int mode = 0, drop_source_tag = 0;
static bcf_hdr_t *in_hdr, *out_hdr;
static float *farr = NULL;
static int32_t *iarr = NULL;
static int mfarr = 0;

const char *about(void)
Expand All @@ -55,6 +57,7 @@ const char *usage(void)
"Plugin options:\n"
//todo " --gl-to-gp convert FORMAT/GL to FORMAT/GP\n"
" --gp-to-gl convert FORMAT/GP to FORMAT/GL\n"
" --gl-to-pl convert FORMAT/GL to FORMAT/PL\n"
" -r, --replace drop the source tag\n"
"\n"
"Example:\n"
Expand All @@ -77,6 +80,7 @@ int init(int argc, char **argv, bcf_hdr_t *in, bcf_hdr_t *out)
{
{"replace",0,0,'r'},
{"gp-to-gl",0,0,1},
{"gl-to-pl",0,0,2},
{0,0,0,0}
};
char c;
Expand All @@ -85,6 +89,7 @@ int init(int argc, char **argv, bcf_hdr_t *in, bcf_hdr_t *out)
switch (c)
{
case 1 : mode = GP_TO_GL; break;
case 2 : mode = GL_TO_PL; break;
case 'r': drop_source_tag = 1; break;
case 'h':
case '?':
Expand All @@ -98,6 +103,8 @@ int init(int argc, char **argv, bcf_hdr_t *in, bcf_hdr_t *out)

if ( mode==GP_TO_GL )
init_header(out_hdr,drop_source_tag?"GP":NULL,BCF_HL_FMT,"##FORMAT=<ID=GL,Number=G,Type=Float,Description=\"Genotype Likelihoods\">");
else if ( mode==GL_TO_PL )
init_header(out_hdr,drop_source_tag?"GL":NULL,BCF_HL_FMT,"##FORMAT=<ID=PL,Number=G,Type=Integer,Description=\"Phred scaled genotype likelihoods\">");

return 0;
}
Expand All @@ -117,12 +124,39 @@ bcf1_t *process(bcf1_t *rec)
if ( drop_source_tag )
bcf_update_format_float(out_hdr,rec,"GP",NULL,0);
}
else if ( mode==GL_TO_PL )
{
n = bcf_get_format_float(in_hdr,rec,"GL",&farr,&mfarr);
if(n < 0){
fprintf(stderr, "Could not read tag: GL\n");
exit(1);
}


// create extra space to store converted data
iarr = (int32_t*) malloc(n * sizeof(int32_t));
if(!iarr) n = -4;

for (i=0; i<n; i++)
{
if ( bcf_float_is_missing(farr[i]) )
iarr[i] = bcf_int32_missing;
else if ( bcf_float_is_vector_end(farr[i]) )
iarr[i] = bcf_int32_vector_end;
else
iarr[i] = lroundf(-10 * farr[i]);
}
bcf_update_format_int32(out_hdr,rec,"PL",iarr,n);
if ( drop_source_tag )
bcf_update_format_float(out_hdr,rec,"GL",NULL,0);
}
return rec;
}

void destroy(void)
{
free(farr);
free(iarr);
}


1 change: 1 addition & 0 deletions test/test.pl
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@
test_vcf_plugin($opts,in=>'fixploidy',out=>'fixploidy.out',cmd=>'+fixploidy',args=>'-- -s {PATH}/fixploidy.samples -p {PATH}/fixploidy.ploidy');
test_vcf_plugin($opts,in=>'vcf2sex',out=>'vcf2sex.out',cmd=>'+vcf2sex',args=>'-- -n 5');
test_vcf_plugin($opts,in=>'vcf2sex',out=>'vcf2sex.out',cmd=>'+vcf2sex',args=>'-- -gn 5');
test_vcf_plugin($opts,in=>'view.GL',out=>'view.PL.vcf',cmd=>'+tag2tag',args=>'-- -r --gl-to-pl');
test_vcf_concat($opts,in=>['concat.1.a','concat.1.b'],out=>'concat.1.vcf.out',do_bcf=>0,args=>'');
test_vcf_concat($opts,in=>['concat.1.a','concat.1.b'],out=>'concat.1.bcf.out',do_bcf=>1,args=>'');
test_vcf_concat($opts,in=>['concat.2.a','concat.2.b'],out=>'concat.2.vcf.out',do_bcf=>0,args=>'-a');
Expand Down
29 changes: 29 additions & 0 deletions test/view.GL.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
##fileformat=VCFv4.1
##FILTER=<ID=PASS,Description="All filters passed">
##reference=file:///seq/references/1000Genomes-NCBI37.fasta
##contig=<ID=11,length=135006516>
##contig=<ID=20,length=63025520>
##contig=<ID=X,length=155270560>
##contig=<ID=Y,length=59373566>
##FORMAT=<ID=GL,Number=G,Type=Float,Description="List of Phred-scaled genotype likelihoods">
##FILTER=<ID=StrandBias,Description="Min P-value for strand bias (INFO/PV4) [0.0001]">
##FILTER=<ID=BaseQualBias,Description="Min P-value for baseQ bias (INFO/PV4) [1e-100]">
##FILTER=<ID=MapQualBias,Description="Min P-value for mapQ bias (INFO/PV4) [0]">
##FILTER=<ID=EndDistBias,Description="Min P-value for end distance bias (INFO/PV4) [0.0001]">
##FILTER=<ID=MinAB,Description="Minimum number of alternate bases (INFO/DP4) [2]">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003
11 2343543 . A . 999 PASS . GL 0,-25.5,-25.5 0,-25.5,-25.5 0,-25.5,-25.5
11 5464562 . C T 999 PASS . GL 0,0,0 0,0,0 0,0,0
20 76962 rs6111385 T C 999 PASS . GL -25.5,0,-25.5 -25.5,-25.5,0 -25.5,-25.5,0
20 126310 . ACC A 999 StrandBias;EndDistBias . GL -25.5,0,-13.2 -25.5,0,-13.9 -25.5,-21.3,0
20 138125 rs2298108 G T 999 PASS . GL -13.5,0,-16.3 -14,0,-25.5 -25.5,-19.9,0
20 138148 rs2298109 C T 999 PASS . GL -19.5,0,-25.5 -19.2,0,-25.5 -25.5,-23.5,0
20 271225 . T TTTA,TA 999 StrandBias . GL -15.1,-5.3,-20.3,0,-5.2,-15.9 -25.5,0,-21.3,-25.5,-25.5,-25.5 -25.5,-25.5,-25.5,-25.5,0,-24.1
20 304568 . C T 999 PASS . GL -9.5,0,-25.5 -19.2,0,-25.5 -25.5,-9.5,0
20 326891 . A AC 999 PASS . GL -25.5,0,-13.2 -25.5,0,-13.9 .,.,.
X 2928329 rs62584840 C T 999 PASS . GL 0,-5.6 0,-8.1 -7.3,0,-1.9
X 2933066 rs61746890 G C 999 PASS . GL 0,-25.5 0,-25.5 -25.5,-25.5,-25.5
X 2942109 rs5939407 T C 999 PASS . GL 0,-25.5 -25.5,0 -25.5,-15.7,0
X 3048719 . T C 999 PASS . GL 0,-25.5 -25.5,0 -25.5,0,-15.7
Y 8657215 . C A 999 PASS . GL 0,-25.5 -25.5,0 .
Y 10011673 rs78249411 G A 999 MinAB . GL -12.6,-10.1 -9.5,0 .
29 changes: 29 additions & 0 deletions test/view.PL.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
##fileformat=VCFv4.1
##FILTER=<ID=PASS,Description="All filters passed">
##reference=file:///seq/references/1000Genomes-NCBI37.fasta
##contig=<ID=11,length=135006516>
##contig=<ID=20,length=63025520>
##contig=<ID=X,length=155270560>
##contig=<ID=Y,length=59373566>
##FILTER=<ID=StrandBias,Description="Min P-value for strand bias (INFO/PV4) [0.0001]">
##FILTER=<ID=BaseQualBias,Description="Min P-value for baseQ bias (INFO/PV4) [1e-100]">
##FILTER=<ID=MapQualBias,Description="Min P-value for mapQ bias (INFO/PV4) [0]">
##FILTER=<ID=EndDistBias,Description="Min P-value for end distance bias (INFO/PV4) [0.0001]">
##FILTER=<ID=MinAB,Description="Minimum number of alternate bases (INFO/DP4) [2]">
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Phred scaled genotype likelihoods">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003
11 2343543 . A . 999 PASS . PL 0,255,255 0,255,255 0,255,255
11 5464562 . C T 999 PASS . PL 0,0,0 0,0,0 0,0,0
20 76962 rs6111385 T C 999 PASS . PL 255,0,255 255,255,0 255,255,0
20 126310 . ACC A 999 StrandBias;EndDistBias . PL 255,0,132 255,0,139 255,213,0
20 138125 rs2298108 G T 999 PASS . PL 135,0,163 140,0,255 255,199,0
20 138148 rs2298109 C T 999 PASS . PL 195,0,255 192,0,255 255,235,0
20 271225 . T TTTA,TA 999 StrandBias . PL 151,53,203,0,52,159 255,0,213,255,255,255 255,255,255,255,0,241
20 304568 . C T 999 PASS . PL 95,0,255 192,0,255 255,95,0
20 326891 . A AC 999 PASS . PL 255,0,132 255,0,139 .,.,.
X 2928329 rs62584840 C T 999 PASS . PL 0,56 0,81 73,0,19
X 2933066 rs61746890 G C 999 PASS . PL 0,255 0,255 255,255,255
X 2942109 rs5939407 T C 999 PASS . PL 0,255 255,0 255,157,0
X 3048719 . T C 999 PASS . PL 0,255 255,0 255,0,157
Y 8657215 . C A 999 PASS . PL 0,255 255,0 .
Y 10011673 rs78249411 G A 999 MinAB . PL 126,101 95,0 .

0 comments on commit 0bd5461

Please sign in to comment.