Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add -C param for sampe/samse, to copy fastq comments into sam output #203

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions bwa.1
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,12 @@ written. [3]
.TP
.BI -r \ STR
Specify the read group in a format like `@RG\\tID:foo\\tSM:bar'. [null]
.TP
.BI -C
Append FASTA/Q comment to SAM output. This option can be used to
transfer read meta information (e.g. barcode) to the SAM output. Note that the
FASTA/Q comment (the string after a space in the header line) must conform the SAM
spec (e.g. BC:Z:CGTAC). Malformated comments lead to incorrect SAM output.
.RE

.TP
Expand Down Expand Up @@ -553,6 +559,12 @@ XA tag will not be written. [10]
.TP
.BI -r \ STR
Specify the read group in a format like `@RG\\tID:foo\\tSM:bar'. [null]
.TP
.BI -C
Append FASTA/Q comment to SAM output. This option can be used to
transfer read meta information (e.g. barcode) to the SAM output. Note that the
FASTA/Q comment (the string after a space in the header line) must conform the SAM
spec (e.g. BC:Z:CGTAC). Malformated comments lead to incorrect SAM output.
.RE

.TP
Expand Down
5 changes: 4 additions & 1 deletion bwape.c
Original file line number Diff line number Diff line change
Expand Up @@ -735,9 +735,10 @@ int bwa_sai2sam_pe(int argc, char *argv[])
int c;
pe_opt_t *popt;
char *prefix, *rg_line = 0;
extern int copy_comment;

popt = bwa_init_pe_opt();
while ((c = getopt(argc, argv, "a:o:sPn:N:c:f:Ar:")) >= 0) {
while ((c = getopt(argc, argv, "a:o:sPn:N:c:f:Ar:C")) >= 0) {
switch (c) {
case 'r':
if ((rg_line = bwa_set_rg(optarg)) == 0) return 1;
Expand All @@ -751,6 +752,7 @@ int bwa_sai2sam_pe(int argc, char *argv[])
case 'c': popt->ap_prior = atof(optarg); break;
case 'f': xreopen(optarg, "w", stdout); break;
case 'A': popt->force_isize = 1; break;
case 'C': copy_comment = 1; break;
default: return 1;
}
}
Expand All @@ -768,6 +770,7 @@ int bwa_sai2sam_pe(int argc, char *argv[])
fprintf(stderr, " -P preload index into memory (for base-space reads only)\n");
fprintf(stderr, " -s disable Smith-Waterman for the unmapped mate\n");
fprintf(stderr, " -A disable insert size estimate (force -s)\n\n");
fprintf(stderr, " -C append FASTA/FASTQ comment to SAM output\n");
fprintf(stderr, "Notes: 1. For SOLiD reads, <in1.fq> corresponds R3 reads and <in2.fq> to F3.\n");
fprintf(stderr, " 2. For reads shorter than 30bp, applying a smaller -o is recommended to\n");
fprintf(stderr, " to get a sensible speed at the cost of pairing accuracy.\n");
Expand Down
10 changes: 8 additions & 2 deletions bwase.c
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,8 @@ void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, in
}
}
}
if (p->comment)
err_printf("\t%s", p->comment);
err_putchar('\n');
} else { // this read has no match
//ubyte_t *s = p->strand? p->rseq : p->seq;
Expand All @@ -494,6 +496,8 @@ void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, in
if (bwa_rg_id[0]) err_printf("\tRG:Z:%s", bwa_rg_id);
if (p->bc[0]) err_printf("\tBC:Z:%s", p->bc);
if (p->clip_len < p->full_len) err_printf("\tXC:i:%d", p->clip_len);
if (p->comment)
err_printf("\t%s", p->comment);
err_putchar('\n');
}
}
Expand Down Expand Up @@ -580,20 +584,22 @@ int bwa_sai2sam_se(int argc, char *argv[])
{
int c, n_occ = 3;
char *prefix, *rg_line = 0;
while ((c = getopt(argc, argv, "hn:f:r:")) >= 0) {
extern int copy_comment;
while ((c = getopt(argc, argv, "hn:f:r:C")) >= 0) {
switch (c) {
case 'h': break;
case 'r':
if ((rg_line = bwa_set_rg(optarg)) == 0) return 1;
break;
case 'n': n_occ = atoi(optarg); break;
case 'f': xreopen(optarg, "w", stdout); break;
case 'C': copy_comment = 1; break;
default: return 1;
}
}

if (optind + 3 > argc) {
fprintf(stderr, "Usage: bwa samse [-n max_occ] [-f out.sam] [-r RG_line] <prefix> <in.sai> <in.fq>\n");
fprintf(stderr, "Usage: bwa samse [-n max_occ] [-f out.sam] [-r RG_line] [-C] <prefix> <in.sai> <in.fq>\n");
return 1;
}
if ((prefix = bwa_idx_infer_prefix(argv[optind])) == 0) {
Expand Down
5 changes: 5 additions & 0 deletions bwaseqio.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ KSEQ_DECLARE(gzFile)

extern unsigned char nst_nt4_table[256];
static char bam_nt16_nt4_table[] = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 };
int copy_comment = 0;

struct __bwa_seqio_t {
// for BAM input
Expand Down Expand Up @@ -199,6 +200,8 @@ bwa_seq_t *bwa_read_seq(bwa_seqio_t *bs, int n_needed, int *n, int mode, int tri
p->qual = (ubyte_t*)strdup((char*)seq->qual.s);
if (trim_qual >= 1) n_trimmed += bwa_trim_read(trim_qual, p);
}
if (copy_comment && seq->comment.l)
p->comment = strdup(seq->comment.s);
p->rseq = (ubyte_t*)calloc(p->full_len, 1);
memcpy(p->rseq, p->seq, p->len);
seq_reverse(p->len, p->seq, 0); // *IMPORTANT*: will be reversed back in bwa_refine_gapped()
Expand Down Expand Up @@ -228,6 +231,8 @@ void bwa_free_read_seq(int n_seqs, bwa_seq_t *seqs)
for (j = 0; j < p->n_multi; ++j)
if (p->multi[j].cigar) free(p->multi[j].cigar);
free(p->name);
if (p->comment)
free(p->comment);
free(p->seq); free(p->rseq); free(p->qual); free(p->aln); free(p->md); free(p->multi);
free(p->cigar);
}
Expand Down
2 changes: 1 addition & 1 deletion bwtaln.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ typedef struct {
} bwt_multi1_t;

typedef struct {
char *name;
char *name, *comment;
ubyte_t *seq, *rseq, *qual;
uint32_t len:20, strand:1, type:2, dummy:1, extra_flag:8;
uint32_t n_mm:8, n_gapo:8, n_gape:8, mapQ:8;
Expand Down