-
Notifications
You must be signed in to change notification settings - Fork 0
/
create_bam_from_scratch.d
41 lines (36 loc) · 1.75 KB
/
create_bam_from_scratch.d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import bio.std.hts.bam.read;
import bio.std.hts.bam.referenceinfo;
import bio.std.hts.sam.header;
import bio.std.hts.bam.reader;
import bio.std.hts.bam.writer;
import contrib.undead.stream;
import std.stdio;
import bio.std.hts.bam.cigar;
void main() {
auto header = new SamHeader(); // First, create SAM header
RgLine rg; // and fill it with information.
rg.identifier = "RG007";
rg.platform = "ILLUMINA"; // Of course, you can modify header
header.read_groups.add(rg); // provided by BamReader object.
auto reference = ReferenceSequenceInfo("contig123", 4321);
auto stream = new MemoryStream();
auto writer = new BamWriter(stream);
writer.writeSamHeader(header); // start writing BAM from header
writer.writeReferenceSequenceInfo([reference]); // and reference sequence info
auto read = BamRead("readName001", "ACTGATGAAC",
[CigarOperation(7, 'M'), CigarOperation(1, 'I'), CigarOperation(2, 'S')]);
read.base_qualities = [38, 34, 33, 35, 28, 39, 25, 19, 21, 17];
read.ref_id = 0;
read.mapping_quality = 46;
read.is_unmapped = false;
read["RG"] = "RG007";
// ... set many more fields, flags, and tags
read.position = 2345; // 0-based, in SAM output you will see 2346
read.strand = '-'; // same as read.is_reverse_strand = true
writer.writeRecord(read); // BGZF blocks are seamlessly compressed in parallel
writer.flush(); // in practice, one would call finish() method
stream.seekSet(0); // but here we will read from the stream
auto reader = new BamReader(stream);
write(reader.header.text); // serialized header already contains newline
writefln("%j", reader.reads.front); // prints record in JSON format
}