From 8b8059b414849570870fbd8faffeccd10533e1aa Mon Sep 17 00:00:00 2001 From: Miller Date: Sat, 27 Jan 2024 12:55:34 -0500 Subject: [PATCH] :construction: add description scraper :pencil: fix borrows :bug: fix quote injection --- src/commands/encoder_cmd.rs | 24 +++++++++++++++++++++++- src/lib/echtvar.rs | 6 +++--- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/commands/encoder_cmd.rs b/src/commands/encoder_cmd.rs index ef1f723..31a1e27 100644 --- a/src/commands/encoder_cmd.rs +++ b/src/commands/encoder_cmd.rs @@ -1,6 +1,6 @@ use bincode::Options; use echtvar_lib::{echtvar::bstrip_chr, fields, kmer16, var32, zigzag}; -use rust_htslib::bcf::header::{TagLength, TagType}; +use rust_htslib::bcf::header::{TagLength, TagType, HeaderRecord}; use rust_htslib::bcf::record::{Buffer, Record}; use rust_htslib::bcf::{Read as BCFRead, Reader}; use stream_vbyte::{encode::encode, x86::Sse41}; @@ -160,6 +160,25 @@ fn is_sorted(data: &Vec) -> bool { return true; } +fn hdr_info_id2description( + mut hrecs: Vec, + id: &String, + default: &std::string::String, +) -> std::string::String { + hrecs.retain(|rec| match rec { + HeaderRecord::Info {key: _, values: v} => &v["ID"] == id, + _ => false} + ); + if hrecs.len() != 1 { + panic!("Field {} is either not present in the header or present multiple times!", id); + }; + let description = match hrecs.first().unwrap() { + HeaderRecord::Info {key: _, values: v} => if v.contains_key("Description") { &v["Description"] } else { default }, + _ => default, + }; + return description.to_string(); +} + pub fn encoder_main(vpaths: Vec<&str>, opath: &str, jpath: &str) { let zpath = std::path::Path::new(opath); let jpath = std::path::Path::new(jpath); @@ -228,6 +247,9 @@ pub fn encoder_main(vpaths: Vec<&str>, opath: &str, jpath: &str) { tl, f.field ), }; + println!("Old description for field {}: {}", f.field, f.description); + f.description = hdr_info_id2description(header.header_records(), &f.field, &f.description); + println!("New description for field {}: {}", f.field, f.description); } let zfile = std::fs::File::create(&zpath).unwrap(); diff --git a/src/lib/echtvar.rs b/src/lib/echtvar.rs index 1e5bd00..1825d86 100644 --- a/src/lib/echtvar.rs +++ b/src/lib/echtvar.rs @@ -177,7 +177,7 @@ impl EchtVars { for e in &self.fields { header.push_record( format!( - "##INFO=", + "##INFO=", e.alias, if vec!["A", "R", "G"].iter().any(|n| n == &e.number) { "1" @@ -191,8 +191,8 @@ impl EchtVars { } else { "Float" }, - if &e.description.to_string() == "added by echtvar" { - format!("added by echtvar from {}", path) + if &e.description.to_string() == "added by echtvar"{ + format!("\"added by echtvar from {}\"", path) } else { format!("added by echtvar {}", e.description.to_string()) }