From 6be4754e1c6953e8333415ae977d39bf5ea2dd44 Mon Sep 17 00:00:00 2001 From: PengNi <543943952@qq.com> Date: Thu, 10 Mar 2022 21:16:28 +0800 Subject: [PATCH] enable handling gzip input in call_freq script --- scripts/call_modification_frequency.py | 43 ++++++++++++++------------ 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/scripts/call_modification_frequency.py b/scripts/call_modification_frequency.py index 4c4512c..1a32062 100644 --- a/scripts/call_modification_frequency.py +++ b/scripts/call_modification_frequency.py @@ -6,6 +6,7 @@ import argparse import os import sys +import gzip from txt_formater import ModRecord from txt_formater import SiteStats @@ -18,25 +19,29 @@ def calculate_mods_frequency(mods_files, prob_cf): count, used = 0, 0 for mods_file in mods_files: - with open(mods_file, 'r') as rf: - for line in rf: - words = line.strip().split("\t") - mod_record = ModRecord(words) - if mod_record.is_record_callable(prob_cf): - if mod_record._site_key not in sitekeys: - sitekeys.add(mod_record._site_key) - sitekey2stats[mod_record._site_key] = SiteStats(mod_record._strand, - mod_record._pos_in_strand, - mod_record._kmer) - sitekey2stats[mod_record._site_key]._prob_0 += mod_record._prob_0 - sitekey2stats[mod_record._site_key]._prob_1 += mod_record._prob_1 - sitekey2stats[mod_record._site_key]._coverage += 1 - if mod_record._called_label == 1: - sitekey2stats[mod_record._site_key]._met += 1 - else: - sitekey2stats[mod_record._site_key]._unmet += 1 - used += 1 - count += 1 + if mods_file.endswith(".gz"): + infile = gzip.open(mods_file, 'rt') + else: + infile = open(mods_file, 'r') + for line in infile: + words = line.strip().split("\t") + mod_record = ModRecord(words) + if mod_record.is_record_callable(prob_cf): + if mod_record._site_key not in sitekeys: + sitekeys.add(mod_record._site_key) + sitekey2stats[mod_record._site_key] = SiteStats(mod_record._strand, + mod_record._pos_in_strand, + mod_record._kmer) + sitekey2stats[mod_record._site_key]._prob_0 += mod_record._prob_0 + sitekey2stats[mod_record._site_key]._prob_1 += mod_record._prob_1 + sitekey2stats[mod_record._site_key]._coverage += 1 + if mod_record._called_label == 1: + sitekey2stats[mod_record._site_key]._met += 1 + else: + sitekey2stats[mod_record._site_key]._unmet += 1 + used += 1 + count += 1 + infile.close() print("{:.2f}% ({} of {}) calls used..".format(used/float(count) * 100, used, count)) return sitekey2stats