Skip to content

Commit

Permalink
enable handling gzip input in call_freq script
Browse files Browse the repository at this point in the history
  • Loading branch information
PengNi committed Mar 10, 2022
1 parent 14b6c4a commit 6be4754
Showing 1 changed file with 24 additions and 19 deletions.
43 changes: 24 additions & 19 deletions scripts/call_modification_frequency.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import argparse
import os
import sys
import gzip

from txt_formater import ModRecord
from txt_formater import SiteStats
Expand All @@ -18,25 +19,29 @@ def calculate_mods_frequency(mods_files, prob_cf):

count, used = 0, 0
for mods_file in mods_files:
with open(mods_file, 'r') as rf:
for line in rf:
words = line.strip().split("\t")
mod_record = ModRecord(words)
if mod_record.is_record_callable(prob_cf):
if mod_record._site_key not in sitekeys:
sitekeys.add(mod_record._site_key)
sitekey2stats[mod_record._site_key] = SiteStats(mod_record._strand,
mod_record._pos_in_strand,
mod_record._kmer)
sitekey2stats[mod_record._site_key]._prob_0 += mod_record._prob_0
sitekey2stats[mod_record._site_key]._prob_1 += mod_record._prob_1
sitekey2stats[mod_record._site_key]._coverage += 1
if mod_record._called_label == 1:
sitekey2stats[mod_record._site_key]._met += 1
else:
sitekey2stats[mod_record._site_key]._unmet += 1
used += 1
count += 1
if mods_file.endswith(".gz"):
infile = gzip.open(mods_file, 'rt')
else:
infile = open(mods_file, 'r')
for line in infile:
words = line.strip().split("\t")
mod_record = ModRecord(words)
if mod_record.is_record_callable(prob_cf):
if mod_record._site_key not in sitekeys:
sitekeys.add(mod_record._site_key)
sitekey2stats[mod_record._site_key] = SiteStats(mod_record._strand,
mod_record._pos_in_strand,
mod_record._kmer)
sitekey2stats[mod_record._site_key]._prob_0 += mod_record._prob_0
sitekey2stats[mod_record._site_key]._prob_1 += mod_record._prob_1
sitekey2stats[mod_record._site_key]._coverage += 1
if mod_record._called_label == 1:
sitekey2stats[mod_record._site_key]._met += 1
else:
sitekey2stats[mod_record._site_key]._unmet += 1
used += 1
count += 1
infile.close()
print("{:.2f}% ({} of {}) calls used..".format(used/float(count) * 100, used, count))
return sitekey2stats

Expand Down

0 comments on commit 6be4754

Please sign in to comment.