From f371b6c3f2d8a35fb9a3859fadcb80a6044504c3 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Wed, 18 Sep 2024 22:20:25 -0400 Subject: [PATCH] - responded to review comments. --- fgpyo/sam/__init__.py | 56 +++++++++++++++++++---------------- tests/fgpyo/sam/test_cigar.py | 2 +- 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/fgpyo/sam/__init__.py b/fgpyo/sam/__init__.py index 395e842..75d8368 100644 --- a/fgpyo/sam/__init__.py +++ b/fgpyo/sam/__init__.py @@ -549,39 +549,43 @@ def length_on_target(self) -> int: def get_alignment_offsets(self, reverse: bool = False) -> Tuple[int, int]: """ - Get the starting and ending offsets for the alignment based on the CIGAR string. - - Args: - reverse: If True, count from the end of the read sequence. - Otherwise (default behavior), count from the beginning of the read sequence. - - Returns: - A tuple (start, end), defining the start and end offsets of the _aligned part_ of the read. - These offsets are 0-based and open-ended, with respect to the beginning of the read sequence. - (If 'reverse' is True, the offsets are with respect to the end of the read sequence.) - - # TODO: FIGURE OUT WHY the following three lines causes mkdocs to fail - # If the Cigar contains no alignment operators that consume sequence bases, or - # only clipping operators, the start and end offsets will be the same value (indicating - # an empty region). - """ + Get the starting and ending offsets for the alignment based on the CIGAR string. + + Args: + reverse: If True, count from the end of the read sequence. + Otherwise (default behavior), count from the beginning of the read sequence. + + Returns: + A tuple (start, end), defining the start and end offsets of the aligned part + of the read. These offsets are 0-based and open-ended, with respect to the + beginning of the read sequence. (If 'reverse' is True, the offsets are with + respect to the end of the read sequence.) + If the Cigar contains no alignment operators that consume sequence bases, or + only clipping operators, the start and end offsets will be the same value + (indicating an empty region). This shared value will be the offset to the first + base consumed by a non-clipping operator or the length of the read sequence if + there is no such base. + + # + """ # TODO: figure out how to remove the '#' from the documentation above without + # breaking the build start_offset: int = 0 end_offset: int = 0 - cig_el: CigarElement + element: CigarElement alignment_began = False elements = self.elements if not reverse else reversed(self.elements) - for cig_el in elements: - if cig_el.operator.is_clipping and not alignment_began: + for element in elements: + if element.operator.is_clipping and not alignment_began: # We are in the clipping operators preceding the alignment - start_offset += cig_el.length_on_query - end_offset += cig_el.length_on_query - elif cig_el.operator.is_clipping: - # We have exited the alignment and are in the clipping operators after the alignment - break - else: + start_offset += element.length_on_query + end_offset += element.length_on_query + elif not element.operator.is_clipping: # We are within the alignment alignment_began = True - end_offset += cig_el.length_on_query + end_offset += element.length_on_query + else: + # We have exited the alignment and are in the clipping operators after the alignment + break return start_offset, end_offset diff --git a/tests/fgpyo/sam/test_cigar.py b/tests/fgpyo/sam/test_cigar.py index de9c07b..4c8964e 100644 --- a/tests/fgpyo/sam/test_cigar.py +++ b/tests/fgpyo/sam/test_cigar.py @@ -54,7 +54,7 @@ def test_bad_index_raises_type_error(index: int) -> None: ) def test_get_alignments(cigar_string: str, start: int, end: int) -> None: cig = Cigar.from_cigarstring(cigar_string) - assert Cigar.get_alignment_offsets(cig, False) == (start, end) + assert Cigar.get_alignment_offsets(cig, reverse=False) == (start, end) @pytest.mark.parametrize(