Skip to content

Commit

Permalink
[Python] Add a couple quality-of-life improvemenets to `testing.util.…
Browse files Browse the repository at this point in the history
…assert_that` (#30771)

* Add tests for qol changes

* implement qols

* revert local start-build-env.sh change

* Fix or skip tests

* undo start-build-env.sh change again

* format

* revert global_aggregate change

* add missing paren

* Update groupby_test.py

* Update groupby_test.py

* address a couple nits/comments

* add in pipeline = actual.pipeline since it is actually used elsewhere

* Update sdks/python/apache_beam/testing/util.py

Note about update compatibility being the reason for not doing this ubiquitously.

* comment out asserts

* use early returns instead of comments

* Use global boolean for early returns in groupby_test

---------

Co-authored-by: Robert Bradshaw <[email protected]>
  • Loading branch information
hjtran and robertwb committed Sep 21, 2024
1 parent 7474e6a commit 6a09545
Show file tree
Hide file tree
Showing 8 changed files with 121 additions and 68 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,8 @@ def groupby_expr(test=None):
| beam.GroupBy(lambda s: s[0])
| beam.Map(print))
# [END groupby_expr]

if test:
test(grouped)
if test:
test(grouped)


if __name__ == '__main__':
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,8 @@ def global_aggregate(test=None):
'unit_price', max, 'max_price')
| beam.Map(print))
# [END global_aggregate]

if test:
test(grouped)
if test:
test(grouped)


if __name__ == '__main__':
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,8 @@ def simple_aggregate(test=None):
'quantity', sum, 'total_quantity')
| beam.Map(print))
# [END simple_aggregate]

if test:
test(grouped)
if test:
test(grouped)


if __name__ == '__main__':
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@
from .groupby_simple_aggregate import simple_aggregate
from .groupby_two_exprs import groupby_two_exprs

#
# TODO: Remove early returns in check functions
# https://github.com/apache/beam/issues/30778
skip_due_to_30778 = True


class UnorderedList(object):
def __init__(self, contents):
Expand Down Expand Up @@ -73,7 +78,10 @@ def normalize_kv(k, v):
# For documentation.
NamedTuple = beam.Row


def check_groupby_expr_result(grouped):
if skip_due_to_30778:
return
assert_that(
grouped | beam.MapTuple(normalize_kv),
equal_to([
Expand All @@ -86,6 +94,8 @@ def check_groupby_expr_result(grouped):


def check_groupby_two_exprs_result(grouped):
if skip_due_to_30778:
return
assert_that(
grouped | beam.MapTuple(normalize_kv),
equal_to([
Expand All @@ -99,6 +109,8 @@ def check_groupby_two_exprs_result(grouped):


def check_groupby_attr_result(grouped):
if skip_due_to_30778:
return
assert_that(
grouped | beam.MapTuple(normalize_kv),
equal_to([
Expand Down Expand Up @@ -146,57 +158,61 @@ def check_groupby_attr_result(grouped):


def check_groupby_attr_expr_result(grouped):
if skip_due_to_30778:
return
assert_that(
grouped | beam.MapTuple(normalize_kv),
equal_to([
#[START groupby_attr_expr_result]
(
NamedTuple(recipe='pie', is_berry=True),
[
beam.Row(
recipe='pie',
fruit='strawberry',
quantity=3,
unit_price=1.50),
beam.Row(
recipe='pie',
fruit='raspberry',
quantity=1,
unit_price=3.50),
beam.Row(
recipe='pie',
fruit='blackberry',
quantity=1,
unit_price=4.00),
beam.Row(
recipe='pie',
fruit='blueberry',
quantity=1,
unit_price=2.00),
]),
(
NamedTuple(recipe='muffin', is_berry=True),
[
beam.Row(
recipe='muffin',
fruit='blueberry',
quantity=2,
unit_price=2.00),
]),
(
NamedTuple(recipe='muffin', is_berry=False),
[
beam.Row(
recipe='muffin',
fruit='banana',
quantity=3,
unit_price=1.00),
]),
(
NamedTuple(recipe='pie', is_berry=True),
[
beam.Row(
recipe='pie',
fruit='strawberry',
quantity=3,
unit_price=1.50),
beam.Row(
recipe='pie',
fruit='raspberry',
quantity=1,
unit_price=3.50),
beam.Row(
recipe='pie',
fruit='blackberry',
quantity=1,
unit_price=4.00),
beam.Row(
recipe='pie',
fruit='blueberry',
quantity=1,
unit_price=2.00),
]),
(
NamedTuple(recipe='muffin', is_berry=True),
[
beam.Row(
recipe='muffin',
fruit='blueberry',
quantity=2,
unit_price=2.00),
]),
(
NamedTuple(recipe='muffin', is_berry=False),
[
beam.Row(
recipe='muffin',
fruit='banana',
quantity=3,
unit_price=1.00),
]),
#[END groupby_attr_expr_result]
]))


def check_simple_aggregate_result(grouped):
if skip_due_to_30778:
return
assert_that(
grouped | beam.MapTuple(normalize_kv),
equal_to([
Expand All @@ -211,6 +227,8 @@ def check_simple_aggregate_result(grouped):


def check_expr_aggregate_result(grouped):
if skip_due_to_30778:
return
assert_that(
grouped | beam.Map(normalize),
equal_to([
Expand All @@ -222,6 +240,8 @@ def check_expr_aggregate_result(grouped):


def check_global_aggregate_result(grouped):
if skip_due_to_30778:
return
assert_that(
grouped | beam.Map(normalize),
equal_to([
Expand All @@ -232,19 +252,26 @@ def check_global_aggregate_result(grouped):


@mock.patch(
'apache_beam.examples.snippets.transforms.aggregation.groupby_expr.print', str)
'apache_beam.examples.snippets.transforms.aggregation.groupby_expr.print',
str)
@mock.patch(
'apache_beam.examples.snippets.transforms.aggregation.groupby_two_exprs.print', str)
'apache_beam.examples.snippets.transforms.aggregation.groupby_two_exprs.print',
str)
@mock.patch(
'apache_beam.examples.snippets.transforms.aggregation.groupby_attr.print', str)
'apache_beam.examples.snippets.transforms.aggregation.groupby_attr.print',
str)
@mock.patch(
'apache_beam.examples.snippets.transforms.aggregation.groupby_attr_expr.print', str)
'apache_beam.examples.snippets.transforms.aggregation.groupby_attr_expr.print',
str)
@mock.patch(
'apache_beam.examples.snippets.transforms.aggregation.groupby_simple_aggregate.print', str)
'apache_beam.examples.snippets.transforms.aggregation.groupby_simple_aggregate.print',
str)
@mock.patch(
'apache_beam.examples.snippets.transforms.aggregation.groupby_expr_aggregate.print', str)
'apache_beam.examples.snippets.transforms.aggregation.groupby_expr_aggregate.print',
str)
@mock.patch(
'apache_beam.examples.snippets.transforms.aggregation.groupby_global_aggregate.print', str)
'apache_beam.examples.snippets.transforms.aggregation.groupby_global_aggregate.print',
str)
class GroupByTest(unittest.TestCase):
def test_groupby_expr(self):
groupby_expr(check_groupby_expr_result)
Expand Down
17 changes: 17 additions & 0 deletions sdks/python/apache_beam/testing/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,23 @@ def assert_that(
"""
assert isinstance(actual, pvalue.PCollection), (
'%s is not a supported type for Beam assert' % type(actual))
pipeline = actual.pipeline
if getattr(actual.pipeline, 'result', None):
# The pipeline was already run. The user most likely called assert_that
# after the pipeleline context.
raise RuntimeError(
'assert_that must be used within a beam.Pipeline context')

# Usually, the uniqueness of the label is left to the pipeline
# writer to guarantee. Since we're in a testing context, we'll
# just automatically append a number to the label if it's
# already in use, as tests don't typically have to worry about
# long-term update compatibility stability of stage names.
if label in pipeline.applied_labels:
label_idx = 2
while f"{label}_{label_idx}" in pipeline.applied_labels:
label_idx += 1
label = f"{label}_{label_idx}"

if isinstance(matcher, _EqualToPerWindowMatcher):
reify_windows = True
Expand Down
13 changes: 13 additions & 0 deletions sdks/python/apache_beam/testing/util_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,19 @@ def test_equal_to_per_window_fail_unmatched_window(self):
equal_to_per_window(expected),
reify_windows=True)

def test_runtimeerror_outside_of_context(self):
with beam.Pipeline() as p:
outputs = (p | beam.Create([1, 2, 3]) | beam.Map(lambda x: x + 1))
with self.assertRaises(RuntimeError):
assert_that(outputs, equal_to([2, 3, 4]))

def test_multiple_assert_that_labels(self):
with beam.Pipeline() as p:
outputs = (p | beam.Create([1, 2, 3]) | beam.Map(lambda x: x + 1))
assert_that(outputs, equal_to([2, 3, 4]))
assert_that(outputs, equal_to([2, 3, 4]))
assert_that(outputs, equal_to([2, 3, 4]))

def test_equal_to_per_window_fail_unmatched_element(self):
with self.assertRaises(BeamAssertException):
start = int(MIN_TIMESTAMP.micros // 1e6) - 5
Expand Down
11 changes: 5 additions & 6 deletions sdks/python/apache_beam/transforms/trigger_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,7 +583,6 @@ def test_after_processing_time(self):
accumulation_mode=AccumulationMode.DISCARDING)
| beam.GroupByKey()
| beam.Map(lambda x: x[1]))

assert_that(results, equal_to([list(range(total_elements_in_trigger))]))

def test_repeatedly_after_processing_time(self):
Expand Down Expand Up @@ -772,11 +771,11 @@ def test_multiple_accumulating_firings(self):
| beam.GroupByKey()
| beam.FlatMap(lambda x: x[1]))

# The trigger should fire twice. Once after 5 seconds, and once after 10.
# The firings should accumulate the output.
first_firing = [str(i) for i in elements if i <= 5]
second_firing = [str(i) for i in elements]
assert_that(records, equal_to(first_firing + second_firing))
# The trigger should fire twice. Once after 5 seconds, and once after 10.
# The firings should accumulate the output.
first_firing = [str(i) for i in elements if i <= 5]
second_firing = [str(i) for i in elements]
assert_that(records, equal_to(first_firing + second_firing))

def test_on_pane_watermark_hold_no_pipeline_stall(self):
"""A regression test added for
Expand Down
8 changes: 4 additions & 4 deletions sdks/python/apache_beam/transforms/util_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1016,13 +1016,13 @@ def test_constant_k(self):
with TestPipeline() as p:
pc = p | beam.Create(self.l)
with_keys = pc | util.WithKeys('k')
assert_that(with_keys, equal_to([('k', 1), ('k', 2), ('k', 3)], ))
assert_that(with_keys, equal_to([('k', 1), ('k', 2), ('k', 3)], ))

def test_callable_k(self):
with TestPipeline() as p:
pc = p | beam.Create(self.l)
with_keys = pc | util.WithKeys(lambda x: x * x)
assert_that(with_keys, equal_to([(1, 1), (4, 2), (9, 3)]))
assert_that(with_keys, equal_to([(1, 1), (4, 2), (9, 3)]))

@staticmethod
def _test_args_kwargs_fn(x, multiply, subtract):
Expand All @@ -1033,7 +1033,7 @@ def test_args_kwargs_k(self):
pc = p | beam.Create(self.l)
with_keys = pc | util.WithKeys(
WithKeysTest._test_args_kwargs_fn, 2, subtract=1)
assert_that(with_keys, equal_to([(1, 1), (3, 2), (5, 3)]))
assert_that(with_keys, equal_to([(1, 1), (3, 2), (5, 3)]))

def test_sideinputs(self):
with TestPipeline() as p:
Expand All @@ -1046,7 +1046,7 @@ def test_sideinputs(self):
the_singleton: x + sum(the_list) + the_singleton,
si1,
the_singleton=si2)
assert_that(with_keys, equal_to([(17, 1), (18, 2), (19, 3)]))
assert_that(with_keys, equal_to([(17, 1), (18, 2), (19, 3)]))


class GroupIntoBatchesTest(unittest.TestCase):
Expand Down

0 comments on commit 6a09545

Please sign in to comment.