From 1b4afa601f78800945dca3a078248f75067aad19 Mon Sep 17 00:00:00 2001 From: Stefan Cardnell Date: Fri, 6 Sep 2024 09:53:40 +0200 Subject: [PATCH 1/4] feat: add retry and dead letter topic behaviour, refs #8 --- CHANGELOG.md | 4 + README.md | 27 +- django_kafka/__init__.py | 11 +- django_kafka/conf.py | 5 + django_kafka/consumer.py | 159 +++++++--- django_kafka/dead_letter/__init__.py | 0 django_kafka/dead_letter/headers.py | 6 + django_kafka/dead_letter/topic.py | 44 +++ .../management/commands/kafka_consume.py | 2 +- django_kafka/registry.py | 12 +- django_kafka/retry/__init__.py | 55 ++++ django_kafka/retry/consumer.py | 104 +++++++ django_kafka/retry/headers.py | 28 ++ django_kafka/retry/topic.py | 72 +++++ django_kafka/serialization.py | 8 + django_kafka/tests/dead_letter/__init__.py | 0 django_kafka/tests/dead_letter/test_topic.py | 66 ++++ django_kafka/tests/retry/__init__.py | 0 django_kafka/tests/retry/test_consumer.py | 283 ++++++++++++++++++ django_kafka/tests/retry/test_headers.py | 27 ++ django_kafka/tests/retry/test_retry.py | 70 +++++ django_kafka/tests/retry/test_topic.py | 153 ++++++++++ django_kafka/tests/test_consumer.py | 171 +++++++++-- .../tests/test_django_kafka_interface.py | 9 +- django_kafka/tests/test_registry.py | 89 ++++-- django_kafka/tests/test_settings.py | 4 + django_kafka/tests/test_topic.py | 129 ++++++-- django_kafka/topic.py | 74 ++++- 28 files changed, 1454 insertions(+), 158 deletions(-) create mode 100644 django_kafka/dead_letter/__init__.py create mode 100644 django_kafka/dead_letter/headers.py create mode 100644 django_kafka/dead_letter/topic.py create mode 100644 django_kafka/retry/__init__.py create mode 100644 django_kafka/retry/consumer.py create mode 100644 django_kafka/retry/headers.py create mode 100644 django_kafka/retry/topic.py create mode 100644 django_kafka/serialization.py create mode 100644 django_kafka/tests/dead_letter/__init__.py create mode 100644 django_kafka/tests/dead_letter/test_topic.py create mode 100644 django_kafka/tests/retry/__init__.py create mode 100644 django_kafka/tests/retry/test_consumer.py create mode 100644 django_kafka/tests/retry/test_headers.py create mode 100644 django_kafka/tests/retry/test_retry.py create mode 100644 django_kafka/tests/retry/test_topic.py diff --git a/CHANGELOG.md b/CHANGELOG.md index e69de29..2bb2b7e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -0,0 +1,4 @@ +# Changelog + +## 0.2.0 (2024-09-05) +* Add decorator for topic retry and dead letter topic, see README.md \ No newline at end of file diff --git a/README.md b/README.md index efc6f65..21f6020 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ Or you can use `DjangoKafka` class API. ```python from django_kafka import kafka -kafka.start_consumers() +kafka.run_consumers() ``` Check [Confluent Python Consumer](https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#consumer) for API documentation. @@ -115,6 +115,31 @@ DJANGO_KAFKA = { **Note:** take [django_kafka.topic.AvroTopic](./django_kafka/topic.py) as an example if you want to implement a custom Topic with your schema. +## Non-Blocking Retries: + +Add non-blocking retry behaviour to a topic by using the `retry` decorator: + +```python +from django_kafka import kafka +from django_kafka.topic import Topic + + +@kafka.retry(max_retries=3, delay=120, include=[ValueError]) +class RetryableTopic(Topic): + name = "topic" + ... +``` + +When the consumption of a message in a retryable topic fails, the message is re-sent to a topic with a name combined of the consumer group id, the original topic name, a `.retry` suffix, and the retry number. Subsequent failed retries will then be sent to retry topics of incrementing retry number until the maximum attempts are reached, after which it will be sent to a dead letter topic suffixed by `.dlt`. So for a failed message in topic `topic` received by consumer group `group`, the expected topic sequence would be: + +1. `topic` +2. `group.topic.retry.1` +3. `group.topic.retry.2` +4. `group.topic.retry.3` +5. `group.topic.dlt` + +When consumers are started using [start commands](#start-the-Consumers), an additional retry consumer will be started in parallel for any consumer containing a retryable topic. This retry consumer will be assigned to a consumer group whose id is a combination of the original group id and a `.retry` suffix. This consumer is subscribed to the retry topics, and manages the message retry and delay behaviour. Please note that messages are retried directly by the retry consumer and are not sent back to the original topic. + ## Settings: **Defaults:** diff --git a/django_kafka/__init__.py b/django_kafka/__init__.py index 9f30a8c..c2ba260 100644 --- a/django_kafka/__init__.py +++ b/django_kafka/__init__.py @@ -10,6 +10,7 @@ from django_kafka.exceptions import DjangoKafkaError from django_kafka.producer import Producer from django_kafka.registry import ConsumersRegistry +from django_kafka.retry import RetrySettings logger = logging.getLogger(__name__) @@ -28,6 +29,7 @@ def autodiscover(): class DjangoKafka: consumers = ConsumersRegistry() + retry = RetrySettings @cached_property def producer(self) -> Producer: @@ -45,14 +47,15 @@ def schema_client(self) -> SchemaRegistryClient: return SchemaRegistryClient(settings.SCHEMA_REGISTRY) - def start_consumer(self, consumer: str): - self.consumers[consumer]().start() + def run_consumer(self, consumer_key: str): + consumer = self.consumers[consumer_key]() + consumer.run() - def start_consumers(self, consumers: Optional[list[str]] = None): + def run_consumers(self, consumers: Optional[list[str]] = None): consumers = consumers or list(self.consumers) with Pool(processes=len(consumers)) as pool: try: - pool.map(self.start_consumer, consumers) + pool.map(self.run_consumer, consumers) except KeyboardInterrupt: # Stops the worker processes immediately without completing # outstanding work. diff --git a/django_kafka/conf.py b/django_kafka/conf.py index ab76af2..1eb9eca 100644 --- a/django_kafka/conf.py +++ b/django_kafka/conf.py @@ -9,6 +9,11 @@ "GLOBAL_CONFIG": {}, "PRODUCER_CONFIG": {}, "CONSUMER_CONFIG": {}, + "RETRY_CONSUMER_CONFIG": { + "auto.offset.reset": "earliest", + "enable.auto.offset.store": False, + "topic.metadata.refresh.interval.ms": 10000, + }, "POLLING_FREQUENCY": 1, # seconds "SCHEMA_REGISTRY": {}, } diff --git a/django_kafka/consumer.py b/django_kafka/consumer.py index e91b049..47756c0 100644 --- a/django_kafka/consumer.py +++ b/django_kafka/consumer.py @@ -1,20 +1,43 @@ import logging +import traceback from pydoc import locate -from typing import Optional +from typing import TYPE_CHECKING, Optional from confluent_kafka import Consumer as ConfluentConsumer from confluent_kafka import cimpl from django_kafka.conf import settings -from django_kafka.topic import Topic +from django_kafka.exceptions import DjangoKafkaError + +if TYPE_CHECKING: + from django_kafka.topic import Topic logger = logging.getLogger(__name__) -class Topics(dict): - def __init__(self, *topics: Topic): - for topic in topics: - self[topic.name] = topic +class Topics: + _topics: tuple["Topic", ...] + _match: dict[str, "Topic"] + + def __init__(self, *topics: "Topic"): + self._topics = topics + self._match: dict[str, "Topic"] = {} + + def get_topic(self, name: str) -> "Topic": + if name not in self._match: + topic = next((t for t in self if t.matches(name)), None) + if not topic: + raise DjangoKafkaError(f"No topic registered for `{name}`") + self._match[name] = topic + + return self._match[name] + + @property + def names(self) -> list[str]: + return [topic.name for topic in self] + + def __iter__(self): + yield from self._topics class Consumer: @@ -29,67 +52,109 @@ class Consumer: https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#pythonclient-consumer """ - topics: Topics[str, Topic] + topics: Topics config: dict polling_freq = settings.POLLING_FREQUENCY default_logger = logger default_error_handler = settings.ERROR_HANDLER - def __init__(self, config: Optional[dict] = None, **kwargs): - kwargs.setdefault("logger", self.default_logger) - kwargs.setdefault("error_cb", locate(self.default_error_handler)()) + def __init__(self): + self.config = self.build_config() + self._consumer = ConfluentConsumer(self.config) - self.config = { + def __getattr__(self, name): + """proxy consumer methods.""" + return getattr(self._consumer, name) + + @classmethod + def build_config(cls): + return { "client.id": settings.CLIENT_ID, **settings.GLOBAL_CONFIG, **settings.CONSUMER_CONFIG, - **getattr(self, "config", {}), - **(config or {}), + "logger": cls.default_logger, + "error_cb": locate(cls.default_error_handler)(), + **getattr(cls, "config", {}), } - self._consumer = ConfluentConsumer(self.config, **kwargs) + @property + def group_id(self) -> str: + return self.config["group.id"] - def __getattr__(self, name): - """proxy consumer methods.""" - if name not in {"config"}: - # For cases when `Consumer.config` is not set and - # `getattr(self, "config", {})` is called on `__init__`, - # the initialization will fail because `_consumer` is not yet set. - return getattr(self._consumer, name) - raise AttributeError(f"'{self.__class__.__name__}' has no attribute 'name'") + def commit_offset(self, msg: cimpl.Message): + if not self.config.get("enable.auto.offset.store"): + # Store the offset associated with msg to a local cache. + # Stored offsets are committed to Kafka by a background + # thread every 'auto.commit.interval.ms'. + # Explicitly storing offsets after processing gives at-least once semantics. + self.store_offsets(msg) - def start(self): - # define topics - self.subscribe(topics=list(self.topics)) - while True: - # poll every self.polling_freq seconds - if msg := self.poll(timeout=self.polling_freq): - self.process_message(msg) + def retry_msg(self, msg: cimpl.Message, exc: Exception) -> bool: + from django_kafka.retry.topic import RetryTopic - def stop(self): - self.close() + topic = self.get_topic(msg) + if not topic.retry_settings: + return False + + return RetryTopic(group_id=self.group_id, main_topic=topic).retry_for( + msg=msg, + exc=exc, + ) + + def dead_letter_msg(self, msg: cimpl.Message, exc: Exception): + from django_kafka.dead_letter.topic import DeadLetterTopic + + topic = self.get_topic(msg) + DeadLetterTopic(group_id=self.group_id, main_topic=topic).produce_for( + msg=msg, + header_message=str(exc), + header_detail=traceback.format_exc(), + ) + + def handle_exception(self, msg: cimpl.Message, exc: Exception): + retried = self.retry_msg(msg, exc) + if not retried: + self.dead_letter_msg(msg, exc) + self.log_error(exc) + + def get_topic(self, msg: cimpl.Message) -> "Topic": + return self.topics.get_topic(name=msg.topic()) + + def log_error(self, error): + logger.error(error, exc_info=True) def process_message(self, msg: cimpl.Message): if msg_error := msg.error(): - self.handle_error(msg_error) + self.log_error(msg_error) return try: - self.topics[msg.topic()].consume(msg) - # ruff: noqa: BLE001 (we do not want consumer to stop if message processing is failing in any circumstances) - except Exception as error: - self.handle_error(error) - else: - self.commit_offset(msg) + self.get_topic(msg).consume(msg) + # ruff: noqa: BLE001 (we do not want consumer to stop if message consumption fails in any circumstances) + except Exception as exc: + self.handle_exception(msg, exc) - def commit_offset(self, msg: cimpl.Message): - if not self.config.get("enable.auto.offset.store"): - # Store the offset associated with msg to a local cache. - # Stored offsets are committed to Kafka by a background - # thread every 'auto.commit.interval.ms'. - # Explicitly storing offsets after processing gives at-least once semantics. - self.store_offsets(msg) + self.commit_offset(msg) - def handle_error(self, error): - logger.error(error, exc_info=True) + def poll(self) -> Optional[cimpl.Message]: + # poll for self.polling_freq seconds + return self._consumer.poll(timeout=self.polling_freq) + + def start(self): + self.subscribe(topics=self.topics.names) + + def run(self): + try: + self.start() + while True: + if msg := self.poll(): + self.process_message(msg) + except Exception as exc: + self.log_error(exc) + raise + finally: + self.stop() + + def stop(self): + self.close() diff --git a/django_kafka/dead_letter/__init__.py b/django_kafka/dead_letter/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/django_kafka/dead_letter/headers.py b/django_kafka/dead_letter/headers.py new file mode 100644 index 0000000..1698625 --- /dev/null +++ b/django_kafka/dead_letter/headers.py @@ -0,0 +1,6 @@ +from enum import StrEnum + + +class DeadLetterHeader(StrEnum): + MESSAGE = "DEAD_LETTER_MESSAGE" + DETAIL = "DEAD_LETTER_DETAIL" diff --git a/django_kafka/dead_letter/topic.py b/django_kafka/dead_letter/topic.py new file mode 100644 index 0000000..8d7748e --- /dev/null +++ b/django_kafka/dead_letter/topic.py @@ -0,0 +1,44 @@ +import re + +from django_kafka.dead_letter.headers import DeadLetterHeader +from django_kafka.serialization import NoOpSerializer +from django_kafka.topic import Topic + + +class DeadLetterTopic(Topic): + key_serializer = NoOpSerializer + value_serializer = NoOpSerializer + + key_deserializer = NoOpSerializer + value_deserializer = NoOpSerializer + + def __init__(self, group_id: str, main_topic: Topic): + self.group_id = group_id + self.main_topic = main_topic + + @property + def name(self) -> str: + group_id = re.escape(self.group_id) + if self.main_topic.is_regex(): + topic_name = f"({self.main_topic.name.lstrip('^').rstrip('$')})" + else: + topic_name = re.escape(self.main_topic.name) + return rf"^{group_id}\.{topic_name}\.dlt$" + + def get_produce_name(self, topic_name: str) -> str: + retry_pattern = r"\.retry\.[0-9]+$" + if re.search(retry_pattern, topic_name): + return re.sub(retry_pattern, ".dlt", topic_name) + return f"{self.group_id}.{topic_name}.dlt" + + def produce_for(self, msg, header_message, header_detail): + headers = [ + (DeadLetterHeader.MESSAGE, header_message), + (DeadLetterHeader.DETAIL, header_detail), + ] + self.produce( + name=self.get_produce_name(msg.topic()), + key=msg.key(), + value=msg.value(), + headers=headers, + ) diff --git a/django_kafka/management/commands/kafka_consume.py b/django_kafka/management/commands/kafka_consume.py index 36a1da0..54e6dd7 100644 --- a/django_kafka/management/commands/kafka_consume.py +++ b/django_kafka/management/commands/kafka_consume.py @@ -21,4 +21,4 @@ def add_arguments(self, parser): ) def handle(self, consumers: Optional[list[str]] = None, *args, **options): - kafka.start_consumers(consumers) + kafka.run_consumers(consumers) diff --git a/django_kafka/registry.py b/django_kafka/registry.py index 06bac07..23e2e91 100644 --- a/django_kafka/registry.py +++ b/django_kafka/registry.py @@ -1,4 +1,3 @@ -from functools import wraps from typing import TYPE_CHECKING, Type from django_kafka.exceptions import DjangoKafkaError @@ -12,9 +11,8 @@ def __init__(self): self.__consumers: dict[str, Type[Consumer]] = {} def __call__(self): - @wraps(self) def add_to_registry(consumer_cls: Type["Consumer"]) -> Type["Consumer"]: - self.__consumers[self.get_key(consumer_cls)] = consumer_cls + self.register(consumer_cls) return consumer_cls return add_to_registry @@ -30,3 +28,11 @@ def __iter__(self): def get_key(self, consumer_cls: Type["Consumer"]) -> str: return f"{consumer_cls.__module__}.{consumer_cls.__name__}" + + def register(self, consumer_cls: Type["Consumer"]): + from django_kafka.retry.consumer import RetryConsumer + + key = self.get_key(consumer_cls) + self.__consumers[key] = consumer_cls + if retry_consumer_cls := RetryConsumer.build(consumer_cls): + self.__consumers[f"{key}.retry"] = retry_consumer_cls diff --git a/django_kafka/retry/__init__.py b/django_kafka/retry/__init__.py new file mode 100644 index 0000000..f1e44e4 --- /dev/null +++ b/django_kafka/retry/__init__.py @@ -0,0 +1,55 @@ +from typing import TYPE_CHECKING, Optional, Type + +from django.utils import timezone + +if TYPE_CHECKING: + from django_kafka.topic import Topic + + +class RetrySettings: + def __init__( + self, + max_retries: int, + delay: int, + backoff: bool = False, + include: Optional[list[Type[Exception]]] = None, + exclude: Optional[list[Type[Exception]]] = None, + ): + """ + :param max_retries: maximum number of retry attempts + :param delay: delay (seconds) + :param backoff: exponential backoff + :param include: exception types to retry for + :param exclude: exception types to exclude from retry + """ + if max_retries <= 0: + raise ValueError("max_retries must be greater than zero") + if delay <= 0: + raise ValueError("delay must be greater than zero") + if include is not None and exclude is not None: + raise ValueError("cannot specify both include and exclude") + + self.max_retries = max_retries + self.delay = delay + self.backoff = backoff + self.include = include + self.exclude = exclude + + def __call__(self, topic_cls: Type["Topic"]): + topic_cls.retry_settings = self + return topic_cls + + def attempts_exceeded(self, attempt): + return attempt > self.max_retries + + def should_retry(self, exc: Exception) -> bool: + if self.include is not None: + return any(isinstance(exc, e) for e in self.include) + if self.exclude is not None: + return not any(isinstance(exc, e) for e in self.exclude) + + return True + + def get_retry_timestamp(self, attempt: int) -> str: + delay = self.delay * 2 ** (attempt - 1) if self.backoff else self.delay + return str(timezone.now().timestamp() + delay) diff --git a/django_kafka/retry/consumer.py b/django_kafka/retry/consumer.py new file mode 100644 index 0000000..c67daac --- /dev/null +++ b/django_kafka/retry/consumer.py @@ -0,0 +1,104 @@ +import traceback +from datetime import datetime +from typing import TYPE_CHECKING, Optional, Type, cast + +from confluent_kafka import TopicPartition, cimpl +from django.utils import timezone + +from django_kafka.conf import settings +from django_kafka.consumer import Consumer, Topics +from django_kafka.dead_letter.topic import DeadLetterTopic +from django_kafka.retry.headers import RetryHeader +from django_kafka.retry.topic import RetryTopic + +if TYPE_CHECKING: + from django_kafka.topic import Topic + + +class RetryTopics(Topics): + def __init__(self, group_id: str, *topics: "Topic"): + super().__init__(*(RetryTopic(group_id=group_id, main_topic=t) for t in topics)) + + +class RetryConsumer(Consumer): + topics: RetryTopics + resume_times: dict[TopicPartition, datetime] + + def __init__(self): + super().__init__() + self.resume_times = {} + + @classmethod + def build(cls, consumer_cls: Type["Consumer"]) -> Optional[Type["RetryConsumer"]]: + """Generates RetryConsumer subclass linked to consumer class retryable topics""" + retryable_topics = [t for t in consumer_cls.topics if t.retry_settings] + if not retryable_topics: + return None + + group_id = consumer_cls.build_config()["group.id"] + + return type[RetryConsumer]( + f"{consumer_cls.__name__}Retry", + (cls,), + { + "config": { + **getattr(cls, "config", {}), + "group.id": f"{group_id}.retry", + }, + "topics": RetryTopics(group_id, *retryable_topics), + }, + ) + + @classmethod + def build_config(cls): + return { + **super().build_config(), + **settings.RETRY_CONSUMER_CONFIG, + **getattr(cls, "config", {}), + } + + def retry_msg(self, msg: cimpl.Message, exc: Exception) -> bool: + retry_topic = cast(RetryTopic, self.get_topic(msg)) + return retry_topic.retry_for(msg=msg, exc=exc) + + def dead_letter_msg(self, msg: cimpl.Message, exc: Exception): + retry_topic = cast(RetryTopic, self.get_topic(msg)) + DeadLetterTopic( + group_id=retry_topic.group_id, + main_topic=retry_topic.main_topic, + ).produce_for( + msg=msg, + header_message=str(exc), + header_detail=traceback.format_exc(), + ) + + def pause_partition(self, msg, until: datetime): + """pauses the partition and stores the resumption time""" + tp = TopicPartition(msg.topic(), msg.partition(), msg.offset()) + self.seek(tp) # seek back to message offset, so it is re-polled on unpause + self.pause([tp]) + self.resume_times[tp] = until + + def resume_ready_partitions(self): + """resumes any partitions that were paused""" + now = timezone.now() + for tp, until in list(self.resume_times.items()): + if now < until: + continue + self.resume([tp]) + del self.resume_times[tp] + + def poll(self): + self.resume_ready_partitions() + return super().poll() + + def process_message(self, msg: cimpl.Message): + retry_time = RetryHeader.get_retry_time(msg.headers()) + if retry_time and retry_time > timezone.now(): + self.pause_partition(msg, retry_time) + return + super().process_message(msg) + + def stop(self): + self.resume_times = {} + super().stop() diff --git a/django_kafka/retry/headers.py b/django_kafka/retry/headers.py new file mode 100644 index 0000000..0271a11 --- /dev/null +++ b/django_kafka/retry/headers.py @@ -0,0 +1,28 @@ +from datetime import datetime +from enum import StrEnum +from typing import Optional + +from django.utils import timezone + + +class RetryHeader(StrEnum): + MESSAGE = "RETRY_MESSAGE" + TIMESTAMP = "RETRY_TIMESTAMP" + + @staticmethod + def get_header(headers: list[tuple], header: "RetryHeader") -> Optional[str]: + return next((v for k, v in headers if k == header), None) + + @staticmethod + def get_retry_time(headers: Optional[list[tuple]]) -> Optional["datetime"]: + """returns the retry time from the message headers""" + if not headers: + return None + + header = RetryHeader.get_header(headers, RetryHeader.TIMESTAMP) + try: + epoch = float(header) + except (TypeError, ValueError): + return None + else: + return datetime.fromtimestamp(epoch, tz=timezone.get_current_timezone()) diff --git a/django_kafka/retry/topic.py b/django_kafka/retry/topic.py new file mode 100644 index 0000000..81db58b --- /dev/null +++ b/django_kafka/retry/topic.py @@ -0,0 +1,72 @@ +import re + +from confluent_kafka import cimpl + +from django_kafka.exceptions import DjangoKafkaError +from django_kafka.retry.headers import RetryHeader +from django_kafka.serialization import NoOpSerializer +from django_kafka.topic import Topic + + +class RetryTopic(Topic): + key_serializer = NoOpSerializer + value_serializer = NoOpSerializer + + key_deserializer = NoOpSerializer + value_deserializer = NoOpSerializer + + def __init__(self, group_id: str, main_topic: Topic): + if not main_topic.retry_settings: + raise DjangoKafkaError(f"Topic {main_topic} is not marked for retry") + self.main_topic = main_topic + self.group_id = group_id + + @property + def name(self) -> str: + group_id = re.escape(self.group_id) + if self.main_topic.is_regex(): + topic_name = f"({self.main_topic.name.lstrip('^').rstrip('$')})" + else: + topic_name = re.escape(self.main_topic.name) + + return rf"^{group_id}\.{topic_name}\.retry\.[0-9]+$" + + @classmethod + def get_attempt(cls, topic_name: str) -> int: + match = re.search(r"\.retry\.([0-9]+)$", topic_name) + return int(match.group(1)) if match else 0 + + def get_produce_name(self, topic_name: str, attempt: int) -> str: + if attempt == 1: + return f"{self.group_id}.{topic_name}.retry.1" + return re.sub(r"\.[0-9]+$", f".{attempt}", topic_name) + + def consume(self, msg: cimpl.Message): + self.main_topic.consume(msg) + + def retry_for(self, msg: cimpl.Message, exc: Exception) -> bool: + """ + msg: the message that failed, this may come from the main topic or retry topic + exc: Exception that caused the failure + """ + settings = self.main_topic.retry_settings + + if not settings.should_retry(exc=exc): + return False + + topic_name = msg.topic() + attempt = self.get_attempt(topic_name) + 1 + + if settings.attempts_exceeded(attempt): + return False + + self.produce( + name=self.get_produce_name(topic_name, attempt), + key=msg.key(), + value=msg.value(), + headers=[ + (RetryHeader.MESSAGE, str(exc)), + (RetryHeader.TIMESTAMP, settings.get_retry_timestamp(attempt)), + ], + ) + return True diff --git a/django_kafka/serialization.py b/django_kafka/serialization.py new file mode 100644 index 0000000..942cebb --- /dev/null +++ b/django_kafka/serialization.py @@ -0,0 +1,8 @@ +from confluent_kafka.serialization import Serializer + + +class NoOpSerializer(Serializer): + """performs no messages serialization, returning the message without modification""" + + def __call__(self, obj, ctx=None): + return obj diff --git a/django_kafka/tests/dead_letter/__init__.py b/django_kafka/tests/dead_letter/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/django_kafka/tests/dead_letter/test_topic.py b/django_kafka/tests/dead_letter/test_topic.py new file mode 100644 index 0000000..3f7535c --- /dev/null +++ b/django_kafka/tests/dead_letter/test_topic.py @@ -0,0 +1,66 @@ +from unittest import mock + +from django.test import TestCase + +from django_kafka.dead_letter.headers import DeadLetterHeader +from django_kafka.dead_letter.topic import DeadLetterTopic +from django_kafka.topic import Topic + + +class DeadLetterTopicTestCase(TestCase): + def _get_main_topic(self, topic_name: str = "topic_name"): + class TestTopic(Topic): + name = topic_name + + return TestTopic() + + def test_name(self): + main_topic = self._get_main_topic(topic_name="topic.name") + dl_topic = DeadLetterTopic(group_id="group.id", main_topic=main_topic) + + self.assertEqual(dl_topic.name, r"^group\.id\.topic\.name\.dlt$") + + def test_name__regex(self): + """tests regex topic names are correctly inserted in to dlt regex""" + main_topic = self._get_main_topic(topic_name="^topic_name|other_name$") + dl_topic = DeadLetterTopic(group_id="group.id", main_topic=main_topic) + + self.assertEqual( + dl_topic.name, + r"^group\.id\.(topic_name|other_name)\.dlt$", + ) + + def test_get_produce_name(self): + """tests normal and retry topics are correctly handled by get_produce_name""" + main_topic = self._get_main_topic() + dl_topic = DeadLetterTopic(group_id="group.id", main_topic=main_topic) + + retry_topic_1 = dl_topic.get_produce_name("fake") + retry_topic_2 = dl_topic.get_produce_name("group.id.fake.retry.1") + + self.assertEqual(retry_topic_1, "group.id.fake.dlt") + self.assertEqual(retry_topic_2, "group.id.fake.dlt") + + @mock.patch("django_kafka.dead_letter.topic.DeadLetterTopic.produce") + def test_produce_for(self, mock_produce): + main_topic = self._get_main_topic() + group_id = "group.id" + header_message = "header_message" + header_detail = "header_detail" + msg_mock = mock.Mock(**{"topic.return_value": "msg_topic"}) + + DeadLetterTopic(group_id=group_id, main_topic=main_topic).produce_for( + msg=msg_mock, + header_message=header_message, + header_detail=header_detail, + ) + + mock_produce.assert_called_once_with( + name="group.id.msg_topic.dlt", + key=msg_mock.key(), + value=msg_mock.value(), + headers=[ + (DeadLetterHeader.MESSAGE, header_message), + (DeadLetterHeader.DETAIL, header_detail), + ], + ) diff --git a/django_kafka/tests/retry/__init__.py b/django_kafka/tests/retry/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/django_kafka/tests/retry/test_consumer.py b/django_kafka/tests/retry/test_consumer.py new file mode 100644 index 0000000..460ac73 --- /dev/null +++ b/django_kafka/tests/retry/test_consumer.py @@ -0,0 +1,283 @@ +import datetime +import traceback +from unittest import mock + +from confluent_kafka import TopicPartition +from django.test import TestCase, override_settings +from django.utils import timezone + +from django_kafka.conf import SETTINGS_KEY +from django_kafka.consumer import Consumer, Topics +from django_kafka.retry import RetrySettings +from django_kafka.retry.consumer import RetryConsumer, RetryTopics +from django_kafka.retry.headers import RetryHeader +from django_kafka.topic import Topic + + +class RetryConsumerTestCase(TestCase): + def _get_topic(self): + class NormalTopic(Topic): + name = "normal_topic" + + return NormalTopic() + + def _get_retryable_topic(self): + class RetryableTopic(Topic): + name = "retry_topic" + + retry = RetrySettings(max_retries=5, delay=60) + retry(RetryableTopic) + + return RetryableTopic() + + def _get_retryable_consumer_cls(self, group_id="group_id"): + class TestConsumer(Consumer): + topics = Topics(self._get_topic(), self._get_retryable_topic()) + config = {"group.id": group_id} + + return TestConsumer + + def _get_retry_consumer(self, consumer_group_id="group_id"): + return RetryConsumer.build( + self._get_retryable_consumer_cls(group_id=consumer_group_id), + )() + + @override_settings( + **{ + SETTINGS_KEY: { + "RETRY_CONSUMER_CONFIG": { + "bootstrap.servers": "bootstrap.defined-by-retry-consumer-config", + "group.id": "group.id-defined-by-retry-consumer-config", + "topic.metadata.refresh.interval.ms": 10000, + }, + }, + }, + ) + @mock.patch("django_kafka.consumer.ConfluentConsumer") + @mock.patch("django_kafka.retry.consumer.Consumer.build_config") + def test_config_merge_override( + self, + mock_consumer_build_config, + mock_consumer_client, + ): + """ + 1. Consumer.build_config() is added to the consumers config + 2. RETRY_CONSUMER_CONFIG is merged next and overrides keys if any + 3. RetryConsumer.config is merged next and overrides keys if any + """ + + mock_consumer_build_config.return_value = { + "bootstrap.servers": "bootstrap.defined-by-consumer-cls", + "group.id": "group.id.set-by-consumer-cls", + "enable.auto.offset.store": False, + } + + class SomeRetryConsumer(RetryConsumer): + config = { + "group.id": "group.id.overridden-by-retry-consumer-class", + } + + retry_consumer = SomeRetryConsumer() + + self.assertDictEqual( + SomeRetryConsumer.build_config(), + { + "bootstrap.servers": "bootstrap.defined-by-retry-consumer-config", + "group.id": "group.id.overridden-by-retry-consumer-class", + "topic.metadata.refresh.interval.ms": 10000, + "enable.auto.offset.store": False, + }, + ) + self.assertDictEqual( + SomeRetryConsumer.build_config(), + retry_consumer.config, + ) + + def test_build(self): + consumer_cls = self._get_retryable_consumer_cls() + retry_consumer_cls = RetryConsumer.build(consumer_cls) + + self.assertTrue(issubclass(retry_consumer_cls, RetryConsumer)) + + self.assertEqual( + retry_consumer_cls.config["group.id"], + f"{consumer_cls.build_config()['group.id']}.retry", + ) + self.assertIsInstance(retry_consumer_cls.topics, RetryTopics) + self.assertCountEqual( + [t for t in consumer_cls.topics if t.retry_settings], + [t.main_topic for t in retry_consumer_cls.topics], + ) + + def test_build__no_retry_topics(self): + class TestConsumer(Consumer): + topics = Topics() + config = {} + + self.assertIsNone(RetryConsumer.build(TestConsumer)) + + def test_retry_msg(self): + retry_consumer = self._get_retry_consumer() + retry_topic = next(iter(retry_consumer.topics)) + retry_topic.retry_for = mock.Mock() + exc = ValueError() + msg_mock = mock.Mock( + **{ + "topic.return_value": retry_topic.get_produce_name( + retry_topic.main_topic.name, + attempt=1, + ), + }, + ) + + retried = retry_consumer.retry_msg(msg_mock, exc) + + retry_topic.retry_for.assert_called_once_with(msg=msg_mock, exc=exc) + self.assertEqual(retried, retry_topic.retry_for.return_value) + + @mock.patch("django_kafka.retry.consumer.DeadLetterTopic") + def test_dead_letter_msg(self, mock_dead_letter_topic_cls): + retry_consumer = self._get_retry_consumer() + retry_topic = next(iter(retry_consumer.topics)) + exc = ValueError() + msg_mock = mock.Mock( + **{ + "topic.return_value": retry_topic.get_produce_name( + retry_topic.main_topic.name, + attempt=1, + ), + }, + ) + + retry_consumer.dead_letter_msg(msg_mock, exc) + + mock_dead_letter_topic_cls.assert_called_once_with( + group_id=retry_topic.group_id, + main_topic=retry_topic.main_topic, + ) + mock_dead_letter_topic_cls.return_value.produce_for.assert_called_once_with( + msg=msg_mock, + header_message=str(exc), + header_detail=traceback.format_exc(), + ) + + @mock.patch("django_kafka.consumer.ConfluentConsumer") + def test_pause_partition(self, mock_confluent_consumer): + retry_consumer = self._get_retry_consumer() + mock_msg = mock.Mock( + **{ + "topic.return_value": "msg_topic", + "partition.return_value": 0, + "offset.return_value": 0, + }, + ) + partition = TopicPartition( + mock_msg.topic(), + mock_msg.partition(), + mock_msg.offset(), + ) + retry_time = timezone.now() + + retry_consumer.pause_partition(mock_msg, retry_time) + + mock_confluent_consumer.return_value.seek.assert_called_once_with(partition) + mock_confluent_consumer.return_value.pause.assert_called_once_with([partition]) + + @mock.patch("django_kafka.consumer.ConfluentConsumer") + def test_resume_partition__before_retry_time(self, mock_confluent_consumer): + retry_consumer = self._get_retry_consumer() + mock_msg = mock.Mock( + **{ + "topic.return_value": "msg_topic", + "partition.return_value": 0, + "offset.return_value": 0, + }, + ) + retry_time = timezone.now() + datetime.timedelta(minutes=1) + + retry_consumer.pause_partition(mock_msg, retry_time) + retry_consumer.resume_ready_partitions() + + mock_confluent_consumer.return_value.resume.assert_not_called() + + @mock.patch("django_kafka.consumer.ConfluentConsumer") + def test_resume_ready_partitions__after_retry_time(self, mock_confluent_consumer): + retry_consumer = self._get_retry_consumer() + mock_msg = mock.Mock( + **{ + "topic.return_value": "msg_topic", + "partition.return_value": 0, + "offset.return_value": 0, + }, + ) + partition = TopicPartition( + mock_msg.topic(), + mock_msg.partition(), + mock_msg.offset(), + ) + retry_time = timezone.now() - datetime.timedelta(minutes=1) + + retry_consumer.pause_partition(mock_msg, retry_time) + retry_consumer.resume_ready_partitions() + + mock_confluent_consumer.return_value.resume.assert_called_once_with([partition]) + + @mock.patch("django_kafka.consumer.ConfluentConsumer") + def test_poll(self, mock_confluent_consumer): + """tests poll resumes partitions""" + retry_consumer = self._get_retry_consumer() + retry_consumer.resume_ready_partitions = mock.Mock() + mock_msg = mock.Mock() + mock_confluent_consumer.return_value.poll.return_value = mock_msg + + msg = retry_consumer.poll() + + self.assertEqual(msg, mock_msg) + retry_consumer.resume_ready_partitions.assert_called_once() # always called + + @mock.patch("django_kafka.consumer.Consumer.process_message") + @mock.patch("django_kafka.consumer.ConfluentConsumer") + def test_process_message__before_retry_time( + self, + mock_confluent_consumer, + mock_consumer_process_message, + ): + retry_consumer = self._get_retry_consumer() + retry_consumer.pause_partition = mock.Mock() + retry_time = timezone.now() + datetime.timedelta(minutes=1) + mock_msg = mock.Mock( + **{ + "error.return_value": None, + "headers.return_value": [ + (RetryHeader.TIMESTAMP, str(retry_time.timestamp())), + ], + }, + ) + + retry_consumer.process_message(mock_msg) + retry_consumer.pause_partition.assert_called_once_with(mock_msg, retry_time) + mock_consumer_process_message.process_message.assert_not_called() + + @mock.patch("django_kafka.consumer.Consumer.process_message") + @mock.patch("django_kafka.consumer.ConfluentConsumer") + def test_process_message__after_retry_time( + self, + mock_confluent_consumer, + mock_consumer_process_message, + ): + retry_consumer = self._get_retry_consumer() + retry_consumer.pause_partition = mock.Mock() + retry_time = timezone.now() - datetime.timedelta(minutes=1) + mock_msg = mock.Mock( + **{ + "error.return_value": None, + "headers.return_value": [ + (RetryHeader.TIMESTAMP, str(retry_time.timestamp())), + ], + }, + ) + + retry_consumer.process_message(mock_msg) + + retry_consumer.pause_partition.assert_not_called() + mock_consumer_process_message.assert_called_once_with(mock_msg) diff --git a/django_kafka/tests/retry/test_headers.py b/django_kafka/tests/retry/test_headers.py new file mode 100644 index 0000000..f9b66b5 --- /dev/null +++ b/django_kafka/tests/retry/test_headers.py @@ -0,0 +1,27 @@ +from django.test import TestCase +from django.utils import timezone + +from django_kafka.retry.headers import RetryHeader + + +class RetryHeaderTestCase(TestCase): + def test_get_header(self): + headers = [(RetryHeader.TIMESTAMP, "abc")] + + self.assertEqual(RetryHeader.get_header(headers, RetryHeader.TIMESTAMP), "abc") + self.assertEqual(RetryHeader.get_header([], RetryHeader.TIMESTAMP), None) + + def test_get_retry_time(self): + now = timezone.now() + headers = [(RetryHeader.TIMESTAMP, str(now.timestamp()))] + + self.assertEqual(RetryHeader.get_retry_time(headers), now) + + def test_get_retry_time__none_when_unavailable(self): + headers1 = [(RetryHeader.TIMESTAMP, "abc")] + headers2 = [] + headers3 = None + + self.assertEqual(RetryHeader.get_retry_time(headers1), None) + self.assertEqual(RetryHeader.get_retry_time(headers2), None) + self.assertEqual(RetryHeader.get_retry_time(headers3), None) diff --git a/django_kafka/tests/retry/test_retry.py b/django_kafka/tests/retry/test_retry.py new file mode 100644 index 0000000..bc27edb --- /dev/null +++ b/django_kafka/tests/retry/test_retry.py @@ -0,0 +1,70 @@ +from unittest import mock + +from django.test import TestCase + +from django_kafka.retry import RetrySettings +from django_kafka.topic import Topic + + +class RetrySettingTestCase(TestCase): + def test_should_retry__include(self): + settings = RetrySettings(max_retries=5, delay=60, include=[ValueError]) + + self.assertEqual(settings.should_retry(ValueError()), True) + self.assertEqual(settings.should_retry(IndexError()), False) + + def test_should_retry__exclude(self): + settings = RetrySettings(max_retries=5, delay=60, exclude=[ValueError]) + + self.assertEqual(settings.should_retry(ValueError()), False) + self.assertEqual(settings.should_retry(IndexError()), True) + + def test_should_retry__no_include_exclude(self): + settings = RetrySettings(max_retries=5, delay=60) + + self.assertEqual(settings.should_retry(ValueError()), True) + + def test_attempts_exceeded(self): + settings = RetrySettings(max_retries=5, delay=60) + + self.assertEqual(settings.attempts_exceeded(6), True) + self.assertEqual(settings.attempts_exceeded(5), False) + self.assertEqual(settings.attempts_exceeded(4), False) + + @mock.patch("django.utils.timezone.now") + def test_get_retry_time__fixed_delay(self, mock_now): + settings = RetrySettings(max_retries=5, delay=60, backoff=False) + + timestamp = 1000 + mock_now.return_value.timestamp.return_value = timestamp + self.assertEqual(settings.get_retry_timestamp(1), str(timestamp + 60)) + self.assertEqual(settings.get_retry_timestamp(2), str(timestamp + 60)) + self.assertEqual(settings.get_retry_timestamp(3), str(timestamp + 60)) + self.assertEqual(settings.get_retry_timestamp(4), str(timestamp + 60)) + + @mock.patch("django.utils.timezone.now") + def test_get_retry_time__backoff_delay(self, mock_now): + settings = RetrySettings(max_retries=5, delay=60, backoff=True) + + timestamp = 1000 + mock_now.return_value.timestamp.return_value = timestamp + self.assertEqual(settings.get_retry_timestamp(1), str(timestamp + 60)) + self.assertEqual(settings.get_retry_timestamp(2), str(timestamp + 120)) + self.assertEqual(settings.get_retry_timestamp(3), str(timestamp + 240)) + self.assertEqual(settings.get_retry_timestamp(4), str(timestamp + 480)) + + +class RetryDecoratorTestCase(TestCase): + def test_retry_decorator(self): + class TopicA(Topic): + pass + + retry = RetrySettings(max_retries=5, delay=60) + result = retry(TopicA) + + self.assertIs(result, TopicA) + self.assertIsNotNone(TopicA.retry_settings) + self.assertEqual( + [TopicA.retry_settings.max_retries, TopicA.retry_settings.delay], + [5, 60], + ) diff --git a/django_kafka/tests/retry/test_topic.py b/django_kafka/tests/retry/test_topic.py new file mode 100644 index 0000000..44fcaa4 --- /dev/null +++ b/django_kafka/tests/retry/test_topic.py @@ -0,0 +1,153 @@ +from unittest import mock + +from django.test import TestCase + +from django_kafka.exceptions import DjangoKafkaError +from django_kafka.retry import RetrySettings +from django_kafka.retry.headers import RetryHeader +from django_kafka.retry.topic import RetryTopic +from django_kafka.topic import Topic + + +class RetryTopicTestCase(TestCase): + def _get_retryable_topic(self, topic_name: str = "topic_name", **retry_kwargs): + class TestTopic(Topic): + name = topic_name + + retry = RetrySettings(**{"max_retries": 5, "delay": 60, **retry_kwargs}) + topic_cls = retry(TestTopic) + return topic_cls() + + def test_init(self): + main_topic = self._get_retryable_topic() + + RetryTopic(group_id="group.id", main_topic=main_topic) + + def test_init__raises_without_retry(self): + class TestTopic(Topic): + name = "topic_name" + + with self.assertRaises(DjangoKafkaError): + RetryTopic(group_id="group.id", main_topic=TestTopic()) + + def test__get_attempt(self): + self.assertEqual(RetryTopic.get_attempt("topic"), 0) + self.assertEqual(RetryTopic.get_attempt("group.id.topic.retry"), 0) + self.assertEqual(RetryTopic.get_attempt("group.id.topic.retry.0"), 0) + self.assertEqual(RetryTopic.get_attempt("group.id.topic.retry.2"), 2) + self.assertEqual(RetryTopic.get_attempt("group.id.topic.retry.10"), 10) + + def test_name(self): + """assert name format and correct escaping of regex special characters""" + main_topic = self._get_retryable_topic(topic_name="topic.name") + retry_topic = RetryTopic(group_id="group.id", main_topic=main_topic) + + self.assertEqual(retry_topic.name, r"^group\.id\.topic\.name\.retry\.[0-9]+$") + + def test_name__regex(self): + """tests regex topic names are correctly inserted in to retry topic regex""" + main_topic = self._get_retryable_topic(topic_name="^topic_name|other_name$") + retry_topic = RetryTopic(group_id="group.id", main_topic=main_topic) + + self.assertEqual( + retry_topic.name, + r"^group\.id\.(topic_name|other_name)\.retry\.[0-9]+$", + ) + + def test_get_produce_name(self): + main_topic = self._get_retryable_topic() + retry_topic = RetryTopic(group_id="group.id", main_topic=main_topic) + + retry_topic_1 = retry_topic.get_produce_name("fake", 1) + retry_topic_2 = retry_topic.get_produce_name("group.id.fake.retry.1", 2) + retry_topic_3 = retry_topic.get_produce_name("group.id.fake.retry.2", 3) + + self.assertEqual(retry_topic_1, "group.id.fake.retry.1") + self.assertEqual(retry_topic_2, "group.id.fake.retry.2") + self.assertEqual(retry_topic_3, "group.id.fake.retry.3") + + def test_consume(self): + """tests RetryTopic uses the main topic consume method""" + main_topic = self._get_retryable_topic() + main_topic.consume = mock.Mock() + mock_msg = mock.Mock() + + RetryTopic(group_id="group.id", main_topic=main_topic).consume(mock_msg) + + main_topic.consume.assert_called_once_with(mock_msg) + + @mock.patch("django_kafka.retry.RetrySettings.get_retry_timestamp") + def test_retry_for__first_retry(self, mock_get_retry_timestamp: mock.Mock): + main_topic = self._get_retryable_topic(max_retries=5) + retry_topic = RetryTopic(group_id="group.id", main_topic=main_topic) + retry_topic.produce = mock.Mock() + mock_msg = mock.Mock(**{"topic.return_value": "msg_topic"}) + + retried = retry_topic.retry_for( + msg=mock_msg, + exc=ValueError("error message"), + ) + + self.assertTrue(retried) + retry_topic.produce.assert_called_with( + name="group.id.msg_topic.retry.1", + key=mock_msg.key(), + value=mock_msg.value(), + headers=[ + (RetryHeader.MESSAGE, "error message"), + (RetryHeader.TIMESTAMP, mock_get_retry_timestamp.return_value), + ], + ) + mock_get_retry_timestamp.assert_called_once_with(1) + + @mock.patch("django_kafka.retry.RetrySettings.get_retry_timestamp") + def test_retry_for__last_retry(self, mock_get_retry_timestamp): + main_topic = self._get_retryable_topic(max_retries=5) + retry_topic = RetryTopic(group_id="group.id", main_topic=main_topic) + retry_topic.produce = mock.Mock() + mock_msg = mock.Mock(**{"topic.return_value": "group.id.msg_topic.retry.4"}) + + retried = retry_topic.retry_for( + msg=mock_msg, + exc=ValueError("error message"), + ) + + self.assertTrue(retried) + retry_topic.produce.assert_called_with( + name="group.id.msg_topic.retry.5", + key=mock_msg.key(), + value=mock_msg.value(), + headers=[ + (RetryHeader.MESSAGE, "error message"), + (RetryHeader.TIMESTAMP, mock_get_retry_timestamp.return_value), + ], + ) + mock_get_retry_timestamp.assert_called_once_with(5) + + def test_retry_for__no_more_retries(self): + main_topic = self._get_retryable_topic(max_retries=5) + retry_topic = RetryTopic(group_id="group.id", main_topic=main_topic) + retry_topic.produce = mock.Mock() + mock_msg = mock.Mock(**{"topic.return_value": "group.id.msg_topic.retry.5"}) + + retried = retry_topic.retry_for( + msg=mock_msg, + exc=ValueError(), + ) + + self.assertFalse(retried) + retry_topic.produce.assert_not_called() + + def test_retry_for__no_retry_for_excluded_error(self): + topic = self._get_retryable_topic(exclude=[ValueError]) + mock_msg = mock.Mock(**{"topic.return_value": "msg_topic"}) + retry_topic = RetryTopic(group_id="group.id", main_topic=topic) + retry_topic.produce = mock.Mock() + + retried = retry_topic.retry_for( + msg=mock_msg, + exc=ValueError(), + ) + + self.assertFalse(retried) + retry_topic.produce.assert_not_called() diff --git a/django_kafka/tests/test_consumer.py b/django_kafka/tests/test_consumer.py index dd9e865..22f68e4 100644 --- a/django_kafka/tests/test_consumer.py +++ b/django_kafka/tests/test_consumer.py @@ -1,3 +1,4 @@ +import traceback from contextlib import suppress from unittest.mock import MagicMock, Mock, call, patch @@ -12,6 +13,14 @@ class StopWhileTrueError(Exception): class ConsumerTestCase(TestCase): + def test_group_id(self): + class SomeConsumer(Consumer): + config = {"group.id": "group_id"} + + consumer = SomeConsumer() + + self.assertEqual(consumer.group_id, "group_id") + @patch( "django_kafka.consumer.Consumer.process_message", side_effect=StopWhileTrueError(), @@ -24,10 +33,11 @@ class ConsumerTestCase(TestCase): "return_value.poll.side_effect": [None, True], }, ) - def test_start(self, mock_consumer_client, mock_process_message): + def test_run(self, mock_consumer_client, mock_process_message): class SomeConsumer(Consumer): topics = MagicMock() config = {} + log_error = Mock() consumer = SomeConsumer() @@ -35,11 +45,11 @@ class SomeConsumer(Consumer): # `consumer.start` is using `while True:` loop which never ends # in order to test it we need to break it which is achievable with side_effect with suppress(StopWhileTrueError): - consumer.start() + consumer.run() # subscribed to the topics defined by consumer class mock_consumer_client.return_value.subscribe.assert_called_once_with( - topics=list(consumer.topics), + topics=consumer.topics.names, ) # poll for message with the frequency defined by consumer class self.assertEqual( @@ -66,18 +76,18 @@ class SomeConsumer(Consumer): # check if msg has error before processing msg.error.assert_called_once_with() # Topic.consume called - consumer.topics[msg.topic()].consume.assert_called_once_with(msg) + consumer.get_topic(msg.topic()).consume.assert_called_once_with(msg) # commit_offset triggered mock_commit_offset.assert_called_once_with(msg) - @patch("django_kafka.consumer.Consumer.handle_error") + @patch("django_kafka.consumer.Consumer.log_error") @patch("django_kafka.consumer.Consumer.commit_offset") @patch("django_kafka.consumer.ConfluentConsumer") def test_process_message_msg_error_logged( self, mock_consumer_client, mock_commit_offset, - handle_error, + log_error, ): class SomeConsumer(Consumer): topics = MagicMock() @@ -92,20 +102,20 @@ class SomeConsumer(Consumer): # check if msg has error before processing msg.error.assert_called_once_with() # error handler was triggered - handle_error.assert_called_once_with(msg.error.return_value) + log_error.assert_called_once_with(msg.error.return_value) # Topic.consume is not called consumer.topics[msg.topic()].consume.assert_not_called() # Consumer.commit_offset is not called mock_commit_offset.assert_not_called() - @patch("django_kafka.consumer.Consumer.handle_error") + @patch("django_kafka.consumer.Consumer.handle_exception") @patch("django_kafka.consumer.Consumer.commit_offset") @patch("django_kafka.consumer.ConfluentConsumer") def test_process_message_exception( self, mock_consumer_client, mock_commit_offset, - handle_error, + handle_exception, ): topic_consume_side_effect = TypeError("test") topic = Mock( @@ -117,9 +127,7 @@ def test_process_message_exception( msg = Mock(**{"topic.return_value": topic.name, "error.return_value": False}) class SomeConsumer(Consumer): - topics = Topics( - topic, - ) + topics = Topics(topic) config = {} consumer = SomeConsumer() @@ -129,20 +137,123 @@ class SomeConsumer(Consumer): # check if msg has error before processing msg.error.assert_called_once_with() # Topic.consume was triggered - consumer.topics[msg.topic()].consume.assert_called_once_with(msg) + topic.consume.assert_called_once_with(msg) # error handler was triggered on exception - handle_error.assert_called_once_with(topic_consume_side_effect) + handle_exception.assert_called_once_with(msg, topic_consume_side_effect) # Consumer.commit_offset is not called - mock_commit_offset.assert_not_called() + mock_commit_offset.assert_called_once() + + def test_handle_exception(self): + msg = Mock() + + class SomeConsumer(Consumer): + topics = Topics() + config = {"group.id": "group_id"} + retry_msg = Mock(return_value=True) # successful retry + dead_letter_msg = Mock() + log_error = Mock() + + consumer = SomeConsumer() + exc = ValueError() + + consumer.handle_exception(msg, exc) + + consumer.retry_msg.assert_called_once_with(msg, exc) + consumer.dead_letter_msg.assert_not_called() + consumer.log_error.assert_not_called() + + def test_handle_exception__failed_retry(self): + msg = Mock() + + class SomeConsumer(Consumer): + topics = Topics() + config = {"group.id": "group_id"} + retry_msg = Mock(return_value=False) # failed retry + dead_letter_msg = Mock() + log_error = Mock() + + consumer = SomeConsumer() + exc = ValueError() + + consumer.handle_exception(msg, exc) + + consumer.retry_msg.assert_called_once_with(msg, exc) + consumer.dead_letter_msg.assert_called_once_with(msg, exc) + consumer.log_error.assert_called_once_with(exc) + + @patch("django_kafka.retry.topic.RetryTopic") + def test_retry_msg(self, mock_retry_topic_cls): + topic = Mock(name="topic", retry_settings=Mock()) + msg = Mock(**{"topic.return_value": topic.name}) + retry_for = mock_retry_topic_cls.return_value.retry_for + retry_for.return_value = True + + class SomeConsumer(Consumer): + topics = Topics(topic) + config = {"group.id": "group_id"} + + consumer = SomeConsumer() + exc = ValueError() + + retried = consumer.retry_msg(msg, exc) + + mock_retry_topic_cls.assert_called_once_with( + group_id=consumer.group_id, + main_topic=topic, + ) + retry_for.assert_called_once_with(msg=msg, exc=exc) + self.assertEqual(retried, True) + + @patch("django_kafka.retry.topic.RetryTopic") + def test_retry_msg__no_retry(self, mock_retry_topic_cls): + topic = Mock(name="topic", retry_settings=None) + msg = Mock(**{"topic.return_value": topic.name}) + + class SomeConsumer(Consumer): + topics = Topics(topic) + config = {"group.id": "group_id"} + + consumer = SomeConsumer() + exc = ValueError() + + retried = consumer.retry_msg(msg, exc) + + mock_retry_topic_cls.assert_not_called() + self.assertEqual(retried, False) + + @patch("django_kafka.dead_letter.topic.DeadLetterTopic") + def test_dead_letter_msg(self, mock_dead_letter_topic_cls): + topic = Mock(name="topic") + msg = Mock(**{"topic.return_value": topic.name}) + produce_for = mock_dead_letter_topic_cls.return_value.produce_for + + class SomeConsumer(Consumer): + topics = Topics(topic) + config = {"group.id": "group_id"} + + consumer = SomeConsumer() + exc = ValueError() + + consumer.dead_letter_msg(msg, exc) + + mock_dead_letter_topic_cls.assert_called_once_with( + group_id=consumer.group_id, + main_topic=topic, + ) + produce_for.assert_called_once_with( + msg=msg, + header_message=str(exc), + header_detail=traceback.format_exc(), + ) @patch("django_kafka.consumer.ConfluentConsumer") def test_auto_offset_false(self, mock_consumer_client): class SomeConsumer(Consumer): - config = {} - - consumer = SomeConsumer({"enable.auto.offset.store": False}) + config = {"enable.auto.offset.store": False} + consumer = SomeConsumer() msg = Mock() + consumer.commit_offset(msg) mock_consumer_client.return_value.store_offsets.assert_called_once_with(msg) @@ -150,9 +261,9 @@ class SomeConsumer(Consumer): @patch("django_kafka.consumer.ConfluentConsumer") def test_auto_offset_true(self, mock_consumer_client): class SomeConsumer(Consumer): - config = {} + config = {"enable.auto.offset.store": True} - consumer = SomeConsumer({"enable.auto.offset.store": True}) + consumer = SomeConsumer() consumer.commit_offset(Mock()) @@ -174,17 +285,18 @@ def test_settings_are_correctly_assigned(self): "bootstrap.servers": "bootstrap.servers-overridden-by-consumer", "group.id": "group.id-defined-by-consumer-config", }, + "ERROR_HANDLER": "django_kafka.error_handlers.ClientErrorHandler", }, }, ) @patch("django_kafka.consumer.ConfluentConsumer") - def test_init_config_merge_override(self, mock_consumer_client): + @patch("django_kafka.error_handlers.ClientErrorHandler") + def test_config_merge_override(self, mock_error_handler, mock_consumer_client): """ 1. CLIENT_ID is added to the consumers config 2. GLOBAL_CONFIG overrides CLIENT_ID (client.id) if it contains one 3. CONSUMER_CONFIG merges with GLOBAL_CONFIG and overrides keys if any 4. Consumer.config is merged with CONSUMER_CONFIG and overrides keys if any - 4. Lastly `config` provided to Consumer.__init__ merges and overrides everything """ class SomeConsumer(Consumer): @@ -197,24 +309,17 @@ class SomeConsumer(Consumer): consumer = SomeConsumer() self.assertDictEqual( - consumer.config, + SomeConsumer.build_config(), { "client.id": "client-id-overridden-by-global-config", "bootstrap.servers": "bootstrap.servers-overridden-by-consumer", "group.id": "group.id.overridden-by-consumer-class", "enable.auto.offset.store": True, + "logger": SomeConsumer.default_logger, + "error_cb": mock_error_handler(), }, ) - - # config provided to the Consumer.__init__ is properly merged - consumer = SomeConsumer({"enable.auto.offset.store": False}) - self.assertDictEqual( + SomeConsumer.build_config(), consumer.config, - { - "client.id": "client-id-overridden-by-global-config", - "bootstrap.servers": "bootstrap.servers-overridden-by-consumer", - "group.id": "group.id.overridden-by-consumer-class", - "enable.auto.offset.store": False, - }, ) diff --git a/django_kafka/tests/test_django_kafka_interface.py b/django_kafka/tests/test_django_kafka_interface.py index 7d1b9a7..fb88f1f 100644 --- a/django_kafka/tests/test_django_kafka_interface.py +++ b/django_kafka/tests/test_django_kafka_interface.py @@ -50,11 +50,10 @@ def test_schema_client(self, mock_schema_registry_client): mock_schema_registry_client.assert_called_once_with(schema_registry_config) @patch("django_kafka.DjangoKafka.consumers") - def test_start_consumer(self, mock_django_kafka_consumers): + def test_run_consumer(self, mock_django_kafka_consumers): kafka = DjangoKafka() + mock_consumer = mock_django_kafka_consumers["path.to.Consumer"]() - kafka.start_consumer("path.to.Consumer") + kafka.run_consumer("path.to.Consumer") - mock_django_kafka_consumers[ - "path.to.Consumer" - ].return_value.start.assert_called_once_with() + mock_consumer.run.assert_called_once() diff --git a/django_kafka/tests/test_registry.py b/django_kafka/tests/test_registry.py index bd1b9d0..cd1d831 100644 --- a/django_kafka/tests/test_registry.py +++ b/django_kafka/tests/test_registry.py @@ -1,56 +1,79 @@ +from typing import Type +from unittest import mock + from django.test import TestCase -from django_kafka.consumer import Consumer +from django_kafka.consumer import Consumer, Topics from django_kafka.registry import ConsumersRegistry class ConsumersRegistryTestCase(TestCase): - def test_decorator_adds_to_registry(self): - class ConsumerA(Consumer): - pass + def _get_consumer_cls(self, name, group_id) -> Type[Consumer]: + return type[Consumer]( + name, + (Consumer,), + {"config": {"group.id": group_id}, "topics": Topics()}, + ) - class ConsumerB(Consumer): - pass + def test_decorator_adds_to_registry(self): + consumer_cls_a = self._get_consumer_cls("ConsumerA", "group_a") + consumer_cls_b = self._get_consumer_cls("ConsumerB", "group_b") registry = ConsumersRegistry() - self.assertIs(registry()(ConsumerA), ConsumerA) - self.assertIs(registry()(ConsumerB), ConsumerB) + self.assertIs(registry()(consumer_cls_a), consumer_cls_a) + self.assertIs(registry()(consumer_cls_b), consumer_cls_b) - key_a = registry.get_key(ConsumerA) - self.assertIs(registry[key_a], ConsumerA) + key_a = registry.get_key(consumer_cls_a) + self.assertIs(registry[key_a], consumer_cls_a) - key_b = registry.get_key(ConsumerB) - self.assertIs(registry[key_b], ConsumerB) + key_b = registry.get_key(consumer_cls_b) + self.assertIs(registry[key_b], consumer_cls_b) - def test_iter_returns_keys(self): - class ConsumerA(Consumer): - pass + def test_iter_returns_expected_keys(self): + consumer_cls_a = self._get_consumer_cls("ConsumerA", "group_a") + consumer_cls_b = self._get_consumer_cls("ConsumerB", "group_b") + registry = ConsumersRegistry() - class ConsumerB(Consumer): - pass + registry()(consumer_cls_a) + registry()(consumer_cls_b) - registry = ConsumersRegistry() + key_a = registry.get_key(consumer_cls_a) + key_b = registry.get_key(consumer_cls_b) - registry()(ConsumerA) - registry()(ConsumerB) + self.assertCountEqual(list(registry), [key_a, key_b]) - self.assertListEqual( - list(registry), - [registry.get_key(ConsumerA), registry.get_key(ConsumerB)], - ) + @mock.patch("django_kafka.retry.consumer.RetryConsumer.build") + def test_retry_consumer_registered(self, mock_retry_consumer_build): + consumer_cls_a = self._get_consumer_cls("ConsumerA", "group_a") + consumer_cls_b = self._get_consumer_cls("ConsumerB", "group_b") + registry = ConsumersRegistry() + retry_consumer_mock = mock.Mock() + mock_retry_consumer_build.side_effect = [retry_consumer_mock, None] - def test_get_key(self): - class ConsumerA(Consumer): - pass + registry()(consumer_cls_a) + registry()(consumer_cls_b) + + key_a = registry.get_key(consumer_cls_a) + key_b = registry.get_key(consumer_cls_b) + retry_key_a = f"{key_a}.retry" - class ConsumerB(Consumer): - pass + self.assertCountEqual(list(registry), [key_a, retry_key_a, key_b]) + self.assertIs(registry[retry_key_a], retry_consumer_mock) + def test_get_key(self): + consumer_cls_a = self._get_consumer_cls("ConsumerA", "group_a") + consumer_cls_b = self._get_consumer_cls("ConsumerB", "group_b") registry = ConsumersRegistry() - key_a = registry.get_key(ConsumerA) - self.assertEqual(key_a, f"{ConsumerA.__module__}.{ConsumerA.__name__}") + key_a = registry.get_key(consumer_cls_a) + self.assertEqual( + key_a, + f"{consumer_cls_a.__module__}.{consumer_cls_a.__name__}", + ) - key_b = registry.get_key(ConsumerB) - self.assertEqual(key_b, f"{ConsumerB.__module__}.{ConsumerB.__name__}") + key_b = registry.get_key(consumer_cls_b) + self.assertEqual( + key_b, + f"{consumer_cls_b.__module__}.{consumer_cls_b.__name__}", + ) diff --git a/django_kafka/tests/test_settings.py b/django_kafka/tests/test_settings.py index c2129d0..b80efd0 100644 --- a/django_kafka/tests/test_settings.py +++ b/django_kafka/tests/test_settings.py @@ -12,6 +12,7 @@ class SettingsTestCase(TestCase): "GLOBAL_CONFIG", "PRODUCER_CONFIG", "CONSUMER_CONFIG", + "RETRY_CONSUMER_CONFIG", "POLLING_FREQUENCY", "SCHEMA_REGISTRY", ) @@ -35,6 +36,9 @@ def test_user_settings(self, mock_consumer_client): "CONSUMER_CONFIG": { "group.id": "group-1", }, + "RETRY_CONSUMER_CONFIG": { + "topic.metadata.refresh.interval.ms": 5000, + }, "POLLING_FREQUENCY": 0.5, "SCHEMA_REGISTRY": { "url": "https://schema-registry", diff --git a/django_kafka/tests/test_topic.py b/django_kafka/tests/test_topic.py index 3b00393..f5bb33d 100644 --- a/django_kafka/tests/test_topic.py +++ b/django_kafka/tests/test_topic.py @@ -19,9 +19,60 @@ class TopicTestCase(TestCase): def setUp(self): self.topic = SomeTopic() + def test_is_regex(self): + self.topic.name = "^some-topic" + + self.assertTrue(self.topic.is_regex()) + + def test_matches(self): + self.topic.name = "some-topic" + + self.assertTrue(self.topic.matches("some-topic")) + self.assertFalse(self.topic.matches("some-topic.extra")) + + def test_matches__regex(self): + self.topic.name = "^some-topic" + + self.assertTrue(self.topic.matches("some-topic")) + self.assertTrue(self.topic.matches("some-topic.extra")) + self.assertFalse(self.topic.matches("other-topic")) + + def test_validate_produce_name(self): + """validates and returns expected producing name for non-regex based topics""" + self.assertEqual(self.topic.name, self.topic.validate_produce_name(None)) + self.assertEqual( + self.topic.name, self.topic.validate_produce_name(self.topic.name) + ) + with self.assertRaisesMessage( + DjangoKafkaError, + "topic producing name `invalid-topic` is not valid for this topic", + ): + self.topic.validate_produce_name("invalid-topic") + + def test_validate_produce_name__regex(self): + """validates supplied producing name when topic is regex based""" + self.topic.name = "^some-topic" + + self.assertEqual( + "some-topic.extra", + self.topic.validate_produce_name("some-topic.extra"), + ) + with self.assertRaisesMessage( + DjangoKafkaError, + "topic producing name `invalid-topic` is not valid for this topic", + ): + self.topic.validate_produce_name("invalid-topic") + with self.assertRaisesMessage( + DjangoKafkaError, + "topic producing name must be supplied for regex-based topics", + ): + self.topic.validate_produce_name(None) + @patch("django_kafka.topic.Topic.serialize") - @patch("django_kafka.DjangoKafka.producer") + @patch("django_kafka.kafka.producer") def test_produce_serializer_kwargs(self, mock_kafka_producer, mock_topic_serialize): + self.topic.name = "^some-topic" + name = "some-topic.extra" key = "key" value = "message value" headers = None # default is None when not provided @@ -30,6 +81,7 @@ def test_produce_serializer_kwargs(self, mock_kafka_producer, mock_topic_seriali self.topic.produce( value, + name=name, key=key, key_serializer_kwargs=key_serializer_kwargs, value_serializer_kwargs=value_serializer_kwargs, @@ -38,33 +90,66 @@ def test_produce_serializer_kwargs(self, mock_kafka_producer, mock_topic_seriali self.assertEqual( mock_topic_serialize.call_args_list, [ - call(key, MessageField.KEY, headers, **key_serializer_kwargs), - call(value, MessageField.VALUE, headers, **value_serializer_kwargs), + call( + name, + key, + MessageField.KEY, + headers, + **key_serializer_kwargs, + ), + call( + name, + value, + MessageField.VALUE, + headers, + **value_serializer_kwargs, + ), ], ) mock_kafka_producer.produce.assert_called_once_with( - self.topic.name, + name, mock_topic_serialize.return_value, key=mock_topic_serialize.return_value, ) @patch("django_kafka.topic.Topic.serialize") - @patch("django_kafka.DjangoKafka.producer") + @patch("django_kafka.kafka.producer") + def test_produce_requires_kwargs_supplied_name_for_regex_names( + self, + mock_kafka_producer, + mock_topic_serialize, + ): + self.topic.name = "^some-topic" + value = "message value" + + with self.assertRaisesMessage( + DjangoKafkaError, + "topic producing name must be supplied for regex-based topics", + ): + self.topic.produce(value) + + @patch("django_kafka.topic.Topic.serialize") + @patch("django_kafka.kafka.producer") def test_produce_only_value(self, mock_kafka_producer, mock_topic_serialize): value = "message value" headers = None # default is None when not provided self.topic.produce(value) - mock_topic_serialize.assert_called_once_with(value, MessageField.VALUE, headers) + mock_topic_serialize.assert_called_once_with( + self.topic.name, + value, + MessageField.VALUE, + headers, + ) mock_kafka_producer.produce.assert_called_once_with( self.topic.name, mock_topic_serialize.return_value, ) @patch("django_kafka.topic.Topic.serialize") - @patch("django_kafka.DjangoKafka.producer") + @patch("django_kafka.kafka.producer") def test_produce_key_is_serialized(self, mock_kafka_producer, mock_topic_serialize): value = "message value" key = "some key" @@ -75,8 +160,8 @@ def test_produce_key_is_serialized(self, mock_kafka_producer, mock_topic_seriali self.assertEqual( mock_topic_serialize.call_args_list, [ - call(key, MessageField.KEY, headers), - call(value, MessageField.VALUE, headers), + call(self.topic.name, key, MessageField.KEY, headers), + call(self.topic.name, value, MessageField.VALUE, headers), ], ) @@ -90,13 +175,14 @@ def test_produce_key_is_serialized(self, mock_kafka_producer, mock_topic_seriali @patch("django_kafka.topic.Topic.key_deserializer") @patch("django_kafka.topic.Topic.context") def test_deserialize_key(self, mock_topic_context, mock_key_deserializer): + name = "name" value = b"some key" field = MessageField.KEY kwargs = {"key": "value"} - self.topic.deserialize(value, field, **kwargs) + self.topic.deserialize(name, value, field, **kwargs) - mock_topic_context.assert_called_once_with(field, None) + mock_topic_context.assert_called_once_with(name, field, None) mock_key_deserializer.assert_called_once_with(**kwargs) mock_key_deserializer.return_value.assert_called_once_with( value, @@ -106,13 +192,14 @@ def test_deserialize_key(self, mock_topic_context, mock_key_deserializer): @patch("django_kafka.topic.Topic.value_deserializer") @patch("django_kafka.topic.Topic.context") def test_deserialize_value(self, mock_topic_context, mock_value_deserializer): + name = "name" value = b"some value" field = MessageField.VALUE kwargs = {"key": "value"} - self.topic.deserialize(value, field, **kwargs) + self.topic.deserialize(name, value, field, **kwargs) - mock_topic_context.assert_called_once_with(field, None) + mock_topic_context.assert_called_once_with(name, field, None) mock_value_deserializer.assert_called_once_with(**kwargs) mock_value_deserializer.return_value.assert_called_once_with( value, @@ -134,7 +221,7 @@ def test_deserialize_unknown_field( DjangoKafkaError, f"Unsupported deserialization field {field}.", ): - self.topic.deserialize("some value", field) + self.topic.deserialize("name", "some value", field) mock_topic_context.assert_not_called() mock_value_deserializer.assert_not_called() @@ -143,13 +230,14 @@ def test_deserialize_unknown_field( @patch("django_kafka.topic.Topic.key_serializer") @patch("django_kafka.topic.Topic.context") def test_serialize_key(self, mock_topic_context, mock_key_serializer): + name = "name" value = "some key" field = MessageField.KEY kwargs = {"key": "value"} - self.topic.serialize(value, field, **kwargs) + self.topic.serialize(name, value, field, **kwargs) - mock_topic_context.assert_called_once_with(field, None) + mock_topic_context.assert_called_once_with(name, field, None) mock_key_serializer.assert_called_once_with(**kwargs) mock_key_serializer.return_value.assert_called_once_with( value, @@ -159,13 +247,14 @@ def test_serialize_key(self, mock_topic_context, mock_key_serializer): @patch("django_kafka.topic.Topic.value_serializer") @patch("django_kafka.topic.Topic.context") def test_serialize_value(self, mock_topic_context, mock_value_serializer): + name = "name" value = "some value" field = MessageField.VALUE kwargs = {"key": "value"} - self.topic.serialize(value, field, **kwargs) + self.topic.serialize(name, value, field, **kwargs) - mock_topic_context.assert_called_once_with(field, None) + mock_topic_context.assert_called_once_with(name, field, None) mock_value_serializer.assert_called_once_with(**kwargs) mock_value_serializer.return_value.assert_called_once_with( value, @@ -187,7 +276,7 @@ def test_serialize_unknown_field( DjangoKafkaError, f"Unsupported serialization field {field}.", ): - self.topic.serialize("some value", field) + self.topic.serialize("name", "some value", field) mock_topic_context.assert_not_called() mock_value_serializer.assert_not_called() @@ -201,7 +290,7 @@ def test_context(self): with patch( "django_kafka.topic.SerializationContext", ) as mock_serialization_context: - self.topic.context(field, headers) + self.topic.context(self.topic.name, field, headers) mock_serialization_context.assert_called_once_with( self.topic.name, field, diff --git a/django_kafka/topic.py b/django_kafka/topic.py index 71d8dd5..a2f5224 100644 --- a/django_kafka/topic.py +++ b/django_kafka/topic.py @@ -1,6 +1,7 @@ import logging +import re from abc import ABC, abstractmethod -from typing import Optional, Type +from typing import TYPE_CHECKING, Optional, Type from confluent_kafka import cimpl from confluent_kafka.schema_registry.avro import AvroDeserializer, AvroSerializer @@ -16,6 +17,9 @@ from django_kafka import kafka from django_kafka.exceptions import DjangoKafkaError +if TYPE_CHECKING: + from django_kafka.retry import RetrySettings + logger = logging.getLogger(__name__) @@ -26,22 +30,53 @@ class Topic(ABC): key_deserializer: Type[Deserializer] = StringDeserializer value_deserializer: Type[Deserializer] = StringDeserializer + retry_settings: Optional["RetrySettings"] = None + @property @abstractmethod def name(self) -> str: - """Define Kafka topic name""" + """Define Kafka topic name + + Regexp pattern subscriptions are supported by prefixing the name with "^". If + used, then a name must always be supplied to .produce() method. + """ + + def is_regex(self): + """returns if the topic subscription is regex based""" + return self.name.startswith("^") + + def validate_produce_name(self, name: Optional[str]) -> str: + """validates and returns the topic producing name""" + if name: + if self.matches(name): + return name + raise DjangoKafkaError( + f"topic producing name `{name}` is not valid for this topic", + ) + if self.is_regex(): + raise DjangoKafkaError( + "topic producing name must be supplied for regex-based topics", + ) + return self.name + + def matches(self, topic_name: str): + if self.is_regex(): + return bool(re.search(self.name, topic_name)) + return self.name == topic_name def consume(self, msg: cimpl.Message): """Implement message processing""" raise NotImplementedError def produce(self, value: any, **kwargs): + name = self.validate_produce_name(kwargs.pop("name", None)) key_serializer_kwargs = kwargs.pop("key_serializer_kwargs", {}) or {} value_serializer_kwargs = kwargs.pop("value_serializer_kwargs", {}) or {} headers = kwargs.get("headers") if "key" in kwargs: kwargs["key"] = self.serialize( + name, kwargs["key"], MessageField.KEY, headers, @@ -49,8 +84,9 @@ def produce(self, value: any, **kwargs): ) kafka.producer.produce( - self.name, + name, self.serialize( + name, value, MessageField.VALUE, headers, @@ -58,38 +94,53 @@ def produce(self, value: any, **kwargs): ), **kwargs, ) + kafka.producer.poll(0) # stops producer on_delivery callbacks buffer overflow def serialize( self, + name, value, field: MessageField, - headers: Optional[dict | list] = None, + headers: Optional[list] = None, **kwargs, ): if field == MessageField.VALUE: serializer = self.get_value_serializer(**kwargs) - return serializer(value, self.context(MessageField.VALUE, headers)) + return serializer( + value, + self.context(name, MessageField.VALUE, headers), + ) if field == MessageField.KEY: serializer = self.get_key_serializer(**kwargs) - return serializer(value, self.context(MessageField.KEY, headers)) + return serializer( + value, + self.context(name, MessageField.KEY, headers), + ) raise DjangoKafkaError(f"Unsupported serialization field {field}.") def deserialize( self, + name, value, field: MessageField, - headers: Optional[dict | list] = None, + headers: Optional[list] = None, **kwargs, ): if field == MessageField.VALUE: deserializer = self.get_value_deserializer(**kwargs) - return deserializer(value, self.context(MessageField.VALUE, headers)) + return deserializer( + value, + self.context(name, MessageField.VALUE, headers), + ) if field == MessageField.KEY: deserializer = self.get_key_deserializer(**kwargs) - return deserializer(value, self.context(MessageField.KEY, headers)) + return deserializer( + value, + self.context(name, MessageField.KEY, headers), + ) raise DjangoKafkaError(f"Unsupported deserialization field {field}.") @@ -107,10 +158,11 @@ def get_value_deserializer(self, **kwargs): def context( self, + name: str, field: MessageField, - headers: Optional[dict | list] = None, + headers: Optional[list] = None, ) -> SerializationContext: - return SerializationContext(self.name, field, headers=headers) + return SerializationContext(name, field, headers=headers) class AvroTopic(Topic, ABC): From 6ac25fbf84a671ed64ddef94ce4ebeb6265d2134 Mon Sep 17 00:00:00 2001 From: Stefan Cardnell Date: Thu, 12 Sep 2024 09:43:59 +0200 Subject: [PATCH 2/4] feat: separate Topic in to TopicConsumer and TopicProducer classes, refs #8 --- CHANGELOG.md | 3 +- README.md | 2 + django_kafka/consumer.py | 53 ++- django_kafka/dead_letter/topic.py | 45 ++- django_kafka/retry/__init__.py | 4 +- django_kafka/retry/consumer.py | 33 +- django_kafka/retry/topic.py | 117 ++++--- django_kafka/tests/dead_letter/test_topic.py | 70 ++-- django_kafka/tests/retry/test_consumer.py | 120 ++++--- django_kafka/tests/retry/test_topic.py | 279 ++++++++++------ django_kafka/tests/test_consumer.py | 68 ++-- django_kafka/tests/test_topic.py | 334 +++++++++---------- django_kafka/topic.py | 181 +++++----- 13 files changed, 701 insertions(+), 608 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2bb2b7e..7b3d1b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ # Changelog ## 0.2.0 (2024-09-05) -* Add decorator for topic retry and dead letter topic, see README.md \ No newline at end of file +* Add decorator for topic retry and dead letter topic, see `README.md` +* Separate `Topic` class in to `TopicProducer` and `TopicConsumer` classes. \ No newline at end of file diff --git a/README.md b/README.md index 21f6020..9d7876d 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,8 @@ class Topic1(Topic): # ... process values ``` +`Topic` inherits from the `TopicProducer` and `TopicConsumer` classes. If you only need to consume or produce messages, inherit from one of these classes instead to avoid defining unnecessary abstract methods. + ### Define a Consumer: Consumers define which topics they take care of. Usually you want one consumer per project. If 2 consumers are defined, then they will be started in parallel. diff --git a/django_kafka/consumer.py b/django_kafka/consumer.py index 47756c0..3bea260 100644 --- a/django_kafka/consumer.py +++ b/django_kafka/consumer.py @@ -10,34 +10,34 @@ from django_kafka.exceptions import DjangoKafkaError if TYPE_CHECKING: - from django_kafka.topic import Topic + from django_kafka.topic import TopicConsumer logger = logging.getLogger(__name__) class Topics: - _topics: tuple["Topic", ...] - _match: dict[str, "Topic"] + _topic_consumers: tuple["TopicConsumer", ...] + _match: dict[str, "TopicConsumer"] - def __init__(self, *topics: "Topic"): - self._topics = topics - self._match: dict[str, "Topic"] = {} + def __init__(self, *topic_consumers: "TopicConsumer"): + self._topic_consumers = topic_consumers + self._match: dict[str, "TopicConsumer"] = {} - def get_topic(self, name: str) -> "Topic": - if name not in self._match: - topic = next((t for t in self if t.matches(name)), None) - if not topic: - raise DjangoKafkaError(f"No topic registered for `{name}`") - self._match[name] = topic + def get(self, topic_name: str) -> "TopicConsumer": + if topic_name not in self._match: + topic_consumer = next((t for t in self if t.matches(topic_name)), None) + if not topic_consumer: + raise DjangoKafkaError(f"No topic registered for `{topic_name}`") + self._match[topic_name] = topic_consumer - return self._match[name] + return self._match[topic_name] @property def names(self) -> list[str]: return [topic.name for topic in self] def __iter__(self): - yield from self._topics + yield from self._topic_consumers class Consumer: @@ -91,23 +91,22 @@ def commit_offset(self, msg: cimpl.Message): self.store_offsets(msg) def retry_msg(self, msg: cimpl.Message, exc: Exception) -> bool: - from django_kafka.retry.topic import RetryTopic + from django_kafka.retry.topic import RetryTopicProducer - topic = self.get_topic(msg) - if not topic.retry_settings: + topic_consumer = self.get_topic_consumer(msg) + if not topic_consumer.retry_settings: return False - return RetryTopic(group_id=self.group_id, main_topic=topic).retry_for( + return RetryTopicProducer( + group_id=self.group_id, + settings=topic_consumer.retry_settings, msg=msg, - exc=exc, - ) + ).retry_for(exc=exc) def dead_letter_msg(self, msg: cimpl.Message, exc: Exception): - from django_kafka.dead_letter.topic import DeadLetterTopic + from django_kafka.dead_letter.topic import DeadLetterTopicProducer - topic = self.get_topic(msg) - DeadLetterTopic(group_id=self.group_id, main_topic=topic).produce_for( - msg=msg, + DeadLetterTopicProducer(group_id=self.group_id, msg=msg).produce_for( header_message=str(exc), header_detail=traceback.format_exc(), ) @@ -118,8 +117,8 @@ def handle_exception(self, msg: cimpl.Message, exc: Exception): self.dead_letter_msg(msg, exc) self.log_error(exc) - def get_topic(self, msg: cimpl.Message) -> "Topic": - return self.topics.get_topic(name=msg.topic()) + def get_topic_consumer(self, msg: cimpl.Message) -> "TopicConsumer": + return self.topics.get(topic_name=msg.topic()) def log_error(self, error): logger.error(error, exc_info=True) @@ -130,7 +129,7 @@ def process_message(self, msg: cimpl.Message): return try: - self.get_topic(msg).consume(msg) + self.get_topic_consumer(msg).consume(msg) # ruff: noqa: BLE001 (we do not want consumer to stop if message consumption fails in any circumstances) except Exception as exc: self.handle_exception(msg, exc) diff --git a/django_kafka/dead_letter/topic.py b/django_kafka/dead_letter/topic.py index 8d7748e..da49c54 100644 --- a/django_kafka/dead_letter/topic.py +++ b/django_kafka/dead_letter/topic.py @@ -1,44 +1,41 @@ import re +from typing import TYPE_CHECKING from django_kafka.dead_letter.headers import DeadLetterHeader +from django_kafka.retry.topic import RETRY_TOPIC_PATTERN from django_kafka.serialization import NoOpSerializer -from django_kafka.topic import Topic +from django_kafka.topic import TopicProducer +if TYPE_CHECKING: + from confluent_kafka import cimpl -class DeadLetterTopic(Topic): +DEAD_LETTER_TOPIC_SUFFIX = "dlt" + + +class DeadLetterTopicProducer(TopicProducer): key_serializer = NoOpSerializer value_serializer = NoOpSerializer - key_deserializer = NoOpSerializer - value_deserializer = NoOpSerializer - - def __init__(self, group_id: str, main_topic: Topic): + def __init__(self, group_id: str, msg: "cimpl.Message"): self.group_id = group_id - self.main_topic = main_topic + self.msg = msg @property def name(self) -> str: - group_id = re.escape(self.group_id) - if self.main_topic.is_regex(): - topic_name = f"({self.main_topic.name.lstrip('^').rstrip('$')})" - else: - topic_name = re.escape(self.main_topic.name) - return rf"^{group_id}\.{topic_name}\.dlt$" - - def get_produce_name(self, topic_name: str) -> str: - retry_pattern = r"\.retry\.[0-9]+$" - if re.search(retry_pattern, topic_name): - return re.sub(retry_pattern, ".dlt", topic_name) - return f"{self.group_id}.{topic_name}.dlt" - - def produce_for(self, msg, header_message, header_detail): + topic = self.msg.topic() + suffix = DEAD_LETTER_TOPIC_SUFFIX + + if re.search(RETRY_TOPIC_PATTERN, topic): + return re.sub(RETRY_TOPIC_PATTERN, suffix, topic) + return f"{self.group_id}.{topic}.{suffix}" + + def produce_for(self, header_message, header_detail): headers = [ (DeadLetterHeader.MESSAGE, header_message), (DeadLetterHeader.DETAIL, header_detail), ] self.produce( - name=self.get_produce_name(msg.topic()), - key=msg.key(), - value=msg.value(), + key=self.msg.key(), + value=self.msg.value(), headers=headers, ) diff --git a/django_kafka/retry/__init__.py b/django_kafka/retry/__init__.py index f1e44e4..a6d0cac 100644 --- a/django_kafka/retry/__init__.py +++ b/django_kafka/retry/__init__.py @@ -3,7 +3,7 @@ from django.utils import timezone if TYPE_CHECKING: - from django_kafka.topic import Topic + from django_kafka.topic import TopicConsumer class RetrySettings: @@ -35,7 +35,7 @@ def __init__( self.include = include self.exclude = exclude - def __call__(self, topic_cls: Type["Topic"]): + def __call__(self, topic_cls: Type["TopicConsumer"]): topic_cls.retry_settings = self return topic_cls diff --git a/django_kafka/retry/consumer.py b/django_kafka/retry/consumer.py index c67daac..f77cad3 100644 --- a/django_kafka/retry/consumer.py +++ b/django_kafka/retry/consumer.py @@ -7,17 +7,22 @@ from django_kafka.conf import settings from django_kafka.consumer import Consumer, Topics -from django_kafka.dead_letter.topic import DeadLetterTopic +from django_kafka.dead_letter.topic import DeadLetterTopicProducer from django_kafka.retry.headers import RetryHeader -from django_kafka.retry.topic import RetryTopic +from django_kafka.retry.topic import RetryTopicConsumer if TYPE_CHECKING: - from django_kafka.topic import Topic + from django_kafka.topic import TopicConsumer class RetryTopics(Topics): - def __init__(self, group_id: str, *topics: "Topic"): - super().__init__(*(RetryTopic(group_id=group_id, main_topic=t) for t in topics)) + def __init__(self, group_id: str, *topic_consumers: "TopicConsumer"): + super().__init__( + *( + RetryTopicConsumer(group_id=group_id, topic_consumer=t) + for t in topic_consumers + ), + ) class RetryConsumer(Consumer): @@ -31,8 +36,8 @@ def __init__(self): @classmethod def build(cls, consumer_cls: Type["Consumer"]) -> Optional[Type["RetryConsumer"]]: """Generates RetryConsumer subclass linked to consumer class retryable topics""" - retryable_topics = [t for t in consumer_cls.topics if t.retry_settings] - if not retryable_topics: + retryable_tcs = [t for t in consumer_cls.topics if t.retry_settings] + if not retryable_tcs: return None group_id = consumer_cls.build_config()["group.id"] @@ -45,7 +50,7 @@ def build(cls, consumer_cls: Type["Consumer"]) -> Optional[Type["RetryConsumer"] **getattr(cls, "config", {}), "group.id": f"{group_id}.retry", }, - "topics": RetryTopics(group_id, *retryable_topics), + "topics": RetryTopics(group_id, *retryable_tcs), }, ) @@ -58,16 +63,12 @@ def build_config(cls): } def retry_msg(self, msg: cimpl.Message, exc: Exception) -> bool: - retry_topic = cast(RetryTopic, self.get_topic(msg)) - return retry_topic.retry_for(msg=msg, exc=exc) + rt_consumer = cast(RetryTopicConsumer, self.get_topic_consumer(msg)) + return rt_consumer.producer_for(msg).retry_for(exc) def dead_letter_msg(self, msg: cimpl.Message, exc: Exception): - retry_topic = cast(RetryTopic, self.get_topic(msg)) - DeadLetterTopic( - group_id=retry_topic.group_id, - main_topic=retry_topic.main_topic, - ).produce_for( - msg=msg, + rt_consumer = cast(RetryTopicConsumer, self.get_topic_consumer(msg)) + DeadLetterTopicProducer(group_id=rt_consumer.group_id, msg=msg).produce_for( header_message=str(exc), header_detail=traceback.format_exc(), ) diff --git a/django_kafka/retry/topic.py b/django_kafka/retry/topic.py index 81db58b..0afbf60 100644 --- a/django_kafka/retry/topic.py +++ b/django_kafka/retry/topic.py @@ -1,72 +1,97 @@ import re - -from confluent_kafka import cimpl +from typing import TYPE_CHECKING from django_kafka.exceptions import DjangoKafkaError from django_kafka.retry.headers import RetryHeader from django_kafka.serialization import NoOpSerializer -from django_kafka.topic import Topic +from django_kafka.topic import TopicConsumer, TopicProducer + +if TYPE_CHECKING: + from confluent_kafka import cimpl + + from django_kafka.retry import RetrySettings + +RETRY_TOPIC_SUFFIX = "retry" +RETRY_TOPIC_PATTERN = rf"{re.escape(RETRY_TOPIC_SUFFIX)}\.([0-9]+)$" -class RetryTopic(Topic): +class RetryTopicProducer(TopicProducer): key_serializer = NoOpSerializer value_serializer = NoOpSerializer - key_deserializer = NoOpSerializer - value_deserializer = NoOpSerializer - - def __init__(self, group_id: str, main_topic: Topic): - if not main_topic.retry_settings: - raise DjangoKafkaError(f"Topic {main_topic} is not marked for retry") - self.main_topic = main_topic + def __init__(self, group_id: str, settings: "RetrySettings", msg: "cimpl.Message"): + self.settings = settings self.group_id = group_id + self.msg = msg + self.attempt = self.get_next_attempt(msg.topic()) + super().__init__() + + @classmethod + def get_next_attempt(cls, topic_name: str) -> int: + match = re.search(RETRY_TOPIC_PATTERN, topic_name) + attempt = int(match.group(1)) if match else 0 + return attempt + 1 @property def name(self) -> str: - group_id = re.escape(self.group_id) - if self.main_topic.is_regex(): - topic_name = f"({self.main_topic.name.lstrip('^').rstrip('$')})" - else: - topic_name = re.escape(self.main_topic.name) + topic = self.msg.topic() + suffix = f"{RETRY_TOPIC_SUFFIX}.{self.attempt}" - return rf"^{group_id}\.{topic_name}\.retry\.[0-9]+$" + if re.search(RETRY_TOPIC_PATTERN, topic): + return re.sub(RETRY_TOPIC_PATTERN, suffix, topic) + return f"{self.group_id}.{self.msg.topic()}.{suffix}" - @classmethod - def get_attempt(cls, topic_name: str) -> int: - match = re.search(r"\.retry\.([0-9]+)$", topic_name) - return int(match.group(1)) if match else 0 - - def get_produce_name(self, topic_name: str, attempt: int) -> str: - if attempt == 1: - return f"{self.group_id}.{topic_name}.retry.1" - return re.sub(r"\.[0-9]+$", f".{attempt}", topic_name) - - def consume(self, msg: cimpl.Message): - self.main_topic.consume(msg) - - def retry_for(self, msg: cimpl.Message, exc: Exception) -> bool: - """ - msg: the message that failed, this may come from the main topic or retry topic - exc: Exception that caused the failure - """ - settings = self.main_topic.retry_settings - - if not settings.should_retry(exc=exc): + def retry_for(self, exc: Exception) -> bool: + if not self.settings.should_retry(exc=exc): return False - topic_name = msg.topic() - attempt = self.get_attempt(topic_name) + 1 - - if settings.attempts_exceeded(attempt): + if self.settings.attempts_exceeded(attempt=self.attempt): return False self.produce( - name=self.get_produce_name(topic_name, attempt), - key=msg.key(), - value=msg.value(), + key=self.msg.key(), + value=self.msg.value(), headers=[ (RetryHeader.MESSAGE, str(exc)), - (RetryHeader.TIMESTAMP, settings.get_retry_timestamp(attempt)), + ( + RetryHeader.TIMESTAMP, + self.settings.get_retry_timestamp(self.attempt), + ), ], ) return True + + +class RetryTopicConsumer(TopicConsumer): + key_deserializer = NoOpSerializer + value_deserializer = NoOpSerializer + + def __init__(self, group_id: str, topic_consumer: TopicConsumer): + if not topic_consumer.retry_settings: + raise DjangoKafkaError( + f"TopicConsumer {topic_consumer} is not marked for retry", + ) + self.topic_consumer = topic_consumer + self.group_id = group_id + super().__init__() + + @property + def name(self) -> str: + """returns name as regex pattern matching all attempts on the group's topic""" + group_id = re.escape(self.group_id) + if self.topic_consumer.is_regex(): + topic_name = f"({self.topic_consumer.name.lstrip('^').rstrip('$')})" + else: + topic_name = re.escape(self.topic_consumer.name) + + return rf"^{group_id}\.{topic_name}\.{RETRY_TOPIC_PATTERN}" + + def consume(self, msg: "cimpl.Message"): + self.topic_consumer.consume(msg) + + def producer_for(self, msg: "cimpl.Message") -> RetryTopicProducer: + return RetryTopicProducer( + group_id=self.group_id, + settings=self.topic_consumer.retry_settings, + msg=msg, + ) diff --git a/django_kafka/tests/dead_letter/test_topic.py b/django_kafka/tests/dead_letter/test_topic.py index 3f7535c..7e935f8 100644 --- a/django_kafka/tests/dead_letter/test_topic.py +++ b/django_kafka/tests/dead_letter/test_topic.py @@ -3,60 +3,52 @@ from django.test import TestCase from django_kafka.dead_letter.headers import DeadLetterHeader -from django_kafka.dead_letter.topic import DeadLetterTopic -from django_kafka.topic import Topic +from django_kafka.dead_letter.topic import ( + DEAD_LETTER_TOPIC_SUFFIX, + DeadLetterTopicProducer, +) +from django_kafka.retry.topic import RETRY_TOPIC_SUFFIX -class DeadLetterTopicTestCase(TestCase): - def _get_main_topic(self, topic_name: str = "topic_name"): - class TestTopic(Topic): - name = topic_name - - return TestTopic() - +class DeadLetterTopicProducerTestCase(TestCase): def test_name(self): - main_topic = self._get_main_topic(topic_name="topic.name") - dl_topic = DeadLetterTopic(group_id="group.id", main_topic=main_topic) + mock_msg_topic_consumer = mock.Mock(**{"topic.return_value": "topic.name"}) + mock_msg_retry_topic = mock.Mock( + **{"topic.return_value": f"group.id.topic.name.{RETRY_TOPIC_SUFFIX}.10"}, + ) - self.assertEqual(dl_topic.name, r"^group\.id\.topic\.name\.dlt$") + dlt_producer_1 = DeadLetterTopicProducer( + group_id="group.id", + msg=mock_msg_topic_consumer, + ) - def test_name__regex(self): - """tests regex topic names are correctly inserted in to dlt regex""" - main_topic = self._get_main_topic(topic_name="^topic_name|other_name$") - dl_topic = DeadLetterTopic(group_id="group.id", main_topic=main_topic) + dlt_producer_2 = DeadLetterTopicProducer( + group_id="group.id", + msg=mock_msg_retry_topic, + ) self.assertEqual( - dl_topic.name, - r"^group\.id\.(topic_name|other_name)\.dlt$", + dlt_producer_1.name, + f"group.id.topic.name.{DEAD_LETTER_TOPIC_SUFFIX}", + ) + self.assertEqual( + dlt_producer_2.name, + f"group.id.topic.name.{DEAD_LETTER_TOPIC_SUFFIX}", ) - def test_get_produce_name(self): - """tests normal and retry topics are correctly handled by get_produce_name""" - main_topic = self._get_main_topic() - dl_topic = DeadLetterTopic(group_id="group.id", main_topic=main_topic) - - retry_topic_1 = dl_topic.get_produce_name("fake") - retry_topic_2 = dl_topic.get_produce_name("group.id.fake.retry.1") - - self.assertEqual(retry_topic_1, "group.id.fake.dlt") - self.assertEqual(retry_topic_2, "group.id.fake.dlt") - - @mock.patch("django_kafka.dead_letter.topic.DeadLetterTopic.produce") - def test_produce_for(self, mock_produce): - main_topic = self._get_main_topic() - group_id = "group.id" - header_message = "header_message" - header_detail = "header_detail" + def test_produce_for(self): msg_mock = mock.Mock(**{"topic.return_value": "msg_topic"}) + dlt_producer = DeadLetterTopicProducer(group_id="group.id", msg=msg_mock) + dlt_producer.produce = mock.Mock() + header_message = "header message" + header_detail = "header detail" - DeadLetterTopic(group_id=group_id, main_topic=main_topic).produce_for( - msg=msg_mock, + dlt_producer.produce_for( header_message=header_message, header_detail=header_detail, ) - mock_produce.assert_called_once_with( - name="group.id.msg_topic.dlt", + dlt_producer.produce.assert_called_once_with( key=msg_mock.key(), value=msg_mock.value(), headers=[ diff --git a/django_kafka/tests/retry/test_consumer.py b/django_kafka/tests/retry/test_consumer.py index 460ac73..6312207 100644 --- a/django_kafka/tests/retry/test_consumer.py +++ b/django_kafka/tests/retry/test_consumer.py @@ -1,6 +1,7 @@ import datetime import traceback -from unittest import mock +from typing import Type +from unittest.mock import Mock, patch from confluent_kafka import TopicPartition from django.test import TestCase, override_settings @@ -11,33 +12,36 @@ from django_kafka.retry import RetrySettings from django_kafka.retry.consumer import RetryConsumer, RetryTopics from django_kafka.retry.headers import RetryHeader -from django_kafka.topic import Topic +from django_kafka.topic import TopicConsumer class RetryConsumerTestCase(TestCase): - def _get_topic(self): - class NormalTopic(Topic): + def _get_topic_consumer(self): + class SomeTopicConsumer(TopicConsumer): name = "normal_topic" - return NormalTopic() + return SomeTopicConsumer() - def _get_retryable_topic(self): - class RetryableTopic(Topic): + def _get_retryable_topic_consumer(self): + class RetryableTopicConsumer(TopicConsumer): name = "retry_topic" retry = RetrySettings(max_retries=5, delay=60) - retry(RetryableTopic) + retry(RetryableTopicConsumer) - return RetryableTopic() + return RetryableTopicConsumer() - def _get_retryable_consumer_cls(self, group_id="group_id"): - class TestConsumer(Consumer): - topics = Topics(self._get_topic(), self._get_retryable_topic()) + def _get_retryable_consumer_cls(self, group_id="group_id") -> Type[Consumer]: + class SomeConsumer(Consumer): + topics = Topics( + self._get_topic_consumer(), + self._get_retryable_topic_consumer(), + ) config = {"group.id": group_id} - return TestConsumer + return SomeConsumer - def _get_retry_consumer(self, consumer_group_id="group_id"): + def _get_retry_consumer(self, consumer_group_id="group_id") -> RetryConsumer: return RetryConsumer.build( self._get_retryable_consumer_cls(group_id=consumer_group_id), )() @@ -53,8 +57,8 @@ def _get_retry_consumer(self, consumer_group_id="group_id"): }, }, ) - @mock.patch("django_kafka.consumer.ConfluentConsumer") - @mock.patch("django_kafka.retry.consumer.Consumer.build_config") + @patch("django_kafka.consumer.ConfluentConsumer") + @patch("django_kafka.retry.consumer.Consumer.build_config") def test_config_merge_override( self, mock_consumer_build_config, @@ -106,7 +110,7 @@ def test_build(self): self.assertIsInstance(retry_consumer_cls.topics, RetryTopics) self.assertCountEqual( [t for t in consumer_cls.topics if t.retry_settings], - [t.main_topic for t in retry_consumer_cls.topics], + [t.topic_consumer for t in retry_consumer_cls.topics], ) def test_build__no_retry_topics(self): @@ -117,54 +121,46 @@ class TestConsumer(Consumer): self.assertIsNone(RetryConsumer.build(TestConsumer)) def test_retry_msg(self): + mock_retry_topic_consumer = Mock() + mock_producer_for = mock_retry_topic_consumer.producer_for + mock_retry_for = mock_producer_for.return_value.retry_for + msg_mock = Mock() + retry_consumer = self._get_retry_consumer() - retry_topic = next(iter(retry_consumer.topics)) - retry_topic.retry_for = mock.Mock() + retry_consumer.get_topic_consumer = Mock(return_value=mock_retry_topic_consumer) exc = ValueError() - msg_mock = mock.Mock( - **{ - "topic.return_value": retry_topic.get_produce_name( - retry_topic.main_topic.name, - attempt=1, - ), - }, - ) retried = retry_consumer.retry_msg(msg_mock, exc) - retry_topic.retry_for.assert_called_once_with(msg=msg_mock, exc=exc) - self.assertEqual(retried, retry_topic.retry_for.return_value) + mock_producer_for.assert_called_once_with(msg_mock) + mock_retry_for.assert_called_once_with(exc) + self.assertEqual(retried, mock_retry_for.return_value) + + @patch("django_kafka.retry.consumer.DeadLetterTopicProducer") + def test_dead_letter_msg(self, mock_dlt_topic_producer_cls): + mock_retry_topic_consumer = Mock() + mock_produce_for = mock_dlt_topic_producer_cls.return_value.produce_for + msg_mock = Mock() - @mock.patch("django_kafka.retry.consumer.DeadLetterTopic") - def test_dead_letter_msg(self, mock_dead_letter_topic_cls): retry_consumer = self._get_retry_consumer() - retry_topic = next(iter(retry_consumer.topics)) + retry_consumer.get_topic_consumer = Mock(return_value=mock_retry_topic_consumer) exc = ValueError() - msg_mock = mock.Mock( - **{ - "topic.return_value": retry_topic.get_produce_name( - retry_topic.main_topic.name, - attempt=1, - ), - }, - ) retry_consumer.dead_letter_msg(msg_mock, exc) - mock_dead_letter_topic_cls.assert_called_once_with( - group_id=retry_topic.group_id, - main_topic=retry_topic.main_topic, - ) - mock_dead_letter_topic_cls.return_value.produce_for.assert_called_once_with( + mock_dlt_topic_producer_cls.assert_called_once_with( + group_id=mock_retry_topic_consumer.group_id, msg=msg_mock, + ) + mock_produce_for.assert_called_once_with( header_message=str(exc), header_detail=traceback.format_exc(), ) - @mock.patch("django_kafka.consumer.ConfluentConsumer") + @patch("django_kafka.consumer.ConfluentConsumer") def test_pause_partition(self, mock_confluent_consumer): retry_consumer = self._get_retry_consumer() - mock_msg = mock.Mock( + mock_msg = Mock( **{ "topic.return_value": "msg_topic", "partition.return_value": 0, @@ -183,10 +179,10 @@ def test_pause_partition(self, mock_confluent_consumer): mock_confluent_consumer.return_value.seek.assert_called_once_with(partition) mock_confluent_consumer.return_value.pause.assert_called_once_with([partition]) - @mock.patch("django_kafka.consumer.ConfluentConsumer") + @patch("django_kafka.consumer.ConfluentConsumer") def test_resume_partition__before_retry_time(self, mock_confluent_consumer): retry_consumer = self._get_retry_consumer() - mock_msg = mock.Mock( + mock_msg = Mock( **{ "topic.return_value": "msg_topic", "partition.return_value": 0, @@ -200,10 +196,10 @@ def test_resume_partition__before_retry_time(self, mock_confluent_consumer): mock_confluent_consumer.return_value.resume.assert_not_called() - @mock.patch("django_kafka.consumer.ConfluentConsumer") + @patch("django_kafka.consumer.ConfluentConsumer") def test_resume_ready_partitions__after_retry_time(self, mock_confluent_consumer): retry_consumer = self._get_retry_consumer() - mock_msg = mock.Mock( + mock_msg = Mock( **{ "topic.return_value": "msg_topic", "partition.return_value": 0, @@ -222,12 +218,12 @@ def test_resume_ready_partitions__after_retry_time(self, mock_confluent_consumer mock_confluent_consumer.return_value.resume.assert_called_once_with([partition]) - @mock.patch("django_kafka.consumer.ConfluentConsumer") + @patch("django_kafka.consumer.ConfluentConsumer") def test_poll(self, mock_confluent_consumer): """tests poll resumes partitions""" retry_consumer = self._get_retry_consumer() - retry_consumer.resume_ready_partitions = mock.Mock() - mock_msg = mock.Mock() + retry_consumer.resume_ready_partitions = Mock() + mock_msg = Mock() mock_confluent_consumer.return_value.poll.return_value = mock_msg msg = retry_consumer.poll() @@ -235,17 +231,17 @@ def test_poll(self, mock_confluent_consumer): self.assertEqual(msg, mock_msg) retry_consumer.resume_ready_partitions.assert_called_once() # always called - @mock.patch("django_kafka.consumer.Consumer.process_message") - @mock.patch("django_kafka.consumer.ConfluentConsumer") + @patch("django_kafka.consumer.Consumer.process_message") + @patch("django_kafka.consumer.ConfluentConsumer") def test_process_message__before_retry_time( self, mock_confluent_consumer, mock_consumer_process_message, ): retry_consumer = self._get_retry_consumer() - retry_consumer.pause_partition = mock.Mock() + retry_consumer.pause_partition = Mock() retry_time = timezone.now() + datetime.timedelta(minutes=1) - mock_msg = mock.Mock( + mock_msg = Mock( **{ "error.return_value": None, "headers.return_value": [ @@ -258,17 +254,17 @@ def test_process_message__before_retry_time( retry_consumer.pause_partition.assert_called_once_with(mock_msg, retry_time) mock_consumer_process_message.process_message.assert_not_called() - @mock.patch("django_kafka.consumer.Consumer.process_message") - @mock.patch("django_kafka.consumer.ConfluentConsumer") + @patch("django_kafka.consumer.Consumer.process_message") + @patch("django_kafka.consumer.ConfluentConsumer") def test_process_message__after_retry_time( self, mock_confluent_consumer, mock_consumer_process_message, ): retry_consumer = self._get_retry_consumer() - retry_consumer.pause_partition = mock.Mock() + retry_consumer.pause_partition = Mock() retry_time = timezone.now() - datetime.timedelta(minutes=1) - mock_msg = mock.Mock( + mock_msg = Mock( **{ "error.return_value": None, "headers.return_value": [ diff --git a/django_kafka/tests/retry/test_topic.py b/django_kafka/tests/retry/test_topic.py index 44fcaa4..38d9b49 100644 --- a/django_kafka/tests/retry/test_topic.py +++ b/django_kafka/tests/retry/test_topic.py @@ -5,92 +5,103 @@ from django_kafka.exceptions import DjangoKafkaError from django_kafka.retry import RetrySettings from django_kafka.retry.headers import RetryHeader -from django_kafka.retry.topic import RetryTopic -from django_kafka.topic import Topic - - -class RetryTopicTestCase(TestCase): - def _get_retryable_topic(self, topic_name: str = "topic_name", **retry_kwargs): - class TestTopic(Topic): - name = topic_name - - retry = RetrySettings(**{"max_retries": 5, "delay": 60, **retry_kwargs}) - topic_cls = retry(TestTopic) - return topic_cls() +from django_kafka.retry.topic import ( + RETRY_TOPIC_PATTERN, + RETRY_TOPIC_SUFFIX, + RetryTopicConsumer, + RetryTopicProducer, +) +from django_kafka.topic import TopicConsumer + + +class RetryTopicProducerTestCase(TestCase): + def test__get_next_attempt(self): + self.assertEqual(RetryTopicProducer.get_next_attempt("topic"), 1) + self.assertEqual( + RetryTopicProducer.get_next_attempt("group.id.topic.fake.5"), + 1, + ) + self.assertEqual( + RetryTopicProducer.get_next_attempt(f"group.id.topic.{RETRY_TOPIC_SUFFIX}"), + 1, + ) + self.assertEqual( + RetryTopicProducer.get_next_attempt( + f"group.id.topic.{RETRY_TOPIC_SUFFIX}.0", + ), + 1, + ) + self.assertEqual( + RetryTopicProducer.get_next_attempt( + f"group.id.topic.{RETRY_TOPIC_SUFFIX}.2", + ), + 3, + ) + self.assertEqual( + RetryTopicProducer.get_next_attempt( + f"group.id.topic.{RETRY_TOPIC_SUFFIX}.10", + ), + 11, + ) def test_init(self): - main_topic = self._get_retryable_topic() + retry_settings = RetrySettings(max_retries=5, delay=60) + mock_msg_topic_consumer = mock.Mock(**{"topic.return_value": "topic.name"}) - RetryTopic(group_id="group.id", main_topic=main_topic) - - def test_init__raises_without_retry(self): - class TestTopic(Topic): - name = "topic_name" - - with self.assertRaises(DjangoKafkaError): - RetryTopic(group_id="group.id", main_topic=TestTopic()) + rt_producer = RetryTopicProducer( + group_id="group.id", + settings=retry_settings, + msg=mock_msg_topic_consumer, + ) - def test__get_attempt(self): - self.assertEqual(RetryTopic.get_attempt("topic"), 0) - self.assertEqual(RetryTopic.get_attempt("group.id.topic.retry"), 0) - self.assertEqual(RetryTopic.get_attempt("group.id.topic.retry.0"), 0) - self.assertEqual(RetryTopic.get_attempt("group.id.topic.retry.2"), 2) - self.assertEqual(RetryTopic.get_attempt("group.id.topic.retry.10"), 10) + self.assertEqual(rt_producer.group_id, "group.id") + self.assertEqual(rt_producer.settings, retry_settings) + self.assertEqual(rt_producer.msg, mock_msg_topic_consumer) + self.assertEqual(rt_producer.attempt, 1) def test_name(self): - """assert name format and correct escaping of regex special characters""" - main_topic = self._get_retryable_topic(topic_name="topic.name") - retry_topic = RetryTopic(group_id="group.id", main_topic=main_topic) + retry_settings = RetrySettings(max_retries=5, delay=60) + mock_msg_topic_consumer = mock.Mock(**{"topic.return_value": "topic.name"}) + mock_msg_rt_producer = mock.Mock( + **{"topic.return_value": f"group.id.topic.name.{RETRY_TOPIC_SUFFIX}.1"}, + ) - self.assertEqual(retry_topic.name, r"^group\.id\.topic\.name\.retry\.[0-9]+$") + rt_producer_1 = RetryTopicProducer( + group_id="group.id", + settings=retry_settings, + msg=mock_msg_topic_consumer, + ) - def test_name__regex(self): - """tests regex topic names are correctly inserted in to retry topic regex""" - main_topic = self._get_retryable_topic(topic_name="^topic_name|other_name$") - retry_topic = RetryTopic(group_id="group.id", main_topic=main_topic) + rt_producer_2 = RetryTopicProducer( + group_id="group.id", + settings=retry_settings, + msg=mock_msg_rt_producer, + ) self.assertEqual( - retry_topic.name, - r"^group\.id\.(topic_name|other_name)\.retry\.[0-9]+$", + rt_producer_1.name, + f"group.id.topic.name.{RETRY_TOPIC_SUFFIX}.1", + ) + self.assertEqual( + rt_producer_2.name, + f"group.id.topic.name.{RETRY_TOPIC_SUFFIX}.2", ) - - def test_get_produce_name(self): - main_topic = self._get_retryable_topic() - retry_topic = RetryTopic(group_id="group.id", main_topic=main_topic) - - retry_topic_1 = retry_topic.get_produce_name("fake", 1) - retry_topic_2 = retry_topic.get_produce_name("group.id.fake.retry.1", 2) - retry_topic_3 = retry_topic.get_produce_name("group.id.fake.retry.2", 3) - - self.assertEqual(retry_topic_1, "group.id.fake.retry.1") - self.assertEqual(retry_topic_2, "group.id.fake.retry.2") - self.assertEqual(retry_topic_3, "group.id.fake.retry.3") - - def test_consume(self): - """tests RetryTopic uses the main topic consume method""" - main_topic = self._get_retryable_topic() - main_topic.consume = mock.Mock() - mock_msg = mock.Mock() - - RetryTopic(group_id="group.id", main_topic=main_topic).consume(mock_msg) - - main_topic.consume.assert_called_once_with(mock_msg) @mock.patch("django_kafka.retry.RetrySettings.get_retry_timestamp") def test_retry_for__first_retry(self, mock_get_retry_timestamp: mock.Mock): - main_topic = self._get_retryable_topic(max_retries=5) - retry_topic = RetryTopic(group_id="group.id", main_topic=main_topic) - retry_topic.produce = mock.Mock() mock_msg = mock.Mock(**{"topic.return_value": "msg_topic"}) - - retried = retry_topic.retry_for( + retry_settings = RetrySettings(max_retries=5, delay=60) + rt_producer = RetryTopicProducer( + group_id="group.id", + settings=retry_settings, msg=mock_msg, - exc=ValueError("error message"), ) + rt_producer.produce = mock.Mock() + + retried = rt_producer.retry_for(exc=ValueError("error message")) self.assertTrue(retried) - retry_topic.produce.assert_called_with( - name="group.id.msg_topic.retry.1", + rt_producer.produce.assert_called_with( key=mock_msg.key(), value=mock_msg.value(), headers=[ @@ -102,19 +113,20 @@ def test_retry_for__first_retry(self, mock_get_retry_timestamp: mock.Mock): @mock.patch("django_kafka.retry.RetrySettings.get_retry_timestamp") def test_retry_for__last_retry(self, mock_get_retry_timestamp): - main_topic = self._get_retryable_topic(max_retries=5) - retry_topic = RetryTopic(group_id="group.id", main_topic=main_topic) - retry_topic.produce = mock.Mock() - mock_msg = mock.Mock(**{"topic.return_value": "group.id.msg_topic.retry.4"}) - - retried = retry_topic.retry_for( + mock_msg = mock.Mock( + **{"topic.return_value": f"group.id.msg_topic.{RETRY_TOPIC_SUFFIX}.4"}, + ) + rt_producer = RetryTopicProducer( + group_id="group.id", + settings=RetrySettings(max_retries=5, delay=60), msg=mock_msg, - exc=ValueError("error message"), ) + rt_producer.produce = mock.Mock() + + retried = rt_producer.retry_for(exc=ValueError("error message")) self.assertTrue(retried) - retry_topic.produce.assert_called_with( - name="group.id.msg_topic.retry.5", + rt_producer.produce.assert_called_with( key=mock_msg.key(), value=mock_msg.value(), headers=[ @@ -125,29 +137,108 @@ def test_retry_for__last_retry(self, mock_get_retry_timestamp): mock_get_retry_timestamp.assert_called_once_with(5) def test_retry_for__no_more_retries(self): - main_topic = self._get_retryable_topic(max_retries=5) - retry_topic = RetryTopic(group_id="group.id", main_topic=main_topic) - retry_topic.produce = mock.Mock() - mock_msg = mock.Mock(**{"topic.return_value": "group.id.msg_topic.retry.5"}) - - retried = retry_topic.retry_for( - msg=mock_msg, - exc=ValueError(), + rt_producer = RetryTopicProducer( + group_id="group.id", + settings=RetrySettings(max_retries=5, delay=60), + msg=mock.Mock( + **{"topic.return_value": f"group.id.msg_topic.{RETRY_TOPIC_SUFFIX}.5"}, + ), ) + rt_producer.produce = mock.Mock() + + retried = rt_producer.retry_for(exc=ValueError()) self.assertFalse(retried) - retry_topic.produce.assert_not_called() + rt_producer.produce.assert_not_called() def test_retry_for__no_retry_for_excluded_error(self): - topic = self._get_retryable_topic(exclude=[ValueError]) - mock_msg = mock.Mock(**{"topic.return_value": "msg_topic"}) - retry_topic = RetryTopic(group_id="group.id", main_topic=topic) - retry_topic.produce = mock.Mock() - - retried = retry_topic.retry_for( - msg=mock_msg, - exc=ValueError(), + rt_producer = RetryTopicProducer( + group_id="group.id", + settings=RetrySettings(max_retries=5, delay=60, exclude=[ValueError]), + msg=mock.Mock(**{"topic.return_value": "msg_topic"}), ) + rt_producer.produce = mock.Mock() + + retried = rt_producer.retry_for(exc=ValueError()) self.assertFalse(retried) - retry_topic.produce.assert_not_called() + rt_producer.produce.assert_not_called() + + +class RetryTopicConsumerTestCase(TestCase): + def _get_retryable_topic_consumer( + self, + topic_name: str = "topic_name", + **retry_kwargs, + ): + class SomeTopicConsumer(TopicConsumer): + name = topic_name + + retry = RetrySettings(**{"max_retries": 5, "delay": 60, **retry_kwargs}) + topic_consumer_cls = retry(SomeTopicConsumer) + return topic_consumer_cls() + + def test_init__raises_without_retry(self): + class SomeTopicConsumer(TopicConsumer): + name = "topic_name" + + with self.assertRaises(DjangoKafkaError): + RetryTopicConsumer(group_id="group.id", topic_consumer=SomeTopicConsumer()) + + def test_name(self): + """assert name format and correct escaping of regex special characters""" + topic_consumer = self._get_retryable_topic_consumer(topic_name="topic.name") + rt_consumer = RetryTopicConsumer( + group_id="group.id", + topic_consumer=topic_consumer, + ) + + self.assertEqual( + rt_consumer.name, + rf"^group\.id\.topic\.name\.{RETRY_TOPIC_PATTERN}", + ) + + def test_name__regex(self): + """tests regex topic names are correctly inserted in to retry topic regex""" + topic_consumer = self._get_retryable_topic_consumer( + topic_name="^topic_name|other_name$", + ) + rt_consumer = RetryTopicConsumer( + group_id="group.id", + topic_consumer=topic_consumer, + ) + + self.assertEqual( + rt_consumer.name, + rf"^group\.id\.(topic_name|other_name)\.{RETRY_TOPIC_PATTERN}", + ) + + def test_consume(self): + """tests RetryTopic uses the main topic consume method""" + topic_consumer = self._get_retryable_topic_consumer() + topic_consumer.consume = mock.Mock() + mock_msg = mock.Mock() + + RetryTopicConsumer(group_id="group.id", topic_consumer=topic_consumer).consume( + mock_msg, + ) + + topic_consumer.consume.assert_called_once_with(mock_msg) + + @mock.patch("django_kafka.retry.topic.RetryTopicProducer") + def test_producer_for(self, mock_rt_producer): + topic_consumer = self._get_retryable_topic_consumer() + mock_msg = mock.Mock() + + rt_consumer = RetryTopicConsumer( + group_id="group.id", + topic_consumer=topic_consumer, + ) + rt_producer = rt_consumer.producer_for(mock_msg) + + self.assertEqual(rt_producer, mock_rt_producer.return_value) + mock_rt_producer.assert_called_once_with( + group_id=rt_consumer.group_id, + settings=topic_consumer.retry_settings, + msg=mock_msg, + ) diff --git a/django_kafka/tests/test_consumer.py b/django_kafka/tests/test_consumer.py index 22f68e4..4abe6cd 100644 --- a/django_kafka/tests/test_consumer.py +++ b/django_kafka/tests/test_consumer.py @@ -76,7 +76,7 @@ class SomeConsumer(Consumer): # check if msg has error before processing msg.error.assert_called_once_with() # Topic.consume called - consumer.get_topic(msg.topic()).consume.assert_called_once_with(msg) + consumer.get_topic_consumer(msg.topic()).consume.assert_called_once_with(msg) # commit_offset triggered mock_commit_offset.assert_called_once_with(msg) @@ -118,16 +118,18 @@ def test_process_message_exception( handle_exception, ): topic_consume_side_effect = TypeError("test") - topic = Mock( + topic_consumer = Mock( **{ "name": "topic", "consume.side_effect": topic_consume_side_effect, }, ) - msg = Mock(**{"topic.return_value": topic.name, "error.return_value": False}) + msg = Mock( + **{"topic.return_value": topic_consumer.name, "error.return_value": False}, + ) class SomeConsumer(Consumer): - topics = Topics(topic) + topics = Topics(topic_consumer) config = {} consumer = SomeConsumer() @@ -137,7 +139,7 @@ class SomeConsumer(Consumer): # check if msg has error before processing msg.error.assert_called_once_with() # Topic.consume was triggered - topic.consume.assert_called_once_with(msg) + topic_consumer.consume.assert_called_once_with(msg) # error handler was triggered on exception handle_exception.assert_called_once_with(msg, topic_consume_side_effect) # Consumer.commit_offset is not called @@ -181,67 +183,67 @@ class SomeConsumer(Consumer): consumer.dead_letter_msg.assert_called_once_with(msg, exc) consumer.log_error.assert_called_once_with(exc) - @patch("django_kafka.retry.topic.RetryTopic") - def test_retry_msg(self, mock_retry_topic_cls): - topic = Mock(name="topic", retry_settings=Mock()) - msg = Mock(**{"topic.return_value": topic.name}) - retry_for = mock_retry_topic_cls.return_value.retry_for - retry_for.return_value = True + @patch("django_kafka.retry.topic.RetryTopicProducer") + def test_retry_msg(self, mock_rt_producer_cls): + mock_topic_consumer = Mock(retry_settings=Mock()) + mock_retry_for = mock_rt_producer_cls.return_value.retry_for + msg_mock = Mock() class SomeConsumer(Consumer): - topics = Topics(topic) + topics = Topics() config = {"group.id": "group_id"} consumer = SomeConsumer() + consumer.get_topic_consumer = Mock(return_value=mock_topic_consumer) exc = ValueError() - retried = consumer.retry_msg(msg, exc) + retried = consumer.retry_msg(msg_mock, exc) - mock_retry_topic_cls.assert_called_once_with( + mock_rt_producer_cls.assert_called_once_with( group_id=consumer.group_id, - main_topic=topic, + settings=mock_topic_consumer.retry_settings, + msg=msg_mock, ) - retry_for.assert_called_once_with(msg=msg, exc=exc) - self.assertEqual(retried, True) + mock_retry_for.assert_called_once_with(exc=exc) + self.assertEqual(retried, mock_retry_for.return_value) - @patch("django_kafka.retry.topic.RetryTopic") - def test_retry_msg__no_retry(self, mock_retry_topic_cls): - topic = Mock(name="topic", retry_settings=None) - msg = Mock(**{"topic.return_value": topic.name}) + @patch("django_kafka.retry.topic.RetryTopicProducer") + def test_retry_msg__no_retry(self, mock_rt_producer_cls): + mock_topic_consumer = Mock(retry_settings=None) + msg_mock = Mock() class SomeConsumer(Consumer): - topics = Topics(topic) + topics = Topics() config = {"group.id": "group_id"} consumer = SomeConsumer() + consumer.get_topic_consumer = Mock(return_value=mock_topic_consumer) exc = ValueError() - retried = consumer.retry_msg(msg, exc) + retried = consumer.retry_msg(msg_mock, exc) - mock_retry_topic_cls.assert_not_called() + mock_rt_producer_cls.assert_not_called() self.assertEqual(retried, False) - @patch("django_kafka.dead_letter.topic.DeadLetterTopic") + @patch("django_kafka.dead_letter.topic.DeadLetterTopicProducer") def test_dead_letter_msg(self, mock_dead_letter_topic_cls): - topic = Mock(name="topic") - msg = Mock(**{"topic.return_value": topic.name}) - produce_for = mock_dead_letter_topic_cls.return_value.produce_for + mock_produce_for = mock_dead_letter_topic_cls.return_value.produce_for + msg_mock = Mock() class SomeConsumer(Consumer): - topics = Topics(topic) + topics = Topics() config = {"group.id": "group_id"} consumer = SomeConsumer() exc = ValueError() - consumer.dead_letter_msg(msg, exc) + consumer.dead_letter_msg(msg_mock, exc) mock_dead_letter_topic_cls.assert_called_once_with( group_id=consumer.group_id, - main_topic=topic, + msg=msg_mock, ) - produce_for.assert_called_once_with( - msg=msg, + mock_produce_for.assert_called_once_with( header_message=str(exc), header_detail=traceback.format_exc(), ) diff --git a/django_kafka/tests/test_topic.py b/django_kafka/tests/test_topic.py index f5bb33d..ab43430 100644 --- a/django_kafka/tests/test_topic.py +++ b/django_kafka/tests/test_topic.py @@ -5,83 +5,40 @@ from django_kafka.conf import SETTINGS_KEY from django_kafka.exceptions import DjangoKafkaError -from django_kafka.topic import AvroTopic, Topic - +from django_kafka.topic import ( + AvroTopicConsumer, + AvroTopicProducer, + TopicConsumer, + TopicProducer, +) -class SomeTopic(Topic): - name = "some-topic" - def consume(self, msg): - pass +class SomeTopicProducer(TopicProducer): + name = "some-topic-producer" -class TopicTestCase(TestCase): +class TopicProducerTestCase(TestCase): def setUp(self): - self.topic = SomeTopic() - - def test_is_regex(self): - self.topic.name = "^some-topic" - - self.assertTrue(self.topic.is_regex()) - - def test_matches(self): - self.topic.name = "some-topic" - - self.assertTrue(self.topic.matches("some-topic")) - self.assertFalse(self.topic.matches("some-topic.extra")) - - def test_matches__regex(self): - self.topic.name = "^some-topic" + self.topic_producer = SomeTopicProducer() - self.assertTrue(self.topic.matches("some-topic")) - self.assertTrue(self.topic.matches("some-topic.extra")) - self.assertFalse(self.topic.matches("other-topic")) + def test_init__disallows_regex(self): + class RegexTopicProducer(TopicProducer): + name = "^topic" - def test_validate_produce_name(self): - """validates and returns expected producing name for non-regex based topics""" - self.assertEqual(self.topic.name, self.topic.validate_produce_name(None)) - self.assertEqual( - self.topic.name, self.topic.validate_produce_name(self.topic.name) - ) - with self.assertRaisesMessage( - DjangoKafkaError, - "topic producing name `invalid-topic` is not valid for this topic", - ): - self.topic.validate_produce_name("invalid-topic") + with self.assertRaises(DjangoKafkaError): + RegexTopicProducer() - def test_validate_produce_name__regex(self): - """validates supplied producing name when topic is regex based""" - self.topic.name = "^some-topic" - - self.assertEqual( - "some-topic.extra", - self.topic.validate_produce_name("some-topic.extra"), - ) - with self.assertRaisesMessage( - DjangoKafkaError, - "topic producing name `invalid-topic` is not valid for this topic", - ): - self.topic.validate_produce_name("invalid-topic") - with self.assertRaisesMessage( - DjangoKafkaError, - "topic producing name must be supplied for regex-based topics", - ): - self.topic.validate_produce_name(None) - - @patch("django_kafka.topic.Topic.serialize") + @patch("django_kafka.topic.TopicProducer.serialize") @patch("django_kafka.kafka.producer") def test_produce_serializer_kwargs(self, mock_kafka_producer, mock_topic_serialize): - self.topic.name = "^some-topic" - name = "some-topic.extra" key = "key" value = "message value" headers = None # default is None when not provided key_serializer_kwargs = {"a": "b"} value_serializer_kwargs = {"c": "d"} - self.topic.produce( + self.topic_producer.produce( value, - name=name, key=key, key_serializer_kwargs=key_serializer_kwargs, value_serializer_kwargs=value_serializer_kwargs, @@ -91,14 +48,12 @@ def test_produce_serializer_kwargs(self, mock_kafka_producer, mock_topic_seriali mock_topic_serialize.call_args_list, [ call( - name, key, MessageField.KEY, headers, **key_serializer_kwargs, ), call( - name, value, MessageField.VALUE, headers, @@ -108,179 +63,203 @@ def test_produce_serializer_kwargs(self, mock_kafka_producer, mock_topic_seriali ) mock_kafka_producer.produce.assert_called_once_with( - name, + self.topic_producer.name, mock_topic_serialize.return_value, key=mock_topic_serialize.return_value, ) - @patch("django_kafka.topic.Topic.serialize") - @patch("django_kafka.kafka.producer") - def test_produce_requires_kwargs_supplied_name_for_regex_names( - self, - mock_kafka_producer, - mock_topic_serialize, - ): - self.topic.name = "^some-topic" - value = "message value" - - with self.assertRaisesMessage( - DjangoKafkaError, - "topic producing name must be supplied for regex-based topics", - ): - self.topic.produce(value) - - @patch("django_kafka.topic.Topic.serialize") + @patch("django_kafka.topic.TopicProducer.serialize") @patch("django_kafka.kafka.producer") def test_produce_only_value(self, mock_kafka_producer, mock_topic_serialize): value = "message value" headers = None # default is None when not provided - self.topic.produce(value) + self.topic_producer.produce(value) mock_topic_serialize.assert_called_once_with( - self.topic.name, value, MessageField.VALUE, headers, ) mock_kafka_producer.produce.assert_called_once_with( - self.topic.name, + self.topic_producer.name, mock_topic_serialize.return_value, ) - @patch("django_kafka.topic.Topic.serialize") + @patch("django_kafka.topic.TopicProducer.serialize") @patch("django_kafka.kafka.producer") def test_produce_key_is_serialized(self, mock_kafka_producer, mock_topic_serialize): value = "message value" key = "some key" headers = {"header-1": "header-1-value"} - self.topic.produce(value, key=key, headers=headers) + self.topic_producer.produce(value, key=key, headers=headers) self.assertEqual( mock_topic_serialize.call_args_list, [ - call(self.topic.name, key, MessageField.KEY, headers), - call(self.topic.name, value, MessageField.VALUE, headers), + call(key, MessageField.KEY, headers), + call(value, MessageField.VALUE, headers), ], ) mock_kafka_producer.produce.assert_called_once_with( - self.topic.name, + self.topic_producer.name, mock_topic_serialize.return_value, key=mock_topic_serialize.return_value, headers=headers, ) - @patch("django_kafka.topic.Topic.key_deserializer") - @patch("django_kafka.topic.Topic.context") - def test_deserialize_key(self, mock_topic_context, mock_key_deserializer): - name = "name" - value = b"some key" + @patch("django_kafka.topic.TopicProducer.key_serializer") + @patch("django_kafka.topic.TopicProducer.context") + def test_serialize_key(self, mock_topic_context, mock_key_serializer): + value = "some key" field = MessageField.KEY kwargs = {"key": "value"} - self.topic.deserialize(name, value, field, **kwargs) + self.topic_producer.serialize(value, field, **kwargs) - mock_topic_context.assert_called_once_with(name, field, None) - mock_key_deserializer.assert_called_once_with(**kwargs) - mock_key_deserializer.return_value.assert_called_once_with( + mock_topic_context.assert_called_once_with(field, None) + mock_key_serializer.assert_called_once_with(**kwargs) + mock_key_serializer.return_value.assert_called_once_with( value, mock_topic_context.return_value, ) - @patch("django_kafka.topic.Topic.value_deserializer") - @patch("django_kafka.topic.Topic.context") - def test_deserialize_value(self, mock_topic_context, mock_value_deserializer): - name = "name" - value = b"some value" + @patch("django_kafka.topic.TopicProducer.value_serializer") + @patch("django_kafka.topic.TopicProducer.context") + def test_serialize_value(self, mock_topic_context, mock_value_serializer): + value = "some value" field = MessageField.VALUE kwargs = {"key": "value"} - self.topic.deserialize(name, value, field, **kwargs) + self.topic_producer.serialize(value, field, **kwargs) - mock_topic_context.assert_called_once_with(name, field, None) - mock_value_deserializer.assert_called_once_with(**kwargs) - mock_value_deserializer.return_value.assert_called_once_with( + mock_topic_context.assert_called_once_with(field, None) + mock_value_serializer.assert_called_once_with(**kwargs) + mock_value_serializer.return_value.assert_called_once_with( value, mock_topic_context.return_value, ) - @patch("django_kafka.topic.Topic.key_deserializer") - @patch("django_kafka.topic.Topic.value_deserializer") - @patch("django_kafka.topic.Topic.context") - def test_deserialize_unknown_field( + @patch("django_kafka.topic.TopicProducer.key_serializer") + @patch("django_kafka.topic.TopicProducer.value_serializer") + @patch("django_kafka.topic.TopicProducer.context") + def test_serialize_unknown_field( self, mock_topic_context, - mock_value_deserializer, - mock_key_deserializer, + mock_value_serializer, + mock_key_serializer, ): field = "something_unknown" with self.assertRaisesMessage( DjangoKafkaError, - f"Unsupported deserialization field {field}.", + f"Unsupported serialization field {field}.", ): - self.topic.deserialize("name", "some value", field) + self.topic_producer.serialize("some value", field) mock_topic_context.assert_not_called() - mock_value_deserializer.assert_not_called() - mock_key_deserializer.assert_not_called() + mock_value_serializer.assert_not_called() + mock_key_serializer.assert_not_called() - @patch("django_kafka.topic.Topic.key_serializer") - @patch("django_kafka.topic.Topic.context") - def test_serialize_key(self, mock_topic_context, mock_key_serializer): - name = "name" - value = "some key" + def test_context(self): + fields = [MessageField.VALUE, MessageField.KEY] + headers = {"header-1": "header-1-value"} + + for field in fields: + with patch( + "django_kafka.topic.SerializationContext", + ) as mock_serialization_context: + self.topic_producer.context(field, headers) + mock_serialization_context.assert_called_once_with( + self.topic_producer.name, + field, + headers=headers, + ) + + +class SomeTopicConsumer(TopicConsumer): + name = "some-topic-consumer" + + def consume(self, msg): + pass + + +class TopicConsumerTestCase(TestCase): + def setUp(self): + self.topic_consumer = SomeTopicConsumer() + + def test_is_regex(self): + self.topic_consumer.name = "^some-topic" + + self.assertTrue(self.topic_consumer.is_regex()) + + def test_matches(self): + self.topic_consumer.name = "some-topic" + + self.assertTrue(self.topic_consumer.matches("some-topic")) + self.assertFalse(self.topic_consumer.matches("some-topic.extra")) + + def test_matches__regex(self): + self.topic_consumer.name = "^some-topic" + + self.assertTrue(self.topic_consumer.matches("some-topic")) + self.assertTrue(self.topic_consumer.matches("some-topic.extra")) + self.assertFalse(self.topic_consumer.matches("other-topic")) + + @patch("django_kafka.topic.TopicConsumer.key_deserializer") + @patch("django_kafka.topic.TopicConsumer.context") + def test_deserialize_key(self, mock_topic_context, mock_key_deserializer): + value = b"some key" field = MessageField.KEY kwargs = {"key": "value"} - self.topic.serialize(name, value, field, **kwargs) + self.topic_consumer.deserialize(value, field, **kwargs) - mock_topic_context.assert_called_once_with(name, field, None) - mock_key_serializer.assert_called_once_with(**kwargs) - mock_key_serializer.return_value.assert_called_once_with( + mock_topic_context.assert_called_once_with(field, None) + mock_key_deserializer.assert_called_once_with(**kwargs) + mock_key_deserializer.return_value.assert_called_once_with( value, mock_topic_context.return_value, ) - @patch("django_kafka.topic.Topic.value_serializer") - @patch("django_kafka.topic.Topic.context") - def test_serialize_value(self, mock_topic_context, mock_value_serializer): - name = "name" - value = "some value" + @patch("django_kafka.topic.TopicConsumer.value_deserializer") + @patch("django_kafka.topic.TopicConsumer.context") + def test_deserialize_value(self, mock_topic_context, mock_value_deserializer): + value = b"some value" field = MessageField.VALUE kwargs = {"key": "value"} - self.topic.serialize(name, value, field, **kwargs) + self.topic_consumer.deserialize(value, field, **kwargs) - mock_topic_context.assert_called_once_with(name, field, None) - mock_value_serializer.assert_called_once_with(**kwargs) - mock_value_serializer.return_value.assert_called_once_with( + mock_topic_context.assert_called_once_with(field, None) + mock_value_deserializer.assert_called_once_with(**kwargs) + mock_value_deserializer.return_value.assert_called_once_with( value, mock_topic_context.return_value, ) - @patch("django_kafka.topic.Topic.key_serializer") - @patch("django_kafka.topic.Topic.value_serializer") - @patch("django_kafka.topic.Topic.context") - def test_serialize_unknown_field( + @patch("django_kafka.topic.TopicConsumer.key_deserializer") + @patch("django_kafka.topic.TopicConsumer.value_deserializer") + @patch("django_kafka.topic.TopicConsumer.context") + def test_deserialize_unknown_field( self, mock_topic_context, - mock_value_serializer, - mock_key_serializer, + mock_value_deserializer, + mock_key_deserializer, ): field = "something_unknown" with self.assertRaisesMessage( DjangoKafkaError, - f"Unsupported serialization field {field}.", + f"Unsupported deserialization field {field}.", ): - self.topic.serialize("name", "some value", field) + self.topic_consumer.deserialize("some value", field) mock_topic_context.assert_not_called() - mock_value_serializer.assert_not_called() - mock_key_serializer.assert_not_called() + mock_value_deserializer.assert_not_called() + mock_key_deserializer.assert_not_called() def test_context(self): fields = [MessageField.VALUE, MessageField.KEY] @@ -290,20 +269,17 @@ def test_context(self): with patch( "django_kafka.topic.SerializationContext", ) as mock_serialization_context: - self.topic.context(self.topic.name, field, headers) + self.topic_consumer.context(field, headers) mock_serialization_context.assert_called_once_with( - self.topic.name, + self.topic_consumer.name, field, headers=headers, ) -class ATopic(AvroTopic): +class ATopicProducer(AvroTopicProducer): name = "some_topic_with_avro_serialization" - def consume(self, msg): - pass - @override_settings( **{SETTINGS_KEY: {"SCHEMA_REGISTRY": {"url": "http://schema-registy"}}}, @@ -311,7 +287,7 @@ def consume(self, msg): @patch("django_kafka.topic.kafka.schema_client") class AvroTopicTestCase(TestCase): def setUp(self): - self.topic = ATopic() + self.topic_producer = ATopicProducer() @patch("django_kafka.topic.AvroSerializer") def test_get_key_serializer( @@ -323,7 +299,7 @@ def test_get_key_serializer( "schema_str": "", "conf": {}, } - key_serializer = self.topic.get_key_serializer(**kwargs) + key_serializer = self.topic_producer.get_key_serializer(**kwargs) # returns AvroSerializer instance self.assertEqual(key_serializer, mock_avro_serializer.return_value) @@ -333,23 +309,6 @@ def test_get_key_serializer( **kwargs, ) - @patch("django_kafka.topic.AvroDeserializer") - def test_get_key_deserializer( - self, - mock_avro_deserializer, - mock_kafka_schema_client, - ): - kwargs = {} - key_deserializer = self.topic.get_key_deserializer(**kwargs) - - # returns mock_AvroDeserializer instance - self.assertEqual(key_deserializer, mock_avro_deserializer.return_value) - # instance was initialized with right arguments - mock_avro_deserializer.assert_called_once_with( - mock_kafka_schema_client, - **kwargs, - ) - @patch("django_kafka.topic.AvroSerializer") def test_get_value_serializer( self, @@ -360,7 +319,7 @@ def test_get_value_serializer( "schema_str": "", "conf": {}, } - value_serializer = self.topic.get_value_serializer(**kwargs) + value_serializer = self.topic_producer.get_value_serializer(**kwargs) # returns AvroSerializer instance self.assertEqual(value_serializer, mock_avro_serializer.return_value) @@ -370,6 +329,39 @@ def test_get_value_serializer( **kwargs, ) + +class ATopicConsumer(AvroTopicConsumer): + name = "some_topic_with_avro_deserialization" + + def consume(self, msg): + pass + + +@override_settings( + **{SETTINGS_KEY: {"SCHEMA_REGISTRY": {"url": "http://schema-registy"}}}, +) +@patch("django_kafka.topic.kafka.schema_client") +class AvroTopicConsumerTestCase(TestCase): + def setUp(self): + self.topic_consumer = ATopicConsumer() + + @patch("django_kafka.topic.AvroDeserializer") + def test_get_key_deserializer( + self, + mock_avro_deserializer, + mock_kafka_schema_client, + ): + kwargs = {} + key_deserializer = self.topic_consumer.get_key_deserializer(**kwargs) + + # returns mock_AvroDeserializer instance + self.assertEqual(key_deserializer, mock_avro_deserializer.return_value) + # instance was initialized with right arguments + mock_avro_deserializer.assert_called_once_with( + mock_kafka_schema_client, + **kwargs, + ) + @patch("django_kafka.topic.AvroDeserializer") def test_get_value_deserializer( self, @@ -377,7 +369,7 @@ def test_get_value_deserializer( mock_kafka_schema_client, ): kwargs = {} - value_deserializer = self.topic.get_value_deserializer(**kwargs) + value_deserializer = self.topic_consumer.get_value_deserializer(**kwargs) # returns mock_AvroDeserializer instance self.assertEqual(value_deserializer, mock_avro_deserializer.return_value) diff --git a/django_kafka/topic.py b/django_kafka/topic.py index a2f5224..7b7f007 100644 --- a/django_kafka/topic.py +++ b/django_kafka/topic.py @@ -6,11 +6,9 @@ from confluent_kafka import cimpl from confluent_kafka.schema_registry.avro import AvroDeserializer, AvroSerializer from confluent_kafka.serialization import ( - Deserializer, MessageField, SerializationContext, Serializer, - StringDeserializer, StringSerializer, ) @@ -23,60 +21,57 @@ logger = logging.getLogger(__name__) -class Topic(ABC): +class TopicProducer(ABC): key_serializer: Type[Serializer] = StringSerializer value_serializer: Type[Serializer] = StringSerializer - key_deserializer: Type[Deserializer] = StringDeserializer - value_deserializer: Type[Deserializer] = StringDeserializer - - retry_settings: Optional["RetrySettings"] = None - @property @abstractmethod def name(self) -> str: - """Define Kafka topic name + """Define Kafka topic producing topic name""" - Regexp pattern subscriptions are supported by prefixing the name with "^". If - used, then a name must always be supplied to .produce() method. - """ + def __init__(self): + super().__init__() + if self.name.startswith("^"): + raise DjangoKafkaError(f"TopicProducer name cannot be regex: {self.name}") - def is_regex(self): - """returns if the topic subscription is regex based""" - return self.name.startswith("^") + def get_key_serializer(self, **kwargs): + return self.key_serializer(**kwargs) - def validate_produce_name(self, name: Optional[str]) -> str: - """validates and returns the topic producing name""" - if name: - if self.matches(name): - return name - raise DjangoKafkaError( - f"topic producing name `{name}` is not valid for this topic", - ) - if self.is_regex(): - raise DjangoKafkaError( - "topic producing name must be supplied for regex-based topics", - ) - return self.name + def get_value_serializer(self, **kwargs): + return self.value_serializer(**kwargs) - def matches(self, topic_name: str): - if self.is_regex(): - return bool(re.search(self.name, topic_name)) - return self.name == topic_name + def context( + self, + field: MessageField, + headers: Optional[list] = None, + ) -> SerializationContext: + return SerializationContext(self.name, field, headers=headers) - def consume(self, msg: cimpl.Message): - """Implement message processing""" - raise NotImplementedError + def serialize( + self, + value, + field: MessageField, + headers: Optional[list] = None, + **kwargs, + ): + if field == MessageField.VALUE: + serializer = self.get_value_serializer(**kwargs) + return serializer(value, self.context(MessageField.VALUE, headers)) + + if field == MessageField.KEY: + serializer = self.get_key_serializer(**kwargs) + return serializer(value, self.context(MessageField.KEY, headers)) + + raise DjangoKafkaError(f"Unsupported serialization field {field}.") def produce(self, value: any, **kwargs): - name = self.validate_produce_name(kwargs.pop("name", None)) key_serializer_kwargs = kwargs.pop("key_serializer_kwargs", {}) or {} value_serializer_kwargs = kwargs.pop("value_serializer_kwargs", {}) or {} headers = kwargs.get("headers") if "key" in kwargs: kwargs["key"] = self.serialize( - name, kwargs["key"], MessageField.KEY, headers, @@ -84,9 +79,8 @@ def produce(self, value: any, **kwargs): ) kafka.producer.produce( - name, + self.name, self.serialize( - name, value, MessageField.VALUE, headers, @@ -96,33 +90,46 @@ def produce(self, value: any, **kwargs): ) kafka.producer.poll(0) # stops producer on_delivery callbacks buffer overflow - def serialize( + +class TopicConsumer(ABC): + key_deserializer: Type[Serializer] = StringSerializer + value_deserializer: Type[Serializer] = StringSerializer + + retry_settings: Optional["RetrySettings"] = None + + @property + @abstractmethod + def name(self) -> str: + """ + Defines Kafka topic consumer subscription name. + + Regexp pattern subscriptions are supported by prefixing the name with "^". + """ + + def is_regex(self) -> bool: + """Returns if the subscription name is regex based""" + return self.name.startswith("^") + + def matches(self, topic_name: str) -> bool: + if self.is_regex(): + return bool(re.search(self.name, topic_name)) + return self.name == topic_name + + def get_key_deserializer(self, **kwargs): + return self.key_deserializer(**kwargs) + + def get_value_deserializer(self, **kwargs): + return self.value_deserializer(**kwargs) + + def context( self, - name, - value, field: MessageField, headers: Optional[list] = None, - **kwargs, - ): - if field == MessageField.VALUE: - serializer = self.get_value_serializer(**kwargs) - return serializer( - value, - self.context(name, MessageField.VALUE, headers), - ) - - if field == MessageField.KEY: - serializer = self.get_key_serializer(**kwargs) - return serializer( - value, - self.context(name, MessageField.KEY, headers), - ) - - raise DjangoKafkaError(f"Unsupported serialization field {field}.") + ) -> SerializationContext: + return SerializationContext(self.name, field, headers=headers) def deserialize( self, - name, value, field: MessageField, headers: Optional[list] = None, @@ -130,52 +137,30 @@ def deserialize( ): if field == MessageField.VALUE: deserializer = self.get_value_deserializer(**kwargs) - return deserializer( - value, - self.context(name, MessageField.VALUE, headers), - ) + return deserializer(value, self.context(MessageField.VALUE, headers)) if field == MessageField.KEY: deserializer = self.get_key_deserializer(**kwargs) - return deserializer( - value, - self.context(name, MessageField.KEY, headers), - ) + return deserializer(value, self.context(MessageField.KEY, headers)) raise DjangoKafkaError(f"Unsupported deserialization field {field}.") - def get_key_serializer(self, **kwargs): - return self.key_serializer(**kwargs) - - def get_value_serializer(self, **kwargs): - return self.value_serializer(**kwargs) - - def get_key_deserializer(self, **kwargs): - return self.key_deserializer(**kwargs) + def consume(self, msg: cimpl.Message): + """Implement message processing""" + raise NotImplementedError - def get_value_deserializer(self, **kwargs): - return self.value_deserializer(**kwargs) - def context( - self, - name: str, - field: MessageField, - headers: Optional[list] = None, - ) -> SerializationContext: - return SerializationContext(name, field, headers=headers) +class Topic(TopicConsumer, TopicProducer, ABC): + """Combined Topic class for when topic consume and produce names are the same""" -class AvroTopic(Topic, ABC): +class AvroTopicProducer(TopicProducer, ABC): """ - Consume. - Defining schemas is not necessary as it gets retrieved automatically from the Schema Registry. - - Produce. - Defining `value_schema` is required (`key_schema` is required when using keys). - It gets submitted to the Schema Registry + Defining `value_schema` is required (`key_schema` is required when using keys). + It gets submitted to the Schema Registry. Multiple schemas and one Topic: - `AvroTopic.produce` takes `serializer_kwargs` kw argument. + `.produce(...)` takes `{key|value}_serializer_kwargs` kw argument. `AvroSerializer` then gets initialized with the provided kwargs. When producing you can tell which schema to use for your message: ```python @@ -209,8 +194,18 @@ def get_value_serializer(self, **kwargs): return AvroSerializer(kafka.schema_client, **kwargs) + +class AvroTopicConsumer(TopicConsumer, ABC): + """ + Defining schemas is not necessary as it gets retrieved automatically from the Schema Registry. + """ # noqa: E501 + def get_key_deserializer(self, **kwargs): return AvroDeserializer(kafka.schema_client, **kwargs) def get_value_deserializer(self, **kwargs): return AvroDeserializer(kafka.schema_client, **kwargs) + + +class AvroTopic(AvroTopicConsumer, AvroTopicProducer, Topic, ABC): + """Combined AvroTopic class for when topic consume and produce names are the same""" From 618b1ff0e4baff91676846f92559906b60a357a3 Mon Sep 17 00:00:00 2001 From: Stefan Cardnell Date: Thu, 12 Sep 2024 16:48:56 +0200 Subject: [PATCH 3/4] feat: add settings for retry and dead letter topic suffixes, refs #8 --- README.md | 2 +- django_kafka/conf.py | 2 + django_kafka/consumer.py | 4 +- django_kafka/dead_letter/topic.py | 17 ++-- django_kafka/retry/consumer.py | 2 +- django_kafka/retry/topic.py | 36 +++++--- django_kafka/tests/dead_letter/test_topic.py | 29 ++++--- django_kafka/tests/retry/test_consumer.py | 6 +- django_kafka/tests/retry/test_topic.py | 88 ++++++++++---------- django_kafka/tests/test_consumer.py | 8 +- django_kafka/tests/test_settings.py | 4 + 11 files changed, 110 insertions(+), 88 deletions(-) diff --git a/README.md b/README.md index 9d7876d..b6d83f5 100644 --- a/README.md +++ b/README.md @@ -257,7 +257,7 @@ docker compose run --rm app bump-my-version bump ``` This will update version major/minor/patch version respectively and add a tag for release. -- Push including new tag to publish the release to pypi. +- Once the changes are approved and merged, push the tag to publish the release to pypi. ```bash git push origin tag ``` diff --git a/django_kafka/conf.py b/django_kafka/conf.py index 1eb9eca..2aaa3de 100644 --- a/django_kafka/conf.py +++ b/django_kafka/conf.py @@ -14,6 +14,8 @@ "enable.auto.offset.store": False, "topic.metadata.refresh.interval.ms": 10000, }, + "RETRY_TOPIC_SUFFIX": "retry", + "DEAD_LETTER_TOPIC_SUFFIX": "dlt", "POLLING_FREQUENCY": 1, # seconds "SCHEMA_REGISTRY": {}, } diff --git a/django_kafka/consumer.py b/django_kafka/consumer.py index 3bea260..70604a5 100644 --- a/django_kafka/consumer.py +++ b/django_kafka/consumer.py @@ -99,9 +99,9 @@ def retry_msg(self, msg: cimpl.Message, exc: Exception) -> bool: return RetryTopicProducer( group_id=self.group_id, - settings=topic_consumer.retry_settings, + retry_settings=topic_consumer.retry_settings, msg=msg, - ).retry_for(exc=exc) + ).retry(exc=exc) def dead_letter_msg(self, msg: cimpl.Message, exc: Exception): from django_kafka.dead_letter.topic import DeadLetterTopicProducer diff --git a/django_kafka/dead_letter/topic.py b/django_kafka/dead_letter/topic.py index da49c54..50c1031 100644 --- a/django_kafka/dead_letter/topic.py +++ b/django_kafka/dead_letter/topic.py @@ -1,16 +1,15 @@ import re from typing import TYPE_CHECKING +from django_kafka import settings from django_kafka.dead_letter.headers import DeadLetterHeader -from django_kafka.retry.topic import RETRY_TOPIC_PATTERN +from django_kafka.retry.topic import RetryTopicProducer from django_kafka.serialization import NoOpSerializer from django_kafka.topic import TopicProducer if TYPE_CHECKING: from confluent_kafka import cimpl -DEAD_LETTER_TOPIC_SUFFIX = "dlt" - class DeadLetterTopicProducer(TopicProducer): key_serializer = NoOpSerializer @@ -19,15 +18,19 @@ class DeadLetterTopicProducer(TopicProducer): def __init__(self, group_id: str, msg: "cimpl.Message"): self.group_id = group_id self.msg = msg + super().__init__() + + @classmethod + def suffix(cls): + return settings.DEAD_LETTER_TOPIC_SUFFIX @property def name(self) -> str: topic = self.msg.topic() - suffix = DEAD_LETTER_TOPIC_SUFFIX - if re.search(RETRY_TOPIC_PATTERN, topic): - return re.sub(RETRY_TOPIC_PATTERN, suffix, topic) - return f"{self.group_id}.{topic}.{suffix}" + if re.search(RetryTopicProducer.pattern(), topic): + return re.sub(RetryTopicProducer.pattern(), self.suffix(), topic) + return f"{self.group_id}.{topic}.{self.suffix()}" def produce_for(self, header_message, header_detail): headers = [ diff --git a/django_kafka/retry/consumer.py b/django_kafka/retry/consumer.py index f77cad3..d67d23b 100644 --- a/django_kafka/retry/consumer.py +++ b/django_kafka/retry/consumer.py @@ -64,7 +64,7 @@ def build_config(cls): def retry_msg(self, msg: cimpl.Message, exc: Exception) -> bool: rt_consumer = cast(RetryTopicConsumer, self.get_topic_consumer(msg)) - return rt_consumer.producer_for(msg).retry_for(exc) + return rt_consumer.producer_for(msg).retry(exc) def dead_letter_msg(self, msg: cimpl.Message, exc: Exception): rt_consumer = cast(RetryTopicConsumer, self.get_topic_consumer(msg)) diff --git a/django_kafka/retry/topic.py b/django_kafka/retry/topic.py index 0afbf60..ababc98 100644 --- a/django_kafka/retry/topic.py +++ b/django_kafka/retry/topic.py @@ -1,6 +1,7 @@ import re from typing import TYPE_CHECKING +from django_kafka.conf import settings from django_kafka.exceptions import DjangoKafkaError from django_kafka.retry.headers import RetryHeader from django_kafka.serialization import NoOpSerializer @@ -11,37 +12,48 @@ from django_kafka.retry import RetrySettings -RETRY_TOPIC_SUFFIX = "retry" -RETRY_TOPIC_PATTERN = rf"{re.escape(RETRY_TOPIC_SUFFIX)}\.([0-9]+)$" - class RetryTopicProducer(TopicProducer): key_serializer = NoOpSerializer value_serializer = NoOpSerializer - def __init__(self, group_id: str, settings: "RetrySettings", msg: "cimpl.Message"): - self.settings = settings + def __init__( + self, + group_id: str, + retry_settings: "RetrySettings", + msg: "cimpl.Message", + ): + self.settings = retry_settings self.group_id = group_id self.msg = msg self.attempt = self.get_next_attempt(msg.topic()) super().__init__() + @classmethod + def suffix(cls): + return settings.RETRY_TOPIC_SUFFIX + + @classmethod + def pattern(cls): + """returns regex pattern to match topics producing by this class""" + return rf"{re.escape(cls.suffix())}\.([0-9]+)$" + @classmethod def get_next_attempt(cls, topic_name: str) -> int: - match = re.search(RETRY_TOPIC_PATTERN, topic_name) + match = re.search(cls.pattern(), topic_name) attempt = int(match.group(1)) if match else 0 return attempt + 1 @property def name(self) -> str: topic = self.msg.topic() - suffix = f"{RETRY_TOPIC_SUFFIX}.{self.attempt}" + suffix = f"{self.suffix()}.{self.attempt}" - if re.search(RETRY_TOPIC_PATTERN, topic): - return re.sub(RETRY_TOPIC_PATTERN, suffix, topic) + if re.search(self.pattern(), topic): + return re.sub(self.pattern(), suffix, topic) return f"{self.group_id}.{self.msg.topic()}.{suffix}" - def retry_for(self, exc: Exception) -> bool: + def retry(self, exc: Exception) -> bool: if not self.settings.should_retry(exc=exc): return False @@ -84,7 +96,7 @@ def name(self) -> str: else: topic_name = re.escape(self.topic_consumer.name) - return rf"^{group_id}\.{topic_name}\.{RETRY_TOPIC_PATTERN}" + return rf"^{group_id}\.{topic_name}\.{RetryTopicProducer.pattern()}" def consume(self, msg: "cimpl.Message"): self.topic_consumer.consume(msg) @@ -92,6 +104,6 @@ def consume(self, msg: "cimpl.Message"): def producer_for(self, msg: "cimpl.Message") -> RetryTopicProducer: return RetryTopicProducer( group_id=self.group_id, - settings=self.topic_consumer.retry_settings, + retry_settings=self.topic_consumer.retry_settings, msg=msg, ) diff --git a/django_kafka/tests/dead_letter/test_topic.py b/django_kafka/tests/dead_letter/test_topic.py index 7e935f8..3d82ff8 100644 --- a/django_kafka/tests/dead_letter/test_topic.py +++ b/django_kafka/tests/dead_letter/test_topic.py @@ -1,20 +1,25 @@ from unittest import mock -from django.test import TestCase +from django.test import TestCase, override_settings +from django_kafka.conf import SETTINGS_KEY from django_kafka.dead_letter.headers import DeadLetterHeader -from django_kafka.dead_letter.topic import ( - DEAD_LETTER_TOPIC_SUFFIX, - DeadLetterTopicProducer, -) -from django_kafka.retry.topic import RETRY_TOPIC_SUFFIX +from django_kafka.dead_letter.topic import DeadLetterTopicProducer +@override_settings( + **{ + SETTINGS_KEY: { + "RETRY_TOPIC_SUFFIX": "test-retry", + "DEAD_LETTER_TOPIC_SUFFIX": "test-dlt", + }, + }, +) class DeadLetterTopicProducerTestCase(TestCase): def test_name(self): mock_msg_topic_consumer = mock.Mock(**{"topic.return_value": "topic.name"}) mock_msg_retry_topic = mock.Mock( - **{"topic.return_value": f"group.id.topic.name.{RETRY_TOPIC_SUFFIX}.10"}, + **{"topic.return_value": "group.id.topic.name.test-retry.10"}, ) dlt_producer_1 = DeadLetterTopicProducer( @@ -27,14 +32,8 @@ def test_name(self): msg=mock_msg_retry_topic, ) - self.assertEqual( - dlt_producer_1.name, - f"group.id.topic.name.{DEAD_LETTER_TOPIC_SUFFIX}", - ) - self.assertEqual( - dlt_producer_2.name, - f"group.id.topic.name.{DEAD_LETTER_TOPIC_SUFFIX}", - ) + self.assertEqual(dlt_producer_1.name, "group.id.topic.name.test-dlt") + self.assertEqual(dlt_producer_2.name, "group.id.topic.name.test-dlt") def test_produce_for(self): msg_mock = mock.Mock(**{"topic.return_value": "msg_topic"}) diff --git a/django_kafka/tests/retry/test_consumer.py b/django_kafka/tests/retry/test_consumer.py index 6312207..f0645b0 100644 --- a/django_kafka/tests/retry/test_consumer.py +++ b/django_kafka/tests/retry/test_consumer.py @@ -123,7 +123,7 @@ class TestConsumer(Consumer): def test_retry_msg(self): mock_retry_topic_consumer = Mock() mock_producer_for = mock_retry_topic_consumer.producer_for - mock_retry_for = mock_producer_for.return_value.retry_for + mock_retry = mock_producer_for.return_value.retry msg_mock = Mock() retry_consumer = self._get_retry_consumer() @@ -133,8 +133,8 @@ def test_retry_msg(self): retried = retry_consumer.retry_msg(msg_mock, exc) mock_producer_for.assert_called_once_with(msg_mock) - mock_retry_for.assert_called_once_with(exc) - self.assertEqual(retried, mock_retry_for.return_value) + mock_retry.assert_called_once_with(exc) + self.assertEqual(retried, mock_retry.return_value) @patch("django_kafka.retry.consumer.DeadLetterTopicProducer") def test_dead_letter_msg(self, mock_dlt_topic_producer_cls): diff --git a/django_kafka/tests/retry/test_topic.py b/django_kafka/tests/retry/test_topic.py index 38d9b49..496adf4 100644 --- a/django_kafka/tests/retry/test_topic.py +++ b/django_kafka/tests/retry/test_topic.py @@ -1,19 +1,19 @@ from unittest import mock -from django.test import TestCase +from django.test import TestCase, override_settings +from django_kafka.conf import SETTINGS_KEY from django_kafka.exceptions import DjangoKafkaError from django_kafka.retry import RetrySettings from django_kafka.retry.headers import RetryHeader from django_kafka.retry.topic import ( - RETRY_TOPIC_PATTERN, - RETRY_TOPIC_SUFFIX, RetryTopicConsumer, RetryTopicProducer, ) from django_kafka.topic import TopicConsumer +@override_settings(**{SETTINGS_KEY: {"RETRY_TOPIC_SUFFIX": "test-retry"}}) class RetryTopicProducerTestCase(TestCase): def test__get_next_attempt(self): self.assertEqual(RetryTopicProducer.get_next_attempt("topic"), 1) @@ -22,25 +22,19 @@ def test__get_next_attempt(self): 1, ) self.assertEqual( - RetryTopicProducer.get_next_attempt(f"group.id.topic.{RETRY_TOPIC_SUFFIX}"), + RetryTopicProducer.get_next_attempt("group.id.topic.test-retry"), 1, ) self.assertEqual( - RetryTopicProducer.get_next_attempt( - f"group.id.topic.{RETRY_TOPIC_SUFFIX}.0", - ), + RetryTopicProducer.get_next_attempt("group.id.topic.test-retry.0"), 1, ) self.assertEqual( - RetryTopicProducer.get_next_attempt( - f"group.id.topic.{RETRY_TOPIC_SUFFIX}.2", - ), + RetryTopicProducer.get_next_attempt("group.id.topic.test-retry.2"), 3, ) self.assertEqual( - RetryTopicProducer.get_next_attempt( - f"group.id.topic.{RETRY_TOPIC_SUFFIX}.10", - ), + RetryTopicProducer.get_next_attempt("group.id.topic.test-retry.10"), 11, ) @@ -50,7 +44,7 @@ def test_init(self): rt_producer = RetryTopicProducer( group_id="group.id", - settings=retry_settings, + retry_settings=retry_settings, msg=mock_msg_topic_consumer, ) @@ -63,42 +57,51 @@ def test_name(self): retry_settings = RetrySettings(max_retries=5, delay=60) mock_msg_topic_consumer = mock.Mock(**{"topic.return_value": "topic.name"}) mock_msg_rt_producer = mock.Mock( - **{"topic.return_value": f"group.id.topic.name.{RETRY_TOPIC_SUFFIX}.1"}, + **{ + "topic.return_value": "group.id.topic.name.test-retry.1", + }, ) rt_producer_1 = RetryTopicProducer( group_id="group.id", - settings=retry_settings, + retry_settings=retry_settings, msg=mock_msg_topic_consumer, ) rt_producer_2 = RetryTopicProducer( group_id="group.id", - settings=retry_settings, + retry_settings=retry_settings, msg=mock_msg_rt_producer, ) - self.assertEqual( - rt_producer_1.name, - f"group.id.topic.name.{RETRY_TOPIC_SUFFIX}.1", - ) - self.assertEqual( - rt_producer_2.name, - f"group.id.topic.name.{RETRY_TOPIC_SUFFIX}.2", + self.assertEqual(rt_producer_1.name, "group.id.topic.name.test-retry.1") + self.assertEqual(rt_producer_2.name, "group.id.topic.name.test-retry.2") + + @override_settings(**{SETTINGS_KEY: {"RETRY_TOPIC_SUFFIX": "test-retry"}}) + def test_name__uses_settings(self): + retry_settings = RetrySettings(max_retries=5, delay=60) + mock_msg_topic_consumer = mock.Mock(**{"topic.return_value": "topic.name"}) + + rt_producer = RetryTopicProducer( + group_id="group.id", + retry_settings=retry_settings, + msg=mock_msg_topic_consumer, ) + self.assertEqual(rt_producer.name, "group.id.topic.name.test-retry.1") + @mock.patch("django_kafka.retry.RetrySettings.get_retry_timestamp") - def test_retry_for__first_retry(self, mock_get_retry_timestamp: mock.Mock): + def test_retry__first_retry(self, mock_get_retry_timestamp: mock.Mock): mock_msg = mock.Mock(**{"topic.return_value": "msg_topic"}) retry_settings = RetrySettings(max_retries=5, delay=60) rt_producer = RetryTopicProducer( group_id="group.id", - settings=retry_settings, + retry_settings=retry_settings, msg=mock_msg, ) rt_producer.produce = mock.Mock() - retried = rt_producer.retry_for(exc=ValueError("error message")) + retried = rt_producer.retry(exc=ValueError("error message")) self.assertTrue(retried) rt_producer.produce.assert_called_with( @@ -112,18 +115,18 @@ def test_retry_for__first_retry(self, mock_get_retry_timestamp: mock.Mock): mock_get_retry_timestamp.assert_called_once_with(1) @mock.patch("django_kafka.retry.RetrySettings.get_retry_timestamp") - def test_retry_for__last_retry(self, mock_get_retry_timestamp): + def test_retry__last_retry(self, mock_get_retry_timestamp): mock_msg = mock.Mock( - **{"topic.return_value": f"group.id.msg_topic.{RETRY_TOPIC_SUFFIX}.4"}, + **{"topic.return_value": "group.id.msg_topic.test-retry.4"}, ) rt_producer = RetryTopicProducer( group_id="group.id", - settings=RetrySettings(max_retries=5, delay=60), + retry_settings=RetrySettings(max_retries=5, delay=60), msg=mock_msg, ) rt_producer.produce = mock.Mock() - retried = rt_producer.retry_for(exc=ValueError("error message")) + retried = rt_producer.retry(exc=ValueError("error message")) self.assertTrue(retried) rt_producer.produce.assert_called_with( @@ -136,35 +139,34 @@ def test_retry_for__last_retry(self, mock_get_retry_timestamp): ) mock_get_retry_timestamp.assert_called_once_with(5) - def test_retry_for__no_more_retries(self): + def test_retry__no_more_retries(self): rt_producer = RetryTopicProducer( group_id="group.id", - settings=RetrySettings(max_retries=5, delay=60), - msg=mock.Mock( - **{"topic.return_value": f"group.id.msg_topic.{RETRY_TOPIC_SUFFIX}.5"}, - ), + retry_settings=RetrySettings(max_retries=5, delay=60), + msg=mock.Mock(**{"topic.return_value": "group.id.msg_topic.test-retry.5"}), ) rt_producer.produce = mock.Mock() - retried = rt_producer.retry_for(exc=ValueError()) + retried = rt_producer.retry(exc=ValueError()) self.assertFalse(retried) rt_producer.produce.assert_not_called() - def test_retry_for__no_retry_for_excluded_error(self): + def test_retry__no_retry_excluded_error(self): rt_producer = RetryTopicProducer( group_id="group.id", - settings=RetrySettings(max_retries=5, delay=60, exclude=[ValueError]), + retry_settings=RetrySettings(max_retries=5, delay=60, exclude=[ValueError]), msg=mock.Mock(**{"topic.return_value": "msg_topic"}), ) rt_producer.produce = mock.Mock() - retried = rt_producer.retry_for(exc=ValueError()) + retried = rt_producer.retry(exc=ValueError()) self.assertFalse(retried) rt_producer.produce.assert_not_called() +@override_settings(**{SETTINGS_KEY: {"RETRY_TOPIC_SUFFIX": "test-retry"}}) class RetryTopicConsumerTestCase(TestCase): def _get_retryable_topic_consumer( self, @@ -195,7 +197,7 @@ def test_name(self): self.assertEqual( rt_consumer.name, - rf"^group\.id\.topic\.name\.{RETRY_TOPIC_PATTERN}", + r"^group\.id\.topic\.name\.test\-retry\.([0-9]+)$", ) def test_name__regex(self): @@ -210,7 +212,7 @@ def test_name__regex(self): self.assertEqual( rt_consumer.name, - rf"^group\.id\.(topic_name|other_name)\.{RETRY_TOPIC_PATTERN}", + r"^group\.id\.(topic_name|other_name)\.test\-retry\.([0-9]+)$", ) def test_consume(self): @@ -239,6 +241,6 @@ def test_producer_for(self, mock_rt_producer): self.assertEqual(rt_producer, mock_rt_producer.return_value) mock_rt_producer.assert_called_once_with( group_id=rt_consumer.group_id, - settings=topic_consumer.retry_settings, + retry_settings=topic_consumer.retry_settings, msg=mock_msg, ) diff --git a/django_kafka/tests/test_consumer.py b/django_kafka/tests/test_consumer.py index 4abe6cd..2d2bd05 100644 --- a/django_kafka/tests/test_consumer.py +++ b/django_kafka/tests/test_consumer.py @@ -186,7 +186,7 @@ class SomeConsumer(Consumer): @patch("django_kafka.retry.topic.RetryTopicProducer") def test_retry_msg(self, mock_rt_producer_cls): mock_topic_consumer = Mock(retry_settings=Mock()) - mock_retry_for = mock_rt_producer_cls.return_value.retry_for + mock_retry = mock_rt_producer_cls.return_value.retry msg_mock = Mock() class SomeConsumer(Consumer): @@ -201,11 +201,11 @@ class SomeConsumer(Consumer): mock_rt_producer_cls.assert_called_once_with( group_id=consumer.group_id, - settings=mock_topic_consumer.retry_settings, + retry_settings=mock_topic_consumer.retry_settings, msg=msg_mock, ) - mock_retry_for.assert_called_once_with(exc=exc) - self.assertEqual(retried, mock_retry_for.return_value) + mock_retry.assert_called_once_with(exc=exc) + self.assertEqual(retried, mock_retry.return_value) @patch("django_kafka.retry.topic.RetryTopicProducer") def test_retry_msg__no_retry(self, mock_rt_producer_cls): diff --git a/django_kafka/tests/test_settings.py b/django_kafka/tests/test_settings.py index b80efd0..e2d00d9 100644 --- a/django_kafka/tests/test_settings.py +++ b/django_kafka/tests/test_settings.py @@ -13,6 +13,8 @@ class SettingsTestCase(TestCase): "PRODUCER_CONFIG", "CONSUMER_CONFIG", "RETRY_CONSUMER_CONFIG", + "RETRY_TOPIC_SUFFIX", + "DEAD_LETTER_TOPIC_SUFFIX", "POLLING_FREQUENCY", "SCHEMA_REGISTRY", ) @@ -39,6 +41,8 @@ def test_user_settings(self, mock_consumer_client): "RETRY_CONSUMER_CONFIG": { "topic.metadata.refresh.interval.ms": 5000, }, + "RETRY_TOPIC_SUFFIX": "retry-extra", + "DEAD_LETTER_TOPIC_SUFFIX": "dlt-extra", "POLLING_FREQUENCY": 0.5, "SCHEMA_REGISTRY": { "url": "https://schema-registry", From 61c9b35ade7629fa21ce828bfd7b9ad8c9407b39 Mon Sep 17 00:00:00 2001 From: Stefan Cardnell Date: Thu, 12 Sep 2024 17:21:57 +0200 Subject: [PATCH 4/4] =?UTF-8?q?Bump=20version:=200.1.0=20=E2=86=92=200.2.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.toml | 2 +- django_kafka/__init__.py | 2 +- pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.bumpversion.toml b/.bumpversion.toml index ab9b67a..3d8cf22 100644 --- a/.bumpversion.toml +++ b/.bumpversion.toml @@ -1,5 +1,5 @@ [tool.bumpversion] -current_version = "0.1.0" +current_version = "0.2.0" parse = "(?P\\d+)\\.(?P\\d+)\\.(?P\\d+)" serialize = ["{major}.{minor}.{patch}"] search = "{current_version}" diff --git a/django_kafka/__init__.py b/django_kafka/__init__.py index c2ba260..cfc5a40 100644 --- a/django_kafka/__init__.py +++ b/django_kafka/__init__.py @@ -14,7 +14,7 @@ logger = logging.getLogger(__name__) -__version__ = "0.1.0" +__version__ = "0.2.0" __all__ = [ "autodiscover", diff --git a/pyproject.toml b/pyproject.toml index b0a977d..025c155 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "django-kafka" -version = "0.1.0" +version = "0.2.0" dependencies = [ "django>=4.0,<6.0", "confluent-kafka[avro, schema-registry]==2.4.0"