From 0c5580e77c21cd09bb58d3e4940bb37d0cdfe227 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 23 Sep 2024 15:58:35 -0400 Subject: [PATCH 1/2] fix(kafka): cast topic to str since it can return None [backport 2.12] (#10757) Backport 33daba967458439008e597d37802c7a00c1fed6f from #10691 to 2.12. Since topic can be None for a message object we should always cast it to a str or else it'll throw an error if we try to set it as a tag with set_tag_str https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#confluent_kafka.Message.topic ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) Co-authored-by: Zachary Groves <32471391+ZStriker19@users.noreply.github.com> --- ddtrace/contrib/internal/kafka/patch.py | 4 ++-- .../notes/kafka_message_topic_none_fix-2bc3fd6388075b94.yaml | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) create mode 100644 releasenotes/notes/kafka_message_topic_none_fix-2bc3fd6388075b94.yaml diff --git a/ddtrace/contrib/internal/kafka/patch.py b/ddtrace/contrib/internal/kafka/patch.py index 60e5ed23379..20b3e3bd9a4 100644 --- a/ddtrace/contrib/internal/kafka/patch.py +++ b/ddtrace/contrib/internal/kafka/patch.py @@ -249,7 +249,7 @@ def _instrument_message(messages, pin, start_ns, instance, err): for message in messages: if message is not None and first_message is not None: - core.set_item("kafka_topic", first_message.topic()) + core.set_item("kafka_topic", str(first_message.topic())) core.dispatch("kafka.consume.start", (instance, first_message, span)) span.set_tag_str(MESSAGING_SYSTEM, kafkax.SERVICE) @@ -260,7 +260,7 @@ def _instrument_message(messages, pin, start_ns, instance, err): if first_message is not None: message_key = first_message.key() or "" message_offset = first_message.offset() or -1 - span.set_tag_str(kafkax.TOPIC, first_message.topic()) + span.set_tag_str(kafkax.TOPIC, str(first_message.topic())) # If this is a deserializing consumer, do not set the key as a tag since we # do not have the serialization function diff --git a/releasenotes/notes/kafka_message_topic_none_fix-2bc3fd6388075b94.yaml b/releasenotes/notes/kafka_message_topic_none_fix-2bc3fd6388075b94.yaml new file mode 100644 index 00000000000..05f9a9c6e50 --- /dev/null +++ b/releasenotes/notes/kafka_message_topic_none_fix-2bc3fd6388075b94.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + kafka: Fixed an issue where a ``TypeError`` exception would be raised if the first message's ``topic()`` returned ``None`` during consumption. From 6eb517d6f7db82be8eacd583702029e3845d88e5 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 23 Sep 2024 21:19:41 +0000 Subject: [PATCH 2/2] fix(botocore): account for None data_obj case in Kinesis [backport 2.12] (#10761) Backport 5546c2391097103c74688c66b0dd57f985194d8f from #10628 to 2.12. This accounts for the possibility of None being returned for the data_obj value from the method `get_kinesis_data_object` in `_patched_kinesis_api_call`. ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) Co-authored-by: Zachary Groves <32471391+ZStriker19@users.noreply.github.com> --- .../contrib/internal/botocore/services/kinesis.py | 2 +- ddtrace/contrib/internal/botocore/utils.py | 5 ++--- .../kinesis_none_type_fix-4b39f2059184359e.yaml | 4 ++++ tests/contrib/botocore/test.py | 13 +++++++++++-- 4 files changed, 18 insertions(+), 6 deletions(-) create mode 100644 releasenotes/notes/kinesis_none_type_fix-4b39f2059184359e.yaml diff --git a/ddtrace/contrib/internal/botocore/services/kinesis.py b/ddtrace/contrib/internal/botocore/services/kinesis.py index 1e8d4972bc4..2d60252bdc2 100644 --- a/ddtrace/contrib/internal/botocore/services/kinesis.py +++ b/ddtrace/contrib/internal/botocore/services/kinesis.py @@ -102,7 +102,7 @@ def _patched_kinesis_api_call(parent_ctx, original_func, instance, args, kwargs, parent_ctx, params, time_estimate, - data_obj.get("_datadog"), + data_obj.get("_datadog") if data_obj else None, record, result, config.botocore.propagation_enabled, diff --git a/ddtrace/contrib/internal/botocore/utils.py b/ddtrace/contrib/internal/botocore/utils.py index dba7f5f703f..664bc5d7741 100644 --- a/ddtrace/contrib/internal/botocore/utils.py +++ b/ddtrace/contrib/internal/botocore/utils.py @@ -29,7 +29,7 @@ def get_json_from_str(data_str: str) -> Tuple[str, Optional[Dict[str, Any]]]: return None, data_obj -def get_kinesis_data_object(data: str) -> Tuple[str, Optional[Dict[str, Any]]]: +def get_kinesis_data_object(data: str) -> Tuple[Optional[str], Optional[Dict[str, Any]]]: """ :data: the data from a kinesis stream The data from a kinesis stream comes as a string (could be json, base64 encoded, etc.) @@ -37,9 +37,8 @@ def get_kinesis_data_object(data: str) -> Tuple[str, Optional[Dict[str, Any]]]: - json string - byte encoded json string - base64 encoded json string - If it's none of these, then we leave the message as it is. + If it's none of these, then we return None """ - # check if data is a json string try: return get_json_from_str(data) diff --git a/releasenotes/notes/kinesis_none_type_fix-4b39f2059184359e.yaml b/releasenotes/notes/kinesis_none_type_fix-4b39f2059184359e.yaml new file mode 100644 index 00000000000..f12ab1a0d64 --- /dev/null +++ b/releasenotes/notes/kinesis_none_type_fix-4b39f2059184359e.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + kinesis: This fix resolves an issue where unparsable data in a Kinesis record would cause a NoneType error. diff --git a/tests/contrib/botocore/test.py b/tests/contrib/botocore/test.py index bf75d745386..1fdb4574f22 100644 --- a/tests/contrib/botocore/test.py +++ b/tests/contrib/botocore/test.py @@ -2858,7 +2858,9 @@ def _test_kinesis_put_record_trace_injection(self, test_name, data, client=None, return decoded_record_data - def _test_kinesis_put_records_trace_injection(self, test_name, data, client=None, enable_stream_arn=False): + def _test_kinesis_put_records_trace_injection( + self, test_name, data, client=None, enable_stream_arn=False, verify=True + ): if not client: client = self.session.create_client("kinesis", region_name="us-east-1") @@ -2870,7 +2872,8 @@ def _test_kinesis_put_records_trace_injection(self, test_name, data, client=None client.put_records(StreamName=stream_name, Records=data, StreamARN=stream_arn) else: client.put_records(StreamName=stream_name, Records=data) - + if not verify: + return None # assert commons for span span = self._kinesis_assert_spans() @@ -3261,6 +3264,12 @@ def test_kinesis_put_records_newline_json_trace_injection(self): assert decoded_record_data.endswith("\n") + @mock_kinesis + def test_kinesis_put_records_unparsable_data_object_avoid_nonetype_error(self): + # If the data is unparsable we should not error in tracer code + records = [{"Data": b"", "PartitionKey": "1234"}] + self._test_kinesis_put_records_trace_injection("unparsable_data_obj", records, verify=False) + @mock_kinesis def test_kinesis_put_records_newline_bytes_trace_injection(self): # (dict -> json string -> bytes + new line)[]