Merge pull request #2383 from DataDog/cbeauchesne/better_schema_checks

Better schema test logic API
DataDog · Apr 24, 2024 · 3adaa06 · 3adaa06
2 parents 73cc8c8 + 1555060
commit 3adaa06
Show file tree

Hide file tree

Showing 12 changed files with 156 additions and 170 deletions.
diff --git a/.github/workflows/run-end-to-end.yml b/.github/workflows/run-end-to-end.yml
@@ -240,7 +240,7 @@ jobs:
       env:
         DD_API_KEY: ${{ secrets.DD_API_KEY }}
     - name: Run REMOTE_CONFIG_MOCKED_BACKEND_ASM_FEATURES scenario
-      if: always() && steps.build.outcome == 'success' && inputs.run_all
+      if: always() && steps.build.outcome == 'success' && (inputs.run_appsec || inputs.run_all)
       run: ./run.sh REMOTE_CONFIG_MOCKED_BACKEND_ASM_FEATURES
       env:
         DD_API_KEY: ${{ secrets.DD_API_KEY }}
@@ -250,12 +250,12 @@ jobs:
       env:
         DD_API_KEY: ${{ secrets.DD_API_KEY }}
     - name: Run REMOTE_CONFIG_MOCKED_BACKEND_ASM_DD scenario
-      if: always() && steps.build.outcome == 'success' && inputs.run_all
+      if: always() && steps.build.outcome == 'success' && (inputs.run_appsec || inputs.run_all)
       run: ./run.sh REMOTE_CONFIG_MOCKED_BACKEND_ASM_DD
       env:
         DD_API_KEY: ${{ secrets.DD_API_KEY }}
     - name: Run REMOTE_CONFIG_MOCKED_BACKEND_ASM_FEATURES_NOCACHE scenario
-      if: always() && steps.build.outcome == 'success' && inputs.run_all
+      if: always() && steps.build.outcome == 'success' && (inputs.run_appsec || inputs.run_all)
       run: ./run.sh REMOTE_CONFIG_MOCKED_BACKEND_ASM_FEATURES_NOCACHE
       env:
         DD_API_KEY: ${{ secrets.DD_API_KEY }}
@@ -265,7 +265,7 @@ jobs:
       env:
         DD_API_KEY: ${{ secrets.DD_API_KEY }}
     - name: Run REMOTE_CONFIG_MOCKED_BACKEND_ASM_DD_NOCACHE scenario
-      if: always() && steps.build.outcome == 'success' && inputs.run_all
+      if: always() && steps.build.outcome == 'success' && (inputs.run_appsec || inputs.run_all)
       run: ./run.sh REMOTE_CONFIG_MOCKED_BACKEND_ASM_DD_NOCACHE
       env:
         DD_API_KEY: ${{ secrets.DD_API_KEY }}

diff --git a/conftest.py b/conftest.py
@@ -205,7 +205,7 @@ def pytest_collection_modifyitems(session, config, items):
             deselected.append(item)
             continue
 
-        if declared_scenario == context.scenario.name:
+        if context.scenario.is_part_of(declared_scenario):
             logger.info(f"{item.nodeid} is included in {context.scenario}")
             selected.append(item)
 

diff --git a/tests/remote_config/test_remote_configuration.py b/tests/remote_config/test_remote_configuration.py
@@ -52,45 +52,6 @@ def test_agent_provide_config_endpoint(self):
 class RemoteConfigurationFieldsBasicTests:
     """Misc tests on fields and values on remote configuration requests"""
 
-    @bug(context.library < "[email protected]")
-    @bug(context.library < "[email protected]")
-    @bug(context.library >= "[email protected]")
-    def test_schemas(self):
-        """Test all library schemas"""
-        interfaces.library.assert_schemas()
-
-    def test_non_regression(self):
-        """Non-regression test on shemas"""
-
-        # Never skip this test. As a full respect of shemas may be hard, this test ensure that
-        # at least the part that was ok stays ok.
-
-        allowed_errors = None
-        if context.library == "golang":
-            allowed_errors = (
-                r"'actor' is a required property on instance \['events'\]\[\d+\]\['context'\]",
-                r"'protocol_version' is a required property on instance ",
-            )
-        elif context.library == "java":
-            # pylint: disable=line-too-long
-            allowed_errors = (
-                r"'appsec' was expected on instance \['events'\]\[\d+\]\['event_type'\]",
-                r"'headers' is a required property on instance \['events'\]\[\d+\]\['context'\]\['http'\]\['response'\]",
-                r"'idempotency_key' is a required property on instance ",
-            )
-        elif context.library == "dotnet":
-            allowed_errors = (
-                # value is missing in configuration object in telemetry payloads
-                r"'value' is a required property on instance \['payload'\]\['configuration'\]\[\d+\]",
-            )
-        elif context.library == "nodejs":
-            allowed_errors = (
-                # value is missing in configuration object in telemetry payloads
-                r"'value' is a required property on instance \['payload'\]\['configuration'\]\[\d+\]",
-            )
-
-        interfaces.library.assert_schemas(allowed_errors=allowed_errors)
-
     def test_client_state_errors(self):
         """Ensure that the Client State error is consistent"""
 

diff --git a/tests/test_schemas.py b/tests/test_schemas.py
@@ -4,106 +4,56 @@
 
 """Test format specifications"""
 
-from utils import weblog, interfaces, bug, context
+from utils import weblog, interfaces, bug, irrelevant, context, scenarios
 
 
-class Test_Library:
+@scenarios.all_endtoend_scenarios
+class Test_library:
     """Libraries's payload are valid regarding schemas"""
 
-    def setup_full(self):
+    def setup_library_schema_full(self):
         # send some requests to be sure to trigger events
-        weblog.get("/waf", params={"key": "\n :"})  # rules.http_protocol_violation.crs_921_160
+        weblog.get("/waf", params={"key": "\n :"})
 
-    @bug(context.library < "[email protected]")
-    @bug(context.library < "[email protected]")
-    @bug(context.library >= "[email protected]")
-    @bug(context.library >= "[email protected]")
-    @bug(
-        context.library >= "[email protected]"
-        and context.agent_version >= "7.47.0rc2"
-        and context.appsec_rules_file is not None,
-        reason="on /v0.7/config, client.products is an empty array",
-    )
-    def test_full(self):
-        interfaces.library.assert_schemas()
+    def test_library_schema_full(self):
+        interfaces.library.assert_schema_points(
+            excluded_points=[
+                ("/telemetry/proxy/api/v2/apmtelemetry", "$.payload.configuration[]"),
+                ("/telemetry/proxy/api/v2/apmtelemetry", "$.payload"),  # APPSEC-52845
+            ]
+        )
 
-    def test_non_regression(self):
-        """ Non-regression test on shemas """
+    @bug(context.library >= "[email protected]", reason="APPSEC-52805")
+    def test_library_schema_telemetry_conf_value(self):
+        interfaces.library.assert_schema_point("/telemetry/proxy/api/v2/apmtelemetry", "$.payload.configuration[]")
 
-        # Never skip this test. As a full respect of shemas may be hard, this test ensure that
-        # at least the part that was ok stays ok.
-
-        allowed_errors = None
-        if context.library == "golang":
-            allowed_errors = (
-                r"'actor' is a required property on instance \['events'\]\[\d+\]\['context'\]",
-                r"'protocol_version' is a required property on instance ",
-            )
-        elif context.library == "java":
-            # pylint: disable=line-too-long
-            allowed_errors = (
-                r"'appsec' was expected on instance \['events'\]\[\d+\]\['event_type'\]",
-                r"'headers' is a required property on instance \['events'\]\[\d+\]\['context'\]\['http'\]\['response'\]",
-                r"'idempotency_key' is a required property on instance ",
-            )
-        elif context.library == "dotnet":
-            allowed_errors = (
-                # value is missing in configuration object in telemetry payloads
-                r"'value' is a required property on instance \['payload'\]\['configuration'\]\[\d+\]",
-            )
-        elif context.library == "nodejs":
-            allowed_errors = (
-                # value is missing in configuration object in telemetry payloads
-                r"'value' is a required property on instance \['payload'\]\['configuration'\]\[\d+\]",
-            )
-        elif context.library == "python":
-            allowed_errors = (r"\[\] is too short on instance \['client'\]\['products'\]",)
-
-        interfaces.library.assert_schemas(allowed_errors=allowed_errors)
+    @bug(library="python", reason="APPSEC-52845")
+    def test_library_schema_telemetry_job_object(self):
+        interfaces.library.assert_schema_point("/telemetry/proxy/api/v2/apmtelemetry", "$.payload")
 
 
+@scenarios.all_endtoend_scenarios
 class Test_Agent:
     """Agents's payload are valid regarding schemas"""
 
-    def setup_full(self):
+    def setup_agent_schema_full(self):
         # send some requests to be sure to trigger events
-        weblog.get("/waf", params={"key": "\n :"})  # rules.http_protocol_violation.crs_921_160
-
-    @bug(context.library < "[email protected]")
-    @bug(context.library < "[email protected]")
-    @bug(context.library >= "[email protected]")
-    @bug(context.library >= "[email protected]")
-    def test_full(self):
-        interfaces.agent.assert_schemas()
-
-    def test_non_regression(self):
-        """ Non-regression test on shemas """
-
-        # Never skip this test. As a full respect of shemas may be hard, this test ensure that
-        # at least the part that was ok stays ok.
-
-        allowed_errors = None
-        if context.library == "golang":
-            allowed_errors = (
-                r"'actor' is a required property on instance \['events'\]\[\d+\]\['context'\]",
-                r"'protocol_version' is a required property on instance ",
-            )
-        elif context.library == "java":
-            # pylint: disable=line-too-long
-            allowed_errors = (
-                r"'appsec' was expected on instance \['events'\]\[\d+\]\['event_type'\]",
-                r"'headers' is a required property on instance \['events'\]\[\d+\]\['context'\]\['http'\]\['response'\]",
-                r"'idempotency_key' is a required property on instance ",
-            )
-        elif context.library == "dotnet":
-            allowed_errors = (
-                # value is missing in configuration object in telemetry payloads
-                r"'value' is a required property on instance \['payload'\]\['configuration'\]\[\d+\]",
-            )
-        elif context.library == "nodejs":
-            allowed_errors = (
-                # value is missing in configuration object in telemetry payloads
-                r"'value' is a required property on instance \['payload'\]\['configuration'\]\[\d+\]",
-            )
-
-        interfaces.agent.assert_schemas(allowed_errors=allowed_errors)
+        weblog.get("/waf", params={"key": "\n :"})
+
+    def test_agent_schema_full(self):
+        interfaces.agent.assert_schema_points(
+            excluded_points=[
+                ("/api/v2/apmtelemetry", "$.payload.configuration[]"),
+                ("/api/v2/apmtelemetry", "$.payload"),  # APPSEC-52845
+            ]
+        )
+
+    @bug(context.library >= "[email protected]", reason="APPSEC-52805")
+    @irrelevant(context.scenario is scenarios.crossed_tracing_libraries, reason="APPSEC-52805")
+    @irrelevant(context.scenario is scenarios.graphql_appsec, reason="APPSEC-52805")
+    def test_agent_schema_telemetry_conf_value(self):
+        interfaces.agent.assert_schema_point("/api/v2/apmtelemetry", "$.payload.configuration[]")
+
+    @bug(library="python", reason="APPSEC-52845")
+    def test_library_schema_telemetry_job_object(self):
+        interfaces.agent.assert_schema_point("/api/v2/apmtelemetry", "$.payload")
diff --git a/utils/_context/_scenarios.py b/utils/_context/_scenarios.py
@@ -208,6 +208,9 @@ def customize_feature_parity_dashboard(self, result):
     def __str__(self) -> str:
         return f"Scenario '{self.name}'"
 
+    def is_part_of(self, declared_scenario):
+        return self.name == declared_scenario
+
 
 class TestTheTestScenario(_Scenario):
     @property
@@ -420,6 +423,9 @@ def __init__(
         self.backend_interface_timeout = backend_interface_timeout
         self.library_interface_timeout = library_interface_timeout
 
+    def is_part_of(self, declared_scenario):
+        return declared_scenario in (self.name, "EndToEndScenario")
+
     def configure(self, config):
         from utils import interfaces
 
@@ -452,6 +458,10 @@ def configure(self, config):
 
     def session_start(self):
         super().session_start()
+
+        if self.replay:
+            return
+
         try:
             code, (stdout, stderr) = self.weblog_container._container.exec_run("uname -a", demux=True)
             if code:
@@ -1219,6 +1229,19 @@ def weblog_variant(self):
 
 
 class scenarios:
+    @staticmethod
+    def all_endtoend_scenarios(test_object):
+        """particular use case where a klass applies on all scenarios"""
+
+        # Check that no scenario has been already declared
+        for marker in getattr(test_object, "pytestmark", []):
+            if marker.name == "scenario":
+                raise ValueError(f"Error on {test_object}: You can declare only one scenario")
+
+        pytest.mark.scenario("EndToEndScenario")(test_object)
+
+        return test_object
+
     todo = _Scenario("TODO", doc="scenario that skips tests not yet executed")
     test_the_test = TestTheTestScenario("TEST_THE_TEST", doc="Small scenario that check system-tests internals")
     mock_the_test = TestTheTestScenario("MOCK_THE_TEST", doc="Mock scenario that check system-tests internals")

diff --git a/utils/interfaces/_agent.py b/utils/interfaces/_agent.py
@@ -11,7 +11,6 @@
 
 from utils.tools import logger, get_rid_from_span, get_rid_from_request
 from utils.interfaces._core import ProxyBasedInterfaceValidator
-from utils.interfaces._schemas_validators import SchemaValidator
 from utils.interfaces._misc_validators import HeadersPresenceValidator, HeadersMatchValidator
 
 
@@ -60,10 +59,6 @@ def assert_use_domain(self, expected_domain):
             if domain != expected_domain:
                 raise ValueError(f"Message #{data['log_filename']} uses host {domain} instead of {expected_domain}")
 
-    def assert_schemas(self, allowed_errors=None):
-        validator = SchemaValidator("agent", allowed_errors)
-        self.validate(validator, success_by_default=True)
-
     def get_profiling_data(self):
         yield from self.get_data(path_filters="/api/v2/profile")
 

diff --git a/utils/interfaces/_core.py b/utils/interfaces/_core.py
@@ -4,17 +4,18 @@
 
 """ This file contains base class used to validate interfaces """
 
-import threading
 import json
 from os import listdir
 from os.path import isfile, join
 import re
+import threading
 import time
 
 import pytest
 
 from utils._context.core import context
 from utils.tools import logger
+from utils.interfaces._schemas_validators import SchemaValidator, SchemaError
 
 
 class InterfaceValidator:
@@ -50,6 +51,7 @@ def __init__(self, name):
         self._lock = threading.RLock()
         self._data_list = []
         self._ingested_files = set()
+        self._schema_errors = None
 
     @property
     def _log_folder(self):
@@ -166,6 +168,39 @@ def wait_for(self, wait_for_function, timeout):
 
         self._wait_for_function = None
 
+    def get_schemas_errors(self) -> list[SchemaError]:
+        if self._schema_errors is None:
+            self._schema_errors = []
+            validator = SchemaValidator(self.name)
+
+            for data in self.get_data():
+                self._schema_errors.extend(validator.get_errors(data))
+
+        return self._schema_errors
+
+    def assert_schema_point(self, endpoint, data_path):
+        has_error = False
+
+        for error in self.get_schemas_errors():
+            if error.endpoint == endpoint and error.data_path == data_path:
+                has_error = True
+                logger.error(f"* {error.message}")
+
+        assert not has_error, f"Schema is invalid for endpoint {endpoint} on data path {data_path}"
+
+    def assert_schema_points(self, excluded_points=None):
+        has_error = False
+        excluded_points = excluded_points or []
+
+        for error in self.get_schemas_errors():
+            if (error.endpoint, error.data_path) in excluded_points:
+                continue
+
+            has_error = True
+            logger.error(f"* {error.message}")
+
+        assert not has_error, f"Schema validation failed for {self.name}"
+
 
 class ValidationError(Exception):
     def __init__(self, *args: object, extra_info=None) -> None:

diff --git a/utils/interfaces/_library/core.py b/utils/interfaces/_library/core.py
@@ -17,7 +17,6 @@
 )
 
 from utils.interfaces._misc_validators import HeadersPresenceValidator
-from utils.interfaces._schemas_validators import SchemaValidator
 
 
 class LibraryInterfaceValidator(ProxyBasedInterfaceValidator):
@@ -233,10 +232,6 @@ def assert_receive_request_root_trace(self):  # TODO : move this in test class
 
         raise ValueError("Nothing has been reported. No request root span with has been found")
 
-    def assert_schemas(self, allowed_errors=None):
-        validator = SchemaValidator("library", allowed_errors)
-        self.validate(validator, success_by_default=True)
-
     def assert_all_traces_requests_forwarded(self, paths):
         # TODO : move this in test class
         paths = set(paths)