diff --git a/.github/workflows/run-end-to-end.yml b/.github/workflows/run-end-to-end.yml index a9a3149989..6fc633372f 100644 --- a/.github/workflows/run-end-to-end.yml +++ b/.github/workflows/run-end-to-end.yml @@ -240,7 +240,7 @@ jobs: env: DD_API_KEY: ${{ secrets.DD_API_KEY }} - name: Run REMOTE_CONFIG_MOCKED_BACKEND_ASM_FEATURES scenario - if: always() && steps.build.outcome == 'success' && inputs.run_all + if: always() && steps.build.outcome == 'success' && (inputs.run_appsec || inputs.run_all) run: ./run.sh REMOTE_CONFIG_MOCKED_BACKEND_ASM_FEATURES env: DD_API_KEY: ${{ secrets.DD_API_KEY }} @@ -250,12 +250,12 @@ jobs: env: DD_API_KEY: ${{ secrets.DD_API_KEY }} - name: Run REMOTE_CONFIG_MOCKED_BACKEND_ASM_DD scenario - if: always() && steps.build.outcome == 'success' && inputs.run_all + if: always() && steps.build.outcome == 'success' && (inputs.run_appsec || inputs.run_all) run: ./run.sh REMOTE_CONFIG_MOCKED_BACKEND_ASM_DD env: DD_API_KEY: ${{ secrets.DD_API_KEY }} - name: Run REMOTE_CONFIG_MOCKED_BACKEND_ASM_FEATURES_NOCACHE scenario - if: always() && steps.build.outcome == 'success' && inputs.run_all + if: always() && steps.build.outcome == 'success' && (inputs.run_appsec || inputs.run_all) run: ./run.sh REMOTE_CONFIG_MOCKED_BACKEND_ASM_FEATURES_NOCACHE env: DD_API_KEY: ${{ secrets.DD_API_KEY }} @@ -265,7 +265,7 @@ jobs: env: DD_API_KEY: ${{ secrets.DD_API_KEY }} - name: Run REMOTE_CONFIG_MOCKED_BACKEND_ASM_DD_NOCACHE scenario - if: always() && steps.build.outcome == 'success' && inputs.run_all + if: always() && steps.build.outcome == 'success' && (inputs.run_appsec || inputs.run_all) run: ./run.sh REMOTE_CONFIG_MOCKED_BACKEND_ASM_DD_NOCACHE env: DD_API_KEY: ${{ secrets.DD_API_KEY }} diff --git a/conftest.py b/conftest.py index 5a02cee393..d4db48984f 100644 --- a/conftest.py +++ b/conftest.py @@ -205,7 +205,7 @@ def pytest_collection_modifyitems(session, config, items): deselected.append(item) continue - if declared_scenario == context.scenario.name: + if context.scenario.is_part_of(declared_scenario): logger.info(f"{item.nodeid} is included in {context.scenario}") selected.append(item) diff --git a/tests/remote_config/test_remote_configuration.py b/tests/remote_config/test_remote_configuration.py index 82f41eab94..015010e54b 100644 --- a/tests/remote_config/test_remote_configuration.py +++ b/tests/remote_config/test_remote_configuration.py @@ -52,45 +52,6 @@ def test_agent_provide_config_endpoint(self): class RemoteConfigurationFieldsBasicTests: """Misc tests on fields and values on remote configuration requests""" - @bug(context.library < "golang@1.36.0") - @bug(context.library < "java@0.93.0") - @bug(context.library >= "nodejs@3.14.1") - def test_schemas(self): - """Test all library schemas""" - interfaces.library.assert_schemas() - - def test_non_regression(self): - """Non-regression test on shemas""" - - # Never skip this test. As a full respect of shemas may be hard, this test ensure that - # at least the part that was ok stays ok. - - allowed_errors = None - if context.library == "golang": - allowed_errors = ( - r"'actor' is a required property on instance \['events'\]\[\d+\]\['context'\]", - r"'protocol_version' is a required property on instance ", - ) - elif context.library == "java": - # pylint: disable=line-too-long - allowed_errors = ( - r"'appsec' was expected on instance \['events'\]\[\d+\]\['event_type'\]", - r"'headers' is a required property on instance \['events'\]\[\d+\]\['context'\]\['http'\]\['response'\]", - r"'idempotency_key' is a required property on instance ", - ) - elif context.library == "dotnet": - allowed_errors = ( - # value is missing in configuration object in telemetry payloads - r"'value' is a required property on instance \['payload'\]\['configuration'\]\[\d+\]", - ) - elif context.library == "nodejs": - allowed_errors = ( - # value is missing in configuration object in telemetry payloads - r"'value' is a required property on instance \['payload'\]\['configuration'\]\[\d+\]", - ) - - interfaces.library.assert_schemas(allowed_errors=allowed_errors) - def test_client_state_errors(self): """Ensure that the Client State error is consistent""" diff --git a/tests/test_schemas.py b/tests/test_schemas.py index ae7b50ff95..3f4d088af9 100644 --- a/tests/test_schemas.py +++ b/tests/test_schemas.py @@ -4,106 +4,56 @@ """Test format specifications""" -from utils import weblog, interfaces, bug, context +from utils import weblog, interfaces, bug, irrelevant, context, scenarios -class Test_Library: +@scenarios.all_endtoend_scenarios +class Test_library: """Libraries's payload are valid regarding schemas""" - def setup_full(self): + def setup_library_schema_full(self): # send some requests to be sure to trigger events - weblog.get("/waf", params={"key": "\n :"}) # rules.http_protocol_violation.crs_921_160 + weblog.get("/waf", params={"key": "\n :"}) - @bug(context.library < "golang@1.36.0") - @bug(context.library < "java@0.93.0") - @bug(context.library >= "dotnet@2.24.0") - @bug(context.library >= "nodejs@2.27.1") - @bug( - context.library >= "python@1.16.0rc2.dev37" - and context.agent_version >= "7.47.0rc2" - and context.appsec_rules_file is not None, - reason="on /v0.7/config, client.products is an empty array", - ) - def test_full(self): - interfaces.library.assert_schemas() + def test_library_schema_full(self): + interfaces.library.assert_schema_points( + excluded_points=[ + ("/telemetry/proxy/api/v2/apmtelemetry", "$.payload.configuration[]"), + ("/telemetry/proxy/api/v2/apmtelemetry", "$.payload"), # APPSEC-52845 + ] + ) - def test_non_regression(self): - """ Non-regression test on shemas """ + @bug(context.library >= "nodejs@2.27.1", reason="APPSEC-52805") + def test_library_schema_telemetry_conf_value(self): + interfaces.library.assert_schema_point("/telemetry/proxy/api/v2/apmtelemetry", "$.payload.configuration[]") - # Never skip this test. As a full respect of shemas may be hard, this test ensure that - # at least the part that was ok stays ok. - - allowed_errors = None - if context.library == "golang": - allowed_errors = ( - r"'actor' is a required property on instance \['events'\]\[\d+\]\['context'\]", - r"'protocol_version' is a required property on instance ", - ) - elif context.library == "java": - # pylint: disable=line-too-long - allowed_errors = ( - r"'appsec' was expected on instance \['events'\]\[\d+\]\['event_type'\]", - r"'headers' is a required property on instance \['events'\]\[\d+\]\['context'\]\['http'\]\['response'\]", - r"'idempotency_key' is a required property on instance ", - ) - elif context.library == "dotnet": - allowed_errors = ( - # value is missing in configuration object in telemetry payloads - r"'value' is a required property on instance \['payload'\]\['configuration'\]\[\d+\]", - ) - elif context.library == "nodejs": - allowed_errors = ( - # value is missing in configuration object in telemetry payloads - r"'value' is a required property on instance \['payload'\]\['configuration'\]\[\d+\]", - ) - elif context.library == "python": - allowed_errors = (r"\[\] is too short on instance \['client'\]\['products'\]",) - - interfaces.library.assert_schemas(allowed_errors=allowed_errors) + @bug(library="python", reason="APPSEC-52845") + def test_library_schema_telemetry_job_object(self): + interfaces.library.assert_schema_point("/telemetry/proxy/api/v2/apmtelemetry", "$.payload") +@scenarios.all_endtoend_scenarios class Test_Agent: """Agents's payload are valid regarding schemas""" - def setup_full(self): + def setup_agent_schema_full(self): # send some requests to be sure to trigger events - weblog.get("/waf", params={"key": "\n :"}) # rules.http_protocol_violation.crs_921_160 - - @bug(context.library < "golang@1.36.0") - @bug(context.library < "java@0.93.0") - @bug(context.library >= "dotnet@2.24.0") - @bug(context.library >= "nodejs@2.27.1") - def test_full(self): - interfaces.agent.assert_schemas() - - def test_non_regression(self): - """ Non-regression test on shemas """ - - # Never skip this test. As a full respect of shemas may be hard, this test ensure that - # at least the part that was ok stays ok. - - allowed_errors = None - if context.library == "golang": - allowed_errors = ( - r"'actor' is a required property on instance \['events'\]\[\d+\]\['context'\]", - r"'protocol_version' is a required property on instance ", - ) - elif context.library == "java": - # pylint: disable=line-too-long - allowed_errors = ( - r"'appsec' was expected on instance \['events'\]\[\d+\]\['event_type'\]", - r"'headers' is a required property on instance \['events'\]\[\d+\]\['context'\]\['http'\]\['response'\]", - r"'idempotency_key' is a required property on instance ", - ) - elif context.library == "dotnet": - allowed_errors = ( - # value is missing in configuration object in telemetry payloads - r"'value' is a required property on instance \['payload'\]\['configuration'\]\[\d+\]", - ) - elif context.library == "nodejs": - allowed_errors = ( - # value is missing in configuration object in telemetry payloads - r"'value' is a required property on instance \['payload'\]\['configuration'\]\[\d+\]", - ) - - interfaces.agent.assert_schemas(allowed_errors=allowed_errors) + weblog.get("/waf", params={"key": "\n :"}) + + def test_agent_schema_full(self): + interfaces.agent.assert_schema_points( + excluded_points=[ + ("/api/v2/apmtelemetry", "$.payload.configuration[]"), + ("/api/v2/apmtelemetry", "$.payload"), # APPSEC-52845 + ] + ) + + @bug(context.library >= "nodejs@2.27.1", reason="APPSEC-52805") + @irrelevant(context.scenario is scenarios.crossed_tracing_libraries, reason="APPSEC-52805") + @irrelevant(context.scenario is scenarios.graphql_appsec, reason="APPSEC-52805") + def test_agent_schema_telemetry_conf_value(self): + interfaces.agent.assert_schema_point("/api/v2/apmtelemetry", "$.payload.configuration[]") + + @bug(library="python", reason="APPSEC-52845") + def test_library_schema_telemetry_job_object(self): + interfaces.agent.assert_schema_point("/api/v2/apmtelemetry", "$.payload") diff --git a/utils/_context/_scenarios.py b/utils/_context/_scenarios.py index 895cdeb43a..78b60dc9f0 100644 --- a/utils/_context/_scenarios.py +++ b/utils/_context/_scenarios.py @@ -208,6 +208,9 @@ def customize_feature_parity_dashboard(self, result): def __str__(self) -> str: return f"Scenario '{self.name}'" + def is_part_of(self, declared_scenario): + return self.name == declared_scenario + class TestTheTestScenario(_Scenario): @property @@ -420,6 +423,9 @@ def __init__( self.backend_interface_timeout = backend_interface_timeout self.library_interface_timeout = library_interface_timeout + def is_part_of(self, declared_scenario): + return declared_scenario in (self.name, "EndToEndScenario") + def configure(self, config): from utils import interfaces @@ -452,6 +458,10 @@ def configure(self, config): def session_start(self): super().session_start() + + if self.replay: + return + try: code, (stdout, stderr) = self.weblog_container._container.exec_run("uname -a", demux=True) if code: @@ -1219,6 +1229,19 @@ def weblog_variant(self): class scenarios: + @staticmethod + def all_endtoend_scenarios(test_object): + """particular use case where a klass applies on all scenarios""" + + # Check that no scenario has been already declared + for marker in getattr(test_object, "pytestmark", []): + if marker.name == "scenario": + raise ValueError(f"Error on {test_object}: You can declare only one scenario") + + pytest.mark.scenario("EndToEndScenario")(test_object) + + return test_object + todo = _Scenario("TODO", doc="scenario that skips tests not yet executed") test_the_test = TestTheTestScenario("TEST_THE_TEST", doc="Small scenario that check system-tests internals") mock_the_test = TestTheTestScenario("MOCK_THE_TEST", doc="Mock scenario that check system-tests internals") diff --git a/utils/interfaces/_agent.py b/utils/interfaces/_agent.py index 33e6a2bd2d..14182eff4f 100644 --- a/utils/interfaces/_agent.py +++ b/utils/interfaces/_agent.py @@ -11,7 +11,6 @@ from utils.tools import logger, get_rid_from_span, get_rid_from_request from utils.interfaces._core import ProxyBasedInterfaceValidator -from utils.interfaces._schemas_validators import SchemaValidator from utils.interfaces._misc_validators import HeadersPresenceValidator, HeadersMatchValidator @@ -60,10 +59,6 @@ def assert_use_domain(self, expected_domain): if domain != expected_domain: raise ValueError(f"Message #{data['log_filename']} uses host {domain} instead of {expected_domain}") - def assert_schemas(self, allowed_errors=None): - validator = SchemaValidator("agent", allowed_errors) - self.validate(validator, success_by_default=True) - def get_profiling_data(self): yield from self.get_data(path_filters="/api/v2/profile") diff --git a/utils/interfaces/_core.py b/utils/interfaces/_core.py index c5b47d4238..973544837c 100644 --- a/utils/interfaces/_core.py +++ b/utils/interfaces/_core.py @@ -4,17 +4,18 @@ """ This file contains base class used to validate interfaces """ -import threading import json from os import listdir from os.path import isfile, join import re +import threading import time import pytest from utils._context.core import context from utils.tools import logger +from utils.interfaces._schemas_validators import SchemaValidator, SchemaError class InterfaceValidator: @@ -50,6 +51,7 @@ def __init__(self, name): self._lock = threading.RLock() self._data_list = [] self._ingested_files = set() + self._schema_errors = None @property def _log_folder(self): @@ -166,6 +168,39 @@ def wait_for(self, wait_for_function, timeout): self._wait_for_function = None + def get_schemas_errors(self) -> list[SchemaError]: + if self._schema_errors is None: + self._schema_errors = [] + validator = SchemaValidator(self.name) + + for data in self.get_data(): + self._schema_errors.extend(validator.get_errors(data)) + + return self._schema_errors + + def assert_schema_point(self, endpoint, data_path): + has_error = False + + for error in self.get_schemas_errors(): + if error.endpoint == endpoint and error.data_path == data_path: + has_error = True + logger.error(f"* {error.message}") + + assert not has_error, f"Schema is invalid for endpoint {endpoint} on data path {data_path}" + + def assert_schema_points(self, excluded_points=None): + has_error = False + excluded_points = excluded_points or [] + + for error in self.get_schemas_errors(): + if (error.endpoint, error.data_path) in excluded_points: + continue + + has_error = True + logger.error(f"* {error.message}") + + assert not has_error, f"Schema validation failed for {self.name}" + class ValidationError(Exception): def __init__(self, *args: object, extra_info=None) -> None: diff --git a/utils/interfaces/_library/core.py b/utils/interfaces/_library/core.py index 31c4d118f7..f1656f868e 100644 --- a/utils/interfaces/_library/core.py +++ b/utils/interfaces/_library/core.py @@ -17,7 +17,6 @@ ) from utils.interfaces._misc_validators import HeadersPresenceValidator -from utils.interfaces._schemas_validators import SchemaValidator class LibraryInterfaceValidator(ProxyBasedInterfaceValidator): @@ -233,10 +232,6 @@ def assert_receive_request_root_trace(self): # TODO : move this in test class raise ValueError("Nothing has been reported. No request root span with has been found") - def assert_schemas(self, allowed_errors=None): - validator = SchemaValidator("library", allowed_errors) - self.validate(validator, success_by_default=True) - def assert_all_traces_requests_forwarded(self, paths): # TODO : move this in test class paths = set(paths) diff --git a/utils/interfaces/_schemas_validators.py b/utils/interfaces/_schemas_validators.py index b7b1a4d5c7..3ce9b651c5 100644 --- a/utils/interfaces/_schemas_validators.py +++ b/utils/interfaces/_schemas_validators.py @@ -7,16 +7,15 @@ PYTHONPATH=. python utils/interfaces/_schemas_validators.py """ +from dataclasses import dataclass import os import json import re import functools -from jsonschema import Draft7Validator, RefResolver +from jsonschema import Draft7Validator, RefResolver, ValidationError from jsonschema.validators import extend -from utils.tools import logger - def _is_bytes_or_string(_checker, instance): return Draft7Validator.TYPE_CHECKER.is_type(instance, "string") or isinstance(instance, bytes) @@ -70,6 +69,25 @@ def _get_schema_validator(schema_id): return _ApiObjectValidator(schema, resolver=resolver, format_checker=Draft7Validator.FORMAT_CHECKER) +@dataclass +class SchemaError: + interface_name: str + endpoint: str + error: ValidationError + data: dict + + @property + def message(self): + return ( + f"{self.error.message} on instance {self.error.json_path} in {self.endpoint}. Please check " + + self.data["log_filename"] + ) + + @property + def data_path(self): + return re.sub(r"\[\d+\]", "[]", self.error.json_path) + + class SchemaValidator: def __init__(self, interface, allowed_errors=None): self.interface = interface @@ -78,39 +96,37 @@ def __init__(self, interface, allowed_errors=None): for pattern in allowed_errors or []: self.allowed_errors.append(re.compile(pattern)) - def __call__(self, data): + def get_errors(self, data) -> list[SchemaError]: path = "/" if data["path"] == "" else data["path"] schema_id = f"/{self.interface}{path}-request.json" validator = _get_schema_validator(schema_id) - if not validator.is_valid(data["request"]["content"]): - messages = [] - - for error in validator.iter_errors(data["request"]["content"]): - message = f"{error.message} on instance " + "".join([f"[{repr(i)}]" for i in error.path]) - if not any(pattern.fullmatch(message) for pattern in self.allowed_errors): - messages.append(message) - - if len(messages) != 0: - for message in messages: - logger.error(f"* {message}") - - raise ValueError(f"Schema is invalid in {data['log_filename']}") + if validator.is_valid(data["request"]["content"]): + return [] - logger.debug(f"{data['log_filename']} schema validation ok") + return [ + SchemaError(interface_name=self.interface, endpoint=path, error=error, data=data,) + for error in validator.iter_errors(data["request"]["content"]) + ] def _main(): for interface in ("agent", "library"): validator = SchemaValidator(interface) - path = f"logs/interfaces/{interface}" - files = [file for file in os.listdir(path) if os.path.isfile(os.path.join(path, file))] - for file in files: - with open(os.path.join(path, file), encoding="utf-8") as f: - data = json.load(f) - - if "request" in data and data["request"]["length"] != 0: - validator(data) + folders = [folder for folder in os.listdir(".") if os.path.isdir(folder) and folder.startswith("logs")] + for folder in folders: + path = f"{folder}/interfaces/{interface}" + + if not os.path.exists(path): + continue + files = [file for file in os.listdir(path) if os.path.isfile(os.path.join(path, file))] + for file in files: + with open(os.path.join(path, file), encoding="utf-8") as f: + data = json.load(f) + + if "request" in data and data["request"]["length"] != 0: + for error in validator.get_errors(data): + print(error.message) if __name__ == "__main__": diff --git a/utils/interfaces/schemas/agent/api/v2/debugger-request.json b/utils/interfaces/schemas/agent/api/v2/debugger-request.json new file mode 100644 index 0000000000..5cd298acbf --- /dev/null +++ b/utils/interfaces/schemas/agent/api/v2/debugger-request.json @@ -0,0 +1,3 @@ +{ + "$id": "/agent/api/v2/debugger-request.json" +} diff --git a/utils/interfaces/schemas/library/debugger/v1/diagnostics-request.json b/utils/interfaces/schemas/library/debugger/v1/diagnostics-request.json new file mode 100644 index 0000000000..8fd63df9a3 --- /dev/null +++ b/utils/interfaces/schemas/library/debugger/v1/diagnostics-request.json @@ -0,0 +1,4 @@ +{ + "$id": "/library/debugger/v1/diagnostics-request.json" + } + \ No newline at end of file diff --git a/utils/interfaces/schemas/library/debugger/v1/input-request.json b/utils/interfaces/schemas/library/debugger/v1/input-request.json new file mode 100644 index 0000000000..e6bd069d7f --- /dev/null +++ b/utils/interfaces/schemas/library/debugger/v1/input-request.json @@ -0,0 +1,4 @@ +{ + "$id": "/library/debugger/v1/input-request.json" + } + \ No newline at end of file