Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: Standardize on JSON Schema Draft 2020-12 to validate stream schemas #2566

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 52 additions & 1 deletion singer_sdk/_singerlib/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@
if t.TYPE_CHECKING:
from referencing._core import Resolver

META_KEYS = [
"id",
"schema",
]

# These are keys defined in the JSON Schema spec that do not themselves contain
# schemas (or lists of schemas)
STANDARD_KEYS = [
Expand Down Expand Up @@ -52,6 +57,9 @@ class Schema:
This is because we wanted to expand it with extra STANDARD_KEYS.
"""

id: str | None = None
schema: str | None = None

type: str | list[str] | None = None
default: t.Any | None = None
properties: dict | None = None
Expand Down Expand Up @@ -94,6 +102,10 @@ def to_dict(self) -> dict[str, t.Any]:
if self.__dict__.get(key) is not None:
result[key] = self.__dict__[key]

for key in META_KEYS:
if self.__dict__.get(key) is not None:
result[f"${key}"] = self.__dict__[key]

return result

@classmethod
Expand All @@ -110,7 +122,40 @@ def from_dict(

Returns:
The initialized Schema object.
"""

Example:
>>> data = {
... "$id": "https://example.com/person.schema.json",
... "$schema": "http://json-schema.org/draft/2020-12/schema",
... "title": "Person",
... "type": "object",
... "properties": {
... "firstName": {
... "type": "string",
... "description": "The person's first name.",
... },
... "lastName": {
... "type": "string",
... "description": "The person's last name.",
... },
... "age": {
... "description": "Age in years which must be equal to or greater than zero.",
... "type": "integer",
... "minimum": 0,
... },
... },
... "required": ["firstName", "lastName"],
... }
>>> schema = Schema.from_dict(data)
>>> schema.title
'Person'
>>> schema.properties["firstName"].description
"The person's first name."
>>> schema.properties["age"].minimum
0
>>> schema.schema
'http://json-schema.org/draft/2020-12/schema'
""" # noqa: E501
kwargs = schema_defaults.copy()
properties = data.get("properties")
items = data.get("items")
Expand All @@ -121,9 +166,15 @@ def from_dict(
}
if items is not None:
kwargs["items"] = cls.from_dict(items, **schema_defaults)

for key in STANDARD_KEYS:
if key in data:
kwargs[key] = data[key]

for key in META_KEYS:
if f"${key}" in data:
kwargs[key] = data[f"${key}"]

return cls(**kwargs)


Expand Down
13 changes: 12 additions & 1 deletion singer_sdk/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@
None,
]

DEFAULT_JSONSCHEMA_VALIDATOR: type[Validator] = validators.Draft7Validator # type: ignore[assignment]
DEFAULT_JSONSCHEMA_VALIDATOR: type[Validator] = validators.Draft202012Validator # type: ignore[assignment]

T = t.TypeVar("T", bound=_JsonValue)
P = t.TypeVar("P")
Expand Down Expand Up @@ -1077,6 +1077,17 @@ def append(self, property: Property) -> None: # noqa: A002
"""
self.wrapped[property.name] = property

@property
def type_dict(self) -> dict: # type: ignore[override]
"""Get type dictionary.

Returns:
A dictionary describing the type.
"""
d = super().type_dict
d["$schema"] = "https://json-schema.org/draft/2020-12/schema"
return d

def __iter__(self) -> t.Iterator[Property]:
"""Iterate all properties of the property list.

Expand Down
4 changes: 3 additions & 1 deletion tests/core/test_jsonschema_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,8 @@ def test_to_json():
"required": [
"test_property"
],
"additionalProperties": false
"additionalProperties": false,
"$schema": "https://json-schema.org/draft/2020-12/schema"
}""",
)

Expand All @@ -172,6 +173,7 @@ def test_any_type(caplog: pytest.LogCaptureFixture):
)
with caplog.at_level(WARNING):
assert schema.to_dict() == {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"type": "object",
"properties": {
"any_type": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
{"type":"SCHEMA","stream":"continents","schema":{"properties":{"code":{"type":["string","null"]},"name":{"type":["string","null"]}},"type":"object"},"key_properties":["code"]}
{"type":"SCHEMA","stream":"countries","schema":{"properties":{"code":{"type":["string","null"]},"name":{"type":["string","null"]},"native":{"type":["string","null"]},"phone":{"type":["string","null"]},"capital":{"type":["string","null"]},"currency":{"type":["string","null"]},"emoji":{"type":["string","null"]},"continent":{"properties":{"code":{"type":["string","null"]},"name":{"type":["string","null"]}},"type":["object","null"]},"languages":{"items":{"properties":{"code":{"type":["string","null"]},"name":{"type":["string","null"]}},"type":"object"},"type":["array","null"]}},"type":"object"},"key_properties":["code"]}
{"type":"SCHEMA","stream":"countries","schema":{"properties":{"code":{"type":["string","null"]},"name":{"type":["string","null"]},"native":{"type":["string","null"]},"phone":{"type":["string","null"]},"capital":{"type":["string","null"]},"currency":{"type":["string","null"]},"emoji":{"type":["string","null"]},"continent":{"properties":{"code":{"type":["string","null"]},"name":{"type":["string","null"]}},"type":["object","null"]},"languages":{"items":{"properties":{"code":{"type":["string","null"]},"name":{"type":["string","null"]}},"type":"object"},"type":["array","null"]}},"type":"object","$schema":"https://json-schema.org/draft/2020-12/schema"},"key_properties":["code"]}
2 changes: 1 addition & 1 deletion tests/snapshots/mapped_stream/aliased_stream.jsonl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{"type":"STATE","value":{}}
{"type":"SCHEMA","stream":"aliased_stream","schema":{"properties":{"email":{"type":["string","null"]},"count":{"type":["integer","null"]},"user":{"properties":{"id":{"type":["integer","null"]},"sub":{"properties":{"num":{"type":["integer","null"]},"custom_obj":{"type":["string","null"]}},"type":["object","null"]},"some_numbers":{"items":{"type":["number"]},"type":["array","null"]}},"type":["object","null"]}},"type":"object"},"key_properties":[]}
{"type":"SCHEMA","stream":"aliased_stream","schema":{"properties":{"email":{"type":["string","null"]},"count":{"type":["integer","null"]},"user":{"properties":{"id":{"type":["integer","null"]},"sub":{"properties":{"num":{"type":["integer","null"]},"custom_obj":{"type":["string","null"]}},"type":["object","null"]},"some_numbers":{"items":{"type":["number"]},"type":["array","null"]}},"type":["object","null"]}},"type":"object","$schema":"https://json-schema.org/draft/2020-12/schema"},"key_properties":[]}
{"type":"RECORD","stream":"aliased_stream","record":{"email":"[email protected]","count":21,"user":{"id":1,"sub":{"num":1,"custom_obj":"obj-hello"},"some_numbers":[3.14,2.718]}},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"aliased_stream","record":{"email":"[email protected]","count":13,"user":{"id":2,"sub":{"num":2,"custom_obj":"obj-world"},"some_numbers":[10.32,1.618]}},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"aliased_stream","record":{"email":"[email protected]","count":19,"user":{"id":3,"sub":{"num":3,"custom_obj":"obj-hello"},"some_numbers":[1.414,1.732]}},"time_extracted":"2022-01-01T00:00:00+00:00"}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{"type":"STATE","value":{}}
{"type":"SCHEMA","stream":"mystream","schema":{"type":"object","properties":{"email_hash":{"type":["string","null"]}}},"key_properties":["email_hash"]}
{"type":"SCHEMA","stream":"mystream","schema":{"type":"object","properties":{"email_hash":{"type":["string","null"]}},"$schema":"https://json-schema.org/draft/2020-12/schema"},"key_properties":["email_hash"]}
{"type":"RECORD","stream":"mystream","record":{"email_hash":"c160f8cc69a4f0bf2b0362752353d060"},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"email_hash":"4b9bb80620f03eb3719e0a061c14283d"},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"email_hash":"426b189df1e2f359efe6ee90f2d2030f"},"time_extracted":"2022-01-01T00:00:00+00:00"}
Expand Down
2 changes: 1 addition & 1 deletion tests/snapshots/mapped_stream/drop_property.jsonl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{"type":"STATE","value":{}}
{"type":"SCHEMA","stream":"mystream","schema":{"properties":{"count":{"type":["integer","null"]},"user":{"properties":{"id":{"type":["integer","null"]},"sub":{"properties":{"num":{"type":["integer","null"]},"custom_obj":{"type":["string","null"]}},"type":["object","null"]},"some_numbers":{"items":{"type":["number"]},"type":["array","null"]}},"type":["object","null"]}},"type":"object"},"key_properties":[]}
{"type":"SCHEMA","stream":"mystream","schema":{"properties":{"count":{"type":["integer","null"]},"user":{"properties":{"id":{"type":["integer","null"]},"sub":{"properties":{"num":{"type":["integer","null"]},"custom_obj":{"type":["string","null"]}},"type":["object","null"]},"some_numbers":{"items":{"type":["number"]},"type":["array","null"]}},"type":["object","null"]}},"type":"object","$schema":"https://json-schema.org/draft/2020-12/schema"},"key_properties":[]}
{"type":"RECORD","stream":"mystream","record":{"count":21,"user":{"id":1,"sub":{"num":1,"custom_obj":"obj-hello"},"some_numbers":[3.14,2.718]}},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"count":13,"user":{"id":2,"sub":{"num":2,"custom_obj":"obj-world"},"some_numbers":[10.32,1.618]}},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"count":19,"user":{"id":3,"sub":{"num":3,"custom_obj":"obj-hello"},"some_numbers":[1.414,1.732]}},"time_extracted":"2022-01-01T00:00:00+00:00"}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{"type":"STATE","value":{}}
{"type":"SCHEMA","stream":"mystream","schema":{"properties":{"count":{"type":["integer","null"]},"user":{"properties":{"id":{"type":["integer","null"]},"sub":{"properties":{"num":{"type":["integer","null"]},"custom_obj":{"type":["string","null"]}},"type":["object","null"]},"some_numbers":{"items":{"type":["number"]},"type":["array","null"]}},"type":["object","null"]}},"type":"object"},"key_properties":[]}
{"type":"SCHEMA","stream":"mystream","schema":{"properties":{"count":{"type":["integer","null"]},"user":{"properties":{"id":{"type":["integer","null"]},"sub":{"properties":{"num":{"type":["integer","null"]},"custom_obj":{"type":["string","null"]}},"type":["object","null"]},"some_numbers":{"items":{"type":["number"]},"type":["array","null"]}},"type":["object","null"]}},"type":"object","$schema":"https://json-schema.org/draft/2020-12/schema"},"key_properties":[]}
{"type":"RECORD","stream":"mystream","record":{"count":21,"user":{"id":1,"sub":{"num":1,"custom_obj":"obj-hello"},"some_numbers":[3.14,2.718]}},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"count":13,"user":{"id":2,"sub":{"num":2,"custom_obj":"obj-world"},"some_numbers":[10.32,1.618]}},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"count":19,"user":{"id":3,"sub":{"num":3,"custom_obj":"obj-hello"},"some_numbers":[1.414,1.732]}},"time_extracted":"2022-01-01T00:00:00+00:00"}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{"type":"STATE","value":{}}
{"type":"SCHEMA","stream":"mystream","schema":{"type":"object","properties":{"cc":{"type":["string","null"]}}},"key_properties":[]}
{"type":"SCHEMA","stream":"mystream","schema":{"type":"object","properties":{"cc":{"type":["string","null"]}},"$schema":"https://json-schema.org/draft/2020-12/schema"},"key_properties":[]}
{"type":"RECORD","stream":"mystream","record":{"cc":"4201040137208265027"},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"cc":"675987782884"},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"cc":"502011811259"},"time_extracted":"2022-01-01T00:00:00+00:00"}
Expand Down
2 changes: 1 addition & 1 deletion tests/snapshots/mapped_stream/flatten_all.jsonl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{"type":"STATE","value":{}}
{"type":"SCHEMA","stream":"mystream","schema":{"properties":{"email":{"type":["string","null"]},"count":{"type":["integer","null"]},"user__id":{"type":["integer","null"]},"user__sub__num":{"type":["integer","null"]},"user__sub__custom_obj":{"type":["string","null"]},"user__some_numbers":{"type":["string","null"]}},"type":"object"},"key_properties":[]}
{"type":"SCHEMA","stream":"mystream","schema":{"properties":{"email":{"type":["string","null"]},"count":{"type":["integer","null"]},"user__id":{"type":["integer","null"]},"user__sub__num":{"type":["integer","null"]},"user__sub__custom_obj":{"type":["string","null"]},"user__some_numbers":{"type":["string","null"]}},"type":"object","$schema":"https://json-schema.org/draft/2020-12/schema"},"key_properties":[]}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]","count":21,"user__id":1,"user__sub__num":1,"user__sub__custom_obj":"obj-hello","user__some_numbers":"[3.14,2.718]"},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]","count":13,"user__id":2,"user__sub__num":2,"user__sub__custom_obj":"obj-world","user__some_numbers":"[10.32,1.618]"},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]","count":19,"user__id":3,"user__sub__num":3,"user__sub__custom_obj":"obj-hello","user__some_numbers":"[1.414,1.732]"},"time_extracted":"2022-01-01T00:00:00+00:00"}
Expand Down
2 changes: 1 addition & 1 deletion tests/snapshots/mapped_stream/flatten_depth_0.jsonl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{"type":"STATE","value":{}}
{"type":"SCHEMA","stream":"mystream","schema":{"properties":{"email":{"type":["string","null"]},"count":{"type":["integer","null"]},"user":{"properties":{"id":{"type":["integer","null"]},"sub":{"properties":{"num":{"type":["integer","null"]},"custom_obj":{"type":["string","null"]}},"type":["object","null"]},"some_numbers":{"items":{"type":["number"]},"type":["array","null"]}},"type":["object","null"]}},"type":"object"},"key_properties":[]}
{"type":"SCHEMA","stream":"mystream","schema":{"properties":{"email":{"type":["string","null"]},"count":{"type":["integer","null"]},"user":{"properties":{"id":{"type":["integer","null"]},"sub":{"properties":{"num":{"type":["integer","null"]},"custom_obj":{"type":["string","null"]}},"type":["object","null"]},"some_numbers":{"items":{"type":["number"]},"type":["array","null"]}},"type":["object","null"]}},"type":"object","$schema":"https://json-schema.org/draft/2020-12/schema"},"key_properties":[]}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]","count":21,"user":{"id":1,"sub":{"num":1,"custom_obj":"obj-hello"},"some_numbers":[3.14,2.718]}},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]","count":13,"user":{"id":2,"sub":{"num":2,"custom_obj":"obj-world"},"some_numbers":[10.32,1.618]}},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]","count":19,"user":{"id":3,"sub":{"num":3,"custom_obj":"obj-hello"},"some_numbers":[1.414,1.732]}},"time_extracted":"2022-01-01T00:00:00+00:00"}
Expand Down
2 changes: 1 addition & 1 deletion tests/snapshots/mapped_stream/flatten_depth_1.jsonl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{"type":"STATE","value":{}}
{"type":"SCHEMA","stream":"mystream","schema":{"properties":{"email":{"type":["string","null"]},"count":{"type":["integer","null"]},"user__id":{"type":["integer","null"]},"user__sub":{"type":["string","null"]},"user__some_numbers":{"type":["string","null"]}},"type":"object"},"key_properties":[]}
{"type":"SCHEMA","stream":"mystream","schema":{"properties":{"email":{"type":["string","null"]},"count":{"type":["integer","null"]},"user__id":{"type":["integer","null"]},"user__sub":{"type":["string","null"]},"user__some_numbers":{"type":["string","null"]}},"type":"object","$schema":"https://json-schema.org/draft/2020-12/schema"},"key_properties":[]}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]","count":21,"user__id":1,"user__sub":"{\"num\":1,\"custom_obj\":\"obj-hello\"}","user__some_numbers":"[3.14,2.718]"},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]","count":13,"user__id":2,"user__sub":"{\"num\":2,\"custom_obj\":\"obj-world\"}","user__some_numbers":"[10.32,1.618]"},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]","count":19,"user__id":3,"user__sub":"{\"num\":3,\"custom_obj\":\"obj-hello\"}","user__some_numbers":"[1.414,1.732]"},"time_extracted":"2022-01-01T00:00:00+00:00"}
Expand Down
2 changes: 1 addition & 1 deletion tests/snapshots/mapped_stream/keep_all_fields.jsonl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{"type":"STATE","value":{}}
{"type":"SCHEMA","stream":"mystream","schema":{"properties":{"email":{"type":["string","null"]},"count":{"type":["integer","null"]},"user":{"properties":{"id":{"type":["integer","null"]},"sub":{"properties":{"num":{"type":["integer","null"]},"custom_obj":{"type":["string","null"]}},"type":["object","null"]},"some_numbers":{"items":{"type":["number"]},"type":["array","null"]}},"type":["object","null"]},"email_hash":{"type":["string","null"]}},"type":"object"},"key_properties":[]}
{"type":"SCHEMA","stream":"mystream","schema":{"properties":{"email":{"type":["string","null"]},"count":{"type":["integer","null"]},"user":{"properties":{"id":{"type":["integer","null"]},"sub":{"properties":{"num":{"type":["integer","null"]},"custom_obj":{"type":["string","null"]}},"type":["object","null"]},"some_numbers":{"items":{"type":["number"]},"type":["array","null"]}},"type":["object","null"]},"email_hash":{"type":["string","null"]}},"type":"object","$schema":"https://json-schema.org/draft/2020-12/schema"},"key_properties":[]}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]","count":21,"user":{"id":1,"sub":{"num":1,"custom_obj":"obj-hello"},"some_numbers":[3.14,2.718]},"email_hash":"c160f8cc69a4f0bf2b0362752353d060"},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]","count":13,"user":{"id":2,"sub":{"num":2,"custom_obj":"obj-world"},"some_numbers":[10.32,1.618]},"email_hash":"4b9bb80620f03eb3719e0a061c14283d"},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]","count":19,"user":{"id":3,"sub":{"num":3,"custom_obj":"obj-hello"},"some_numbers":[1.414,1.732]},"email_hash":"426b189df1e2f359efe6ee90f2d2030f"},"time_extracted":"2022-01-01T00:00:00+00:00"}
Expand Down
2 changes: 1 addition & 1 deletion tests/snapshots/mapped_stream/map_and_flatten.jsonl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{"type":"STATE","value":{}}
{"type":"SCHEMA","stream":"mystream","schema":{"properties":{"email":{"type":["string","null"]},"count":{"type":["integer","null"]},"user__id":{"type":["integer","null"]},"user__sub__num":{"type":["integer","null"]},"user__sub__custom_obj":{"type":["string","null"]},"user__some_numbers":{"type":["string","null"]},"email_hash":{"type":["string","null"]}},"type":"object"},"key_properties":["email_hash"]}
{"type":"SCHEMA","stream":"mystream","schema":{"properties":{"email":{"type":["string","null"]},"count":{"type":["integer","null"]},"user__id":{"type":["integer","null"]},"user__sub__num":{"type":["integer","null"]},"user__sub__custom_obj":{"type":["string","null"]},"user__some_numbers":{"type":["string","null"]},"email_hash":{"type":["string","null"]}},"type":"object","$schema":"https://json-schema.org/draft/2020-12/schema"},"key_properties":["email_hash"]}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]","count":21,"user__id":1,"user__sub__num":1,"user__sub__custom_obj":"obj-hello","user__some_numbers":"[3.14,2.718]","email_hash":"c160f8cc69a4f0bf2b0362752353d060"},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]","count":13,"user__id":2,"user__sub__num":2,"user__sub__custom_obj":"obj-world","user__some_numbers":"[10.32,1.618]","email_hash":"4b9bb80620f03eb3719e0a061c14283d"},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]","count":19,"user__id":3,"user__sub__num":3,"user__sub__custom_obj":"obj-hello","user__some_numbers":"[1.414,1.732]","email_hash":"426b189df1e2f359efe6ee90f2d2030f"},"time_extracted":"2022-01-01T00:00:00+00:00"}
Expand Down
Loading
Loading