Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validation Extension Support #200

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
f0a470a
feat(dev): adds validation extension support;
JVickery-TBS Nov 24, 2023
a97f4ad
feat(dev): logging;
JVickery-TBS Nov 24, 2023
028e42b
fix(dev): 2.9 support;
JVickery-TBS Nov 24, 2023
77d762e
fix(dev): better conditioning;
JVickery-TBS Nov 27, 2023
aec22b9
feat(comments): added comments;
JVickery-TBS Nov 27, 2023
cd3c7cc
Merge branch 'master' into feature/validation-support
JVickery-TBS Jan 29, 2024
7e99aa7
fix(dev): misc feedback;
JVickery-TBS Jan 29, 2024
25ea76e
fix(dev): misc fixes;
JVickery-TBS Jan 29, 2024
d2720fc
fix(syntax): flake8;
JVickery-TBS Jan 31, 2024
e888153
feat(dev): logic and schema config option;
JVickery-TBS Feb 2, 2024
5c07ba4
Merge branch 'master' into feature/validation-support
JVickery-TBS Feb 2, 2024
4612484
feat(dev): better logic and tests;
JVickery-TBS Feb 2, 2024
8ac8db5
fix(logic): fixed some logic;
JVickery-TBS Feb 2, 2024
d9bb56c
Merge branch 'master' into feature/validation-support
JVickery-TBS Feb 5, 2024
1ac8090
fix(syntax): made better;
JVickery-TBS Feb 5, 2024
1761ed5
fix(comments): fixed inline comments;
JVickery-TBS Feb 5, 2024
e182eb7
feat(dev): started doing sync mode;
JVickery-TBS Feb 6, 2024
b386e0e
feat(dev): sync mode cont.;
JVickery-TBS Feb 7, 2024
3200483
feat(dev): sync mode cont.;
JVickery-TBS Feb 7, 2024
d225801
Merge branch 'master' into feature/validation-support
JVickery-TBS May 16, 2024
4fbdb0d
feat(dev): IPipeValidation implementation;
JVickery-TBS May 16, 2024
27d98cf
fix(tests): validation req tests;
JVickery-TBS May 16, 2024
378f69f
fix(misc): comments and messages;
JVickery-TBS Jul 12, 2024
6070740
fix(logic): ignore not sysadmin;
JVickery-TBS Aug 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions ckanext/xloader/action.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ def xloader_submit(context, data_dict):
:param ignore_hash: If set to True, the xloader will reload the file
even if it haven't changed. (optional, default: False)
:type ignore_hash: bool
:param sync: If set to True, the xloader callback will be executed right
JVickery-TBS marked this conversation as resolved.
Show resolved Hide resolved
away, instead of a job being enqueued. It will also delete any existing jobs
for the given resource. (optional, default: False)
:type sync: bool

Returns ``True`` if the job has been submitted and ``False`` if the job
has not been submitted, i.e. when ckanext-xloader is not configured.
Expand All @@ -53,6 +57,9 @@ def xloader_submit(context, data_dict):
if errors:
raise p.toolkit.ValidationError(errors)

p.toolkit.check_access('xloader_submit', context, data_dict)

sync = data_dict.pop('sync', False)
res_id = data_dict['resource_id']
try:
resource_dict = p.toolkit.get_action('resource_show')(context, {
Expand Down Expand Up @@ -166,15 +173,20 @@ def xloader_submit(context, data_dict):
job = enqueue_job(
jobs.xloader_data_into_datastore, [data], queue=custom_queue,
title="xloader_submit: package: {} resource: {}".format(resource_dict.get('package_id'), res_id),
rq_kwargs=dict(timeout=timeout)
rq_kwargs=dict(timeout=timeout, at_front=sync)
duttonw marked this conversation as resolved.
Show resolved Hide resolved
)
except Exception:
log.exception('Unable to enqueued xloader res_id=%s', res_id)
if sync:
log.exception('Unable to xloader res_id=%s', res_id)
else:
log.exception('Unable to enqueued xloader res_id=%s', res_id)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"Unable to enqueued" -> "Unable to enqueue"

return False
log.debug('Enqueued xloader job=%s res_id=%s', job.id, res_id)

value = json.dumps({'job_id': job.id})

if sync:
log.debug('Pushed xloader sync mode job=%s res_id=%s to front of queue', job.id, res_id)

task['value'] = value
task['state'] = 'pending'
task['last_updated'] = str(datetime.datetime.utcnow())
Expand Down
19 changes: 19 additions & 0 deletions ckanext/xloader/config_declaration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,25 @@ groups:
to True.
type: bool
required: false
- key: ckanext.xloader.validation.requires_successful_report
default: False
example: True
description: |
Resources are required to pass Validation from the ckanext-validation
plugin to be able to get XLoadered.
type: bool
required: false
- key: ckanext.xloader.validation.enforce_schema
default: True
example: False
description: |
Resources are expected to have a Validation Schema, or use the default ones if not.

If this option is set to `False`, Resources that do not have
a Validation Schema will be treated like they do not require Validation.

See https://github.com/frictionlessdata/ckanext-validation?tab=readme-ov-file#data-schema
for more details.
- key: ckanext.xloader.clean_datastore_tables
default: False
example: True
Expand Down
49 changes: 44 additions & 5 deletions ckanext/xloader/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
from . import action, auth, helpers as xloader_helpers, utils
from ckanext.xloader.utils import XLoaderFormats

try:
from ckanext.validation.interfaces import IPipeValidation
HAS_IPIPE_VALIDATION = True
except ImportError:
HAS_IPIPE_VALIDATION = False

try:
config_declarations = toolkit.blanket.config_declarations
except AttributeError:
Expand All @@ -34,6 +40,8 @@ class xloaderPlugin(plugins.SingletonPlugin):
plugins.implements(plugins.IResourceController, inherit=True)
plugins.implements(plugins.IClick)
plugins.implements(plugins.IBlueprint)
if HAS_IPIPE_VALIDATION:
plugins.implements(IPipeValidation)

# IClick
def get_commands(self):
Expand Down Expand Up @@ -68,6 +76,22 @@ def configure(self, config_):
)
)

# IPipeValidation

def receive_validation_report(self, validation_report):
if utils.requires_successful_validation_report():
res_dict = toolkit.get_action('resource_show')({'ignore_auth': True},
{'id': validation_report.get('resource_id')})
if (toolkit.asbool(toolkit.config.get('ckanext.xloader.validation.enforce_schema', True))
or res_dict.get('schema', None)) and validation_report.get('status') != 'success':
# A schema is present, or required to be present
return
# if validation is running in async mode, it is running from the redis workers.
# thus we need to do sync=True to have Xloader put the job at the front of the queue.
sync = toolkit.asbool(toolkit.config.get(u'ckanext.validation.run_on_update_async', True))
self._submit_to_xloader(res_dict, sync=sync)


# IDomainObjectModification

def notify(self, entity, operation):
Expand Down Expand Up @@ -95,7 +119,11 @@ def notify(self, entity, operation):
if _should_remove_unsupported_resource_from_datastore(resource_dict):
toolkit.enqueue_job(fn=_remove_unsupported_resource_from_datastore, args=[entity.id])

if not getattr(entity, 'url_changed', False):
if utils.requires_successful_validation_report():
log.debug("Deferring xloading resource %s because the "
"resource did not pass validation yet.", entity.id)
return
elif not getattr(entity, 'url_changed', False):
# do not submit to xloader if the url has not changed.
return

Expand All @@ -104,6 +132,11 @@ def notify(self, entity, operation):
# IResourceController

def after_resource_create(self, context, resource_dict):
if utils.requires_successful_validation_report():
log.debug("Deferring xloading resource %s because the "
"resource did not pass validation yet.", resource_dict.get('id'))
return

self._submit_to_xloader(resource_dict)

def before_resource_show(self, resource_dict):
Expand Down Expand Up @@ -146,7 +179,7 @@ def before_show(self, resource_dict):
def after_update(self, context, resource_dict):
self.after_resource_update(context, resource_dict)

def _submit_to_xloader(self, resource_dict):
def _submit_to_xloader(self, resource_dict, sync=False):
context = {"ignore_auth": True, "defer_commit": True}
if not XLoaderFormats.is_it_an_xloader_format(resource_dict["format"]):
log.debug(
Expand All @@ -165,14 +198,20 @@ def _submit_to_xloader(self, resource_dict):
return

try:
log.debug(
"Submitting resource %s to be xloadered", resource_dict["id"]
)
if sync:
log.debug(
"xloadering resource %s in sync mode", resource_dict["id"]
)
else:
log.debug(
"Submitting resource %s to be xloadered", resource_dict["id"]
)
toolkit.get_action("xloader_submit")(
context,
{
"resource_id": resource_dict["id"],
"ignore_hash": self.ignore_hash,
"sync": sync,
},
)
except toolkit.ValidationError as e:
Expand Down
1 change: 1 addition & 0 deletions ckanext/xloader/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def xloader_submit_schema():
'id': [ignore_missing],
'set_url_type': [ignore_missing, boolean_validator],
'ignore_hash': [ignore_missing, boolean_validator],
'sync': [ignore_missing, boolean_validator],
'__junk': [empty],
'__before': [dsschema.rename('id', 'resource_id')]
}
Expand Down
87 changes: 87 additions & 0 deletions ckanext/xloader/tests/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,93 @@ def test_submit_when_url_changes(self, monkeypatch):

assert func.called

@pytest.mark.ckan_config("ckanext.xloader.validation.requires_successful_report", True)
def test_require_validation(self, monkeypatch):
func = mock.Mock()
monkeypatch.setitem(_actions, "xloader_submit", func)

mock_resource_validation_show = mock.Mock()
monkeypatch.setitem(_actions, "resource_validation_show", mock_resource_validation_show)

dataset = factories.Dataset()

resource = helpers.call_action(
"resource_create",
{},
package_id=dataset["id"],
url="http://example.com/file.csv",
format="CSV",
validation_status='failure',
)

# TODO: test IPipeValidation
assert not func.called # because of the validation_status not being `success`
func.called = None # reset

helpers.call_action(
"resource_update",
{},
id=resource["id"],
package_id=dataset["id"],
url="http://example.com/file2.csv",
format="CSV",
validation_status='success',
)

# TODO: test IPipeValidation
assert not func.called # because of the validation_status is `success`

@pytest.mark.ckan_config("ckanext.xloader.validation.requires_successful_report", True)
@pytest.mark.ckan_config("ckanext.xloader.validation.enforce_schema", False)
def test_enforce_validation_schema(self, monkeypatch):
func = mock.Mock()
monkeypatch.setitem(_actions, "xloader_submit", func)

mock_resource_validation_show = mock.Mock()
monkeypatch.setitem(_actions, "resource_validation_show", mock_resource_validation_show)

dataset = factories.Dataset()

resource = helpers.call_action(
"resource_create",
{},
package_id=dataset["id"],
url="http://example.com/file.csv",
schema='',
validation_status='',
)

# TODO: test IPipeValidation
assert not func.called # because of the schema being empty
func.called = None # reset

helpers.call_action(
"resource_update",
{},
id=resource["id"],
package_id=dataset["id"],
url="http://example.com/file2.csv",
schema='https://example.com/schema.json',
validation_status='failure',
)

# TODO: test IPipeValidation
assert not func.called # because of the validation_status not being `success` and there is a schema
func.called = None # reset

helpers.call_action(
"resource_update",
{},
package_id=dataset["id"],
id=resource["id"],
url="http://example.com/file3.csv",
schema='https://example.com/schema.json',
validation_status='success',
)

# TODO: test IPipeValidation
assert not func.called # because of the validation_status is `success` and there is a schema

@pytest.mark.parametrize("toolkit_config_value, mock_xloader_formats, url_type, datastore_active, expected_result", [
# Test1: Should pass as it is an upload with an active datastore entry but an unsupported format
(True, False, 'upload', True, True),
Expand Down
76 changes: 73 additions & 3 deletions ckanext/xloader/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,20 @@

import json
import datetime
from rq import get_current_job

from ckan import model
from ckan.lib import search
from collections import defaultdict
from decimal import Decimal

import ckan.plugins as p
from ckan.plugins.toolkit import config
from ckan.plugins.toolkit import config, h, _

from logging import getLogger


log = getLogger(__name__)

# resource.formats accepted by ckanext-xloader. Must be lowercase here.
DEFAULT_FORMATS = [
Expand Down Expand Up @@ -42,9 +48,73 @@ def is_it_an_xloader_format(cls, format_):
return format_.lower() in cls._formats


def requires_successful_validation_report():
return p.toolkit.asbool(config.get('ckanext.xloader.validation.requires_successful_report', False))


def awaiting_validation(res_dict):
# type: (dict) -> bool
"""
Checks the existence of a logic action from the ckanext-validation
plugin, thus supporting any extending of the Validation Plugin class.

Checks ckanext.xloader.validation.requires_successful_report config
option value.

Checks ckanext.xloader.validation.enforce_schema config
option value. Then checks the Resource's validation_status.
"""
if not requires_successful_validation_report():
# validation.requires_successful_report is turned off, return right away
return False

try:
# check for one of the main actions from ckanext-validation
# in the case that users extend the Validation plugin class
# and rename the plugin entry-point.
p.toolkit.get_action('resource_validation_show')
is_validation_plugin_loaded = True
except KeyError:
is_validation_plugin_loaded = False

if not is_validation_plugin_loaded:
# the validation plugin is not loaded but required, log a warning
log.warning('ckanext.xloader.validation.requires_successful_report requires the ckanext-validation plugin to be activated.')
return False

if (p.toolkit.asbool(config.get('ckanext.xloader.validation.enforce_schema', True))
or res_dict.get('schema', None)) and res_dict.get('validation_status', None) != 'success':

# either validation.enforce_schema is turned on or it is off and there is a schema,
# we then explicitly check for the `validation_status` report to be `success``
return True

# at this point, we can assume that the Resource is not waiting for Validation.
# or that the Resource does not have a Validation Schema and we are not enforcing schemas.
return False


def resource_data(id, resource_id, rows=None):

if p.toolkit.request.method == "POST":

context = {
"ignore_auth": True,
}
resource_dict = p.toolkit.get_action("resource_show")(
context,
{
"id": resource_id,
},
)

if awaiting_validation(resource_dict):
h.flash_error(_("Cannot upload resource %s to the DataStore "
"because the resource did not pass validation yet.") % resource_id)
return p.toolkit.redirect_to(
"xloader.resource_data", id=id, resource_id=resource_id
)

try:
p.toolkit.get_action("xloader_submit")(
None,
Expand Down Expand Up @@ -201,7 +271,7 @@ def type_guess(rows, types=TYPES, strict=False):
at_least_one_value = []
for ri, row in enumerate(rows):
diff = len(row) - len(guesses)
for _ in range(diff):
for _i in range(diff):
typesdict = {}
for type in types:
typesdict[type] = 0
Expand All @@ -227,7 +297,7 @@ def type_guess(rows, types=TYPES, strict=False):
else:
for i, row in enumerate(rows):
diff = len(row) - len(guesses)
for _ in range(diff):
for _i in range(diff):
guesses.append(defaultdict(int))
for i, cell in enumerate(row):
# add string guess so that we have at least one guess
Expand Down
Loading