diff --git a/spidermon/contrib/scrapy/monitors/monitors.py b/spidermon/contrib/scrapy/monitors/monitors.py index 9097e9a3..6221a19b 100644 --- a/spidermon/contrib/scrapy/monitors/monitors.py +++ b/spidermon/contrib/scrapy/monitors/monitors.py @@ -569,7 +569,7 @@ def _get_jobs(self, states, number_of_jobs): start=start, state=states, count=count, - filters=dict(has_tag=tags) if tags else None, + has_tag=tags or None, ) total_jobs.extend(current_jobs) @@ -584,19 +584,19 @@ def _get_jobs(self, states, number_of_jobs): def _get_tags_to_filter(self): """ - Return the intersect of the desired tags to filter and + Return a list of tags with the intersection of the desired tags to filter and the ones from the current job. """ desired_tags = self.crawler.settings.getlist(SPIDERMON_JOBS_COMPARISON_TAGS) if not desired_tags: - return {} + return [] current_tags = json.loads(os.environ.get("SHUB_JOB_DATA", "{}")).get("tags") if not current_tags: - return {} + return [] tags_to_filter = set(desired_tags) & set(current_tags) - return sorted(tags_to_filter) + return list(sorted(tags_to_filter)) def get_threshold(self): number_of_jobs = self.crawler.settings.getint(SPIDERMON_JOBS_COMPARISON) diff --git a/tests/contrib/scrapy/monitors/test_jobs_comparison_monitor.py b/tests/contrib/scrapy/monitors/test_jobs_comparison_monitor.py index 86b1caa1..f01125a5 100644 --- a/tests/contrib/scrapy/monitors/test_jobs_comparison_monitor.py +++ b/tests/contrib/scrapy/monitors/test_jobs_comparison_monitor.py @@ -90,6 +90,26 @@ def runTest(): pass +def test_jobs_comparison_monitor_get_tags_to_filter(monkeypatch): + mock_data = Mock() + + monitor = TestZyteJobsComparisonMonitor() + monitor.data = mock_data + + # Empty SPIDERMON_JOBS_COMPARISON_TAGS + mock_data.crawler.settings.getlist.return_value = None + assert monitor._get_tags_to_filter() == [] + + # Empty SHUB_JOB_DATA.tags + mock_data.crawler.settings.getlist.return_value = ["tag1", "tag2"] + assert monitor._get_tags_to_filter() == [] + + # Sorted intersection + mock_data.crawler.settings.getlist.return_value = ["tag2", "tag1", "tag3"] + monkeypatch.setenv("SHUB_JOB_DATA", '{"tags": ["tag1", "tag2"]}') + assert monitor._get_tags_to_filter() == ["tag1", "tag2"] + + def test_jobs_comparison_monitor_get_jobs(): mock_client = Mock() with patch( @@ -197,7 +217,7 @@ def test_arguments_passed_to_zyte_client( state=list(states), # Count goes from pending number of jobs up to 1000 count=min(number_of_jobs - n * 1000, 1000), - filters={"has_tag": list(tags)}, + has_tag=list(tags), ) # One call to api every 1000 expected jobs for n in range(0, math.ceil(number_of_jobs / 1000))