From 56d1191e2e26469bc8750070cc1d5032c4b7dab4 Mon Sep 17 00:00:00 2001 From: Dan Nicholson Date: Tue, 14 Nov 2023 10:42:11 -0700 Subject: [PATCH] Download: Skip unneeded content import tasks Unfortunately, even if Kolibri already has all the content nodes, it will still probe the remote server for channel metadata. Since that fails when the device is offline, skip creating the tasks if it appears all the content nodes are available. This uses the same `get_import_export_data` helper that Kolibri uses when determining nodes to download. That's an expensive query, but it appears that's the only way to reliably determine if a download is needed or not. Fixes: #890 --- kolibri_explore_plugin/collectionviews.py | 45 ++++++++++++++++--- .../test/test_collectionviews.py | 13 ++++++ 2 files changed, 53 insertions(+), 5 deletions(-) diff --git a/kolibri_explore_plugin/collectionviews.py b/kolibri_explore_plugin/collectionviews.py index dc936924..c8ac6e4a 100644 --- a/kolibri_explore_plugin/collectionviews.py +++ b/kolibri_explore_plugin/collectionviews.py @@ -12,6 +12,9 @@ from kolibri.core.content.utils.content_manifest import ( ContentManifestParseError, ) +from kolibri.core.content.utils.import_export_content import ( + get_import_export_data, +) from kolibri.core.tasks.job import State as JobState from kolibri.core.tasks.main import job_storage from kolibri.utils import conf @@ -189,6 +192,20 @@ def get_contentimport_tasks(self): node_ids = list( self._get_node_ids_for_channel(channel_metadata, channel_id) ) + + # Check if the desired nodes are already available. + num_resources, _, _ = get_import_export_data( + channel_id, + node_ids=node_ids, + available=False, + ) + if num_resources == 0: + logger.debug( + f"Skipping content import task for {channel_id} " + "since all resources already present" + ) + continue + tasks.append( get_remotecontentimport_task( channel_id, channel_metadata.name, node_ids @@ -219,12 +236,30 @@ def get_contentthumbnail_tasks(self): For all the channels in this content manifest. """ - return [ - get_remotecontentimport_task( - channel_id, node_ids=[], all_thumbnails=True + tasks = [] + + for channel_id in self.get_channel_ids(): + # Check if the desired thumbnail nodes are already available. + num_resources, _, _ = get_import_export_data( + channel_id, + node_ids=[], + available=False, + all_thumbnails=True, ) - for channel_id in self.get_channel_ids() - ] + if num_resources == 0: + logger.debug( + f"Skipping content thumbnail task for {channel_id} " + "since all resources already present" + ) + continue + + tasks.append( + get_remotecontentimport_task( + channel_id, node_ids=[], all_thumbnails=True + ) + ) + + return tasks def _get_node_ids_for_channel(self, channel_metadata, channel_id): """Get node IDs regardless of the version diff --git a/kolibri_explore_plugin/test/test_collectionviews.py b/kolibri_explore_plugin/test/test_collectionviews.py index 5931c658..cac2ebb7 100644 --- a/kolibri_explore_plugin/test/test_collectionviews.py +++ b/kolibri_explore_plugin/test/test_collectionviews.py @@ -9,12 +9,14 @@ from kolibri.core.content.models import ChannelMetadata from kolibri.core.content.models import ContentNode from kolibri.core.content.models import LocalFile +from kolibri.core.tasks import main as tasks_main from rest_framework.test import APIClient from .utils import COLLECTIONSDIR from .utils import ExploreTestTimeoutError from .utils import wait_for_background_tasks from kolibri_explore_plugin import collectionviews +from kolibri_explore_plugin.jobs import TaskType @pytest.mark.django_db @@ -220,6 +222,11 @@ def test_download_manager_preload(facility_user, grade, name): assert num_initial_channels == len(all_channels) assert LocalFile.objects.filter(available=False).count() == 0 + # Clear all the jobs to check if any downloading jobs were created + # later. + job_storage = tasks_main.job_storage + job_storage.clear(force=True) + # Run the downloader with requests blocked. Since no URLs are mocked, all # requests will fail. Since the download manager retries tasks forever, it # will eventually time out on any request. @@ -233,3 +240,9 @@ def test_download_manager_preload(facility_user, grade, name): assert ( LocalFile.objects.filter(available=True).count() == num_initial_files ) + + # Check that no channel or content import jobs were created. + channel_jobs = job_storage.filter_jobs(func=TaskType.REMOTECHANNELIMPORT) + assert channel_jobs == [] + content_jobs = job_storage.filter_jobs(func=TaskType.REMOTECONTENTIMPORT) + assert content_jobs == []