Download: Skip unneeded content import tasks

Unfortunately, even if Kolibri already has all the content nodes, it will still probe the remote server for channel metadata. Since that fails when the device is offline, skip creating the tasks if it appears all the content nodes are available. This uses the same `get_import_export_data` helper that Kolibri uses when determining nodes to download. That's an expensive query, but it appears that's the only way to reliably determine if a download is needed or not. Fixes: #890
endlessm · Nov 14, 2023 · 56d1191 · 56d1191
1 parent 0fa2acf
commit 56d1191
Show file tree

Hide file tree

Showing 2 changed files with 53 additions and 5 deletions.
diff --git a/kolibri_explore_plugin/collectionviews.py b/kolibri_explore_plugin/collectionviews.py
@@ -12,6 +12,9 @@
 from kolibri.core.content.utils.content_manifest import (
     ContentManifestParseError,
 )
+from kolibri.core.content.utils.import_export_content import (
+    get_import_export_data,
+)
 from kolibri.core.tasks.job import State as JobState
 from kolibri.core.tasks.main import job_storage
 from kolibri.utils import conf
@@ -189,6 +192,20 @@ def get_contentimport_tasks(self):
             node_ids = list(
                 self._get_node_ids_for_channel(channel_metadata, channel_id)
             )
+
+            # Check if the desired nodes are already available.
+            num_resources, _, _ = get_import_export_data(
+                channel_id,
+                node_ids=node_ids,
+                available=False,
+            )
+            if num_resources == 0:
+                logger.debug(
+                    f"Skipping content import task for {channel_id} "
+                    "since all resources already present"
+                )
+                continue
+
             tasks.append(
                 get_remotecontentimport_task(
                     channel_id, channel_metadata.name, node_ids
@@ -219,12 +236,30 @@ def get_contentthumbnail_tasks(self):
 
         For all the channels in this content manifest.
         """
-        return [
-            get_remotecontentimport_task(
-                channel_id, node_ids=[], all_thumbnails=True
+        tasks = []
+
+        for channel_id in self.get_channel_ids():
+            # Check if the desired thumbnail nodes are already available.
+            num_resources, _, _ = get_import_export_data(
+                channel_id,
+                node_ids=[],
+                available=False,
+                all_thumbnails=True,
             )
-            for channel_id in self.get_channel_ids()
-        ]
+            if num_resources == 0:
+                logger.debug(
+                    f"Skipping content thumbnail task for {channel_id} "
+                    "since all resources already present"
+                )
+                continue
+
+            tasks.append(
+                get_remotecontentimport_task(
+                    channel_id, node_ids=[], all_thumbnails=True
+                )
+            )
+
+        return tasks
 
     def _get_node_ids_for_channel(self, channel_metadata, channel_id):
         """Get node IDs regardless of the version

diff --git a/kolibri_explore_plugin/test/test_collectionviews.py b/kolibri_explore_plugin/test/test_collectionviews.py
@@ -9,12 +9,14 @@
 from kolibri.core.content.models import ChannelMetadata
 from kolibri.core.content.models import ContentNode
 from kolibri.core.content.models import LocalFile
+from kolibri.core.tasks import main as tasks_main
 from rest_framework.test import APIClient
 
 from .utils import COLLECTIONSDIR
 from .utils import ExploreTestTimeoutError
 from .utils import wait_for_background_tasks
 from kolibri_explore_plugin import collectionviews
+from kolibri_explore_plugin.jobs import TaskType
 
 
 @pytest.mark.django_db
@@ -220,6 +222,11 @@ def test_download_manager_preload(facility_user, grade, name):
     assert num_initial_channels == len(all_channels)
     assert LocalFile.objects.filter(available=False).count() == 0
 
+    # Clear all the jobs to check if any downloading jobs were created
+    # later.
+    job_storage = tasks_main.job_storage
+    job_storage.clear(force=True)
+
     # Run the downloader with requests blocked. Since no URLs are mocked, all
     # requests will fail. Since the download manager retries tasks forever, it
     # will eventually time out on any request.
@@ -233,3 +240,9 @@ def test_download_manager_preload(facility_user, grade, name):
     assert (
         LocalFile.objects.filter(available=True).count() == num_initial_files
     )
+
+    # Check that no channel or content import jobs were created.
+    channel_jobs = job_storage.filter_jobs(func=TaskType.REMOTECHANNELIMPORT)
+    assert channel_jobs == []
+    content_jobs = job_storage.filter_jobs(func=TaskType.REMOTECONTENTIMPORT)
+    assert content_jobs == []