From 56d1191e2e26469bc8750070cc1d5032c4b7dab4 Mon Sep 17 00:00:00 2001
From: Dan Nicholson <dbn@endlessos.org>
Date: Tue, 14 Nov 2023 10:42:11 -0700
Subject: [PATCH] Download: Skip unneeded content import tasks

Unfortunately, even if Kolibri already has all the content nodes, it
will still probe the remote server for channel metadata. Since that
fails when the device is offline, skip creating the tasks if it appears
all the content nodes are available. This uses the same
`get_import_export_data` helper that Kolibri uses when determining nodes
to download. That's an expensive query, but it appears that's the only
way to reliably determine if a download is needed or not.

Fixes: #890
---
 kolibri_explore_plugin/collectionviews.py     | 45 ++++++++++++++++---
 .../test/test_collectionviews.py              | 13 ++++++
 2 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/kolibri_explore_plugin/collectionviews.py b/kolibri_explore_plugin/collectionviews.py
index dc936924..c8ac6e4a 100644
--- a/kolibri_explore_plugin/collectionviews.py
+++ b/kolibri_explore_plugin/collectionviews.py
@@ -12,6 +12,9 @@
 from kolibri.core.content.utils.content_manifest import (
     ContentManifestParseError,
 )
+from kolibri.core.content.utils.import_export_content import (
+    get_import_export_data,
+)
 from kolibri.core.tasks.job import State as JobState
 from kolibri.core.tasks.main import job_storage
 from kolibri.utils import conf
@@ -189,6 +192,20 @@ def get_contentimport_tasks(self):
             node_ids = list(
                 self._get_node_ids_for_channel(channel_metadata, channel_id)
             )
+
+            # Check if the desired nodes are already available.
+            num_resources, _, _ = get_import_export_data(
+                channel_id,
+                node_ids=node_ids,
+                available=False,
+            )
+            if num_resources == 0:
+                logger.debug(
+                    f"Skipping content import task for {channel_id} "
+                    "since all resources already present"
+                )
+                continue
+
             tasks.append(
                 get_remotecontentimport_task(
                     channel_id, channel_metadata.name, node_ids
@@ -219,12 +236,30 @@ def get_contentthumbnail_tasks(self):
 
         For all the channels in this content manifest.
         """
-        return [
-            get_remotecontentimport_task(
-                channel_id, node_ids=[], all_thumbnails=True
+        tasks = []
+
+        for channel_id in self.get_channel_ids():
+            # Check if the desired thumbnail nodes are already available.
+            num_resources, _, _ = get_import_export_data(
+                channel_id,
+                node_ids=[],
+                available=False,
+                all_thumbnails=True,
             )
-            for channel_id in self.get_channel_ids()
-        ]
+            if num_resources == 0:
+                logger.debug(
+                    f"Skipping content thumbnail task for {channel_id} "
+                    "since all resources already present"
+                )
+                continue
+
+            tasks.append(
+                get_remotecontentimport_task(
+                    channel_id, node_ids=[], all_thumbnails=True
+                )
+            )
+
+        return tasks
 
     def _get_node_ids_for_channel(self, channel_metadata, channel_id):
         """Get node IDs regardless of the version
diff --git a/kolibri_explore_plugin/test/test_collectionviews.py b/kolibri_explore_plugin/test/test_collectionviews.py
index 5931c658..cac2ebb7 100644
--- a/kolibri_explore_plugin/test/test_collectionviews.py
+++ b/kolibri_explore_plugin/test/test_collectionviews.py
@@ -9,12 +9,14 @@
 from kolibri.core.content.models import ChannelMetadata
 from kolibri.core.content.models import ContentNode
 from kolibri.core.content.models import LocalFile
+from kolibri.core.tasks import main as tasks_main
 from rest_framework.test import APIClient
 
 from .utils import COLLECTIONSDIR
 from .utils import ExploreTestTimeoutError
 from .utils import wait_for_background_tasks
 from kolibri_explore_plugin import collectionviews
+from kolibri_explore_plugin.jobs import TaskType
 
 
 @pytest.mark.django_db
@@ -220,6 +222,11 @@ def test_download_manager_preload(facility_user, grade, name):
     assert num_initial_channels == len(all_channels)
     assert LocalFile.objects.filter(available=False).count() == 0
 
+    # Clear all the jobs to check if any downloading jobs were created
+    # later.
+    job_storage = tasks_main.job_storage
+    job_storage.clear(force=True)
+
     # Run the downloader with requests blocked. Since no URLs are mocked, all
     # requests will fail. Since the download manager retries tasks forever, it
     # will eventually time out on any request.
@@ -233,3 +240,9 @@ def test_download_manager_preload(facility_user, grade, name):
     assert (
         LocalFile.objects.filter(available=True).count() == num_initial_files
     )
+
+    # Check that no channel or content import jobs were created.
+    channel_jobs = job_storage.filter_jobs(func=TaskType.REMOTECHANNELIMPORT)
+    assert channel_jobs == []
+    content_jobs = job_storage.filter_jobs(func=TaskType.REMOTECONTENTIMPORT)
+    assert content_jobs == []