Skip to content

Commit

Permalink
Download: Skip unneeded content import tasks
Browse files Browse the repository at this point in the history
Unfortunately, even if Kolibri already has all the content nodes, it
will still probe the remote server for channel metadata. Since that
fails when the device is offline, skip creating the tasks if it appears
all the content nodes are available. This uses the same
`get_import_export_data` helper that Kolibri uses when determining nodes
to download. That's an expensive query, but it appears that's the only
way to reliably determine if a download is needed or not.

Fixes: #890
  • Loading branch information
dbnicholson committed Nov 14, 2023
1 parent 0fa2acf commit 56d1191
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 5 deletions.
45 changes: 40 additions & 5 deletions kolibri_explore_plugin/collectionviews.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
from kolibri.core.content.utils.content_manifest import (
ContentManifestParseError,
)
from kolibri.core.content.utils.import_export_content import (
get_import_export_data,
)
from kolibri.core.tasks.job import State as JobState
from kolibri.core.tasks.main import job_storage
from kolibri.utils import conf
Expand Down Expand Up @@ -189,6 +192,20 @@ def get_contentimport_tasks(self):
node_ids = list(
self._get_node_ids_for_channel(channel_metadata, channel_id)
)

# Check if the desired nodes are already available.
num_resources, _, _ = get_import_export_data(
channel_id,
node_ids=node_ids,
available=False,
)
if num_resources == 0:
logger.debug(
f"Skipping content import task for {channel_id} "
"since all resources already present"
)
continue

tasks.append(
get_remotecontentimport_task(
channel_id, channel_metadata.name, node_ids
Expand Down Expand Up @@ -219,12 +236,30 @@ def get_contentthumbnail_tasks(self):
For all the channels in this content manifest.
"""
return [
get_remotecontentimport_task(
channel_id, node_ids=[], all_thumbnails=True
tasks = []

for channel_id in self.get_channel_ids():
# Check if the desired thumbnail nodes are already available.
num_resources, _, _ = get_import_export_data(
channel_id,
node_ids=[],
available=False,
all_thumbnails=True,
)
for channel_id in self.get_channel_ids()
]
if num_resources == 0:
logger.debug(
f"Skipping content thumbnail task for {channel_id} "
"since all resources already present"
)
continue

tasks.append(
get_remotecontentimport_task(
channel_id, node_ids=[], all_thumbnails=True
)
)

return tasks

def _get_node_ids_for_channel(self, channel_metadata, channel_id):
"""Get node IDs regardless of the version
Expand Down
13 changes: 13 additions & 0 deletions kolibri_explore_plugin/test/test_collectionviews.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@
from kolibri.core.content.models import ChannelMetadata
from kolibri.core.content.models import ContentNode
from kolibri.core.content.models import LocalFile
from kolibri.core.tasks import main as tasks_main
from rest_framework.test import APIClient

from .utils import COLLECTIONSDIR
from .utils import ExploreTestTimeoutError
from .utils import wait_for_background_tasks
from kolibri_explore_plugin import collectionviews
from kolibri_explore_plugin.jobs import TaskType


@pytest.mark.django_db
Expand Down Expand Up @@ -220,6 +222,11 @@ def test_download_manager_preload(facility_user, grade, name):
assert num_initial_channels == len(all_channels)
assert LocalFile.objects.filter(available=False).count() == 0

# Clear all the jobs to check if any downloading jobs were created
# later.
job_storage = tasks_main.job_storage
job_storage.clear(force=True)

# Run the downloader with requests blocked. Since no URLs are mocked, all
# requests will fail. Since the download manager retries tasks forever, it
# will eventually time out on any request.
Expand All @@ -233,3 +240,9 @@ def test_download_manager_preload(facility_user, grade, name):
assert (
LocalFile.objects.filter(available=True).count() == num_initial_files
)

# Check that no channel or content import jobs were created.
channel_jobs = job_storage.filter_jobs(func=TaskType.REMOTECHANNELIMPORT)
assert channel_jobs == []
content_jobs = job_storage.filter_jobs(func=TaskType.REMOTECONTENTIMPORT)
assert content_jobs == []

0 comments on commit 56d1191

Please sign in to comment.