diff --git a/contentcuration/contentcuration/tests/test_user.py b/contentcuration/contentcuration/tests/test_user.py index d772007d57..585932aa9c 100644 --- a/contentcuration/contentcuration/tests/test_user.py +++ b/contentcuration/contentcuration/tests/test_user.py @@ -16,7 +16,9 @@ from .base import StudioTestCase from .testdata import fileobj_video from contentcuration.models import DEFAULT_CONTENT_DEFAULTS +from contentcuration.models import File from contentcuration.models import Invitation +from contentcuration.models import Language from contentcuration.models import User from contentcuration.models import UserSubscription from contentcuration.tests import testdata @@ -163,6 +165,80 @@ def test_user_csv_export(self): self.assertIn(_format_size(videos[index - 1].file_size), row) self.assertEqual(index, len(videos)) + def test_user_csv_export_reports_channel_and_content_metadata(self): + language = Language.objects.create(lang_code="fr", readable_name="French") + file_record = File.objects.filter( + contentnode__tree_id=self.channel.main_tree.tree_id + ).first() + file_record.uploaded_by = self.user + file_record.original_filename = "sample-video.mp4" + file_record.language = None + file_record.save() + + contentnode = file_record.contentnode + contentnode.title = "CSV Content Title" + contentnode.description = "CSV Description" + contentnode.author = "CSV Author" + contentnode.language = language + contentnode.license_description = "CSV License Description" + contentnode.copyright_holder = "CSV Copyright Holder" + contentnode.save() + + with tempfile.NamedTemporaryFile(suffix=".csv") as tempf: + write_user_csv(self.user, path=tempf.name) + + with io.open(tempf.name, "r", encoding="utf-8") as csv_file: + rows = list(csv.DictReader(csv_file, delimiter=",")) + + self.assertTrue(rows) + row = rows[0] + self.assertEqual(row["Channel"], self.channel.name) + self.assertEqual(row["Title"], "CSV Content Title") + self.assertEqual(row["Filename"], "sample-video.mp4") + self.assertEqual(row["Description"], "CSV Description") + self.assertEqual(row["Author"], "CSV Author") + self.assertEqual(row["Language"], "French") + self.assertEqual(row["License Description"], "CSV License Description") + self.assertEqual(row["Copyright Holder"], "CSV Copyright Holder") + + def test_user_csv_export_reports_staged_files(self): + self.user.staged_files.create(checksum="stagedchecksum", file_size=2048) + + with tempfile.NamedTemporaryFile(suffix=".csv") as tempf: + write_user_csv(self.user, path=tempf.name) + + with io.open(tempf.name, "r", encoding="utf-8") as csv_file: + rows = list(csv.DictReader(csv_file, delimiter=",")) + + staged_rows = [row for row in rows if row["Filename"] == "Staged File"] + self.assertEqual(len(staged_rows), 1) + staged_row = staged_rows[0] + self.assertEqual(staged_row["Channel"], "No Channel") + self.assertEqual(staged_row["Title"], "No Resource") + self.assertEqual(staged_row["File Size"], _format_size(2048)) + self.assertEqual(staged_row["URL"], "") + + def test_user_csv_export_includes_files_without_contentnode(self): + file_without_contentnode = fileobj_video() + self.assertIsNone(file_without_contentnode.contentnode_id) + file_without_contentnode.uploaded_by = self.user + file_without_contentnode.original_filename = "no-contentnode.mp4" + file_without_contentnode.save() + + with tempfile.NamedTemporaryFile(suffix=".csv") as tempf: + write_user_csv(self.user, path=tempf.name) + + with io.open(tempf.name, "r", encoding="utf-8") as csv_file: + rows = list(csv.DictReader(csv_file, delimiter=",")) + + row = next( + row + for row in rows + if row["Filename"] == file_without_contentnode.original_filename + ) + self.assertEqual(row["Title"], "No resource") + self.assertEqual(row["Channel"], "No Channel") + class UserEffectiveDiskSpaceTest(StudioTestCase): def setUp(self): diff --git a/contentcuration/contentcuration/utils/csv_writer.py b/contentcuration/contentcuration/utils/csv_writer.py index 0ceaefcd7c..babb5b6850 100644 --- a/contentcuration/contentcuration/utils/csv_writer.py +++ b/contentcuration/contentcuration/utils/csv_writer.py @@ -6,13 +6,17 @@ from django.conf import settings from django.contrib.sites.models import Site +from django.db.models import Exists +from django.db.models import F from django.db.models import OuterRef -from django.db.models import Q from django.db.models import Subquery +from django.db.models.sql.constants import LOUTER from django.utils.translation import gettext as _ from le_utils.constants import content_kinds +from contentcuration.db.models.query import With from contentcuration.models import Channel +from contentcuration.models import ContentNode from contentcuration.models import generate_storage_url if not os.path.exists(settings.CSV_ROOT): @@ -43,29 +47,24 @@ def generate_user_csv_filename(user): def _write_user_row(file, writer, domain): - filename = "{}.{}".format(file["checksum"], file["file_format__extension"]) + filename = "{}.{}".format(file["checksum"], file["file_extension"]) writer.writerow( [ file["channel_name"] or _("No Channel"), - file["contentnode__title"] or _("No resource"), + file["node_title"] or _("No resource"), next( - ( - k[1] - for k in content_kinds.choices - if k[0] == file["contentnode__kind_id"] - ), + (k[1] for k in content_kinds.choices if k[0] == file["node_kind_id"]), "", ), file["original_filename"], _format_size(file["file_size"] or 0), generate_storage_url(filename), - file["contentnode__description"], - file["contentnode__author"], - file["language__readable_name"] - or file["contentnode__language__readable_name"], - file["contentnode__license__license_name"], - file["contentnode__license_description"], - file["contentnode__copyright_holder"], + file["node_description"], + file["node_author"], + file["file_language"] or file["node_language"], + file["node_license_name"], + file["node_license_description"], + file["node_copyright_holder"], ] ) @@ -100,34 +99,105 @@ def write_user_csv(user, path=None): domain = Site.objects.get(pk=1).domain - # Get all user files - channel_query = Channel.objects.filter( - Q(main_tree__tree_id=OuterRef("contentnode__tree_id")) - | Q(trash_tree__tree_id=OuterRef("contentnode__tree_id")) + # Build CTEs so we first reduce to this user's files, then resolve only + # needed content node and channel fields. + user_files_cte = With( + user.files.values( + "id", + "contentnode_id", + "original_filename", + "file_size", + "checksum", + file_extension=F("file_format__extension"), + file_language=F("language__readable_name"), + ), + name="user_files", + ) + + content_nodes_cte = With( + user_files_cte.join( + ContentNode.objects.all(), + id=user_files_cte.col.contentnode_id, + ) + .values( + "id", + "tree_id", + node_title=F("title"), + node_kind_id=F("kind_id"), + node_description=F("description"), + node_author=F("author"), + node_language=F("language__readable_name"), + node_license_name=F("license__license_name"), + node_license_description=F("license_description"), + node_copyright_holder=F("copyright_holder"), + ) + .distinct(), + name="content_nodes", + ) + + main_channel_names = Channel.objects.filter( + Exists( + content_nodes_cte.queryset().filter( + tree_id=OuterRef("main_tree__tree_id") + ) + ) + ).values( + tree_id=F("main_tree__tree_id"), + channel_name=F("name"), + ) + trash_channel_names = Channel.objects.filter( + Exists( + content_nodes_cte.queryset().filter( + tree_id=OuterRef("trash_tree__tree_id") + ) + ) + ).values( + tree_id=F("trash_tree__tree_id"), + channel_name=F("name"), + ) + channel_names_cte = With( + main_channel_names.union(trash_channel_names), name="channel_names" ) user_files = ( - user.files.select_related("language", "contentnode", "file_format") + content_nodes_cte.join( + user_files_cte.queryset(), + contentnode_id=content_nodes_cte.col.id, + _join_type=LOUTER, + ) + .with_cte(user_files_cte) + .with_cte(content_nodes_cte) + .with_cte(channel_names_cte) .annotate( - channel_name=Subquery(channel_query.values_list("name", flat=True)[:1]) + channel_name=Subquery( + channel_names_cte.queryset() + .filter(tree_id=content_nodes_cte.col.tree_id) + .values("channel_name")[:1] + ), + node_title=content_nodes_cte.col.node_title, + node_kind_id=content_nodes_cte.col.node_kind_id, + node_description=content_nodes_cte.col.node_description, + node_author=content_nodes_cte.col.node_author, + node_language=content_nodes_cte.col.node_language, + node_license_name=content_nodes_cte.col.node_license_name, + node_license_description=content_nodes_cte.col.node_license_description, + node_copyright_holder=content_nodes_cte.col.node_copyright_holder, ) .values( "channel_name", "original_filename", "file_size", "checksum", - "file_format__extension", - "language__readable_name", - "contentnode__title", - "contentnode__language__readable_name", - "contentnode__license__license_name", - "contentnode__kind_id", - "contentnode__description", - "contentnode__author", - "contentnode__provider", - "contentnode__aggregator", - "contentnode__license_description", - "contentnode__copyright_holder", + "file_extension", + "file_language", + "node_title", + "node_kind_id", + "node_description", + "node_author", + "node_language", + "node_license_name", + "node_license_description", + "node_copyright_holder", ) ) for file in user_files: