learningequality · bjester · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026 · rtibblesbot
diff --git a/contentcuration/contentcuration/tests/test_user.py b/contentcuration/contentcuration/tests/test_user.py
@@ -16,7 +16,9 @@
 from .base import StudioTestCase
 from .testdata import fileobj_video
 from contentcuration.models import DEFAULT_CONTENT_DEFAULTS
+from contentcuration.models import File
 from contentcuration.models import Invitation
+from contentcuration.models import Language
 from contentcuration.models import User
 from contentcuration.models import UserSubscription
 from contentcuration.tests import testdata
@@ -163,6 +165,80 @@ def test_user_csv_export(self):
                         self.assertIn(_format_size(videos[index - 1].file_size), row)
             self.assertEqual(index, len(videos))
 
+    def test_user_csv_export_reports_channel_and_content_metadata(self):
+        language = Language.objects.create(lang_code="fr", readable_name="French")
+        file_record = File.objects.filter(
+            contentnode__tree_id=self.channel.main_tree.tree_id
+        ).first()
+        file_record.uploaded_by = self.user
+        file_record.original_filename = "sample-video.mp4"
+        file_record.language = None
+        file_record.save()
+
+        contentnode = file_record.contentnode
+        contentnode.title = "CSV Content Title"
+        contentnode.description = "CSV Description"
+        contentnode.author = "CSV Author"
+        contentnode.language = language
+        contentnode.license_description = "CSV License Description"
+        contentnode.copyright_holder = "CSV Copyright Holder"
+        contentnode.save()
+
+        with tempfile.NamedTemporaryFile(suffix=".csv") as tempf:
+            write_user_csv(self.user, path=tempf.name)
+
+            with io.open(tempf.name, "r", encoding="utf-8") as csv_file:
+                rows = list(csv.DictReader(csv_file, delimiter=","))
+
+        self.assertTrue(rows)
+        row = rows[0]
+        self.assertEqual(row["Channel"], self.channel.name)
+        self.assertEqual(row["Title"], "CSV Content Title")
+        self.assertEqual(row["Filename"], "sample-video.mp4")
+        self.assertEqual(row["Description"], "CSV Description")
+        self.assertEqual(row["Author"], "CSV Author")
+        self.assertEqual(row["Language"], "French")
+        self.assertEqual(row["License Description"], "CSV License Description")
+        self.assertEqual(row["Copyright Holder"], "CSV Copyright Holder")
+
+    def test_user_csv_export_reports_staged_files(self):
+        self.user.staged_files.create(checksum="stagedchecksum", file_size=2048)
+
+        with tempfile.NamedTemporaryFile(suffix=".csv") as tempf:
+            write_user_csv(self.user, path=tempf.name)
+
+            with io.open(tempf.name, "r", encoding="utf-8") as csv_file:
+                rows = list(csv.DictReader(csv_file, delimiter=","))
+
+        staged_rows = [row for row in rows if row["Filename"] == "Staged File"]
+        self.assertEqual(len(staged_rows), 1)
+        staged_row = staged_rows[0]
+        self.assertEqual(staged_row["Channel"], "No Channel")
+        self.assertEqual(staged_row["Title"], "No Resource")
+        self.assertEqual(staged_row["File Size"], _format_size(2048))
+        self.assertEqual(staged_row["URL"], "")
+
+    def test_user_csv_export_includes_files_without_contentnode(self):
+        file_without_contentnode = fileobj_video()
+        self.assertIsNone(file_without_contentnode.contentnode_id)
+        file_without_contentnode.uploaded_by = self.user
+        file_without_contentnode.original_filename = "no-contentnode.mp4"
+        file_without_contentnode.save()
+
+        with tempfile.NamedTemporaryFile(suffix=".csv") as tempf:
+            write_user_csv(self.user, path=tempf.name)
+
+            with io.open(tempf.name, "r", encoding="utf-8") as csv_file:
+                rows = list(csv.DictReader(csv_file, delimiter=","))
+
+        row = next(
+            row
+            for row in rows
+            if row["Filename"] == file_without_contentnode.original_filename
+        )
+        self.assertEqual(row["Title"], "No resource")
+        self.assertEqual(row["Channel"], "No Channel")
+
 
 class UserEffectiveDiskSpaceTest(StudioTestCase):
     def setUp(self):

diff --git a/contentcuration/contentcuration/utils/csv_writer.py b/contentcuration/contentcuration/utils/csv_writer.py
@@ -6,13 +6,17 @@
 
 from django.conf import settings
 from django.contrib.sites.models import Site
+from django.db.models import Exists
+from django.db.models import F
 from django.db.models import OuterRef
-from django.db.models import Q
 from django.db.models import Subquery
+from django.db.models.sql.constants import LOUTER
 from django.utils.translation import gettext as _
 from le_utils.constants import content_kinds
 
+from contentcuration.db.models.query import With
 from contentcuration.models import Channel
+from contentcuration.models import ContentNode
 from contentcuration.models import generate_storage_url
 
 if not os.path.exists(settings.CSV_ROOT):
@@ -43,29 +47,24 @@ def generate_user_csv_filename(user):
 
 
 def _write_user_row(file, writer, domain):
-    filename = "{}.{}".format(file["checksum"], file["file_format__extension"])
+    filename = "{}.{}".format(file["checksum"], file["file_extension"])
     writer.writerow(
         [
             file["channel_name"] or _("No Channel"),
-            file["contentnode__title"] or _("No resource"),
+            file["node_title"] or _("No resource"),
             next(
-                (
-                    k[1]
-                    for k in content_kinds.choices
-                    if k[0] == file["contentnode__kind_id"]
-                ),
+                (k[1] for k in content_kinds.choices if k[0] == file["node_kind_id"]),
                 "",
             ),
             file["original_filename"],
             _format_size(file["file_size"] or 0),
             generate_storage_url(filename),
-            file["contentnode__description"],
-            file["contentnode__author"],
-            file["language__readable_name"]
-            or file["contentnode__language__readable_name"],
-            file["contentnode__license__license_name"],
-            file["contentnode__license_description"],
-            file["contentnode__copyright_holder"],
+            file["node_description"],
+            file["node_author"],
+            file["file_language"] or file["node_language"],
+            file["node_license_name"],
+            file["node_license_description"],
+            file["node_copyright_holder"],
         ]
     )
 
@@ -100,34 +99,105 @@ def write_user_csv(user, path=None):
 
         domain = Site.objects.get(pk=1).domain
 
-        # Get all user files
-        channel_query = Channel.objects.filter(
-            Q(main_tree__tree_id=OuterRef("contentnode__tree_id"))
-            | Q(trash_tree__tree_id=OuterRef("contentnode__tree_id"))
+        # Build CTEs so we first reduce to this user's files, then resolve only
+        # needed content node and channel fields.
+        user_files_cte = With(
+            user.files.values(
+                "id",
+                "contentnode_id",
+                "original_filename",
+                "file_size",
+                "checksum",
+                file_extension=F("file_format__extension"),
+                file_language=F("language__readable_name"),
+            ),
+            name="user_files",
+        )
+
+        content_nodes_cte = With(
+            user_files_cte.join(
+                ContentNode.objects.all(),
+                id=user_files_cte.col.contentnode_id,
+            )
+            .values(
+                "id",
+                "tree_id",
+                node_title=F("title"),
+                node_kind_id=F("kind_id"),
+                node_description=F("description"),
+                node_author=F("author"),
+                node_language=F("language__readable_name"),
+                node_license_name=F("license__license_name"),
+                node_license_description=F("license_description"),
+                node_copyright_holder=F("copyright_holder"),
+            )
+            .distinct(),
+            name="content_nodes",
+        )
+
+        main_channel_names = Channel.objects.filter(
+            Exists(
+                content_nodes_cte.queryset().filter(
+                    tree_id=OuterRef("main_tree__tree_id")
+                )
+            )
+        ).values(
+            tree_id=F("main_tree__tree_id"),
+            channel_name=F("name"),
+        )
+        trash_channel_names = Channel.objects.filter(
+            Exists(
+                content_nodes_cte.queryset().filter(
+                    tree_id=OuterRef("trash_tree__tree_id")
+                )
+            )
+        ).values(
+            tree_id=F("trash_tree__tree_id"),
+            channel_name=F("name"),
+        )
+        channel_names_cte = With(
+            main_channel_names.union(trash_channel_names), name="channel_names"
         )
 
         user_files = (
-            user.files.select_related("language", "contentnode", "file_format")
+            content_nodes_cte.join(
+                user_files_cte.queryset(),
+                contentnode_id=content_nodes_cte.col.id,
+                _join_type=LOUTER,
+            )
+            .with_cte(user_files_cte)
+            .with_cte(content_nodes_cte)
+            .with_cte(channel_names_cte)
             .annotate(
-                channel_name=Subquery(channel_query.values_list("name", flat=True)[:1])
+                channel_name=Subquery(
+                    channel_names_cte.queryset()
+                    .filter(tree_id=content_nodes_cte.col.tree_id)
+                    .values("channel_name")[:1]
+                ),
+                node_title=content_nodes_cte.col.node_title,
+                node_kind_id=content_nodes_cte.col.node_kind_id,
+                node_description=content_nodes_cte.col.node_description,
+                node_author=content_nodes_cte.col.node_author,
+                node_language=content_nodes_cte.col.node_language,
+                node_license_name=content_nodes_cte.col.node_license_name,
+                node_license_description=content_nodes_cte.col.node_license_description,
+                node_copyright_holder=content_nodes_cte.col.node_copyright_holder,
             )
             .values(
                 "channel_name",
                 "original_filename",
                 "file_size",
                 "checksum",
-                "file_format__extension",
-                "language__readable_name",
-                "contentnode__title",
-                "contentnode__language__readable_name",
-                "contentnode__license__license_name",
-                "contentnode__kind_id",
-                "contentnode__description",
-                "contentnode__author",
-                "contentnode__provider",
-                "contentnode__aggregator",
-                "contentnode__license_description",
-                "contentnode__copyright_holder",
+                "file_extension",
+                "file_language",
+                "node_title",
+                "node_kind_id",
+                "node_description",
+                "node_author",
+                "node_language",
+                "node_license_name",
+                "node_license_description",
+                "node_copyright_holder",
             )
         )
         for file in user_files: