Zipstack · harini-venkataraman · Jun 30, 2026 · Jun 30, 2026 · Jun 30, 2026 · Jun 30, 2026
diff --git a/...mpt_studio/prompt_profile_manager_v2/migrations/0006_make_extraction_adapters_nullable.py b/...mpt_studio/prompt_profile_manager_v2/migrations/0006_make_extraction_adapters_nullable.py
@@ -0,0 +1,47 @@
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("adapter_processor_v2", "0001_initial"),
+        ("prompt_profile_manager_v2", "0005_profilemanager_shared_to_org_and_more"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="profilemanager",
+            name="vector_store",
+            field=models.ForeignKey(
+                blank=True,
+                db_comment="Field to store the chosen vector store.",
+                null=True,
+                on_delete=django.db.models.deletion.PROTECT,
+                related_name="profiles_vector_store",
+                to="adapter_processor_v2.adapterinstance",
+            ),
+        ),
+        migrations.AlterField(
+            model_name="profilemanager",
+            name="embedding_model",
+            field=models.ForeignKey(
+                blank=True,
+                null=True,
+                on_delete=django.db.models.deletion.PROTECT,
+                related_name="profiles_embedding_model",
+                to="adapter_processor_v2.adapterinstance",
+            ),
+        ),
+        migrations.AlterField(
+            model_name="profilemanager",
+            name="x2text",
+            field=models.ForeignKey(
+                blank=True,
+                db_comment="Field to store the X2Text Adapter chosen by the user",
+                null=True,
+                on_delete=django.db.models.deletion.PROTECT,
+                related_name="profiles_x2text",
+                to="adapter_processor_v2.adapterinstance",
+            ),
+        ),
+    ]
diff --git a/backend/prompt_studio/prompt_profile_manager_v2/models.py b/backend/prompt_studio/prompt_profile_manager_v2/models.py
@@ -61,15 +61,15 @@ class RetrievalStrategy(models.TextChoices):
     vector_store = models.ForeignKey(
         AdapterInstance,
         db_comment="Field to store the chosen vector store.",
-        blank=False,
-        null=False,
+        blank=True,
+        null=True,
         on_delete=models.PROTECT,
         related_name="profiles_vector_store",
     )
     embedding_model = models.ForeignKey(
         AdapterInstance,
-        blank=False,
-        null=False,
+        blank=True,
+        null=True,
         on_delete=models.PROTECT,
         related_name="profiles_embedding_model",
     )
@@ -84,8 +84,8 @@ class RetrievalStrategy(models.TextChoices):
     x2text = models.ForeignKey(
         AdapterInstance,
         db_comment="Field to store the X2Text Adapter chosen by the user",
-        blank=False,
-        null=False,
+        blank=True,
+        null=True,
         on_delete=models.PROTECT,
         related_name="profiles_x2text",
     )

diff --git a/backend/prompt_studio/prompt_profile_manager_v2/serializers.py b/backend/prompt_studio/prompt_profile_manager_v2/serializers.py
@@ -1,6 +1,7 @@
 import logging
 
 from adapter_processor_v2.adapter_processor import AdapterProcessor
+from rest_framework import serializers
 
 from backend.serializers import AuditSerializer
 from prompt_studio.prompt_profile_manager_v2.constants import ProfileManagerKeys
@@ -9,6 +10,14 @@
 
 logger = logging.getLogger(__name__)
 
+# Extraction adapter fields that are only required when at least one prompt
+# using this profile needs text extraction (extraction_inputs != "image").
+_TEXT_EXTRACTION_FIELDS = (
+    ProfileManagerKeys.VECTOR_STORE,
+    ProfileManagerKeys.EMBEDDING_MODEL,
+    ProfileManagerKeys.X2TEXT,
+)
+
 
 class ProfileManagerSerializer(AuditSerializer):
     class Meta:
@@ -18,12 +27,49 @@
         # the DRF auto-validator that 400s on re-save / PUT before the view runs.
         validators = []
 
+    def validate(self, attrs):
+        """Enforce x2text/embedding/vector_store when text extraction needed.
+
+        These fields are nullable at the DB level to support image-only
+        profiles, but must be populated when any prompt using this profile
+        requires text extraction.
+        """
+        attrs = super().validate(attrs)
+
+        instance = self.instance
+        if instance is not None:
+            # Update: check prompts currently linked to this profile
+            needs_text = instance.tool_studio_prompts.exclude(
+                extraction_inputs="image"
+            ).exists()
+        else:
+            # Create: no prompts linked yet — require extraction adapters
+            # by default so existing flows are unaffected
+            needs_text = True
+
+        if needs_text:
+            missing = [
+                field
+                for field in _TEXT_EXTRACTION_FIELDS
+                if not attrs.get(field)
+                and (instance is None or not getattr(instance, f"{field}_id", None))
+            ]
-        if needs_text:
-            missing = [
-                field
-                for field in _TEXT_EXTRACTION_FIELDS
-                if not attrs.get(field)
-                and (instance is None or not getattr(instance, f"{field}_id", None))
-            ]
+        if needs_text:
+            missing = [
+                field
+                for field in _TEXT_EXTRACTION_FIELDS
+                if not (
+                    # Use incoming value when explicitly provided, else fall
+                    # back to whatever is already set on the instance.
+                    attrs.get(field)
+                    or (
+                        field not in attrs
+                        and instance is not None
+                        and getattr(instance, f"{field}_id", None)
+                    )
+                )
+            ]
-        if needs_text:
-            missing = [
-                field
-                for field in _TEXT_EXTRACTION_FIELDS
-                if not attrs.get(field)
-                and (instance is None or not getattr(instance, f"{field}_id", None))
-            ]
+        if needs_text:
+            missing = [
+                field
+                for field in _TEXT_EXTRACTION_FIELDS
+                if not (
+                    # Use incoming value when explicitly provided, else fall
+                    # back to whatever is already set on the instance.
+                    attrs.get(field)
+                    or (
+                        field not in attrs
+                        and instance is not None
+                        and getattr(instance, f"{field}_id", None)
+                    )
+                )
+            ]
+            if missing:
+                raise serializers.ValidationError(
+                    {
+                        field: "This field is required when any linked prompt "
+                        "uses text extraction."
+                        for field in missing
+                    }
+                )
+        return attrs
+
     def to_representation(self, instance):  # type: ignore
         rep: dict[str, str] = super().to_representation(instance)
         llm = rep[ProfileManagerKeys.LLM]
-        embedding = rep[ProfileManagerKeys.EMBEDDING_MODEL]
-        vector_db = rep[ProfileManagerKeys.VECTOR_STORE]
-        x2text = rep[ProfileManagerKeys.X2TEXT]
+        embedding = rep.get(ProfileManagerKeys.EMBEDDING_MODEL)
+        vector_db = rep.get(ProfileManagerKeys.VECTOR_STORE)
+        x2text = rep.get(ProfileManagerKeys.X2TEXT)
         if llm:
             rep[ProfileManagerKeys.LLM] = AdapterProcessor.get_adapter_instance_by_id(llm)
         if embedding:

diff --git a/backend/prompt_studio/prompt_studio_core_v2/constants.py b/backend/prompt_studio/prompt_studio_core_v2/constants.py
@@ -108,6 +108,10 @@ class ToolStudioPromptKeys:
     # Webhook postprocessing settings
     ENABLE_POSTPROCESSING_WEBHOOK = "enable_postprocessing_webhook"
     POSTPROCESSING_WEBHOOK_URL = "postprocessing_webhook_url"
+    # Vision mode fields
+    EXTRACTION_INPUTS = "extraction_inputs"
+    SOURCE_OF_TRUTH = "source_of_truth"
+    SOURCE_FILE_PATH = "source_file_path"
 
 
 class FileViewTypes:

diff --git a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py
@@ -411,6 +411,10 @@ def _build_prompt_output(
         if lookup_config := get_lookup_config(prompt):
             output["lookup_config"] = lookup_config
 
+        # Vision mode fields
+        output[TSPKeys.EXTRACTION_INPUTS] = prompt.extraction_inputs
+        output[TSPKeys.SOURCE_OF_TRUTH] = prompt.source_of_truth
+
         output[TSPKeys.EVAL_SETTINGS] = {}
         output[TSPKeys.EVAL_SETTINGS][TSPKeys.EVAL_SETTINGS_EVALUATE] = prompt.evaluate
         output[TSPKeys.EVAL_SETTINGS][TSPKeys.EVAL_SETTINGS_MONITOR_LLM] = [monitor_llm]
@@ -825,6 +829,10 @@ def build_fetch_response_payload(
         if lookup_config := get_lookup_config(prompt):
             output["lookup_config"] = lookup_config
 
+        # Vision mode fields
+        output[TSPKeys.EXTRACTION_INPUTS] = prompt.extraction_inputs
+        output[TSPKeys.SOURCE_OF_TRUTH] = prompt.source_of_truth
+
         output[TSPKeys.EVAL_SETTINGS] = {}
         output[TSPKeys.EVAL_SETTINGS][TSPKeys.EVAL_SETTINGS_EVALUATE] = prompt.evaluate
         output[TSPKeys.EVAL_SETTINGS][TSPKeys.EVAL_SETTINGS_MONITOR_LLM] = [monitor_llm]
@@ -874,6 +882,7 @@ def build_fetch_response_payload(
             TSPKeys.FILE_NAME: doc_name,
             TSPKeys.FILE_HASH: file_hash,
             TSPKeys.FILE_PATH: extract_path,
+            TSPKeys.SOURCE_FILE_PATH: file_path,
             Common.LOG_EVENTS_ID: StateStore.get(Common.LOG_EVENTS_ID),
             TSPKeys.EXECUTION_SOURCE: ExecutionSource.IDE.value,
             TSPKeys.CUSTOM_DATA: tool.custom_data,
@@ -1064,6 +1073,7 @@ def build_bulk_fetch_response_payload(
             TSPKeys.FILE_NAME: doc_name,
             TSPKeys.FILE_HASH: file_hash,
             TSPKeys.FILE_PATH: extract_path,
+            TSPKeys.SOURCE_FILE_PATH: file_path,
             Common.LOG_EVENTS_ID: StateStore.get(Common.LOG_EVENTS_ID),
             TSPKeys.EXECUTION_SOURCE: ExecutionSource.IDE.value,
             TSPKeys.CUSTOM_DATA: tool.custom_data,
@@ -1225,6 +1235,7 @@ def build_single_pass_payload(
             TSPKeys.FILE_HASH: file_hash,
             TSPKeys.FILE_NAME: doc_name,
             TSPKeys.FILE_PATH: file_path,
+            TSPKeys.SOURCE_FILE_PATH: doc_path,
             Common.LOG_EVENTS_ID: StateStore.get(Common.LOG_EVENTS_ID),
             TSPKeys.EXECUTION_SOURCE: ExecutionSource.IDE.value,
             TSPKeys.CUSTOM_DATA: tool.custom_data,
@@ -1950,6 +1961,9 @@ def _fetch_response(
             output[TSPKeys.POSTPROCESSING_WEBHOOK_URL] = webhook_url
         if lookup_config := get_lookup_config(prompt):
             output["lookup_config"] = lookup_config
+        # Vision mode fields
+        output[TSPKeys.EXTRACTION_INPUTS] = prompt.extraction_inputs
+        output[TSPKeys.SOURCE_OF_TRUTH] = prompt.source_of_truth
         # Eval settings for the prompt
         output[TSPKeys.EVAL_SETTINGS] = {}
         output[TSPKeys.EVAL_SETTINGS][TSPKeys.EVAL_SETTINGS_EVALUATE] = prompt.evaluate
@@ -2000,6 +2014,7 @@ def _fetch_response(
             TSPKeys.FILE_NAME: doc_name,
             TSPKeys.FILE_HASH: file_hash,
             TSPKeys.FILE_PATH: doc_path,
+            TSPKeys.SOURCE_FILE_PATH: doc_path,
-            TSPKeys.FILE_PATH: doc_path,
-            TSPKeys.SOURCE_FILE_PATH: doc_path,
+            TSPKeys.FILE_PATH: doc_path,
+            TSPKeys.SOURCE_FILE_PATH: file_path,
-            TSPKeys.FILE_PATH: doc_path,
-            TSPKeys.SOURCE_FILE_PATH: doc_path,
+            TSPKeys.FILE_PATH: doc_path,
+            TSPKeys.SOURCE_FILE_PATH: file_path,
             Common.LOG_EVENTS_ID: StateStore.get(Common.LOG_EVENTS_ID),
             TSPKeys.EXECUTION_SOURCE: ExecutionSource.IDE.value,
             TSPKeys.CUSTOM_DATA: tool.custom_data,

diff --git a/backend/prompt_studio/prompt_studio_registry_v2/constants.py b/backend/prompt_studio/prompt_studio_registry_v2/constants.py
@@ -106,6 +106,9 @@ class JsonSchemaKey:
     ENABLE_POSTPROCESSING_WEBHOOK = "enable_postprocessing_webhook"
     POSTPROCESSING_WEBHOOK_URL = "postprocessing_webhook_url"
     WORD_CONFIDENCE_POSTAMBLE = "word_confidence_postamble"
+    # Vision mode fields
+    EXTRACTION_INPUTS = "extraction_inputs"
+    SOURCE_OF_TRUTH = "source_of_truth"
 
 
 class SpecKey:

diff --git a/backend/prompt_studio/prompt_studio_registry_v2/prompt_studio_registry_helper.py b/backend/prompt_studio/prompt_studio_registry_v2/prompt_studio_registry_helper.py
@@ -266,10 +266,19 @@ def frame_export_json(
 
         embedding_suffix = ""
         adapter_id = ""
-        vector_db = str(default_llm_profile.vector_store.id)
-        embedding_model = str(default_llm_profile.embedding_model.id)
+        # Extraction adapters may be null for image-only profiles
+        vector_db = (
+            str(default_llm_profile.vector_store.id)
+            if default_llm_profile.vector_store
+            else ""
+        )
+        embedding_model = (
+            str(default_llm_profile.embedding_model.id)
+            if default_llm_profile.embedding_model
+            else ""
+        )
         llm = str(default_llm_profile.llm.id)
-        x2text = str(default_llm_profile.x2text.id)
+        x2text = str(default_llm_profile.x2text.id) if default_llm_profile.x2text else ""
 
         # Tool settings
         tool_settings = {}
@@ -328,36 +337,51 @@ def frame_export_json(
                     invalidated_outputs.append(prompt.prompt_key)
                     continue
 
-            vector_db = str(prompt.profile_manager.vector_store.id)
-            embedding_model = str(prompt.profile_manager.embedding_model.id)
-            llm = str(prompt.profile_manager.llm.id)
-            x2text = str(prompt.profile_manager.x2text.id)
-            adapter_id = str(prompt.profile_manager.embedding_model.adapter_id)
-            embedding_suffix = adapter_id.split("|")[0]
+            # Extraction adapters may be null for image-only prompts
+            pm = prompt.profile_manager
+            vector_db = str(pm.vector_store.id) if pm.vector_store else ""
+            embedding_model = str(pm.embedding_model.id) if pm.embedding_model else ""
+            llm = str(pm.llm.id)
+            x2text = str(pm.x2text.id) if pm.x2text else ""
+            if pm.embedding_model:
+                adapter_id = str(pm.embedding_model.adapter_id)
+                embedding_suffix = adapter_id.split("|")[0]
+            else:
+                adapter_id = ""
+                embedding_suffix = ""
 
             output[JsonSchemaKey.PROMPT] = prompt.prompt
             output[JsonSchemaKey.ACTIVE] = prompt.active
             output[JsonSchemaKey.REQUIRED] = prompt.required
-            output[JsonSchemaKey.CHUNK_SIZE] = prompt.profile_manager.chunk_size
+            output[JsonSchemaKey.CHUNK_SIZE] = pm.chunk_size
             output[JsonSchemaKey.VECTOR_DB] = vector_db
             output[JsonSchemaKey.EMBEDDING] = embedding_model
             output[JsonSchemaKey.X2TEXT_ADAPTER] = x2text
-            output[JsonSchemaKey.CHUNK_OVERLAP] = prompt.profile_manager.chunk_overlap
+            output[JsonSchemaKey.CHUNK_OVERLAP] = pm.chunk_overlap
             output[JsonSchemaKey.LLM] = llm
             output[JsonSchemaKey.PREAMBLE] = tool.preamble
             output[JsonSchemaKey.POSTAMBLE] = tool.postamble
             output[JsonSchemaKey.GRAMMAR] = grammar_list
             output[JsonSchemaKey.TYPE] = prompt.enforce_type
             output[JsonSchemaKey.NAME] = prompt.prompt_key
-            output[JsonSchemaKey.RETRIEVAL_STRATEGY] = (
-                prompt.profile_manager.retrieval_strategy
-            )
-            output[JsonSchemaKey.SIMILARITY_TOP_K] = (
-                prompt.profile_manager.similarity_top_k
-            )
-            output[JsonSchemaKey.SECTION] = prompt.profile_manager.section
-            output[JsonSchemaKey.REINDEX] = prompt.profile_manager.reindex
+            output[JsonSchemaKey.RETRIEVAL_STRATEGY] = pm.retrieval_strategy
+            output[JsonSchemaKey.SIMILARITY_TOP_K] = pm.similarity_top_k
+            output[JsonSchemaKey.SECTION] = pm.section
+            output[JsonSchemaKey.REINDEX] = pm.reindex
             output[JsonSchemaKey.EMBEDDING_SUFFIX] = embedding_suffix
+            # Vision mode fields — force text_only when single-pass is enabled
+            if tool.single_pass_extraction_mode and prompt.extraction_inputs != "text":
+                logger.warning(
+                    "Single-pass extraction enabled: forcing prompt '%s' "
+                    "from extraction_inputs='%s' to 'text' in export",
+                    prompt.prompt_key,
+                    prompt.extraction_inputs,
+                )
+                output[JsonSchemaKey.EXTRACTION_INPUTS] = "text"
+                output[JsonSchemaKey.SOURCE_OF_TRUTH] = "text"
+            else:
+                output[JsonSchemaKey.EXTRACTION_INPUTS] = prompt.extraction_inputs
+                output[JsonSchemaKey.SOURCE_OF_TRUTH] = prompt.source_of_truth
             # Webhook postprocessing settings
             output[JsonSchemaKey.ENABLE_POSTPROCESSING_WEBHOOK] = (
                 prompt.enable_postprocessing_webhook

diff --git a/...pt_studio/prompt_studio_v2/migrations/0015_toolstudioprompt_extraction_inputs_and_more.py b/...pt_studio/prompt_studio_v2/migrations/0015_toolstudioprompt_extraction_inputs_and_more.py
@@ -0,0 +1,36 @@
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("prompt_studio_v2", "0014_alter_toolstudioprompt_enforce_type"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="toolstudioprompt",
+            name="extraction_inputs",
+            field=models.TextField(
+                choices=[
+                    ("text", "Text only (default)"),
+                    ("image", "Page image only"),
+                    ("both", "Text and page image"),
+                ],
+                db_comment="What inputs to send to the LLM: text, image, or both",
+                default="text",
+            ),
+        ),
+        migrations.AddField(
+            model_name="toolstudioprompt",
+            name="source_of_truth",
+            field=models.TextField(
+                choices=[
+                    ("text", "Text is source of truth"),
+                    ("image", "Image is source of truth"),
+                ],
+                db_comment="Which input is source of truth "
+                "(only meaningful when extraction_inputs=both)",
+                default="text",
+            ),
+        ),
+    ]