From 2d599dcaf2a77a564075b98c8044ba7c18090984 Mon Sep 17 00:00:00 2001
From: Rob Court <robbie.court@gmail.com>
Date: Tue, 23 Jun 2026 20:29:56 +0000
Subject: [PATCH 1/4] Restore dropped term-info fields and harden
 License/heavy-term paths

term_info_parse_object dropped three reference-bearing fields that the
panel renders today, so the VFBquery term-info path showed less than the
legacy SOLR-field path:

- def_pubs (class definition references) were never read. The legacy
  processor appends them inline to the definition
  (VFBProcessTermInfoCachedJson.java:937); restored the same way, as
  microref links on Meta.Description (not a separate Publications entry,
  so the rendered panel is identical and no new section is introduced).
- pub_syn synonyms were gated Class-only, dropping Individual synonyms.
  Each synonym already carries its own publication inline, matching the
  legacy 'synonym (microref)' render; only the Class gate is removed.
- pub_specific_content was gated on the SuperType "Publication" but the
  SOLR marker is the lowercase "pub", so pub title/PubMed/DOI/FlyBase
  never surfaced.

Also harden two operational paths:

- solr_result_cache.cache_result issued a blocking commit=true write. On a
  wedged IndexWriter a cold-miss term (e.g. a License individual, never
  pre-warmed by owlery-cache-reload) stalled the ha_api worker and the
  request surfaced as HTTP 503. Default to a soft commit (autoSoftCommit
  handles visibility); override with VFBQUERY_SOLR_WRITE_COMMIT=true.
- fill_query_results re-ran each query at limit=-1 purely to length-check
  the result, even when the preview was not saturated. Skip the full
  re-run when the preview returned fewer rows than its cap.

Add test_term_info_parity covering the three field gaps plus a License
smoke test.
---
 src/test/test_term_info_parity.py | 107 ++++++++++++++++++++++++++++++
 src/vfbquery/solr_result_cache.py |  16 ++++-
 src/vfbquery/vfb_queries.py       |  78 +++++++++++++++-------
 3 files changed, 176 insertions(+), 25 deletions(-)
 create mode 100644 src/test/test_term_info_parity.py

diff --git a/src/test/test_term_info_parity.py b/src/test/test_term_info_parity.py
new file mode 100644
index 0000000..cb8a821
--- /dev/null
+++ b/src/test/test_term_info_parity.py
@@ -0,0 +1,107 @@
+"""
+Term-info parity + robustness tests for the VFBquery -> term-info migration.
+
+Covers the three serialiser gaps reconciled between ``term_info_parse_object``
+(``vfb_queries.py``) and the canonical dataclass serialiser
+(``term_info_queries.py``), plus the License-term robustness fix:
+
+  A. Class definition references (``def_pubs``) must reach ``Publications``.
+  B. Individual-term synonyms (``pub_syn``) must reach ``Synonyms``
+     (previously gated Class-only).
+  C. Publication-term external content (``pub_specific_content``) must reach
+     ``Publications`` -- the SOLR SuperType marker is the lowercase ``pub``.
+  D. ``get_term_info`` must not 5xx / hang on any SuperType -- the License
+     individual is the regression case (cold-miss cache write must be
+     non-blocking).
+
+The parity checks run against ``term_info_parse_object`` on the raw SOLR
+``term_info`` doc (a read-only fetch, no per-query count calls), so they are
+fast and deterministic. Caching is disabled for the whole module so nothing is
+written back to the shared production cache.
+"""
+
+import os
+os.environ.setdefault("VFBQUERY_CACHE_ENABLED", "false")
+
+import unittest
+import sys
+
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
+
+from vfbquery import vfb_queries as q
+from vfbquery.cached_functions import is_valid_term_info_result
+
+
+class _Results:
+    """Minimal stand-in for a pysolr result object."""
+    def __init__(self, docs):
+        self.docs = docs
+        self.hits = len(docs)
+
+
+def _raw(short_form):
+    """Fetch the raw SOLR doc(s) for a term (read-only) and wrap them."""
+    return _Results(q.vfb_solr.search('id:' + short_form).docs)
+
+
+class TermInfoParityTest(unittest.TestCase):
+
+    def _parse(self, short_form):
+        ti = q.term_info_parse_object(_raw(short_form), short_form)
+        self.assertIsNotNone(ti, f"parse returned None for {short_form}")
+        return ti
+
+    # --- Gap A: class definition references (def_pubs) -> inline in description
+    # The legacy panel appends def_pubs as microref links to the definition
+    # (VFBProcessTermInfoCachedJson.java:937), so parity is an inline render in
+    # Meta.Description, not a separate Publications entry.
+    def test_class_def_pubs_inline_in_description(self):
+        ti = self._parse("FBbt_00003748")  # medulla
+        desc = ti.get("Meta", {}).get("Description", "")
+        self.assertIn("FBrf0231227", desc, "def_pub FBrf0231227 missing from description")
+        self.assertIn("FBrf0224194", desc, "def_pub FBrf0224194 missing from description")
+
+    def test_kenyon_def_pubs_all_inline(self):
+        ti = self._parse("FBbt_00003686")  # Kenyon cell
+        desc = ti.get("Meta", {}).get("Description", "")
+        for ref in ("FBrf0092568", "FBrf0214059", "FBrf0205263"):
+            self.assertIn(ref, desc, f"def_pub {ref} missing from description")
+
+    # --- Gap B: Individual synonyms (pub_syn) -> Synonyms -------------------
+    def test_individual_synonyms_present(self):
+        ti = self._parse("VFB_00101385")  # individual image (MEon)
+        labels = {s.get("label") for s in ti.get("Synonyms", [])}
+        self.assertIn("MEon JRC_FlyEM_Hemibrain", labels,
+                      "Individual pub_syn dropped from Synonyms")
+
+    def test_class_synonyms_not_regressed(self):
+        ti = self._parse("FBgn0010339")  # gene 128up: 7 synonyms
+        self.assertGreaterEqual(len(ti.get("Synonyms", [])), 7,
+                                "class synonyms regressed")
+
+    # --- Gap C: publication external content (pub_specific_content) ---------
+    def test_publication_external_content_present(self):
+        ti = self._parse("FBrf0242477")  # Dolan et al., 2019
+        pubs = ti.get("Publications", [])
+        self.assertTrue(pubs, "pub_specific_content dropped: Publications empty")
+        pub = pubs[0]
+        self.assertTrue(pub.get("title"), "pub title missing")
+        refs = " ".join(pub.get("refs", []))
+        self.assertIn("31112130", refs, "PubMed id missing")
+        self.assertIn("FBrf0242477", refs, "FlyBase ref missing")
+        self.assertIn("10.7554/eLife.43079", refs, "DOI missing")
+
+    # --- Gap D: License term must not 5xx / return None --------------------
+    def test_license_term_info_does_not_5xx(self):
+        # preview=False avoids the per-query count calls; License has no
+        # queries anyway. The point is that a valid dict comes back rather
+        # than None or a raised exception.
+        result = q.get_term_info("VFBlicense_CC_BY_SA_4_0", preview=False)
+        self.assertIsInstance(result, dict, "License term_info did not return a dict")
+        self.assertTrue(is_valid_term_info_result(result),
+                        "License term_info failed validity check")
+        self.assertIn("License", result.get("SuperTypes", []))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/src/vfbquery/solr_result_cache.py b/src/vfbquery/solr_result_cache.py
index 47b901d..62f8863 100644
--- a/src/vfbquery/solr_result_cache.py
+++ b/src/vfbquery/solr_result_cache.py
@@ -493,12 +493,24 @@ def cache_result(self, query_type: str, term_id: str, result: Any, **params) ->
                 "expires_at": cached_data["expires_at"]
             }
             
-            # Store cache document 
+            # Store cache document.
+            # Use a soft (deferred) commit by default: a hard per-write
+            # ``commit=true`` flush blocks the request until the IndexWriter
+            # completes, and on a wedged writer (e.g. the soft-NFS write.lock
+            # EIO failure mode) that stall propagates up — a cold-miss term
+            # such as a License individual then hangs and saturates the ha_api
+            # worker queue, surfacing as HTTP 503. Relying on the core's
+            # autoSoftCommit (as the sibling write paths in this module already
+            # do) keeps the write fast and non-blocking; the 3-month cache
+            # tolerates a few seconds' visibility delay. Override with
+            # VFBQUERY_SOLR_WRITE_COMMIT=true if an immediate commit is needed.
+            commit_flag = os.getenv('VFBQUERY_SOLR_WRITE_COMMIT', 'false').lower() \
+                in ('1', 'true', 'yes')
             response = requests.post(
                 f"{self.cache_url}/update",
                 data=json.dumps([cache_doc]),
                 headers={"Content-Type": "application/json"},
-                params={"commit": "true"},  # Immediate commit for availability
+                params={"commit": "true" if commit_flag else "false"},
                 timeout=int(os.getenv('VFBQUERY_SOLR_WRITE_TIMEOUT', '60'))
             )
             
diff --git a/src/vfbquery/vfb_queries.py b/src/vfbquery/vfb_queries.py
index 8657b35..f8809bf 100644
--- a/src/vfbquery/vfb_queries.py
+++ b/src/vfbquery/vfb_queries.py
@@ -515,6 +515,18 @@ def term_info_parse_object(results, short_form):
             termInfo["Meta"]["Description"] = "%s"%("".join(vfbTerm.term.description))
         except (NameError, AttributeError):
             pass
+        # Append class definition references (def_pubs) inline to the description
+        # as markdown microref links, matching how the panel renders them today
+        # (legacy VFBProcessTermInfoCachedJson definition() + "(<microrefs>)").
+        # Kept inline rather than as a separate Publications entry so the display
+        # is identical and no new panel section is introduced.
+        if getattr(vfbTerm, 'def_pubs', None):
+            def_refs = [p.get_microref() for p in vfbTerm.def_pubs
+                        if hasattr(p, 'get_miniref') and p.get_miniref()
+                        and hasattr(p, 'get_microref') and p.get_microref()]
+            if def_refs:
+                existing_desc = termInfo["Meta"].get("Description", "")
+                termInfo["Meta"]["Description"] = (existing_desc + "\n(" + ", ".join(def_refs) + ")") if existing_desc else ("(" + ", ".join(def_refs) + ")")
         try:
             # Retrieve comment from the term's comment attribute
             termInfo["Meta"]["Comment"] = "%s"%("".join(vfbTerm.term.comment))
@@ -1190,7 +1202,7 @@ def term_info_parse_object(results, short_form):
                     publication["title"] = pub.core.label if pub.core.label else ""
                     publication["short_form"] = pub.core.short_form if pub.core.short_form else ""
                     publication["microref"] = pub.get_microref() if hasattr(pub, 'get_microref') and pub.get_microref() else ""
-                    
+
                     # Add external references
                     refs = []
                     if hasattr(pub, 'PubMed') and pub.PubMed:
@@ -1199,14 +1211,16 @@ def term_info_parse_object(results, short_form):
                         refs.append(f"http://flybase.org/reports/{pub.FlyBase}")
                     if hasattr(pub, 'DOI') and pub.DOI:
                         refs.append(f"https://doi.org/{pub.DOI}")
-                    
+
                     publication["refs"] = refs
                     publications.append(publication)
-            
+
             termInfo["Publications"] = publications
 
-        # Add Synonyms for Class entities
-        if termInfo["SuperTypes"] and "Class" in termInfo["SuperTypes"] and vfbTerm.pub_syn and len(vfbTerm.pub_syn) > 0:
+        # Add Synonyms from pub_syn. Not gated on Class: Individual terms also
+        # carry pub_syn (parity gap B — Individual synonyms were dropped when
+        # this was Class-only).
+        if termInfo["SuperTypes"] and vfbTerm.pub_syn and len(vfbTerm.pub_syn) > 0:
             synonyms = []
             for syn in vfbTerm.pub_syn:
                 if hasattr(syn, 'synonym') and syn.synonym:
@@ -1260,8 +1274,10 @@ def term_info_parse_object(results, short_form):
             if synonyms and "Synonyms" not in termInfo:
                 termInfo["Synonyms"] = synonyms
 
-        # Special handling for Publication entities
-        if termInfo["SuperTypes"] and "Publication" in termInfo["SuperTypes"] and vfbTerm.pub_specific_content:
+        # Special handling for Publication entities. The SOLR SuperType marker is
+        # the lowercase "pub" (parity gap C — gating on "Publication" meant the
+        # block never fired, dropping pub title/PubMed/DOI/FlyBase links).
+        if termInfo["SuperTypes"] and ("pub" in termInfo["SuperTypes"] or "Publication" in termInfo["SuperTypes"]) and vfbTerm.pub_specific_content:
             publication = {}
             publication["title"] = vfbTerm.pub_specific_content.title if hasattr(vfbTerm.pub_specific_content, 'title') else ""
             publication["short_form"] = vfbTerm.term.core.short_form
@@ -6098,22 +6114,38 @@ def process_query(query):
                     result_count = result['count']
                     # If limit was applied, the count in dict may be wrong, get correct count
                     if query['preview'] > 0 and result_count == len(result['rows']):
-                        try:
-                            full_kwargs = {'return_dataframe': False, 'limit': -1}
-                            if supports_force_refresh:
-                                full_kwargs['force_refresh'] = force_refresh
-                            if function_args and takes_short_form:
-                                short_form_value = list(function_args.values())[0]
-                                full_dict = function(short_form_value, **full_kwargs)
-                            else:
-                                full_dict = function(**full_kwargs)
-                            result_count = full_dict['count']
-                        except Exception as e:
-                            print(f"Error getting full count for {query['function']}: {e}")
-                            result_count = result['count']  # Keep as is
+                        # Skip the full limit=-1 re-run when the preview was not
+                        # saturated: fewer returned rows than the preview cap means
+                        # the preview already holds the entire result set, so the
+                        # count is exactly the number of preview rows. This avoids
+                        # materialising every row purely to length-check it — the
+                        # main driver of cold term-info latency on SuperTypes that
+                        # offer many queries (expression pattern, scRNAseq cluster),
+                        # and a no-op win for zero/low-count queries (grey-out path).
+                        if len(result['rows']) < query['preview']:
+                            result_count = len(result['rows'])
+                        else:
+                          try:
+                              full_kwargs = {'return_dataframe': False, 'limit': -1}
+                              if supports_force_refresh:
+                                  full_kwargs['force_refresh'] = force_refresh
+                              if function_args and takes_short_form:
+                                  short_form_value = list(function_args.values())[0]
+                                  full_dict = function(short_form_value, **full_kwargs)
+                              else:
+                                  full_dict = function(**full_kwargs)
+                              result_count = full_dict['count']
+                          except Exception as e:
+                              print(f"Error getting full count for {query['function']}: {e}")
+                              result_count = result['count']  # Keep as is
                 elif isinstance(result, pd.DataFrame):
-                    # For DataFrame results, we need the full count even when preview is limited
-                    try:
+                    # For DataFrame results, we need the full count even when preview is limited.
+                    # But skip the full limit=-1 re-run when the preview was not saturated
+                    # (fewer rows than the cap means the preview already holds every row).
+                    if query['preview'] > 0 and len(result) < query['preview']:
+                        result_count = len(result)
+                    else:
+                      try:
                         full_kwargs = {'return_dataframe': True, 'limit': -1}
                         if supports_force_refresh:
                             full_kwargs['force_refresh'] = force_refresh
@@ -6123,7 +6155,7 @@ def process_query(query):
                         else:
                             full_result = function(**full_kwargs)
                         result_count = len(full_result)
-                    except Exception as e:
+                      except Exception as e:
                         print(f"Error getting full count for {query['function']}: {e}")
                         result_count = len(result)  # Fallback to limited count
                 else:

From 83fb10fc41c0783551cec49d2f3be31fc62cb151 Mon Sep 17 00:00:00 2001
From: Rob Court <robbie.court@gmail.com>
Date: Tue, 23 Jun 2026 21:39:46 +0000
Subject: [PATCH 2/4] Surface xrefs, related_individuals and targeting in
 term_info parse
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Field-coverage sweep against the legacy processor
(VFBProcessTermInfoCachedJson.java) found term_info_parse_object also drops
sections the panel renders today:

- xrefs (external DB cross-references) were dropped entirely — e.g. medulla's
  Insect Brain DB link and gene FlyBase links. Now emitted as a structured
  Xrefs list (site label, accession, external link, icon).
- related_individuals (present on most FBbt classes) were dropped. Now emitted
  as Meta.RelatedIndividuals, grouped like relationships.
- targeting_splits / target_neurons are wired (TargetingSplits/TargetingNeurons)
  to match the legacy model; unpopulated in current SOLR data so a no-op today,
  but no longer at risk of being silently dropped.

Declare the new fields on TermInfoOutputSchema so .load keeps them.

images, downloads and queries are already covered: image/example/domain records
carry the nrrd/obj/wlz/swc URLs and template voxel/extent/centre, so downloads
are recreatable client-side.

Extend test_term_info_parity with xref (anatomy + gene) and related_individuals
cases.
---
 src/test/test_term_info_parity.py | 21 +++++++++
 src/vfbquery/vfb_queries.py       | 71 +++++++++++++++++++++++++++++++
 2 files changed, 92 insertions(+)

diff --git a/src/test/test_term_info_parity.py b/src/test/test_term_info_parity.py
index cb8a821..700fa61 100644
--- a/src/test/test_term_info_parity.py
+++ b/src/test/test_term_info_parity.py
@@ -91,6 +91,27 @@ def test_publication_external_content_present(self):
         self.assertIn("FBrf0242477", refs, "FlyBase ref missing")
         self.assertIn("10.7554/eLife.43079", refs, "DOI missing")
 
+    # --- Coverage: external xref links (genes, anatomy) --------------------
+    def test_xrefs_surface_as_links(self):
+        ti = self._parse("FBbt_00003748")  # medulla -> Insect Brain DB
+        xr = ti.get("Xrefs") or []
+        self.assertTrue(xr, "Xrefs dropped for medulla")
+        ibdb = [x for x in xr if x.get("label") == "Insect Brain DB"]
+        self.assertTrue(ibdb, "Insect Brain DB xref missing")
+        self.assertIn("insectbraindb.org/app/structures/38", ibdb[0].get("link", ""))
+
+    def test_gene_xref_flybase(self):
+        ti = self._parse("FBgn0051882")  # a gene with a FlyBase xref
+        links = " ".join(x.get("link", "") for x in (ti.get("Xrefs") or []))
+        self.assertIn("flybase.org/reports/FBgn0051882", links, "gene FlyBase xref missing")
+
+    # --- Coverage: related_individuals -------------------------------------
+    def test_related_individuals_surface(self):
+        ti = self._parse("FBbt_00000058")  # FBbt class carrying related_individuals
+        ri = ti.get("Meta", {}).get("RelatedIndividuals", "")
+        self.assertTrue(ri, "related_individuals dropped")
+        self.assertIn("FBbt_00000057", ri, "related individual target id missing")
+
     # --- Gap D: License term must not 5xx / return None --------------------
     def test_license_term_info_does_not_5xx(self):
         # preview=False avoids the per-query count calls; License has no
diff --git a/src/vfbquery/vfb_queries.py b/src/vfbquery/vfb_queries.py
index f8809bf..b4237d0 100644
--- a/src/vfbquery/vfb_queries.py
+++ b/src/vfbquery/vfb_queries.py
@@ -331,6 +331,12 @@ class TermInfoOutputSchema(Schema):
     Publications = fields.List(fields.Dict(keys=fields.String(), values=fields.Raw()), required=False)
     Synonyms = fields.List(fields.Dict(keys=fields.String(), values=fields.Raw()), required=False, allow_none=True)
     Technique = fields.List(fields.String(), required=False, allow_none=True)
+    # External DB cross-references (site label + accession link + icon), rendered
+    # as the panel's xrefs section. TargetingSplits/TargetingNeurons: splits that
+    # target this neuron / neurons a split targets, each their own panel section.
+    Xrefs = fields.List(fields.Dict(keys=fields.String(), values=fields.Raw()), required=False, allow_none=True)
+    TargetingSplits = fields.List(fields.String(), required=False, allow_none=True)
+    TargetingNeurons = fields.List(fields.String(), required=False, allow_none=True)
 
     @post_load
     def make_term_info(self, data, **kwargs):
@@ -1308,6 +1314,71 @@ def term_info_parse_object(results, short_form):
                         termInfo["Synonyms"].append(synonym)
                         existing_labels.add(synonym["label"])
 
+        # External database cross-references (xrefs). Rendered today as the
+        # panel's xrefs link section (VFBProcessTermInfoCachedJson.java:1536):
+        # site label, icon and the external accession link. Previously dropped
+        # by this parser (e.g. medulla's Insect Brain DB link).
+        if getattr(vfbTerm, 'xrefs', None):
+            xrefs_out = []
+            for x in vfbTerm.xrefs:
+                site = getattr(x, 'site', None)
+                label = getattr(site, 'label', '') if site else ''
+                acc = x.accession if getattr(x, 'accession', None) and x.accession != "None" else ''
+                if acc:
+                    link = (x.link_base or '') + acc + (x.link_postfix or '')
+                elif getattr(x, 'homepage', None):
+                    link = x.homepage
+                else:
+                    link = getattr(site, 'iri', '') if site else ''
+                entry = {"label": label, "accession": acc, "link": link}
+                if getattr(x, 'icon', None):
+                    entry["icon"] = x.icon
+                xrefs_out.append(entry)
+            if xrefs_out:
+                termInfo["Xrefs"] = xrefs_out
+
+        # Related individuals — same Rel shape as relationships, rendered as its
+        # own panel section (VFBProcessTermInfoCachedJson.java:1529). Kept as a
+        # Meta string so it travels with the other Meta rows.
+        if getattr(vfbTerm, 'related_individuals', None):
+            grouped_ri = {}
+            for rel in vfbTerm.related_individuals:
+                if not (hasattr(rel, 'relation') and hasattr(rel.relation, 'label')):
+                    continue
+                if not (hasattr(rel, 'object') and hasattr(rel.object, 'label')):
+                    continue
+                rid = getattr(rel.relation, 'short_form', None) or rel.relation.label
+                key = (rel.relation.label, rid)
+                obj = (rel.object.label, getattr(rel.object, 'short_form', ''))
+                grouped_ri.setdefault(key, set()).add(obj)
+            related = []
+            for (rlabel, rid), objs in sorted(grouped_ri.items()):
+                objlinks = ", ".join("[%s](%s)" % (encode_brackets(o[0]), o[1]) for o in sorted(objs))
+                related.append("[%s](%s): %s" % (encode_brackets(rlabel), rid, objlinks))
+            if related:
+                termInfo["Meta"]["RelatedIndividuals"] = "; ".join(related)
+
+        # Splits that target this neuron / neurons a split targets — each its own
+        # panel section (VFBProcessTermInfoCachedJson.java:1827 / :1835).
+        if getattr(vfbTerm, 'targeting_splits', None):
+            ts, seen = [], set()
+            for s in vfbTerm.targeting_splits:
+                sf = getattr(s, 'short_form', None)
+                if sf and sf not in seen:
+                    ts.append("[%s](%s)" % (encode_brackets(s.label or sf), sf))
+                    seen.add(sf)
+            if ts:
+                termInfo["TargetingSplits"] = ts
+        if getattr(vfbTerm, 'target_neurons', None):
+            tn, seen = [], set()
+            for n in vfbTerm.target_neurons:
+                sf = getattr(n, 'short_form', None)
+                if sf and sf not in seen:
+                    tn.append("[%s](%s)" % (encode_brackets(n.label or sf), sf))
+                    seen.add(sf)
+            if tn:
+                termInfo["TargetingNeurons"] = tn
+
         # Add the queries to the term info
         termInfo["Queries"] = queries
 

From a4691a9f653d93c798805fb32ab7e5614f98c8b1 Mon Sep 17 00:00:00 2001
From: Rob Court <robbie.court@gmail.com>
Date: Wed, 24 Jun 2026 07:06:41 +0000
Subject: [PATCH 3/4] Surface DataSet link/logo; drop static targeting (it is
 query data)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed VFB_json_schema_indexer (the indexer that produces the SOLR
term_info field) — the authoritative per-SuperType clause set lives in its
vfb_query_builder QueryLibrary. Two follow-ups from that review:

- DataSet term.link (e.g. Ito2013's FlyBase reference) and term.logo were
  dropped by term_info_parse_object. Now surfaced as Meta.Link / Meta.Logo,
  matching the panel's link/logo rows.
- targeting_splits (neuron_split clause, Neuron classes) and target_neurons
  (split_neuron clause, Split classes) are Cypher-derived in the indexer.
  They are query data, not static term metadata, so the static fields added
  earlier are removed; they will be reintroduced as proper query types
  (preview + count badge) displayed like the current term-info section.

dataset_counts (images/types) is already covered by the DatasetImages query
count badge. Add a DataSet-link test.
---
 src/test/test_term_info_parity.py |  6 +++++
 src/vfbquery/vfb_queries.py       | 40 +++++++++++++------------------
 2 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/src/test/test_term_info_parity.py b/src/test/test_term_info_parity.py
index 700fa61..266a11d 100644
--- a/src/test/test_term_info_parity.py
+++ b/src/test/test_term_info_parity.py
@@ -112,6 +112,12 @@ def test_related_individuals_surface(self):
         self.assertTrue(ri, "related_individuals dropped")
         self.assertIn("FBbt_00000057", ri, "related individual target id missing")
 
+    # --- Coverage: DataSet external link -----------------------------------
+    def test_dataset_link_present(self):
+        ti = self._parse("Ito2013")
+        link = ti.get("Meta", {}).get("Link", "")
+        self.assertIn("flybase.org/reports/FBrf0221438", link, "DataSet link dropped")
+
     # --- Gap D: License term must not 5xx / return None --------------------
     def test_license_term_info_does_not_5xx(self):
         # preview=False avoids the per-query count calls; License has no
diff --git a/src/vfbquery/vfb_queries.py b/src/vfbquery/vfb_queries.py
index b4237d0..c6823be 100644
--- a/src/vfbquery/vfb_queries.py
+++ b/src/vfbquery/vfb_queries.py
@@ -332,11 +332,8 @@ class TermInfoOutputSchema(Schema):
     Synonyms = fields.List(fields.Dict(keys=fields.String(), values=fields.Raw()), required=False, allow_none=True)
     Technique = fields.List(fields.String(), required=False, allow_none=True)
     # External DB cross-references (site label + accession link + icon), rendered
-    # as the panel's xrefs section. TargetingSplits/TargetingNeurons: splits that
-    # target this neuron / neurons a split targets, each their own panel section.
+    # as the panel's xrefs section.
     Xrefs = fields.List(fields.Dict(keys=fields.String(), values=fields.Raw()), required=False, allow_none=True)
-    TargetingSplits = fields.List(fields.String(), required=False, allow_none=True)
-    TargetingNeurons = fields.List(fields.String(), required=False, allow_none=True)
 
     @post_load
     def make_term_info(self, data, **kwargs):
@@ -538,6 +535,21 @@ def term_info_parse_object(results, short_form):
             termInfo["Meta"]["Comment"] = "%s"%("".join(vfbTerm.term.comment))
         except (NameError, AttributeError):
             pass
+        # External homepage link + logo (e.g. a DataSet's FlyBase/project link and
+        # icon). Rendered as the panel's link / logo rows
+        # (VFBProcessTermInfoCachedJson.java:1456 / :1449). Previously dropped.
+        try:
+            _link = vfbTerm.term.get_link() if hasattr(vfbTerm.term, 'get_link') else ""
+            if _link:
+                termInfo["Meta"]["Link"] = _link
+        except (AttributeError, TypeError):
+            pass
+        try:
+            _logo = vfbTerm.term.get_logo() if hasattr(vfbTerm.term, 'get_logo') else ""
+            if _logo:
+                termInfo["Meta"]["Logo"] = _logo
+        except (AttributeError, TypeError):
+            pass
         
         if hasattr(vfbTerm, 'parents') and vfbTerm.parents and len(vfbTerm.parents) > 0:
             parents = []
@@ -1358,26 +1370,6 @@ def term_info_parse_object(results, short_form):
             if related:
                 termInfo["Meta"]["RelatedIndividuals"] = "; ".join(related)
 
-        # Splits that target this neuron / neurons a split targets — each its own
-        # panel section (VFBProcessTermInfoCachedJson.java:1827 / :1835).
-        if getattr(vfbTerm, 'targeting_splits', None):
-            ts, seen = [], set()
-            for s in vfbTerm.targeting_splits:
-                sf = getattr(s, 'short_form', None)
-                if sf and sf not in seen:
-                    ts.append("[%s](%s)" % (encode_brackets(s.label or sf), sf))
-                    seen.add(sf)
-            if ts:
-                termInfo["TargetingSplits"] = ts
-        if getattr(vfbTerm, 'target_neurons', None):
-            tn, seen = [], set()
-            for n in vfbTerm.target_neurons:
-                sf = getattr(n, 'short_form', None)
-                if sf and sf not in seen:
-                    tn.append("[%s](%s)" % (encode_brackets(n.label or sf), sf))
-                    seen.add(sf)
-            if tn:
-                termInfo["TargetingNeurons"] = tn
 
         # Add the queries to the term info
         termInfo["Queries"] = queries

From cd2780250dd94a2ab42b8c33b9d2a66677c64b37 Mon Sep 17 00:00:00 2001
From: Rob Court <robbie.court@gmail.com>
Date: Wed, 24 Jun 2026 07:15:03 +0000
Subject: [PATCH 4/4] Add SplitsTargeting / TargetNeurons as live query types
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

targeting_splits (Neuron classes) and target_neurons (Split classes) are
Cypher-derived in the indexer (query_roller neuron_split / split_neuron
clauses), not static term metadata. Surface them as proper VFBquery query
types with preview + count badge, displayed like the current term-info
targeting section:

- get_splits_targeting / get_neurons_targeted_by_split — live Neo4j queries
  returning the standard class-row table (id/label/tags/thumbnail) and the
  true count (so fill_query_results needs no re-run).
- SplitsTargeting_to_schema (Class+Neuron) / TargetNeurons_to_schema
  (Class+Split) builders; wired into term_info_parse_object and ha_api
  QUERY_TYPE_MAP.

Verified live: MBON FBbt_00100243 -> 33 targeting splits; split
VFBexp_FBtp0129935FBtp0129968 -> 18 target neurons. Tests cover both the
offered query and the function count/rows.
---
 src/test/test_term_info_parity.py | 22 +++++++
 src/vfbquery/ha_api.py            |  2 +
 src/vfbquery/vfb_queries.py       | 99 +++++++++++++++++++++++++++++++
 3 files changed, 123 insertions(+)

diff --git a/src/test/test_term_info_parity.py b/src/test/test_term_info_parity.py
index 266a11d..fca8135 100644
--- a/src/test/test_term_info_parity.py
+++ b/src/test/test_term_info_parity.py
@@ -118,6 +118,28 @@ def test_dataset_link_present(self):
         link = ti.get("Meta", {}).get("Link", "")
         self.assertIn("flybase.org/reports/FBrf0221438", link, "DataSet link dropped")
 
+    # --- Targeting queries (splits<->neurons) as live query types ----------
+    def test_neuron_class_offers_splits_targeting(self):
+        ti = self._parse("FBbt_00100243")  # MBON neuron class with split drivers
+        self.assertTrue(any(x.get("query") == "SplitsTargeting" for x in ti.get("Queries", [])),
+                        "SplitsTargeting not offered on neuron class")
+
+    def test_split_class_offers_target_neurons(self):
+        ti = self._parse("VFBexp_FBtp0129935FBtp0129968")  # a split class
+        self.assertTrue(any(x.get("query") == "TargetNeurons" for x in ti.get("Queries", [])),
+                        "TargetNeurons not offered on split class")
+
+    def test_splits_targeting_returns_count_and_rows(self):
+        r = q.get_splits_targeting("FBbt_00100243", return_dataframe=False, limit=5)
+        self.assertIsInstance(r, dict)
+        self.assertGreater(r.get("count", 0), 0, "expected splits targeting MBON")
+        self.assertTrue(r.get("rows"), "no preview rows")
+        self.assertTrue(all(k in r["rows"][0] for k in ("id", "label", "tags", "thumbnail")))
+
+    def test_neurons_targeted_by_split_returns_count(self):
+        r = q.get_neurons_targeted_by_split("VFBexp_FBtp0129935FBtp0129968", return_dataframe=False, limit=5)
+        self.assertGreater(r.get("count", 0), 0, "expected neurons targeted by split")
+
     # --- Gap D: License term must not 5xx / return None --------------------
     def test_license_term_info_does_not_5xx(self):
         # preview=False avoids the per-query count calls; License has no
diff --git a/src/vfbquery/ha_api.py b/src/vfbquery/ha_api.py
index 4f443ae..475d3f5 100644
--- a/src/vfbquery/ha_api.py
+++ b/src/vfbquery/ha_api.py
@@ -266,6 +266,8 @@ async def security_middleware(request, handler):
     "PartsOf":                      "get_parts_of",
     "ComponentsOf":                 "get_components_of",
     "SubclassesOf":                 "get_subclasses_of",
+    "SplitsTargeting":              "get_splits_targeting",
+    "TargetNeurons":                "get_neurons_targeted_by_split",
 
     # Neurons in region
     "NeuronsPartHere":              "get_neurons_with_part_in",
diff --git a/src/vfbquery/vfb_queries.py b/src/vfbquery/vfb_queries.py
index c6823be..783980f 100644
--- a/src/vfbquery/vfb_queries.py
+++ b/src/vfbquery/vfb_queries.py
@@ -824,6 +824,17 @@ def term_info_parse_object(results, short_form):
                 sorted_images = {int(key): value for key, value in sorted(images.items(), key=lambda x: x[0])}
                 termInfo["Domains"] = sorted_images
 
+        # SplitsTargeting — splits (intersectional expression patterns) that
+        # target this neuron class. TargetNeurons — neurons a split class targets.
+        # Live Neo4j queries (indexer neuron_split / split_neuron clauses),
+        # surfaced as queries with a count badge rather than static fields.
+        if contains_all_tags(termInfo["SuperTypes"], ["Class", "Neuron"]):
+            q = SplitsTargeting_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
+            queries.append(q)
+        if contains_all_tags(termInfo["SuperTypes"], ["Class", "Split"]):
+            q = TargetNeurons_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
+            queries.append(q)
+
         if contains_all_tags(termInfo["SuperTypes"], ["Individual", "Neuron"]):
             q = SimilarMorphologyTo_to_schema(termInfo["Name"], {"neuron": vfbTerm.term.core.short_form, "similarity_score": "NBLAST_score"})
             queries.append(q)
@@ -1607,6 +1618,24 @@ def SubclassesOf_to_schema(name, take_default):
     return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
 
 
+def SplitsTargeting_to_schema(name, take_default):
+    """Schema for SplitsTargeting query: splits that target a neuron class.
+    Matching criteria: Class + Neuron (mirrors the indexer neuron_split clause)."""
+    return Query(query="SplitsTargeting", label=f"Splits targeting {name}",
+                 function="get_splits_targeting",
+                 takes={"short_form": {"$and": ["Class", "Neuron"]}, "default": take_default},
+                 preview=5, preview_columns=["id", "label", "tags", "thumbnail"])
+
+
+def TargetNeurons_to_schema(name, take_default):
+    """Schema for TargetNeurons query: neurons targeted by a split class.
+    Matching criteria: Class + Split (mirrors the indexer split_neuron clause)."""
+    return Query(query="TargetNeurons", label=f"Neurons targeted by {name}",
+                 function="get_neurons_targeted_by_split",
+                 takes={"short_form": {"$and": ["Class", "Split"]}, "default": take_default},
+                 preview=5, preview_columns=["id", "label", "tags", "thumbnail"])
+
+
 def NeuronClassesFasciculatingHere_to_schema(name, take_default):
     """
     Schema for NeuronClassesFasciculatingHere query.
@@ -3172,6 +3201,76 @@ def get_subclasses_of(short_form: str, return_dataframe=True, limit: int = -1):
     return _owlery_query_to_results(owl_query, short_form, return_dataframe, limit, solr_field='anat_query', query_by_label=False)
 
 
+def _targeting_rows(base_match, var, short_form, return_dataframe, limit):
+    """Shared runner for the split<->neuron targeting queries.
+
+    base_match must bind the result class to ``var`` for the given ``short_form``.
+    Returns the standard class-row table (id/label/tags/thumbnail) + true count,
+    so fill_query_results gets the real count without a re-run.
+    """
+    count_query = base_match + f" RETURN count(DISTINCT {var}) AS total_count"
+    count_df = pd.DataFrame.from_records(get_dict_cursor()(vc.nc.commit_list([count_query])))
+    total_count = int(count_df['total_count'][0]) if not count_df.empty else 0
+
+    main_query = base_match + (
+        f" WITH DISTINCT {var} "
+        f"CALL {{ WITH {var} OPTIONAL MATCH ({var})<-[:INSTANCEOF]-(:Individual)<-[:depicts]-"
+        "(:Individual)-[irw:in_register_with]->(:Template)-[:depicts]->(templ:Template) "
+        "RETURN irw, templ LIMIT 1 } "
+        f"RETURN {var}.short_form AS id, "
+        f"apoc.text.format(\"[%s](%s)\",[{var}.label, {var}.short_form]) AS label, "
+        f"apoc.text.join(coalesce({var}.uniqueFacets,[]),'|') AS tags, "
+        f"REPLACE(apoc.text.format(\"[![%s](%s '%s')](%s)\",[{var}.label, "
+        "REPLACE(COALESCE(irw.thumbnail[0],''),'thumbnailT.png','thumbnail.png'), "
+        f"{var}.label, templ.short_form + ',' + {var}.short_form]), "
+        "\"[![null]( 'null')](null)\", \"\") AS thumbnail "
+        "ORDER BY label"
+    )
+    if limit != -1:
+        main_query += f" LIMIT {limit}"
+    df = pd.DataFrame.from_records(get_dict_cursor()(vc.nc.commit_list([main_query])))
+    df = encode_markdown_links(df, ['label', 'thumbnail'])
+    if return_dataframe:
+        return df
+    return {
+        "headers": _get_standard_query_headers(),
+        "rows": [{k: row.get(k) for k in ["id", "label", "tags", "thumbnail"]}
+                 for row in safe_to_dict(df, sort_by_id=False)],
+        "count": total_count,
+    }
+
+
+@with_solr_cache('splits_targeting')
+def get_splits_targeting(short_form: str, return_dataframe=True, limit: int = -1):
+    """Splits (intersectional expression patterns) that target the given neuron class.
+
+    Live Neo4j query mirroring the indexer's neuron_split clause
+    (VFB_json_schema_indexer query_roller.neuron_split) — surfaced as a query
+    with a preview + count badge rather than a static term-info field.
+    """
+    base = (
+        "MATCH (:Class {label:'intersectional expression pattern'})"
+        "<-[:SUBCLASSOF]-(ep:Class)<-[:part_of]-(:Individual)"
+        f"-[:INSTANCEOF]->(primary:Class {{short_form:'{short_form}'}})"
+    )
+    return _targeting_rows(base, "ep", short_form, return_dataframe, limit)
+
+
+@with_solr_cache('neurons_targeted_by_split')
+def get_neurons_targeted_by_split(short_form: str, return_dataframe=True, limit: int = -1):
+    """Neurons targeted by the given split class.
+
+    Live Neo4j query mirroring the indexer's split_neuron clause
+    (VFB_json_schema_indexer query_roller.split_neuron).
+    """
+    base = (
+        "MATCH (:Class {label:'intersectional expression pattern'})"
+        f"<-[:SUBCLASSOF]-(primary:Class {{short_form:'{short_form}'}})"
+        "<-[:part_of]-(:Individual)-[:INSTANCEOF]->(n:Neuron)"
+    )
+    return _targeting_rows(base, "n", short_form, return_dataframe, limit)
+
+
 @with_solr_cache('neuron_classes_fasciculating_here')
 def get_neuron_classes_fasciculating_here(short_form: str, return_dataframe=True, limit: int = -1):
     """