From 2d599dcaf2a77a564075b98c8044ba7c18090984 Mon Sep 17 00:00:00 2001 From: Rob Court Date: Tue, 23 Jun 2026 20:29:56 +0000 Subject: [PATCH 1/4] Restore dropped term-info fields and harden License/heavy-term paths term_info_parse_object dropped three reference-bearing fields that the panel renders today, so the VFBquery term-info path showed less than the legacy SOLR-field path: - def_pubs (class definition references) were never read. The legacy processor appends them inline to the definition (VFBProcessTermInfoCachedJson.java:937); restored the same way, as microref links on Meta.Description (not a separate Publications entry, so the rendered panel is identical and no new section is introduced). - pub_syn synonyms were gated Class-only, dropping Individual synonyms. Each synonym already carries its own publication inline, matching the legacy 'synonym (microref)' render; only the Class gate is removed. - pub_specific_content was gated on the SuperType "Publication" but the SOLR marker is the lowercase "pub", so pub title/PubMed/DOI/FlyBase never surfaced. Also harden two operational paths: - solr_result_cache.cache_result issued a blocking commit=true write. On a wedged IndexWriter a cold-miss term (e.g. a License individual, never pre-warmed by owlery-cache-reload) stalled the ha_api worker and the request surfaced as HTTP 503. Default to a soft commit (autoSoftCommit handles visibility); override with VFBQUERY_SOLR_WRITE_COMMIT=true. - fill_query_results re-ran each query at limit=-1 purely to length-check the result, even when the preview was not saturated. Skip the full re-run when the preview returned fewer rows than its cap. Add test_term_info_parity covering the three field gaps plus a License smoke test. --- src/test/test_term_info_parity.py | 107 ++++++++++++++++++++++++++++++ src/vfbquery/solr_result_cache.py | 16 ++++- src/vfbquery/vfb_queries.py | 78 +++++++++++++++------- 3 files changed, 176 insertions(+), 25 deletions(-) create mode 100644 src/test/test_term_info_parity.py diff --git a/src/test/test_term_info_parity.py b/src/test/test_term_info_parity.py new file mode 100644 index 0000000..cb8a821 --- /dev/null +++ b/src/test/test_term_info_parity.py @@ -0,0 +1,107 @@ +""" +Term-info parity + robustness tests for the VFBquery -> term-info migration. + +Covers the three serialiser gaps reconciled between ``term_info_parse_object`` +(``vfb_queries.py``) and the canonical dataclass serialiser +(``term_info_queries.py``), plus the License-term robustness fix: + + A. Class definition references (``def_pubs``) must reach ``Publications``. + B. Individual-term synonyms (``pub_syn``) must reach ``Synonyms`` + (previously gated Class-only). + C. Publication-term external content (``pub_specific_content``) must reach + ``Publications`` -- the SOLR SuperType marker is the lowercase ``pub``. + D. ``get_term_info`` must not 5xx / hang on any SuperType -- the License + individual is the regression case (cold-miss cache write must be + non-blocking). + +The parity checks run against ``term_info_parse_object`` on the raw SOLR +``term_info`` doc (a read-only fetch, no per-query count calls), so they are +fast and deterministic. Caching is disabled for the whole module so nothing is +written back to the shared production cache. +""" + +import os +os.environ.setdefault("VFBQUERY_CACHE_ENABLED", "false") + +import unittest +import sys + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))) + +from vfbquery import vfb_queries as q +from vfbquery.cached_functions import is_valid_term_info_result + + +class _Results: + """Minimal stand-in for a pysolr result object.""" + def __init__(self, docs): + self.docs = docs + self.hits = len(docs) + + +def _raw(short_form): + """Fetch the raw SOLR doc(s) for a term (read-only) and wrap them.""" + return _Results(q.vfb_solr.search('id:' + short_form).docs) + + +class TermInfoParityTest(unittest.TestCase): + + def _parse(self, short_form): + ti = q.term_info_parse_object(_raw(short_form), short_form) + self.assertIsNotNone(ti, f"parse returned None for {short_form}") + return ti + + # --- Gap A: class definition references (def_pubs) -> inline in description + # The legacy panel appends def_pubs as microref links to the definition + # (VFBProcessTermInfoCachedJson.java:937), so parity is an inline render in + # Meta.Description, not a separate Publications entry. + def test_class_def_pubs_inline_in_description(self): + ti = self._parse("FBbt_00003748") # medulla + desc = ti.get("Meta", {}).get("Description", "") + self.assertIn("FBrf0231227", desc, "def_pub FBrf0231227 missing from description") + self.assertIn("FBrf0224194", desc, "def_pub FBrf0224194 missing from description") + + def test_kenyon_def_pubs_all_inline(self): + ti = self._parse("FBbt_00003686") # Kenyon cell + desc = ti.get("Meta", {}).get("Description", "") + for ref in ("FBrf0092568", "FBrf0214059", "FBrf0205263"): + self.assertIn(ref, desc, f"def_pub {ref} missing from description") + + # --- Gap B: Individual synonyms (pub_syn) -> Synonyms ------------------- + def test_individual_synonyms_present(self): + ti = self._parse("VFB_00101385") # individual image (MEon) + labels = {s.get("label") for s in ti.get("Synonyms", [])} + self.assertIn("MEon JRC_FlyEM_Hemibrain", labels, + "Individual pub_syn dropped from Synonyms") + + def test_class_synonyms_not_regressed(self): + ti = self._parse("FBgn0010339") # gene 128up: 7 synonyms + self.assertGreaterEqual(len(ti.get("Synonyms", [])), 7, + "class synonyms regressed") + + # --- Gap C: publication external content (pub_specific_content) --------- + def test_publication_external_content_present(self): + ti = self._parse("FBrf0242477") # Dolan et al., 2019 + pubs = ti.get("Publications", []) + self.assertTrue(pubs, "pub_specific_content dropped: Publications empty") + pub = pubs[0] + self.assertTrue(pub.get("title"), "pub title missing") + refs = " ".join(pub.get("refs", [])) + self.assertIn("31112130", refs, "PubMed id missing") + self.assertIn("FBrf0242477", refs, "FlyBase ref missing") + self.assertIn("10.7554/eLife.43079", refs, "DOI missing") + + # --- Gap D: License term must not 5xx / return None -------------------- + def test_license_term_info_does_not_5xx(self): + # preview=False avoids the per-query count calls; License has no + # queries anyway. The point is that a valid dict comes back rather + # than None or a raised exception. + result = q.get_term_info("VFBlicense_CC_BY_SA_4_0", preview=False) + self.assertIsInstance(result, dict, "License term_info did not return a dict") + self.assertTrue(is_valid_term_info_result(result), + "License term_info failed validity check") + self.assertIn("License", result.get("SuperTypes", [])) + + +if __name__ == "__main__": + unittest.main() diff --git a/src/vfbquery/solr_result_cache.py b/src/vfbquery/solr_result_cache.py index 47b901d..62f8863 100644 --- a/src/vfbquery/solr_result_cache.py +++ b/src/vfbquery/solr_result_cache.py @@ -493,12 +493,24 @@ def cache_result(self, query_type: str, term_id: str, result: Any, **params) -> "expires_at": cached_data["expires_at"] } - # Store cache document + # Store cache document. + # Use a soft (deferred) commit by default: a hard per-write + # ``commit=true`` flush blocks the request until the IndexWriter + # completes, and on a wedged writer (e.g. the soft-NFS write.lock + # EIO failure mode) that stall propagates up — a cold-miss term + # such as a License individual then hangs and saturates the ha_api + # worker queue, surfacing as HTTP 503. Relying on the core's + # autoSoftCommit (as the sibling write paths in this module already + # do) keeps the write fast and non-blocking; the 3-month cache + # tolerates a few seconds' visibility delay. Override with + # VFBQUERY_SOLR_WRITE_COMMIT=true if an immediate commit is needed. + commit_flag = os.getenv('VFBQUERY_SOLR_WRITE_COMMIT', 'false').lower() \ + in ('1', 'true', 'yes') response = requests.post( f"{self.cache_url}/update", data=json.dumps([cache_doc]), headers={"Content-Type": "application/json"}, - params={"commit": "true"}, # Immediate commit for availability + params={"commit": "true" if commit_flag else "false"}, timeout=int(os.getenv('VFBQUERY_SOLR_WRITE_TIMEOUT', '60')) ) diff --git a/src/vfbquery/vfb_queries.py b/src/vfbquery/vfb_queries.py index 8657b35..f8809bf 100644 --- a/src/vfbquery/vfb_queries.py +++ b/src/vfbquery/vfb_queries.py @@ -515,6 +515,18 @@ def term_info_parse_object(results, short_form): termInfo["Meta"]["Description"] = "%s"%("".join(vfbTerm.term.description)) except (NameError, AttributeError): pass + # Append class definition references (def_pubs) inline to the description + # as markdown microref links, matching how the panel renders them today + # (legacy VFBProcessTermInfoCachedJson definition() + "()"). + # Kept inline rather than as a separate Publications entry so the display + # is identical and no new panel section is introduced. + if getattr(vfbTerm, 'def_pubs', None): + def_refs = [p.get_microref() for p in vfbTerm.def_pubs + if hasattr(p, 'get_miniref') and p.get_miniref() + and hasattr(p, 'get_microref') and p.get_microref()] + if def_refs: + existing_desc = termInfo["Meta"].get("Description", "") + termInfo["Meta"]["Description"] = (existing_desc + "\n(" + ", ".join(def_refs) + ")") if existing_desc else ("(" + ", ".join(def_refs) + ")") try: # Retrieve comment from the term's comment attribute termInfo["Meta"]["Comment"] = "%s"%("".join(vfbTerm.term.comment)) @@ -1190,7 +1202,7 @@ def term_info_parse_object(results, short_form): publication["title"] = pub.core.label if pub.core.label else "" publication["short_form"] = pub.core.short_form if pub.core.short_form else "" publication["microref"] = pub.get_microref() if hasattr(pub, 'get_microref') and pub.get_microref() else "" - + # Add external references refs = [] if hasattr(pub, 'PubMed') and pub.PubMed: @@ -1199,14 +1211,16 @@ def term_info_parse_object(results, short_form): refs.append(f"http://flybase.org/reports/{pub.FlyBase}") if hasattr(pub, 'DOI') and pub.DOI: refs.append(f"https://doi.org/{pub.DOI}") - + publication["refs"] = refs publications.append(publication) - + termInfo["Publications"] = publications - # Add Synonyms for Class entities - if termInfo["SuperTypes"] and "Class" in termInfo["SuperTypes"] and vfbTerm.pub_syn and len(vfbTerm.pub_syn) > 0: + # Add Synonyms from pub_syn. Not gated on Class: Individual terms also + # carry pub_syn (parity gap B — Individual synonyms were dropped when + # this was Class-only). + if termInfo["SuperTypes"] and vfbTerm.pub_syn and len(vfbTerm.pub_syn) > 0: synonyms = [] for syn in vfbTerm.pub_syn: if hasattr(syn, 'synonym') and syn.synonym: @@ -1260,8 +1274,10 @@ def term_info_parse_object(results, short_form): if synonyms and "Synonyms" not in termInfo: termInfo["Synonyms"] = synonyms - # Special handling for Publication entities - if termInfo["SuperTypes"] and "Publication" in termInfo["SuperTypes"] and vfbTerm.pub_specific_content: + # Special handling for Publication entities. The SOLR SuperType marker is + # the lowercase "pub" (parity gap C — gating on "Publication" meant the + # block never fired, dropping pub title/PubMed/DOI/FlyBase links). + if termInfo["SuperTypes"] and ("pub" in termInfo["SuperTypes"] or "Publication" in termInfo["SuperTypes"]) and vfbTerm.pub_specific_content: publication = {} publication["title"] = vfbTerm.pub_specific_content.title if hasattr(vfbTerm.pub_specific_content, 'title') else "" publication["short_form"] = vfbTerm.term.core.short_form @@ -6098,22 +6114,38 @@ def process_query(query): result_count = result['count'] # If limit was applied, the count in dict may be wrong, get correct count if query['preview'] > 0 and result_count == len(result['rows']): - try: - full_kwargs = {'return_dataframe': False, 'limit': -1} - if supports_force_refresh: - full_kwargs['force_refresh'] = force_refresh - if function_args and takes_short_form: - short_form_value = list(function_args.values())[0] - full_dict = function(short_form_value, **full_kwargs) - else: - full_dict = function(**full_kwargs) - result_count = full_dict['count'] - except Exception as e: - print(f"Error getting full count for {query['function']}: {e}") - result_count = result['count'] # Keep as is + # Skip the full limit=-1 re-run when the preview was not + # saturated: fewer returned rows than the preview cap means + # the preview already holds the entire result set, so the + # count is exactly the number of preview rows. This avoids + # materialising every row purely to length-check it — the + # main driver of cold term-info latency on SuperTypes that + # offer many queries (expression pattern, scRNAseq cluster), + # and a no-op win for zero/low-count queries (grey-out path). + if len(result['rows']) < query['preview']: + result_count = len(result['rows']) + else: + try: + full_kwargs = {'return_dataframe': False, 'limit': -1} + if supports_force_refresh: + full_kwargs['force_refresh'] = force_refresh + if function_args and takes_short_form: + short_form_value = list(function_args.values())[0] + full_dict = function(short_form_value, **full_kwargs) + else: + full_dict = function(**full_kwargs) + result_count = full_dict['count'] + except Exception as e: + print(f"Error getting full count for {query['function']}: {e}") + result_count = result['count'] # Keep as is elif isinstance(result, pd.DataFrame): - # For DataFrame results, we need the full count even when preview is limited - try: + # For DataFrame results, we need the full count even when preview is limited. + # But skip the full limit=-1 re-run when the preview was not saturated + # (fewer rows than the cap means the preview already holds every row). + if query['preview'] > 0 and len(result) < query['preview']: + result_count = len(result) + else: + try: full_kwargs = {'return_dataframe': True, 'limit': -1} if supports_force_refresh: full_kwargs['force_refresh'] = force_refresh @@ -6123,7 +6155,7 @@ def process_query(query): else: full_result = function(**full_kwargs) result_count = len(full_result) - except Exception as e: + except Exception as e: print(f"Error getting full count for {query['function']}: {e}") result_count = len(result) # Fallback to limited count else: From 83fb10fc41c0783551cec49d2f3be31fc62cb151 Mon Sep 17 00:00:00 2001 From: Rob Court Date: Tue, 23 Jun 2026 21:39:46 +0000 Subject: [PATCH 2/4] Surface xrefs, related_individuals and targeting in term_info parse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Field-coverage sweep against the legacy processor (VFBProcessTermInfoCachedJson.java) found term_info_parse_object also drops sections the panel renders today: - xrefs (external DB cross-references) were dropped entirely — e.g. medulla's Insect Brain DB link and gene FlyBase links. Now emitted as a structured Xrefs list (site label, accession, external link, icon). - related_individuals (present on most FBbt classes) were dropped. Now emitted as Meta.RelatedIndividuals, grouped like relationships. - targeting_splits / target_neurons are wired (TargetingSplits/TargetingNeurons) to match the legacy model; unpopulated in current SOLR data so a no-op today, but no longer at risk of being silently dropped. Declare the new fields on TermInfoOutputSchema so .load keeps them. images, downloads and queries are already covered: image/example/domain records carry the nrrd/obj/wlz/swc URLs and template voxel/extent/centre, so downloads are recreatable client-side. Extend test_term_info_parity with xref (anatomy + gene) and related_individuals cases. --- src/test/test_term_info_parity.py | 21 +++++++++ src/vfbquery/vfb_queries.py | 71 +++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) diff --git a/src/test/test_term_info_parity.py b/src/test/test_term_info_parity.py index cb8a821..700fa61 100644 --- a/src/test/test_term_info_parity.py +++ b/src/test/test_term_info_parity.py @@ -91,6 +91,27 @@ def test_publication_external_content_present(self): self.assertIn("FBrf0242477", refs, "FlyBase ref missing") self.assertIn("10.7554/eLife.43079", refs, "DOI missing") + # --- Coverage: external xref links (genes, anatomy) -------------------- + def test_xrefs_surface_as_links(self): + ti = self._parse("FBbt_00003748") # medulla -> Insect Brain DB + xr = ti.get("Xrefs") or [] + self.assertTrue(xr, "Xrefs dropped for medulla") + ibdb = [x for x in xr if x.get("label") == "Insect Brain DB"] + self.assertTrue(ibdb, "Insect Brain DB xref missing") + self.assertIn("insectbraindb.org/app/structures/38", ibdb[0].get("link", "")) + + def test_gene_xref_flybase(self): + ti = self._parse("FBgn0051882") # a gene with a FlyBase xref + links = " ".join(x.get("link", "") for x in (ti.get("Xrefs") or [])) + self.assertIn("flybase.org/reports/FBgn0051882", links, "gene FlyBase xref missing") + + # --- Coverage: related_individuals ------------------------------------- + def test_related_individuals_surface(self): + ti = self._parse("FBbt_00000058") # FBbt class carrying related_individuals + ri = ti.get("Meta", {}).get("RelatedIndividuals", "") + self.assertTrue(ri, "related_individuals dropped") + self.assertIn("FBbt_00000057", ri, "related individual target id missing") + # --- Gap D: License term must not 5xx / return None -------------------- def test_license_term_info_does_not_5xx(self): # preview=False avoids the per-query count calls; License has no diff --git a/src/vfbquery/vfb_queries.py b/src/vfbquery/vfb_queries.py index f8809bf..b4237d0 100644 --- a/src/vfbquery/vfb_queries.py +++ b/src/vfbquery/vfb_queries.py @@ -331,6 +331,12 @@ class TermInfoOutputSchema(Schema): Publications = fields.List(fields.Dict(keys=fields.String(), values=fields.Raw()), required=False) Synonyms = fields.List(fields.Dict(keys=fields.String(), values=fields.Raw()), required=False, allow_none=True) Technique = fields.List(fields.String(), required=False, allow_none=True) + # External DB cross-references (site label + accession link + icon), rendered + # as the panel's xrefs section. TargetingSplits/TargetingNeurons: splits that + # target this neuron / neurons a split targets, each their own panel section. + Xrefs = fields.List(fields.Dict(keys=fields.String(), values=fields.Raw()), required=False, allow_none=True) + TargetingSplits = fields.List(fields.String(), required=False, allow_none=True) + TargetingNeurons = fields.List(fields.String(), required=False, allow_none=True) @post_load def make_term_info(self, data, **kwargs): @@ -1308,6 +1314,71 @@ def term_info_parse_object(results, short_form): termInfo["Synonyms"].append(synonym) existing_labels.add(synonym["label"]) + # External database cross-references (xrefs). Rendered today as the + # panel's xrefs link section (VFBProcessTermInfoCachedJson.java:1536): + # site label, icon and the external accession link. Previously dropped + # by this parser (e.g. medulla's Insect Brain DB link). + if getattr(vfbTerm, 'xrefs', None): + xrefs_out = [] + for x in vfbTerm.xrefs: + site = getattr(x, 'site', None) + label = getattr(site, 'label', '') if site else '' + acc = x.accession if getattr(x, 'accession', None) and x.accession != "None" else '' + if acc: + link = (x.link_base or '') + acc + (x.link_postfix or '') + elif getattr(x, 'homepage', None): + link = x.homepage + else: + link = getattr(site, 'iri', '') if site else '' + entry = {"label": label, "accession": acc, "link": link} + if getattr(x, 'icon', None): + entry["icon"] = x.icon + xrefs_out.append(entry) + if xrefs_out: + termInfo["Xrefs"] = xrefs_out + + # Related individuals — same Rel shape as relationships, rendered as its + # own panel section (VFBProcessTermInfoCachedJson.java:1529). Kept as a + # Meta string so it travels with the other Meta rows. + if getattr(vfbTerm, 'related_individuals', None): + grouped_ri = {} + for rel in vfbTerm.related_individuals: + if not (hasattr(rel, 'relation') and hasattr(rel.relation, 'label')): + continue + if not (hasattr(rel, 'object') and hasattr(rel.object, 'label')): + continue + rid = getattr(rel.relation, 'short_form', None) or rel.relation.label + key = (rel.relation.label, rid) + obj = (rel.object.label, getattr(rel.object, 'short_form', '')) + grouped_ri.setdefault(key, set()).add(obj) + related = [] + for (rlabel, rid), objs in sorted(grouped_ri.items()): + objlinks = ", ".join("[%s](%s)" % (encode_brackets(o[0]), o[1]) for o in sorted(objs)) + related.append("[%s](%s): %s" % (encode_brackets(rlabel), rid, objlinks)) + if related: + termInfo["Meta"]["RelatedIndividuals"] = "; ".join(related) + + # Splits that target this neuron / neurons a split targets — each its own + # panel section (VFBProcessTermInfoCachedJson.java:1827 / :1835). + if getattr(vfbTerm, 'targeting_splits', None): + ts, seen = [], set() + for s in vfbTerm.targeting_splits: + sf = getattr(s, 'short_form', None) + if sf and sf not in seen: + ts.append("[%s](%s)" % (encode_brackets(s.label or sf), sf)) + seen.add(sf) + if ts: + termInfo["TargetingSplits"] = ts + if getattr(vfbTerm, 'target_neurons', None): + tn, seen = [], set() + for n in vfbTerm.target_neurons: + sf = getattr(n, 'short_form', None) + if sf and sf not in seen: + tn.append("[%s](%s)" % (encode_brackets(n.label or sf), sf)) + seen.add(sf) + if tn: + termInfo["TargetingNeurons"] = tn + # Add the queries to the term info termInfo["Queries"] = queries From a4691a9f653d93c798805fb32ab7e5614f98c8b1 Mon Sep 17 00:00:00 2001 From: Rob Court Date: Wed, 24 Jun 2026 07:06:41 +0000 Subject: [PATCH 3/4] Surface DataSet link/logo; drop static targeting (it is query data) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed VFB_json_schema_indexer (the indexer that produces the SOLR term_info field) — the authoritative per-SuperType clause set lives in its vfb_query_builder QueryLibrary. Two follow-ups from that review: - DataSet term.link (e.g. Ito2013's FlyBase reference) and term.logo were dropped by term_info_parse_object. Now surfaced as Meta.Link / Meta.Logo, matching the panel's link/logo rows. - targeting_splits (neuron_split clause, Neuron classes) and target_neurons (split_neuron clause, Split classes) are Cypher-derived in the indexer. They are query data, not static term metadata, so the static fields added earlier are removed; they will be reintroduced as proper query types (preview + count badge) displayed like the current term-info section. dataset_counts (images/types) is already covered by the DatasetImages query count badge. Add a DataSet-link test. --- src/test/test_term_info_parity.py | 6 +++++ src/vfbquery/vfb_queries.py | 40 +++++++++++++------------------ 2 files changed, 22 insertions(+), 24 deletions(-) diff --git a/src/test/test_term_info_parity.py b/src/test/test_term_info_parity.py index 700fa61..266a11d 100644 --- a/src/test/test_term_info_parity.py +++ b/src/test/test_term_info_parity.py @@ -112,6 +112,12 @@ def test_related_individuals_surface(self): self.assertTrue(ri, "related_individuals dropped") self.assertIn("FBbt_00000057", ri, "related individual target id missing") + # --- Coverage: DataSet external link ----------------------------------- + def test_dataset_link_present(self): + ti = self._parse("Ito2013") + link = ti.get("Meta", {}).get("Link", "") + self.assertIn("flybase.org/reports/FBrf0221438", link, "DataSet link dropped") + # --- Gap D: License term must not 5xx / return None -------------------- def test_license_term_info_does_not_5xx(self): # preview=False avoids the per-query count calls; License has no diff --git a/src/vfbquery/vfb_queries.py b/src/vfbquery/vfb_queries.py index b4237d0..c6823be 100644 --- a/src/vfbquery/vfb_queries.py +++ b/src/vfbquery/vfb_queries.py @@ -332,11 +332,8 @@ class TermInfoOutputSchema(Schema): Synonyms = fields.List(fields.Dict(keys=fields.String(), values=fields.Raw()), required=False, allow_none=True) Technique = fields.List(fields.String(), required=False, allow_none=True) # External DB cross-references (site label + accession link + icon), rendered - # as the panel's xrefs section. TargetingSplits/TargetingNeurons: splits that - # target this neuron / neurons a split targets, each their own panel section. + # as the panel's xrefs section. Xrefs = fields.List(fields.Dict(keys=fields.String(), values=fields.Raw()), required=False, allow_none=True) - TargetingSplits = fields.List(fields.String(), required=False, allow_none=True) - TargetingNeurons = fields.List(fields.String(), required=False, allow_none=True) @post_load def make_term_info(self, data, **kwargs): @@ -538,6 +535,21 @@ def term_info_parse_object(results, short_form): termInfo["Meta"]["Comment"] = "%s"%("".join(vfbTerm.term.comment)) except (NameError, AttributeError): pass + # External homepage link + logo (e.g. a DataSet's FlyBase/project link and + # icon). Rendered as the panel's link / logo rows + # (VFBProcessTermInfoCachedJson.java:1456 / :1449). Previously dropped. + try: + _link = vfbTerm.term.get_link() if hasattr(vfbTerm.term, 'get_link') else "" + if _link: + termInfo["Meta"]["Link"] = _link + except (AttributeError, TypeError): + pass + try: + _logo = vfbTerm.term.get_logo() if hasattr(vfbTerm.term, 'get_logo') else "" + if _logo: + termInfo["Meta"]["Logo"] = _logo + except (AttributeError, TypeError): + pass if hasattr(vfbTerm, 'parents') and vfbTerm.parents and len(vfbTerm.parents) > 0: parents = [] @@ -1358,26 +1370,6 @@ def term_info_parse_object(results, short_form): if related: termInfo["Meta"]["RelatedIndividuals"] = "; ".join(related) - # Splits that target this neuron / neurons a split targets — each its own - # panel section (VFBProcessTermInfoCachedJson.java:1827 / :1835). - if getattr(vfbTerm, 'targeting_splits', None): - ts, seen = [], set() - for s in vfbTerm.targeting_splits: - sf = getattr(s, 'short_form', None) - if sf and sf not in seen: - ts.append("[%s](%s)" % (encode_brackets(s.label or sf), sf)) - seen.add(sf) - if ts: - termInfo["TargetingSplits"] = ts - if getattr(vfbTerm, 'target_neurons', None): - tn, seen = [], set() - for n in vfbTerm.target_neurons: - sf = getattr(n, 'short_form', None) - if sf and sf not in seen: - tn.append("[%s](%s)" % (encode_brackets(n.label or sf), sf)) - seen.add(sf) - if tn: - termInfo["TargetingNeurons"] = tn # Add the queries to the term info termInfo["Queries"] = queries From cd2780250dd94a2ab42b8c33b9d2a66677c64b37 Mon Sep 17 00:00:00 2001 From: Rob Court Date: Wed, 24 Jun 2026 07:15:03 +0000 Subject: [PATCH 4/4] Add SplitsTargeting / TargetNeurons as live query types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit targeting_splits (Neuron classes) and target_neurons (Split classes) are Cypher-derived in the indexer (query_roller neuron_split / split_neuron clauses), not static term metadata. Surface them as proper VFBquery query types with preview + count badge, displayed like the current term-info targeting section: - get_splits_targeting / get_neurons_targeted_by_split — live Neo4j queries returning the standard class-row table (id/label/tags/thumbnail) and the true count (so fill_query_results needs no re-run). - SplitsTargeting_to_schema (Class+Neuron) / TargetNeurons_to_schema (Class+Split) builders; wired into term_info_parse_object and ha_api QUERY_TYPE_MAP. Verified live: MBON FBbt_00100243 -> 33 targeting splits; split VFBexp_FBtp0129935FBtp0129968 -> 18 target neurons. Tests cover both the offered query and the function count/rows. --- src/test/test_term_info_parity.py | 22 +++++++ src/vfbquery/ha_api.py | 2 + src/vfbquery/vfb_queries.py | 99 +++++++++++++++++++++++++++++++ 3 files changed, 123 insertions(+) diff --git a/src/test/test_term_info_parity.py b/src/test/test_term_info_parity.py index 266a11d..fca8135 100644 --- a/src/test/test_term_info_parity.py +++ b/src/test/test_term_info_parity.py @@ -118,6 +118,28 @@ def test_dataset_link_present(self): link = ti.get("Meta", {}).get("Link", "") self.assertIn("flybase.org/reports/FBrf0221438", link, "DataSet link dropped") + # --- Targeting queries (splits<->neurons) as live query types ---------- + def test_neuron_class_offers_splits_targeting(self): + ti = self._parse("FBbt_00100243") # MBON neuron class with split drivers + self.assertTrue(any(x.get("query") == "SplitsTargeting" for x in ti.get("Queries", [])), + "SplitsTargeting not offered on neuron class") + + def test_split_class_offers_target_neurons(self): + ti = self._parse("VFBexp_FBtp0129935FBtp0129968") # a split class + self.assertTrue(any(x.get("query") == "TargetNeurons" for x in ti.get("Queries", [])), + "TargetNeurons not offered on split class") + + def test_splits_targeting_returns_count_and_rows(self): + r = q.get_splits_targeting("FBbt_00100243", return_dataframe=False, limit=5) + self.assertIsInstance(r, dict) + self.assertGreater(r.get("count", 0), 0, "expected splits targeting MBON") + self.assertTrue(r.get("rows"), "no preview rows") + self.assertTrue(all(k in r["rows"][0] for k in ("id", "label", "tags", "thumbnail"))) + + def test_neurons_targeted_by_split_returns_count(self): + r = q.get_neurons_targeted_by_split("VFBexp_FBtp0129935FBtp0129968", return_dataframe=False, limit=5) + self.assertGreater(r.get("count", 0), 0, "expected neurons targeted by split") + # --- Gap D: License term must not 5xx / return None -------------------- def test_license_term_info_does_not_5xx(self): # preview=False avoids the per-query count calls; License has no diff --git a/src/vfbquery/ha_api.py b/src/vfbquery/ha_api.py index 4f443ae..475d3f5 100644 --- a/src/vfbquery/ha_api.py +++ b/src/vfbquery/ha_api.py @@ -266,6 +266,8 @@ async def security_middleware(request, handler): "PartsOf": "get_parts_of", "ComponentsOf": "get_components_of", "SubclassesOf": "get_subclasses_of", + "SplitsTargeting": "get_splits_targeting", + "TargetNeurons": "get_neurons_targeted_by_split", # Neurons in region "NeuronsPartHere": "get_neurons_with_part_in", diff --git a/src/vfbquery/vfb_queries.py b/src/vfbquery/vfb_queries.py index c6823be..783980f 100644 --- a/src/vfbquery/vfb_queries.py +++ b/src/vfbquery/vfb_queries.py @@ -824,6 +824,17 @@ def term_info_parse_object(results, short_form): sorted_images = {int(key): value for key, value in sorted(images.items(), key=lambda x: x[0])} termInfo["Domains"] = sorted_images + # SplitsTargeting — splits (intersectional expression patterns) that + # target this neuron class. TargetNeurons — neurons a split class targets. + # Live Neo4j queries (indexer neuron_split / split_neuron clauses), + # surfaced as queries with a count badge rather than static fields. + if contains_all_tags(termInfo["SuperTypes"], ["Class", "Neuron"]): + q = SplitsTargeting_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form}) + queries.append(q) + if contains_all_tags(termInfo["SuperTypes"], ["Class", "Split"]): + q = TargetNeurons_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form}) + queries.append(q) + if contains_all_tags(termInfo["SuperTypes"], ["Individual", "Neuron"]): q = SimilarMorphologyTo_to_schema(termInfo["Name"], {"neuron": vfbTerm.term.core.short_form, "similarity_score": "NBLAST_score"}) queries.append(q) @@ -1607,6 +1618,24 @@ def SubclassesOf_to_schema(name, take_default): return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns) +def SplitsTargeting_to_schema(name, take_default): + """Schema for SplitsTargeting query: splits that target a neuron class. + Matching criteria: Class + Neuron (mirrors the indexer neuron_split clause).""" + return Query(query="SplitsTargeting", label=f"Splits targeting {name}", + function="get_splits_targeting", + takes={"short_form": {"$and": ["Class", "Neuron"]}, "default": take_default}, + preview=5, preview_columns=["id", "label", "tags", "thumbnail"]) + + +def TargetNeurons_to_schema(name, take_default): + """Schema for TargetNeurons query: neurons targeted by a split class. + Matching criteria: Class + Split (mirrors the indexer split_neuron clause).""" + return Query(query="TargetNeurons", label=f"Neurons targeted by {name}", + function="get_neurons_targeted_by_split", + takes={"short_form": {"$and": ["Class", "Split"]}, "default": take_default}, + preview=5, preview_columns=["id", "label", "tags", "thumbnail"]) + + def NeuronClassesFasciculatingHere_to_schema(name, take_default): """ Schema for NeuronClassesFasciculatingHere query. @@ -3172,6 +3201,76 @@ def get_subclasses_of(short_form: str, return_dataframe=True, limit: int = -1): return _owlery_query_to_results(owl_query, short_form, return_dataframe, limit, solr_field='anat_query', query_by_label=False) +def _targeting_rows(base_match, var, short_form, return_dataframe, limit): + """Shared runner for the split<->neuron targeting queries. + + base_match must bind the result class to ``var`` for the given ``short_form``. + Returns the standard class-row table (id/label/tags/thumbnail) + true count, + so fill_query_results gets the real count without a re-run. + """ + count_query = base_match + f" RETURN count(DISTINCT {var}) AS total_count" + count_df = pd.DataFrame.from_records(get_dict_cursor()(vc.nc.commit_list([count_query]))) + total_count = int(count_df['total_count'][0]) if not count_df.empty else 0 + + main_query = base_match + ( + f" WITH DISTINCT {var} " + f"CALL {{ WITH {var} OPTIONAL MATCH ({var})<-[:INSTANCEOF]-(:Individual)<-[:depicts]-" + "(:Individual)-[irw:in_register_with]->(:Template)-[:depicts]->(templ:Template) " + "RETURN irw, templ LIMIT 1 } " + f"RETURN {var}.short_form AS id, " + f"apoc.text.format(\"[%s](%s)\",[{var}.label, {var}.short_form]) AS label, " + f"apoc.text.join(coalesce({var}.uniqueFacets,[]),'|') AS tags, " + f"REPLACE(apoc.text.format(\"[![%s](%s '%s')](%s)\",[{var}.label, " + "REPLACE(COALESCE(irw.thumbnail[0],''),'thumbnailT.png','thumbnail.png'), " + f"{var}.label, templ.short_form + ',' + {var}.short_form]), " + "\"[![null]( 'null')](null)\", \"\") AS thumbnail " + "ORDER BY label" + ) + if limit != -1: + main_query += f" LIMIT {limit}" + df = pd.DataFrame.from_records(get_dict_cursor()(vc.nc.commit_list([main_query]))) + df = encode_markdown_links(df, ['label', 'thumbnail']) + if return_dataframe: + return df + return { + "headers": _get_standard_query_headers(), + "rows": [{k: row.get(k) for k in ["id", "label", "tags", "thumbnail"]} + for row in safe_to_dict(df, sort_by_id=False)], + "count": total_count, + } + + +@with_solr_cache('splits_targeting') +def get_splits_targeting(short_form: str, return_dataframe=True, limit: int = -1): + """Splits (intersectional expression patterns) that target the given neuron class. + + Live Neo4j query mirroring the indexer's neuron_split clause + (VFB_json_schema_indexer query_roller.neuron_split) — surfaced as a query + with a preview + count badge rather than a static term-info field. + """ + base = ( + "MATCH (:Class {label:'intersectional expression pattern'})" + "<-[:SUBCLASSOF]-(ep:Class)<-[:part_of]-(:Individual)" + f"-[:INSTANCEOF]->(primary:Class {{short_form:'{short_form}'}})" + ) + return _targeting_rows(base, "ep", short_form, return_dataframe, limit) + + +@with_solr_cache('neurons_targeted_by_split') +def get_neurons_targeted_by_split(short_form: str, return_dataframe=True, limit: int = -1): + """Neurons targeted by the given split class. + + Live Neo4j query mirroring the indexer's split_neuron clause + (VFB_json_schema_indexer query_roller.split_neuron). + """ + base = ( + "MATCH (:Class {label:'intersectional expression pattern'})" + f"<-[:SUBCLASSOF]-(primary:Class {{short_form:'{short_form}'}})" + "<-[:part_of]-(:Individual)-[:INSTANCEOF]->(n:Neuron)" + ) + return _targeting_rows(base, "n", short_form, return_dataframe, limit) + + @with_solr_cache('neuron_classes_fasciculating_here') def get_neuron_classes_fasciculating_here(short_form: str, return_dataframe=True, limit: int = -1): """