diff --git a/src/test/test_term_info_parity.py b/src/test/test_term_info_parity.py new file mode 100644 index 0000000..fca8135 --- /dev/null +++ b/src/test/test_term_info_parity.py @@ -0,0 +1,156 @@ +""" +Term-info parity + robustness tests for the VFBquery -> term-info migration. + +Covers the three serialiser gaps reconciled between ``term_info_parse_object`` +(``vfb_queries.py``) and the canonical dataclass serialiser +(``term_info_queries.py``), plus the License-term robustness fix: + + A. Class definition references (``def_pubs``) must reach ``Publications``. + B. Individual-term synonyms (``pub_syn``) must reach ``Synonyms`` + (previously gated Class-only). + C. Publication-term external content (``pub_specific_content``) must reach + ``Publications`` -- the SOLR SuperType marker is the lowercase ``pub``. + D. ``get_term_info`` must not 5xx / hang on any SuperType -- the License + individual is the regression case (cold-miss cache write must be + non-blocking). + +The parity checks run against ``term_info_parse_object`` on the raw SOLR +``term_info`` doc (a read-only fetch, no per-query count calls), so they are +fast and deterministic. Caching is disabled for the whole module so nothing is +written back to the shared production cache. +""" + +import os +os.environ.setdefault("VFBQUERY_CACHE_ENABLED", "false") + +import unittest +import sys + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))) + +from vfbquery import vfb_queries as q +from vfbquery.cached_functions import is_valid_term_info_result + + +class _Results: + """Minimal stand-in for a pysolr result object.""" + def __init__(self, docs): + self.docs = docs + self.hits = len(docs) + + +def _raw(short_form): + """Fetch the raw SOLR doc(s) for a term (read-only) and wrap them.""" + return _Results(q.vfb_solr.search('id:' + short_form).docs) + + +class TermInfoParityTest(unittest.TestCase): + + def _parse(self, short_form): + ti = q.term_info_parse_object(_raw(short_form), short_form) + self.assertIsNotNone(ti, f"parse returned None for {short_form}") + return ti + + # --- Gap A: class definition references (def_pubs) -> inline in description + # The legacy panel appends def_pubs as microref links to the definition + # (VFBProcessTermInfoCachedJson.java:937), so parity is an inline render in + # Meta.Description, not a separate Publications entry. + def test_class_def_pubs_inline_in_description(self): + ti = self._parse("FBbt_00003748") # medulla + desc = ti.get("Meta", {}).get("Description", "") + self.assertIn("FBrf0231227", desc, "def_pub FBrf0231227 missing from description") + self.assertIn("FBrf0224194", desc, "def_pub FBrf0224194 missing from description") + + def test_kenyon_def_pubs_all_inline(self): + ti = self._parse("FBbt_00003686") # Kenyon cell + desc = ti.get("Meta", {}).get("Description", "") + for ref in ("FBrf0092568", "FBrf0214059", "FBrf0205263"): + self.assertIn(ref, desc, f"def_pub {ref} missing from description") + + # --- Gap B: Individual synonyms (pub_syn) -> Synonyms ------------------- + def test_individual_synonyms_present(self): + ti = self._parse("VFB_00101385") # individual image (MEon) + labels = {s.get("label") for s in ti.get("Synonyms", [])} + self.assertIn("MEon JRC_FlyEM_Hemibrain", labels, + "Individual pub_syn dropped from Synonyms") + + def test_class_synonyms_not_regressed(self): + ti = self._parse("FBgn0010339") # gene 128up: 7 synonyms + self.assertGreaterEqual(len(ti.get("Synonyms", [])), 7, + "class synonyms regressed") + + # --- Gap C: publication external content (pub_specific_content) --------- + def test_publication_external_content_present(self): + ti = self._parse("FBrf0242477") # Dolan et al., 2019 + pubs = ti.get("Publications", []) + self.assertTrue(pubs, "pub_specific_content dropped: Publications empty") + pub = pubs[0] + self.assertTrue(pub.get("title"), "pub title missing") + refs = " ".join(pub.get("refs", [])) + self.assertIn("31112130", refs, "PubMed id missing") + self.assertIn("FBrf0242477", refs, "FlyBase ref missing") + self.assertIn("10.7554/eLife.43079", refs, "DOI missing") + + # --- Coverage: external xref links (genes, anatomy) -------------------- + def test_xrefs_surface_as_links(self): + ti = self._parse("FBbt_00003748") # medulla -> Insect Brain DB + xr = ti.get("Xrefs") or [] + self.assertTrue(xr, "Xrefs dropped for medulla") + ibdb = [x for x in xr if x.get("label") == "Insect Brain DB"] + self.assertTrue(ibdb, "Insect Brain DB xref missing") + self.assertIn("insectbraindb.org/app/structures/38", ibdb[0].get("link", "")) + + def test_gene_xref_flybase(self): + ti = self._parse("FBgn0051882") # a gene with a FlyBase xref + links = " ".join(x.get("link", "") for x in (ti.get("Xrefs") or [])) + self.assertIn("flybase.org/reports/FBgn0051882", links, "gene FlyBase xref missing") + + # --- Coverage: related_individuals ------------------------------------- + def test_related_individuals_surface(self): + ti = self._parse("FBbt_00000058") # FBbt class carrying related_individuals + ri = ti.get("Meta", {}).get("RelatedIndividuals", "") + self.assertTrue(ri, "related_individuals dropped") + self.assertIn("FBbt_00000057", ri, "related individual target id missing") + + # --- Coverage: DataSet external link ----------------------------------- + def test_dataset_link_present(self): + ti = self._parse("Ito2013") + link = ti.get("Meta", {}).get("Link", "") + self.assertIn("flybase.org/reports/FBrf0221438", link, "DataSet link dropped") + + # --- Targeting queries (splits<->neurons) as live query types ---------- + def test_neuron_class_offers_splits_targeting(self): + ti = self._parse("FBbt_00100243") # MBON neuron class with split drivers + self.assertTrue(any(x.get("query") == "SplitsTargeting" for x in ti.get("Queries", [])), + "SplitsTargeting not offered on neuron class") + + def test_split_class_offers_target_neurons(self): + ti = self._parse("VFBexp_FBtp0129935FBtp0129968") # a split class + self.assertTrue(any(x.get("query") == "TargetNeurons" for x in ti.get("Queries", [])), + "TargetNeurons not offered on split class") + + def test_splits_targeting_returns_count_and_rows(self): + r = q.get_splits_targeting("FBbt_00100243", return_dataframe=False, limit=5) + self.assertIsInstance(r, dict) + self.assertGreater(r.get("count", 0), 0, "expected splits targeting MBON") + self.assertTrue(r.get("rows"), "no preview rows") + self.assertTrue(all(k in r["rows"][0] for k in ("id", "label", "tags", "thumbnail"))) + + def test_neurons_targeted_by_split_returns_count(self): + r = q.get_neurons_targeted_by_split("VFBexp_FBtp0129935FBtp0129968", return_dataframe=False, limit=5) + self.assertGreater(r.get("count", 0), 0, "expected neurons targeted by split") + + # --- Gap D: License term must not 5xx / return None -------------------- + def test_license_term_info_does_not_5xx(self): + # preview=False avoids the per-query count calls; License has no + # queries anyway. The point is that a valid dict comes back rather + # than None or a raised exception. + result = q.get_term_info("VFBlicense_CC_BY_SA_4_0", preview=False) + self.assertIsInstance(result, dict, "License term_info did not return a dict") + self.assertTrue(is_valid_term_info_result(result), + "License term_info failed validity check") + self.assertIn("License", result.get("SuperTypes", [])) + + +if __name__ == "__main__": + unittest.main() diff --git a/src/vfbquery/ha_api.py b/src/vfbquery/ha_api.py index 4f443ae..475d3f5 100644 --- a/src/vfbquery/ha_api.py +++ b/src/vfbquery/ha_api.py @@ -266,6 +266,8 @@ async def security_middleware(request, handler): "PartsOf": "get_parts_of", "ComponentsOf": "get_components_of", "SubclassesOf": "get_subclasses_of", + "SplitsTargeting": "get_splits_targeting", + "TargetNeurons": "get_neurons_targeted_by_split", # Neurons in region "NeuronsPartHere": "get_neurons_with_part_in", diff --git a/src/vfbquery/solr_result_cache.py b/src/vfbquery/solr_result_cache.py index 47b901d..62f8863 100644 --- a/src/vfbquery/solr_result_cache.py +++ b/src/vfbquery/solr_result_cache.py @@ -493,12 +493,24 @@ def cache_result(self, query_type: str, term_id: str, result: Any, **params) -> "expires_at": cached_data["expires_at"] } - # Store cache document + # Store cache document. + # Use a soft (deferred) commit by default: a hard per-write + # ``commit=true`` flush blocks the request until the IndexWriter + # completes, and on a wedged writer (e.g. the soft-NFS write.lock + # EIO failure mode) that stall propagates up — a cold-miss term + # such as a License individual then hangs and saturates the ha_api + # worker queue, surfacing as HTTP 503. Relying on the core's + # autoSoftCommit (as the sibling write paths in this module already + # do) keeps the write fast and non-blocking; the 3-month cache + # tolerates a few seconds' visibility delay. Override with + # VFBQUERY_SOLR_WRITE_COMMIT=true if an immediate commit is needed. + commit_flag = os.getenv('VFBQUERY_SOLR_WRITE_COMMIT', 'false').lower() \ + in ('1', 'true', 'yes') response = requests.post( f"{self.cache_url}/update", data=json.dumps([cache_doc]), headers={"Content-Type": "application/json"}, - params={"commit": "true"}, # Immediate commit for availability + params={"commit": "true" if commit_flag else "false"}, timeout=int(os.getenv('VFBQUERY_SOLR_WRITE_TIMEOUT', '60')) ) diff --git a/src/vfbquery/vfb_queries.py b/src/vfbquery/vfb_queries.py index 8657b35..783980f 100644 --- a/src/vfbquery/vfb_queries.py +++ b/src/vfbquery/vfb_queries.py @@ -331,6 +331,9 @@ class TermInfoOutputSchema(Schema): Publications = fields.List(fields.Dict(keys=fields.String(), values=fields.Raw()), required=False) Synonyms = fields.List(fields.Dict(keys=fields.String(), values=fields.Raw()), required=False, allow_none=True) Technique = fields.List(fields.String(), required=False, allow_none=True) + # External DB cross-references (site label + accession link + icon), rendered + # as the panel's xrefs section. + Xrefs = fields.List(fields.Dict(keys=fields.String(), values=fields.Raw()), required=False, allow_none=True) @post_load def make_term_info(self, data, **kwargs): @@ -515,11 +518,38 @@ def term_info_parse_object(results, short_form): termInfo["Meta"]["Description"] = "%s"%("".join(vfbTerm.term.description)) except (NameError, AttributeError): pass + # Append class definition references (def_pubs) inline to the description + # as markdown microref links, matching how the panel renders them today + # (legacy VFBProcessTermInfoCachedJson definition() + "()"). + # Kept inline rather than as a separate Publications entry so the display + # is identical and no new panel section is introduced. + if getattr(vfbTerm, 'def_pubs', None): + def_refs = [p.get_microref() for p in vfbTerm.def_pubs + if hasattr(p, 'get_miniref') and p.get_miniref() + and hasattr(p, 'get_microref') and p.get_microref()] + if def_refs: + existing_desc = termInfo["Meta"].get("Description", "") + termInfo["Meta"]["Description"] = (existing_desc + "\n(" + ", ".join(def_refs) + ")") if existing_desc else ("(" + ", ".join(def_refs) + ")") try: # Retrieve comment from the term's comment attribute termInfo["Meta"]["Comment"] = "%s"%("".join(vfbTerm.term.comment)) except (NameError, AttributeError): pass + # External homepage link + logo (e.g. a DataSet's FlyBase/project link and + # icon). Rendered as the panel's link / logo rows + # (VFBProcessTermInfoCachedJson.java:1456 / :1449). Previously dropped. + try: + _link = vfbTerm.term.get_link() if hasattr(vfbTerm.term, 'get_link') else "" + if _link: + termInfo["Meta"]["Link"] = _link + except (AttributeError, TypeError): + pass + try: + _logo = vfbTerm.term.get_logo() if hasattr(vfbTerm.term, 'get_logo') else "" + if _logo: + termInfo["Meta"]["Logo"] = _logo + except (AttributeError, TypeError): + pass if hasattr(vfbTerm, 'parents') and vfbTerm.parents and len(vfbTerm.parents) > 0: parents = [] @@ -794,6 +824,17 @@ def term_info_parse_object(results, short_form): sorted_images = {int(key): value for key, value in sorted(images.items(), key=lambda x: x[0])} termInfo["Domains"] = sorted_images + # SplitsTargeting — splits (intersectional expression patterns) that + # target this neuron class. TargetNeurons — neurons a split class targets. + # Live Neo4j queries (indexer neuron_split / split_neuron clauses), + # surfaced as queries with a count badge rather than static fields. + if contains_all_tags(termInfo["SuperTypes"], ["Class", "Neuron"]): + q = SplitsTargeting_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form}) + queries.append(q) + if contains_all_tags(termInfo["SuperTypes"], ["Class", "Split"]): + q = TargetNeurons_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form}) + queries.append(q) + if contains_all_tags(termInfo["SuperTypes"], ["Individual", "Neuron"]): q = SimilarMorphologyTo_to_schema(termInfo["Name"], {"neuron": vfbTerm.term.core.short_form, "similarity_score": "NBLAST_score"}) queries.append(q) @@ -1190,7 +1231,7 @@ def term_info_parse_object(results, short_form): publication["title"] = pub.core.label if pub.core.label else "" publication["short_form"] = pub.core.short_form if pub.core.short_form else "" publication["microref"] = pub.get_microref() if hasattr(pub, 'get_microref') and pub.get_microref() else "" - + # Add external references refs = [] if hasattr(pub, 'PubMed') and pub.PubMed: @@ -1199,14 +1240,16 @@ def term_info_parse_object(results, short_form): refs.append(f"http://flybase.org/reports/{pub.FlyBase}") if hasattr(pub, 'DOI') and pub.DOI: refs.append(f"https://doi.org/{pub.DOI}") - + publication["refs"] = refs publications.append(publication) - + termInfo["Publications"] = publications - # Add Synonyms for Class entities - if termInfo["SuperTypes"] and "Class" in termInfo["SuperTypes"] and vfbTerm.pub_syn and len(vfbTerm.pub_syn) > 0: + # Add Synonyms from pub_syn. Not gated on Class: Individual terms also + # carry pub_syn (parity gap B — Individual synonyms were dropped when + # this was Class-only). + if termInfo["SuperTypes"] and vfbTerm.pub_syn and len(vfbTerm.pub_syn) > 0: synonyms = [] for syn in vfbTerm.pub_syn: if hasattr(syn, 'synonym') and syn.synonym: @@ -1260,8 +1303,10 @@ def term_info_parse_object(results, short_form): if synonyms and "Synonyms" not in termInfo: termInfo["Synonyms"] = synonyms - # Special handling for Publication entities - if termInfo["SuperTypes"] and "Publication" in termInfo["SuperTypes"] and vfbTerm.pub_specific_content: + # Special handling for Publication entities. The SOLR SuperType marker is + # the lowercase "pub" (parity gap C — gating on "Publication" meant the + # block never fired, dropping pub title/PubMed/DOI/FlyBase links). + if termInfo["SuperTypes"] and ("pub" in termInfo["SuperTypes"] or "Publication" in termInfo["SuperTypes"]) and vfbTerm.pub_specific_content: publication = {} publication["title"] = vfbTerm.pub_specific_content.title if hasattr(vfbTerm.pub_specific_content, 'title') else "" publication["short_form"] = vfbTerm.term.core.short_form @@ -1292,6 +1337,51 @@ def term_info_parse_object(results, short_form): termInfo["Synonyms"].append(synonym) existing_labels.add(synonym["label"]) + # External database cross-references (xrefs). Rendered today as the + # panel's xrefs link section (VFBProcessTermInfoCachedJson.java:1536): + # site label, icon and the external accession link. Previously dropped + # by this parser (e.g. medulla's Insect Brain DB link). + if getattr(vfbTerm, 'xrefs', None): + xrefs_out = [] + for x in vfbTerm.xrefs: + site = getattr(x, 'site', None) + label = getattr(site, 'label', '') if site else '' + acc = x.accession if getattr(x, 'accession', None) and x.accession != "None" else '' + if acc: + link = (x.link_base or '') + acc + (x.link_postfix or '') + elif getattr(x, 'homepage', None): + link = x.homepage + else: + link = getattr(site, 'iri', '') if site else '' + entry = {"label": label, "accession": acc, "link": link} + if getattr(x, 'icon', None): + entry["icon"] = x.icon + xrefs_out.append(entry) + if xrefs_out: + termInfo["Xrefs"] = xrefs_out + + # Related individuals — same Rel shape as relationships, rendered as its + # own panel section (VFBProcessTermInfoCachedJson.java:1529). Kept as a + # Meta string so it travels with the other Meta rows. + if getattr(vfbTerm, 'related_individuals', None): + grouped_ri = {} + for rel in vfbTerm.related_individuals: + if not (hasattr(rel, 'relation') and hasattr(rel.relation, 'label')): + continue + if not (hasattr(rel, 'object') and hasattr(rel.object, 'label')): + continue + rid = getattr(rel.relation, 'short_form', None) or rel.relation.label + key = (rel.relation.label, rid) + obj = (rel.object.label, getattr(rel.object, 'short_form', '')) + grouped_ri.setdefault(key, set()).add(obj) + related = [] + for (rlabel, rid), objs in sorted(grouped_ri.items()): + objlinks = ", ".join("[%s](%s)" % (encode_brackets(o[0]), o[1]) for o in sorted(objs)) + related.append("[%s](%s): %s" % (encode_brackets(rlabel), rid, objlinks)) + if related: + termInfo["Meta"]["RelatedIndividuals"] = "; ".join(related) + + # Add the queries to the term info termInfo["Queries"] = queries @@ -1528,6 +1618,24 @@ def SubclassesOf_to_schema(name, take_default): return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns) +def SplitsTargeting_to_schema(name, take_default): + """Schema for SplitsTargeting query: splits that target a neuron class. + Matching criteria: Class + Neuron (mirrors the indexer neuron_split clause).""" + return Query(query="SplitsTargeting", label=f"Splits targeting {name}", + function="get_splits_targeting", + takes={"short_form": {"$and": ["Class", "Neuron"]}, "default": take_default}, + preview=5, preview_columns=["id", "label", "tags", "thumbnail"]) + + +def TargetNeurons_to_schema(name, take_default): + """Schema for TargetNeurons query: neurons targeted by a split class. + Matching criteria: Class + Split (mirrors the indexer split_neuron clause).""" + return Query(query="TargetNeurons", label=f"Neurons targeted by {name}", + function="get_neurons_targeted_by_split", + takes={"short_form": {"$and": ["Class", "Split"]}, "default": take_default}, + preview=5, preview_columns=["id", "label", "tags", "thumbnail"]) + + def NeuronClassesFasciculatingHere_to_schema(name, take_default): """ Schema for NeuronClassesFasciculatingHere query. @@ -3093,6 +3201,76 @@ def get_subclasses_of(short_form: str, return_dataframe=True, limit: int = -1): return _owlery_query_to_results(owl_query, short_form, return_dataframe, limit, solr_field='anat_query', query_by_label=False) +def _targeting_rows(base_match, var, short_form, return_dataframe, limit): + """Shared runner for the split<->neuron targeting queries. + + base_match must bind the result class to ``var`` for the given ``short_form``. + Returns the standard class-row table (id/label/tags/thumbnail) + true count, + so fill_query_results gets the real count without a re-run. + """ + count_query = base_match + f" RETURN count(DISTINCT {var}) AS total_count" + count_df = pd.DataFrame.from_records(get_dict_cursor()(vc.nc.commit_list([count_query]))) + total_count = int(count_df['total_count'][0]) if not count_df.empty else 0 + + main_query = base_match + ( + f" WITH DISTINCT {var} " + f"CALL {{ WITH {var} OPTIONAL MATCH ({var})<-[:INSTANCEOF]-(:Individual)<-[:depicts]-" + "(:Individual)-[irw:in_register_with]->(:Template)-[:depicts]->(templ:Template) " + "RETURN irw, templ LIMIT 1 } " + f"RETURN {var}.short_form AS id, " + f"apoc.text.format(\"[%s](%s)\",[{var}.label, {var}.short_form]) AS label, " + f"apoc.text.join(coalesce({var}.uniqueFacets,[]),'|') AS tags, " + f"REPLACE(apoc.text.format(\"[![%s](%s '%s')](%s)\",[{var}.label, " + "REPLACE(COALESCE(irw.thumbnail[0],''),'thumbnailT.png','thumbnail.png'), " + f"{var}.label, templ.short_form + ',' + {var}.short_form]), " + "\"[![null]( 'null')](null)\", \"\") AS thumbnail " + "ORDER BY label" + ) + if limit != -1: + main_query += f" LIMIT {limit}" + df = pd.DataFrame.from_records(get_dict_cursor()(vc.nc.commit_list([main_query]))) + df = encode_markdown_links(df, ['label', 'thumbnail']) + if return_dataframe: + return df + return { + "headers": _get_standard_query_headers(), + "rows": [{k: row.get(k) for k in ["id", "label", "tags", "thumbnail"]} + for row in safe_to_dict(df, sort_by_id=False)], + "count": total_count, + } + + +@with_solr_cache('splits_targeting') +def get_splits_targeting(short_form: str, return_dataframe=True, limit: int = -1): + """Splits (intersectional expression patterns) that target the given neuron class. + + Live Neo4j query mirroring the indexer's neuron_split clause + (VFB_json_schema_indexer query_roller.neuron_split) — surfaced as a query + with a preview + count badge rather than a static term-info field. + """ + base = ( + "MATCH (:Class {label:'intersectional expression pattern'})" + "<-[:SUBCLASSOF]-(ep:Class)<-[:part_of]-(:Individual)" + f"-[:INSTANCEOF]->(primary:Class {{short_form:'{short_form}'}})" + ) + return _targeting_rows(base, "ep", short_form, return_dataframe, limit) + + +@with_solr_cache('neurons_targeted_by_split') +def get_neurons_targeted_by_split(short_form: str, return_dataframe=True, limit: int = -1): + """Neurons targeted by the given split class. + + Live Neo4j query mirroring the indexer's split_neuron clause + (VFB_json_schema_indexer query_roller.split_neuron). + """ + base = ( + "MATCH (:Class {label:'intersectional expression pattern'})" + f"<-[:SUBCLASSOF]-(primary:Class {{short_form:'{short_form}'}})" + "<-[:part_of]-(:Individual)-[:INSTANCEOF]->(n:Neuron)" + ) + return _targeting_rows(base, "n", short_form, return_dataframe, limit) + + @with_solr_cache('neuron_classes_fasciculating_here') def get_neuron_classes_fasciculating_here(short_form: str, return_dataframe=True, limit: int = -1): """ @@ -6098,22 +6276,38 @@ def process_query(query): result_count = result['count'] # If limit was applied, the count in dict may be wrong, get correct count if query['preview'] > 0 and result_count == len(result['rows']): - try: - full_kwargs = {'return_dataframe': False, 'limit': -1} - if supports_force_refresh: - full_kwargs['force_refresh'] = force_refresh - if function_args and takes_short_form: - short_form_value = list(function_args.values())[0] - full_dict = function(short_form_value, **full_kwargs) - else: - full_dict = function(**full_kwargs) - result_count = full_dict['count'] - except Exception as e: - print(f"Error getting full count for {query['function']}: {e}") - result_count = result['count'] # Keep as is + # Skip the full limit=-1 re-run when the preview was not + # saturated: fewer returned rows than the preview cap means + # the preview already holds the entire result set, so the + # count is exactly the number of preview rows. This avoids + # materialising every row purely to length-check it — the + # main driver of cold term-info latency on SuperTypes that + # offer many queries (expression pattern, scRNAseq cluster), + # and a no-op win for zero/low-count queries (grey-out path). + if len(result['rows']) < query['preview']: + result_count = len(result['rows']) + else: + try: + full_kwargs = {'return_dataframe': False, 'limit': -1} + if supports_force_refresh: + full_kwargs['force_refresh'] = force_refresh + if function_args and takes_short_form: + short_form_value = list(function_args.values())[0] + full_dict = function(short_form_value, **full_kwargs) + else: + full_dict = function(**full_kwargs) + result_count = full_dict['count'] + except Exception as e: + print(f"Error getting full count for {query['function']}: {e}") + result_count = result['count'] # Keep as is elif isinstance(result, pd.DataFrame): - # For DataFrame results, we need the full count even when preview is limited - try: + # For DataFrame results, we need the full count even when preview is limited. + # But skip the full limit=-1 re-run when the preview was not saturated + # (fewer rows than the cap means the preview already holds every row). + if query['preview'] > 0 and len(result) < query['preview']: + result_count = len(result) + else: + try: full_kwargs = {'return_dataframe': True, 'limit': -1} if supports_force_refresh: full_kwargs['force_refresh'] = force_refresh @@ -6123,7 +6317,7 @@ def process_query(query): else: full_result = function(**full_kwargs) result_count = len(full_result) - except Exception as e: + except Exception as e: print(f"Error getting full count for {query['function']}: {e}") result_count = len(result) # Fallback to limited count else: