diff --git a/src/google/adk/cli/cli_eval.py b/src/google/adk/cli/cli_eval.py
index 33c1693208..5aa0ecb848 100644
--- a/src/google/adk/cli/cli_eval.py
+++ b/src/google/adk/cli/cli_eval.py
@@ -24,7 +24,9 @@
 import click
 from google.genai import types as genai_types
 
+from ..agents.base_agent import BaseAgent
 from ..agents.llm_agent import Agent
+from ..apps.app import App
 from ..evaluation.base_eval_service import BaseEvalService
 from ..evaluation.base_eval_service import EvaluateConfig
 from ..evaluation.base_eval_service import EvaluateRequest
@@ -86,11 +88,33 @@ def get_default_metric_info(
   )
 
 
-def get_root_agent(agent_module_file_path: str) -> Agent:
-  """Returns root agent given the agent module."""
+def get_app_or_root_agent(
+    agent_module_file_path: str,
+) -> tuple[Optional[App], BaseAgent]:
+  """Returns the (app, root_agent) pair for the given agent module.
+
+  Resolution order mirrors `AgentLoader._load_from_module_or_package`:
+  if the module exposes an `App` instance via `agent.app`, that App and its
+  `root_agent` are returned. Otherwise `app` is None and the bare
+  `agent.root_agent` is returned. This lets eval flows participate in the
+  App's plugin / cache / resumability lifecycle when one is defined, while
+  preserving the bare-`root_agent` path for projects that don't use App.
+  """
   agent_module = _get_agent_module(agent_module_file_path)
-  root_agent = agent_module.agent.root_agent
-  return root_agent
+  app = getattr(agent_module.agent, "app", None)
+  if isinstance(app, App):
+    return app, app.root_agent
+  return None, agent_module.agent.root_agent
+
+
+def get_root_agent(agent_module_file_path: str) -> Agent:
+  """Returns root agent given the agent module.
+
+  Kept for backward compatibility. New callers should prefer
+  `get_app_or_root_agent`, which also surfaces the wrapping `App` (if any)
+  so plugins, context-cache, and resumability configs are honored.
+  """
+  return get_app_or_root_agent(agent_module_file_path)[1]
 
 
 def try_get_reset_func(agent_module_file_path: str) -> Any:
diff --git a/src/google/adk/cli/cli_tools_click.py b/src/google/adk/cli/cli_tools_click.py
index c6a71175a1..76764cfbb4 100644
--- a/src/google/adk/cli/cli_tools_click.py
+++ b/src/google/adk/cli/cli_tools_click.py
@@ -979,8 +979,8 @@ def cli_eval(
     from ..evaluation.simulation.user_simulator_provider import UserSimulatorProvider
     from .cli_eval import _collect_eval_results
     from .cli_eval import _collect_inferences
+    from .cli_eval import get_app_or_root_agent
     from .cli_eval import get_default_metric_info
-    from .cli_eval import get_root_agent
     from .cli_eval import parse_and_get_evals_to_run
     from .cli_eval import pretty_print_eval_result
   except ModuleNotFoundError as mnf:
@@ -990,7 +990,7 @@ def cli_eval(
   print(f"Using evaluation criteria: {eval_config}")
   eval_metrics = get_eval_metrics_from_config(eval_config)
 
-  root_agent = get_root_agent(agent_module_file_path)
+  app, root_agent = get_app_or_root_agent(agent_module_file_path)
   app_name = os.path.basename(agent_module_file_path)
   agents_dir = os.path.dirname(agent_module_file_path)
   eval_sets_manager = None
@@ -1098,6 +1098,7 @@ def cli_eval(
         eval_set_results_manager=eval_set_results_manager,
         user_simulator_provider=user_simulator_provider,
         metric_evaluator_registry=metric_evaluator_registry,
+        app=app,
     )
 
     inference_results = asyncio.run(
@@ -1121,8 +1122,6 @@ def cli_eval(
   eval_run_summary = {}
 
   for eval_result in eval_results:
-    eval_result: EvalCaseResult
-
     if eval_result.eval_set_id not in eval_run_summary:
       eval_run_summary[eval_result.eval_set_id] = [0, 0]
 
@@ -1139,7 +1138,6 @@ def cli_eval(
 
   if print_detailed_results:
     for eval_result in eval_results:
-      eval_result: EvalCaseResult
       click.echo(
           "********************************************************************"
       )
diff --git a/src/google/adk/evaluation/evaluation_generator.py b/src/google/adk/evaluation/evaluation_generator.py
index 5b0100818c..f6e88bdb2f 100644
--- a/src/google/adk/evaluation/evaluation_generator.py
+++ b/src/google/adk/evaluation/evaluation_generator.py
@@ -36,6 +36,7 @@
 from ..agents.llm_agent import Agent
 from ..agents.run_config import RunConfig
 from ..agents.run_config import StreamingMode
+from ..apps.app import App
 from ..artifacts.base_artifact_service import BaseArtifactService
 from ..artifacts.in_memory_artifact_service import InMemoryArtifactService
 from ..events.event import Event
@@ -331,20 +332,30 @@ async def _process_query(
     """Process a query using the agent and evaluation dataset."""
     module_path = f"{module_name}"
     agent_module = importlib.import_module(module_path)
-    root_agent = agent_module.agent.root_agent
+    # Prefer the wrapping `App` when the module exposes one, so that
+    # `app.plugins`, context-cache, and resumability configs participate
+    # in eval runs the same way they do for `adk web` / `adk run`.
+    app_obj = getattr(agent_module.agent, "app", None)
+    if isinstance(app_obj, App):
+      root_agent = app_obj.root_agent
+    else:
+      app_obj = None
+      root_agent = agent_module.agent.root_agent
 
     reset_func = getattr(agent_module.agent, "reset_data", None)
 
     agent_to_evaluate = root_agent
     if agent_name:
-      agent_to_evaluate = root_agent.find_agent(agent_name)
-      assert agent_to_evaluate, f"Sub-Agent `{agent_name}` not found."
+      found_agent = root_agent.find_agent(agent_name)
+      assert found_agent, f"Sub-Agent `{agent_name}` not found."
+      agent_to_evaluate = found_agent
 
     return await EvaluationGenerator._generate_inferences_from_root_agent(
         agent_to_evaluate,
         user_simulator=user_simulator,
         reset_func=reset_func,
         initial_session=initial_session,
+        app=app_obj,
     )
 
   @staticmethod
@@ -543,8 +554,17 @@ async def _generate_inferences_from_root_agent(
       session_service: Optional[BaseSessionService] = None,
       artifact_service: Optional[BaseArtifactService] = None,
       memory_service: Optional[BaseMemoryService] = None,
+      app: Optional[App] = None,
   ) -> list[Invocation]:
-    """Scrapes the root agent in coordination with the user simulator."""
+    """Scrapes the root agent in coordination with the user simulator.
+
+    If `app` is provided, the eval Runner is built from a copy of the App
+    with internal eval plugins merged into `app.plugins`, preserving the
+    App's `context_cache_config`, `resumability_config`, and any other
+    application-wide configuration. Otherwise the Runner is built from
+    the bare `root_agent` with only the internal eval plugins, matching
+    the legacy behavior.
+    """
 
     if not session_service:
       session_service = InMemorySessionService()
@@ -581,13 +601,39 @@ async def _generate_inferences_from_root_agent(
     ensure_retry_options_plugin = EnsureRetryOptionsPlugin(
         name="ensure_retry_options"
     )
+    internal_eval_plugins = [
+        request_intercepter_plugin,
+        ensure_retry_options_plugin,
+    ]
+
+    if app is not None:
+      # Copy the App so we don't mutate the user's instance, and merge our
+      # internal eval plugins with the user's. Override `root_agent` so the
+      # Runner targets the agent the caller actually asked us to evaluate
+      # (e.g., a sub-agent), while still carrying the App's plugins,
+      # context_cache_config, and resumability_config.
+      runner_app = app.model_copy(
+          update={
+              "plugins": list(app.plugins) + internal_eval_plugins,
+              "root_agent": root_agent,
+          }
+      )
+      runner_kwargs: dict[str, Any] = {
+          "app": runner_app,
+          "app_name": app_name,
+      }
+    else:
+      runner_kwargs = {
+          "app_name": app_name,
+          "agent": root_agent,
+          "plugins": internal_eval_plugins,
+      }
+
     async with Runner(
-        app_name=app_name,
-        agent=root_agent,
+        **runner_kwargs,
         artifact_service=artifact_service,
         session_service=session_service,
         memory_service=memory_service,
-        plugins=[request_intercepter_plugin, ensure_retry_options_plugin],
     ) as runner:
       events = []
       while True:
diff --git a/src/google/adk/evaluation/local_eval_service.py b/src/google/adk/evaluation/local_eval_service.py
index 1a032bad64..bb2cc0d38c 100644
--- a/src/google/adk/evaluation/local_eval_service.py
+++ b/src/google/adk/evaluation/local_eval_service.py
@@ -25,6 +25,7 @@
 from typing_extensions import override
 
 from ..agents.base_agent import BaseAgent
+from ..apps.app import App
 from ..artifacts.base_artifact_service import BaseArtifactService
 from ..artifacts.in_memory_artifact_service import InMemoryArtifactService
 from ..errors.not_found_error import NotFoundError
@@ -123,8 +124,20 @@ def __init__(
       session_id_supplier: Callable[[], str] = _get_session_id,
       user_simulator_provider: UserSimulatorProvider = UserSimulatorProvider(),
       memory_service: Optional[BaseMemoryService] = None,
+      *,
+      app: Optional[App] = None,
   ):
+    """Initializes a LocalEvalService.
+
+    Args:
+      app: Optional `App` that wraps `root_agent`. When provided, eval runs
+        are executed through a Runner built from the App, so `app.plugins`,
+        `app.context_cache_config`, and `app.resumability_config` are
+        honored during inference. When None, the legacy bare-agent path is
+        used.
+    """
     self._root_agent = root_agent
+    self._app = app
     self._eval_sets_manager = eval_sets_manager
     metric_evaluator_registry = (
         metric_evaluator_registry or DEFAULT_METRIC_EVALUATOR_REGISTRY
@@ -516,6 +529,7 @@ async def _perform_inference_single_eval_item(
                   session_service=self._session_service,
                   artifact_service=self._artifact_service,
                   memory_service=self._memory_service,
+                  app=self._app,
               )
           )
 
diff --git a/tests/unittests/cli/utils/test_cli_eval.py b/tests/unittests/cli/utils/test_cli_eval.py
index c6d21fa707..368a943dc6 100644
--- a/tests/unittests/cli/utils/test_cli_eval.py
+++ b/tests/unittests/cli/utils/test_cli_eval.py
@@ -19,6 +19,9 @@
 from types import SimpleNamespace
 from unittest import mock
 
+from google.adk.agents.base_agent import BaseAgent
+from google.adk.apps.app import App
+
 
 def test_get_eval_sets_manager_local(monkeypatch):
   mock_local_manager = mock.MagicMock()
@@ -49,3 +52,66 @@ def test_get_eval_sets_manager_gcs(monkeypatch):
   )
   assert manager == mock_gcs_manager
   mock_create_gcs.assert_called_once_with("gs://bucket")
+
+
+def _patch_agent_module(monkeypatch, agent_namespace):
+  """Patches `_get_agent_module` to return a stub whose `.agent` matches."""
+  monkeypatch.setattr(
+      "google.adk.cli.cli_eval._get_agent_module",
+      lambda _path: SimpleNamespace(agent=agent_namespace),
+  )
+
+
+def test_get_app_or_root_agent_with_app(monkeypatch):
+  """When the module exposes an App, both app and its root_agent are returned."""
+  root_agent = BaseAgent(name="root_agent")
+  app = App(name="my_app", root_agent=root_agent)
+  _patch_agent_module(
+      monkeypatch, SimpleNamespace(root_agent=root_agent, app=app)
+  )
+
+  from google.adk.cli.cli_eval import get_app_or_root_agent
+
+  resolved_app, resolved_root = get_app_or_root_agent("some/path")
+  assert resolved_app is app
+  assert resolved_root is root_agent
+
+
+def test_get_app_or_root_agent_without_app(monkeypatch):
+  """When only `root_agent` is exposed, app is None."""
+  root_agent = BaseAgent(name="root_agent")
+  _patch_agent_module(monkeypatch, SimpleNamespace(root_agent=root_agent))
+
+  from google.adk.cli.cli_eval import get_app_or_root_agent
+
+  resolved_app, resolved_root = get_app_or_root_agent("some/path")
+  assert resolved_app is None
+  assert resolved_root is root_agent
+
+
+def test_get_app_or_root_agent_app_attribute_not_an_app_instance(monkeypatch):
+  """If `app` exists but is not an App, it is ignored and we fall back."""
+  root_agent = BaseAgent(name="root_agent")
+  _patch_agent_module(
+      monkeypatch,
+      SimpleNamespace(root_agent=root_agent, app="not-an-app"),
+  )
+
+  from google.adk.cli.cli_eval import get_app_or_root_agent
+
+  resolved_app, resolved_root = get_app_or_root_agent("some/path")
+  assert resolved_app is None
+  assert resolved_root is root_agent
+
+
+def test_get_root_agent_back_compat(monkeypatch):
+  """Existing `get_root_agent` callers keep getting the bare agent back."""
+  root_agent = BaseAgent(name="root_agent")
+  app = App(name="my_app", root_agent=root_agent)
+  _patch_agent_module(
+      monkeypatch, SimpleNamespace(root_agent=root_agent, app=app)
+  )
+
+  from google.adk.cli.cli_eval import get_root_agent
+
+  assert get_root_agent("some/path") is root_agent
diff --git a/tests/unittests/cli/utils/test_cli_tools_click.py b/tests/unittests/cli/utils/test_cli_tools_click.py
index 4f77a71f16..459b7ab673 100644
--- a/tests/unittests/cli/utils/test_cli_tools_click.py
+++ b/tests/unittests/cli/utils/test_cli_tools_click.py
@@ -60,8 +60,14 @@ def mock_load_eval_set_from_file():
 
 @pytest.fixture
 def mock_get_root_agent():
-  with mock.patch("google.adk.cli.cli_eval.get_root_agent") as mock_func:
-    mock_func.return_value = root_agent
+  """Patches the agent resolver used by the eval CLI.
+
+  `cli_eval` resolves agents via `get_app_or_root_agent` (which returns
+  `(app, root_agent)`); the eval-set tests don't exercise the App path,
+  so we yield `(None, root_agent)`.
+  """
+  with mock.patch("google.adk.cli.cli_eval.get_app_or_root_agent") as mock_func:
+    mock_func.return_value = (None, root_agent)
     yield mock_func
 
 
diff --git a/tests/unittests/evaluation/test_evaluation_generator.py b/tests/unittests/evaluation/test_evaluation_generator.py
index 05ab25cc72..9cefb387dc 100644
--- a/tests/unittests/evaluation/test_evaluation_generator.py
+++ b/tests/unittests/evaluation/test_evaluation_generator.py
@@ -16,6 +16,8 @@
 
 import asyncio
 
+from google.adk.agents.base_agent import BaseAgent
+from google.adk.apps.app import App
 from google.adk.evaluation.app_details import AgentDetails
 from google.adk.evaluation.app_details import AppDetails
 from google.adk.evaluation.evaluation_generator import _LiveSession
@@ -26,6 +28,7 @@
 from google.adk.evaluation.simulation.user_simulator import UserSimulator
 from google.adk.events.event import Event
 from google.adk.models.llm_request import LlmRequest
+from google.adk.plugins.base_plugin import BasePlugin
 from google.genai import types
 import pytest
 
@@ -860,3 +863,133 @@ async def mock_run_live(*args, **kwargs):
     )
     assert isinstance(called_after_args.kwargs["llm_response"], Event)
     assert called_after_args.kwargs["llm_response"] == mock_event
+
+
+class _SpyPlugin(BasePlugin):
+  """A user-defined plugin used to assert merge behavior."""
+
+  pass
+
+
+class TestGenerateInferencesFromRootAgentWithApp:
+  """Tests that App.plugins / configs are honored when an App is provided."""
+
+  @pytest.fixture
+  def runner_cls(self, mocker):
+    """Patches Runner and returns the patched class for kwargs inspection."""
+    mock_runner_cls = mocker.patch(
+        "google.adk.evaluation.evaluation_generator.Runner"
+    )
+    mock_runner_instance = mocker.AsyncMock()
+    mock_runner_instance.__aenter__.return_value = mock_runner_instance
+    mock_runner_cls.return_value = mock_runner_instance
+    yield mock_runner_cls
+
+  @pytest.fixture
+  def stop_immediately_simulator(self, mocker):
+    """Returns a UserSimulator that stops on first call (no inference work)."""
+    sim = mocker.MagicMock(spec=UserSimulator)
+    sim.get_next_user_message = mocker.AsyncMock(
+        return_value=NextUserMessage(
+            status=UserSimulatorStatus.STOP_SIGNAL_DETECTED
+        )
+    )
+    return sim
+
+  @pytest.mark.asyncio
+  async def test_runner_built_from_app_when_provided(
+      self, runner_cls, mock_session_service, stop_immediately_simulator
+  ):
+    """When `app` is passed, Runner is built with `app=` (merged) instead of `agent=`."""
+    root_agent = BaseAgent(name="root_agent")
+    user_plugin = _SpyPlugin(name="user_plugin")
+    app = App(name="my_app", root_agent=root_agent, plugins=[user_plugin])
+
+    await EvaluationGenerator._generate_inferences_from_root_agent(
+        root_agent=root_agent,
+        user_simulator=stop_immediately_simulator,
+        app=app,
+    )
+
+    runner_cls.assert_called_once()
+    kwargs = runner_cls.call_args.kwargs
+    assert "agent" not in kwargs, (
+        "Runner must not receive `agent=` when `app=` is provided "
+        "(would raise ValueError)."
+    )
+    assert "plugins" not in kwargs, (
+        "Runner must not receive `plugins=` when `app=` is provided "
+        "(would raise ValueError)."
+    )
+    runner_app = kwargs["app"]
+    assert isinstance(runner_app, App)
+    plugin_names = [p.name for p in runner_app.plugins]
+    assert (
+        "user_plugin" in plugin_names
+    ), "User plugin must be preserved in the merged App passed to Runner."
+    assert "request_intercepter_plugin" in plugin_names
+    assert "ensure_retry_options" in plugin_names
+
+  @pytest.mark.asyncio
+  async def test_user_app_is_not_mutated(
+      self, runner_cls, mock_session_service, stop_immediately_simulator
+  ):
+    """The user's App instance must not be mutated across eval runs."""
+    root_agent = BaseAgent(name="root_agent")
+    user_plugin = _SpyPlugin(name="user_plugin")
+    app = App(name="my_app", root_agent=root_agent, plugins=[user_plugin])
+    original_plugins_id = id(app.plugins)
+
+    for _ in range(3):
+      await EvaluationGenerator._generate_inferences_from_root_agent(
+          root_agent=root_agent,
+          user_simulator=stop_immediately_simulator,
+          app=app,
+      )
+
+    # The user's App instance must still hold exactly its original plugin set,
+    # regardless of how many eval runs reused it.
+    assert app.plugins == [user_plugin]
+    assert id(app.plugins) == original_plugins_id
+
+  @pytest.mark.asyncio
+  async def test_runner_falls_back_to_bare_agent_when_no_app(
+      self, runner_cls, mock_session_service, stop_immediately_simulator
+  ):
+    """When `app` is None, Runner is built with the legacy `agent=`/`plugins=` shape."""
+    root_agent = BaseAgent(name="root_agent")
+
+    await EvaluationGenerator._generate_inferences_from_root_agent(
+        root_agent=root_agent,
+        user_simulator=stop_immediately_simulator,
+    )
+
+    runner_cls.assert_called_once()
+    kwargs = runner_cls.call_args.kwargs
+    assert "app" not in kwargs
+    assert kwargs["agent"] is root_agent
+    plugin_names = [p.name for p in kwargs["plugins"]]
+    assert plugin_names == [
+        "request_intercepter_plugin",
+        "ensure_retry_options",
+    ]
+
+  @pytest.mark.asyncio
+  async def test_root_agent_override_propagates_to_merged_app(
+      self, runner_cls, mock_session_service, stop_immediately_simulator
+  ):
+    """If a sub-agent is passed as root_agent, the merged App reflects that."""
+    full_root = BaseAgent(name="full_root")
+    sub_agent = BaseAgent(name="sub_agent")
+    app = App(name="my_app", root_agent=full_root)
+
+    await EvaluationGenerator._generate_inferences_from_root_agent(
+        root_agent=sub_agent,
+        user_simulator=stop_immediately_simulator,
+        app=app,
+    )
+
+    runner_app = runner_cls.call_args.kwargs["app"]
+    assert runner_app.root_agent is sub_agent
+    # User's App must be untouched.
+    assert app.root_agent is full_root
diff --git a/tests/unittests/evaluation/test_local_eval_service.py b/tests/unittests/evaluation/test_local_eval_service.py
index 3bbfafc5be..7c5755ae84 100644
--- a/tests/unittests/evaluation/test_local_eval_service.py
+++ b/tests/unittests/evaluation/test_local_eval_service.py
@@ -19,6 +19,7 @@
 from typing import Optional
 
 from google.adk.agents.llm_agent import LlmAgent
+from google.adk.apps.app import App
 from google.adk.errors.not_found_error import NotFoundError
 from google.adk.evaluation.base_eval_service import EvaluateConfig
 from google.adk.evaluation.base_eval_service import EvaluateRequest
@@ -906,6 +907,10 @@ async def test_perform_inference_single_eval_item_non_live(
       live_timeout_seconds=300,
   )
 
+  # The non-live branch forwards `app=self._app` to the underlying
+  # `_generate_inferences_from_root_agent` (see fix in
+  # `local_eval_service.py`). The `eval_service` fixture builds the service
+  # without an `app`, so we expect `app=None`.
   mock_generate.assert_called_once_with(
       root_agent=dummy_agent,
       user_simulator=mock_user_sim,
@@ -914,4 +919,76 @@ async def test_perform_inference_single_eval_item_non_live(
       session_service=eval_service._session_service,
       artifact_service=eval_service._artifact_service,
       memory_service=eval_service._memory_service,
+      app=None,
   )
+
+
+@pytest.mark.asyncio
+async def test_perform_inference_forwards_app_to_evaluation_generator(
+    dummy_agent, mock_eval_sets_manager, mocker
+):
+  """LocalEvalService passes its `app` through to _generate_inferences_from_root_agent."""
+  app = App(name="test_app", root_agent=dummy_agent)
+
+  eval_case = EvalCase(eval_id="case-1", conversation=[])
+  mock_eval_sets_manager.get_eval_set.return_value = EvalSet(
+      eval_set_id="set-1",
+      eval_cases=[eval_case],
+  )
+
+  mock_generate = mocker.patch(
+      "google.adk.evaluation.local_eval_service.EvaluationGenerator._generate_inferences_from_root_agent",
+      new=mocker.AsyncMock(return_value=[]),
+  )
+
+  service = LocalEvalService(
+      root_agent=dummy_agent,
+      eval_sets_manager=mock_eval_sets_manager,
+      app=app,
+  )
+
+  request = InferenceRequest(
+      app_name="test_app",
+      eval_set_id="set-1",
+      eval_case_ids=["case-1"],
+      inference_config=InferenceConfig(),
+  )
+  async for _ in service.perform_inference(inference_request=request):
+    pass
+
+  mock_generate.assert_awaited_once()
+  assert mock_generate.await_args.kwargs["app"] is app
+
+
+@pytest.mark.asyncio
+async def test_perform_inference_passes_none_when_no_app(
+    dummy_agent, mock_eval_sets_manager, mocker
+):
+  """When LocalEvalService has no `app`, it forwards None (legacy behavior)."""
+  eval_case = EvalCase(eval_id="case-1", conversation=[])
+  mock_eval_sets_manager.get_eval_set.return_value = EvalSet(
+      eval_set_id="set-1",
+      eval_cases=[eval_case],
+  )
+
+  mock_generate = mocker.patch(
+      "google.adk.evaluation.local_eval_service.EvaluationGenerator._generate_inferences_from_root_agent",
+      new=mocker.AsyncMock(return_value=[]),
+  )
+
+  service = LocalEvalService(
+      root_agent=dummy_agent,
+      eval_sets_manager=mock_eval_sets_manager,
+  )
+
+  request = InferenceRequest(
+      app_name="test_app",
+      eval_set_id="set-1",
+      eval_case_ids=["case-1"],
+      inference_config=InferenceConfig(),
+  )
+  async for _ in service.perform_inference(inference_request=request):
+    pass
+
+  mock_generate.assert_awaited_once()
+  assert mock_generate.await_args.kwargs["app"] is None