diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 7fa8d3c..cca3ef0 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -26,6 +26,12 @@ on: - 'CMakeLists.txt' - 'library.json' - '.github/workflows/release.yml' + # The web installer + landing page are served from Pages by the deploy-pages job + # below; a change to them must trigger a deploy or it never reaches the live site + # (the eth-only-provisioning fix shipped a commit that didn't auto-deploy because + # docs/install was missing here). src/ui/install-picker*.js is already covered by src/**. + - 'docs/install/**' + - 'docs/landing/**' workflow_dispatch: inputs: tag: @@ -102,6 +108,10 @@ jobs: - uses: actions/checkout@v4 with: persist-credentials: false + # Full history: compute_version.py counts commits since the last v* tag + # for the `latest` build's `-dev.` suffix. A shallow clone (the default) + # has no tags / partial history and would yield a wrong count. + fetch-depth: 0 - name: Cache ESP-IDF tooling uses: actions/cache@v4 @@ -129,6 +139,27 @@ jobs: elif [ "$IS_MAIN" = "true" ]; then echo "tag=latest" >> "$GITHUB_OUTPUT" else echo "tag=$REF_NAME" >> "$GITHUB_OUTPUT"; fi + # The semver burned into the binary + stamped on the assets/manifest. A + # `latest` build gets `-dev.` (N = commits since the last v* tag) + # so successive latest builds are orderable; a stable tag gets the core. + # Computed once here and reused by build + staging so all three agree. + - name: Compute version + id: ver + # The channel (latest vs stable) and the -rc handling both live in + # compute_version.py — pass only the tag, the helper derives the rest, so + # this step and the release job's identical step can't disagree. Raw + # `python` (not `uv run`): this job has no setup-uv (the ESP-IDF docker + # action provides Python) and the script is stdlib-only. + # Tag passed via env (not inline ${{ }}) so it reaches the script as a + # plain shell variable, never spliced into the command text — no shell + # injection from a crafted tag/ref. + env: + TAG: ${{ steps.tag.outputs.tag }} + run: | + set -euo pipefail + V=$(python scripts/build/compute_version.py --tag "$TAG") + echo "version=$V" >> "$GITHUB_OUTPUT" + - name: Build firmware uses: espressif/esp-idf-ci-action@v1 with: @@ -150,13 +181,13 @@ jobs: # We run our own builder (not the action's default `idf.py build`) # so the sdkconfig fragments and EXCLUDE_COMPONENTS go through the # same code path as local builds. --release burns the channel tag in. - command: python ../scripts/build/build_esp32.py --firmware ${{ matrix.firmware }} --release "${{ steps.tag.outputs.tag }}" + command: python ../scripts/build/build_esp32.py --firmware ${{ matrix.firmware }} --release "${{ steps.tag.outputs.tag }}" --version "${{ steps.ver.outputs.version }}" - name: Stage release artifacts run: | set -euo pipefail mkdir -p dist - V=$(jq -r .version library.json) + V="${{ steps.ver.outputs.version }}" # computed once above; matches the binary's MM_VERSION # Per-firmware build dir under build/esp32-/ (plan-19.1). # build_esp32.py points idf.py at this dir via -B, so the build # tree lives outside esp32/ and multiple firmwares can coexist — @@ -242,6 +273,10 @@ jobs: # the "Re-create latest" step below force-pushes the `latest` tag with git, # which needs the token in .git/config. - uses: actions/checkout@v4 + with: + # Full history: compute_version.py counts commits since the last v* tag + # for the manifest's `-dev.` version (must match the binary's). + fetch-depth: 0 - uses: astral-sh/setup-uv@v3 @@ -272,13 +307,28 @@ jobs: echo "tag=$REF_NAME" >> "$GITHUB_OUTPUT" fi + # Same computation as the build job's "Compute version" — the manifest's + # version must match the binary's MM_VERSION + the asset names. Channel + + # -rc handling live in compute_version.py; pass only the tag (this job has + # setup-uv, so `uv run`). + - name: Compute version + id: ver + # Tag via env (not inline ${{ }}) to keep it out of the command text — + # no shell injection from a crafted tag/ref. + env: + TAG: ${{ steps.tag.outputs.tag }} + run: | + set -euo pipefail + V=$(uv run python scripts/build/compute_version.py --tag "$TAG") + echo "version=$V" >> "$GITHUB_OUTPUT" + - name: Generate ESP Web Tools manifests (release-asset URLs) env: TAG: ${{ steps.tag.outputs.tag }} REPO: ${{ github.repository }} run: | set -euo pipefail - V=$(jq -r .version library.json) + V="${{ steps.ver.outputs.version }}" # computed once above; matches binary + asset names # Absolute GitHub release-asset URLs. Uploaded as release assets; # read by the on-device OTA picker (device fetches the .bin directly # — no CORS). The Pages-relative manifests are generated in the @@ -342,6 +392,14 @@ jobs: uses: softprops/action-gh-release@v2 with: tag_name: ${{ steps.tag.outputs.tag }} + # Release `name` is the computed semver (e.g. "2.1.0-dev.7"). The device- + # hosted UI's dev-channel update check reads it from the CORS-readable + # GitHub API (releases/tags/latest) — the manifest-*.json asset that also + # carries the version is fetched via a release-asset URL that redirects to + # release-assets.githubusercontent.com, which sends no CORS header, so the + # browser blocks that read from the device origin. The API exposes `name` + # cross-origin, so surfacing the version here is what makes the badge work. + name: ${{ steps.ver.outputs.version }} # latest and vX.Y.Z-rcN tags are prerelease — they sort below stable # on the Releases page and aren't picked up by tooling that asks for # "latest release". Stable vX.Y.Z tags publish normally. diff --git a/CLAUDE.md b/CLAUDE.md index b5d50c5..d8328bf 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -16,6 +16,7 @@ See `docs/architecture.md` for system design. This file contains only rules and - **Default to subtraction.** The reflex on most changes (a bug fix, a review finding, a refactor) should be *can this remove or replace code, or land net-neutral?*, not *what do I add?* If a change only ever grows the line count and the doc count, that's the smell this rule exists to catch. Prefer removing code over adding it; a deletion that preserves behaviour is the best kind of change. - **Continuous refactor, no hacks.** Improvement is not a scheduled phase; it happens *the moment* a hack, a divergence, or a duplicated pattern is spotted, in whatever change is already open. The bar is absolute: **never** leave a hack, a workaround, or a bespoke one-off in place because "it works for now" — the fix is the *recognisable, standard* one. So when you reach for a clever shortcut, an environment sniff, a duplicated block, a stub that papers over a broken dependency, stop and ask *what's the textbook construct here?* and do that instead. This is the union of three principles applied as a working reflex rather than a checklist: *[Common patterns first](#principles)* (use the construct a new contributor recognises in 30s), *[Industry standards, our own code](#principles)* (the textbook algorithm AND the textbook name, written fresh against our architecture), and *[Minimalism means elegance](#principles)* (consistency, reuse, no duplication, the fast hot path). What this bullet adds over those: the **timing** (on sight, continuously, not deferred to a "cleanup later" that never comes) and the **no-hacks floor** (a workaround is never the destination; if the standard fix is genuinely out of scope right now, the hack doesn't ship — it's backlogged with the standard fix named, per *[Mandatory subtraction](#process-rules)*). The product-owner-initiated counterpart, for larger restructures, is the *[Refactor for simplicity](#process-rules)* process rule; this principle is the small-scale, agent-initiated version of the same instinct. - **No duplication, in code or docs.** Same logic in two places belongs in one shared function; same fact in two docs belongs in one place the other links to. A comment or doc paragraph that restates what the code already says is duplication too; delete it. (Reuse a recognisable shape rather than inventing one; see *Common patterns first* above.) +- **Document a thing once, reference it generically.** A module lives in one home (its `.h` + one `docs/moonmodules/*.md`), its registration, and its tests. Don't name it elsewhere: in other prose say "a modifier"/"a driver", not `FooModifier`, and don't re-explain what it does — the reader studies its spec. Naming a thing across unrelated files multiplies rename cost and teaches nothing a link wouldn't. *No duplication* applied to names. - **Data over objects in the hot path.** This is minimalism's hot-path corollary — the same "minimal memory, fastest hot path" test (see *Minimalism means elegance*), applied where speed and memory matter most and resolved to one answer: design around plain contiguous data, not an object graph. A flat buffer of elements that one stage writes and the next stage reads, following the producer/consumer data flow in [docs/architecture.md](docs/architecture.md). A contiguous buffer is cache-friendly and lets a stage do integer math straight on the array, whereas per-element objects with virtual accessors are cache-hostile and allocation-heavy, exactly what the hot-path rules forbid. So in the render loop: no object graph, no inheritance, don't wrap buffer data in objects. The **one deliberate class hierarchy** is the module tree (one `MoonModule` base, shallow subclasses, a single virtual-dispatch boundary), because uniform polymorphism is what lets the UI render any module generically with zero per-module UI code. **Outside the hot path**, a small *recognizable* adapter interface with a couple of virtuals is allowed when it passes the *Common patterns first* test — e.g. `ListSource` is the textbook data-source/adapter shape (UITableView's data source, Qt's `QAbstractItemModel`): the view is generic, the rows stay with their owner. That is not "adding inheritance" in the sense this rule forbids; a *bespoke* hierarchy outside the module tree still is. The line: hot-path data is flat and object-free, period; off the hot path, prefer flat data but a proven adapter interface beats a hand-rolled callback table when it's more consistent and reusable. - **Concrete first, abstract later.** Build one working feature end-to-end before extracting patterns into shared abstractions. Don't build the framework before the domain logic works. - **Robust to any input.** A running device tolerates any sequence of UI actions or API calls: add, delete, replace, or reconfigure any module in any order, at any grid size, and it keeps running. Degraded or idle is acceptable; crashed is not. This robustness is a defining strongpoint of projectMM, and it's guarded by the test framework, not by hope: a discovered crash drives a new test that pins the fix (see the Hard Rule). Out of scope: power loss, malformed OTA, brown-out, and other physical/electrical faults the firmware can't intercept; this principle is about what the software accepts as input. @@ -178,7 +179,7 @@ The "end users will use this" moment. Per-release criteria are defined by the pr 5. **Changelog / release notes**: drafted in the GitHub release body. Skip only for unreleased pre-1.0 tags. 6. **Cross-platform smoke**: run scenarios on every supported platform (today: PC + ESP32; later: + Teensy, RPi), if the release claims new platform support or the version bumps a major or minor. -7. **Principles audit**: sweep `docs/` (except `docs/backlog/` and `docs/history/`) and `src/` for forward-looking language ("roadmap", "will be", "planned", "in the future", "currently lacks", `TODO`, `FIXME`) and other violations of § Principles. Acceptable hits carry a one-line justification; the rest get rewritten present-tense or moved to `docs/backlog/backlog.md` / `docs/history/`. The reviewer agent can run this end-to-end. Skip only for releases where the diff against the previous tag is doc-empty. +7. **Principles audit**: sweep `docs/` (except `docs/backlog/` and `docs/history/`) and `src/` for forward-looking language ("roadmap", "will be", "planned", "in the future", "currently lacks", `TODO`, `FIXME`) and other violations of § Principles. Acceptable hits carry a one-line justification; the rest get rewritten present-tense or moved to `docs/backlog/` / `docs/history/`. The reviewer agent can run this end-to-end. Skip only for releases where the diff against the previous tag is doc-empty. What the agent reads: - Always: `CLAUDE.md`, `architecture.md` @@ -196,8 +197,10 @@ docs/ testing.md ← test inventory and strategy performance.md ← per-module timing, memory, sizeof for each platform backlog/ ← forward-looking: what to build next (not present-tense) - README.md ← index: what's here (to-build list + design studies + in-flight draft specs) - backlog.md ← the prioritised to-build list + README.md ← landing page: overview of every item + index (the rest of the system links here, not into items) + backlog-core.md ← to-build list, core / infrastructure domain (+ UI) + backlog-light.md ← to-build list, light domain (drivers, effects, preview, sensors) + backlog-mixed.md ← to-build list, items spanning both domains history/ ← backward-looking: accumulated wisdom README.md ← index: what's here + cross-repo trends + digest prompt decisions.md ← actions, lessons, proven patterns @@ -220,7 +223,7 @@ Do **not** repeat facts the `.h` already states: the controls list (the .h has ` The `history/` folder is the distilled experience of years of building LED/light systems, from WLED, WLED-MM, StarLight, MoonLight, through projectMM. It contains proven patterns, memory tricks, control mechanisms, and hard-won lessons, studied under the [*Industry standards, our own code*](#principles) principle. Per-project credits live in the `history/` digests and the per-module "Prior art" sections. -The `backlog/` folder is its forward-looking counterpart: `backlog.md` is the prioritised to-build list, design studies sit alongside it, and a spec for a not-yet-built module can live here as a plain draft `.md` until it ships (its final spec then goes to `moonmodules/` and the draft is deleted). Both `history/` and `backlog/` are exempt from the present-tense rule and agents don't read them automatically; only when planning new work. Neither folder only accumulates: per [*Mandatory subtraction*](#process-rules), both shrink as well — shipped backlog items and absorbed history entries are deleted, since the git commits are the permanent record and these folders are just the working narrative above it. +The `backlog/` folder is its forward-looking counterpart: the to-build list is split by domain (`backlog-core.md` / `backlog-light.md` / `backlog-mixed.md`) with `README.md` as the landing page the rest of the docs link to, design studies sit alongside it, and a spec for a not-yet-built module can live here as a plain draft `.md` until it ships (its final spec then goes to `moonmodules/` and the draft is deleted). Both `history/` and `backlog/` are exempt from the present-tense rule and agents don't read them automatically; only when planning new work. Neither folder only accumulates: per [*Mandatory subtraction*](#process-rules), both shrink as well — shipped backlog items and absorbed history entries are deleted, since the git commits are the permanent record and these folders are just the working narrative above it. ## Code Style diff --git a/docs/architecture.md b/docs/architecture.md index 1fa647d..87d63a0 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -342,7 +342,7 @@ Multiple layouts can live in one Layouts container. Each layout describes one li **Layers** (a MoonModule) is the top-level container for one or more layers. Each layer renders independently into its own buffer; the Drivers container composes those buffers downstream. -**🚧 Multi-layer composition.** The container exists to compose more than one Layer's buffer into the shared output: alpha-blend and additive, in layer order. With a single layer wired (today's boot pipeline) Layers is a thin pass-through, but the design is the multi-layer case: each Layer renders into its own buffer, and the Drivers container's blend+map step composites them in order into the physical buffer (which is why that buffer is described as a *blend* buffer in [§ Memory strategy](#memory-strategy)). The single-layer path is the degenerate case of this, not a separate design. +**Multi-layer composition.** The container composes more than one Layer's buffer into the shared output: each enabled Layer renders into its own buffer, and the Drivers container's blend+map step composites them in container order (bottom→top) into the physical buffer (which is why that buffer is a *blend* buffer in [§ Memory strategy](#memory-strategy)). Each Layer carries a `blendMode` (alpha-over or additive) and an `opacity` — inert parameters the Layer never acts on; Drivers reads them and the container child order, and blends bottom→top. The bottom layer clears + overwrites the output; each layer above blends onto the accumulated frame per its mode and opacity. With a single enabled Layer this is the degenerate case: a thin pass-through that hands the driver the Layer's buffer directly (no composite), byte-for-byte the single-layer pipeline. The blend math is integer-only per the hot-path rule (8-bit alpha-over `(src·α + dst·(255−α))/255`, additive sum-with-clamp); cost scales with the enabled-layer count. A **Layer** (a MoonModule, child of Layers) owns: @@ -351,15 +351,15 @@ A **Layer** (a MoonModule, child of Layers) owns: - **Effects** (ordered list): write light values into the buffer. - **Modifiers** (ordered list): transform the LUT or light values. -A layer can have **multiple effects**. Effects are not blended; they write to the buffer sequentially in their listed order, each overwriting or adding to the previous. That allows stacked patterns (a base-colour effect followed by a sparkle effect). +A layer can have **multiple effects**. Each effect writes to the buffer sequentially in its listed order, overwriting or adding to the previous — so the effects stack (a base-colour effect followed by a sparkle effect). -A layer applies its **first enabled modifier** during LUT build (`Layer::rebuildLUT`). Modifier *chaining* (applying several in sequence) is not implemented: only the first enabled modifier takes effect. Order matters for a chain (a multiply-then-checkerboard mask differs from checkerboard-then-multiply, just as mirror-then-rotate differs from rotate-then-mirror), which is why modifiers are reorderable in the UI even though only the first is applied today. Chaining is on the [backlog](backlog/backlog.md): static modifiers chain during LUT build, dynamic modifiers during rendering. +A layer applies its **first enabled modifier** during LUT build (`Layer::rebuildLUT`). Modifiers are **reorderable** in the UI, and order is meaningful (a multiply-then-checkerboard mask differs from checkerboard-then-multiply, just as mirror-then-rotate differs from rotate-then-mirror). Applying several modifiers in sequence (chaining) is on the [backlog](backlog/README.md). Each layer references the shared Layouts. The layer builds its own LUT by iterating the Layouts container's coordinates and applying its static modifiers in order. Different layers in Layers can have different modifiers, producing different LUTs from the same Layouts. ## Effects -Effects produce light colours. They write into the Layer's buffer, which represents a logical grid. The Layer determines the buffer's dimensions (width, height, depth) from the Layouts, its own start/end percentages within the physical layout, and its modifiers. Effects receive these logical dimensions and elapsed time (millis) as their rendering context. They compute light positions from the buffer index (e.g. `x = i % width`, `y = i / width`). +Effects produce light colours. They write into the Layer's buffer, which represents a logical grid. The Layer determines the buffer's dimensions (width, height, depth) from the Layouts and its modifiers. Effects receive these logical dimensions and elapsed time (millis) as their rendering context. They compute light positions from the buffer index (e.g. `x = i % width`, `y = i / width`). Effects use elapsed time for animation, not frame count. Animation speed becomes frame-rate independent: an effect looks the same at 30 fps and 60 fps. This is also what makes the 🚧 cross-device clock sync work: a shared elapsed-time base means synced visuals across controllers (see [§ Multi-device sync](#multi-device-sync)). @@ -462,7 +462,7 @@ Network input (ArtNet receive, WebSocket) is processed synchronously at a define The system checks available heap before each allocation and degrades gracefully when memory is insufficient. A minimum reserve (`HEAP_RESERVE = 32 KB`) is kept for stack, HTTP, WiFi, and overhead. - **Mapping LUT** is created only if all of: modifiers exist on the layer; layout is not a simple non-serpentine grid (where physical == logical); enough heap available after the reserve. -- **Driver output buffer** (see [§ Drivers](#drivers) for what it's for) is created only when at least one layer has a mapping LUT actually allocated and enough heap is available. +- **Driver output buffer** (see [§ Drivers](#drivers) for what it's for) is created only when the pipeline must write into physical space rather than hand a driver a layer's logical buffer directly — that is, when **two or more layers are enabled** (they must be composited into one buffer) **or** a layer has a **mapping LUT** actually allocated (logical≠physical) — and enough heap is available. A single enabled layer with no LUT needs no output buffer: drivers read its buffer directly (the zero-copy fast path). ### Degradation cascade diff --git a/docs/backlog/README.md b/docs/backlog/README.md index 52adcc8..825f90d 100644 --- a/docs/backlog/README.md +++ b/docs/backlog/README.md @@ -2,23 +2,53 @@ The forward-looking half of the docs (the backward-looking half is [`../history/`](../history/)). This folder is **not** present-tense and agents don't read it automatically — only when planning new work. See [CLAUDE.md § Documentation](../../CLAUDE.md) for how `backlog/` and `history/` relate. -## What's here +This README is the **landing page**: the rest of the system links here, not into individual items, so the present-tense docs stay present-tense. -### The prioritised to-build list +## The to-build list -- [backlog.md](backlog.md) — what to build next, grouped by theme (distribution, effects, drivers, modifiers, …). Completed items are removed; the file is deleted when empty. -- [ui-deferred.md](ui-deferred.md) — UI items not yet in the live [ui.md](../moonmodules/core/ui.md): deferred-to-1.x features, open design questions, and the gap analysis against v1. The backward-looking v1 UI reverse-engineering lives in [history/v1-inventory.md](../history/v1-inventory.md). -- [leddriver-deferred.md](leddriver-deferred.md) — the LED-driver increments (RMT single-strand, multi-pin RMT, LCD_CAM on S3) all shipped; this is what's left and tracked nowhere else: the sigrok flicker test, the core-1 driver task, fuller show error handling, the per-driver buffer window, 16-bit/dither, and moving-head preview. +Split along the codebase's own boundary (`src/core/` vs `src/light/`), with a third file for items that genuinely span both: -### In-flight draft specs +- **[backlog-core.md](backlog-core.md)** — core / infrastructure: distribution + platforms, ESP32 performance & memory, network & persistence, HTTP/OTA, architecture, testing, housekeeping, and UI. +- **[backlog-light.md](backlog-light.md)** — the light domain: LED drivers (architecture + deferred increments), LCD/DMA driver work, effects & preview, and sensors / audio-reactive input. +- **[backlog-mixed.md](backlog-mixed.md)** — cross-domain items where a core mechanism interacts with a light driver/effect/modifier. -A spec for a not-yet-built module can live here as a plain draft `.md` (alongside the design studies below) until the module ships — at which point its final spec is written in [`../moonmodules/`](../moonmodules/) and the draft is deleted. There's no dedicated subfolder or promote step: a draft is just a forward-looking markdown file like the rest of `backlog/`. None are in flight right now (every drafted module has shipped; the former UI draft moved to [ui-deferred.md](ui-deferred.md) and [history/v1-inventory.md](../history/v1-inventory.md)). +Completed items are removed; a file is deleted when empty (per [*Mandatory subtraction*](../../CLAUDE.md#process-rules)). Tags in item titles: *(investigation)* = needs measurement before a fix · *(backlog)* = scoped but not started · *(deferred)* = waiting on a prerequisite · *(future / long term)* = directional. -### Design studies +## At a glance + +A map of everything in the three files, by theme. + +### Core ([backlog-core.md](backlog-core.md)) + +- **Distribution** — remaining platforms (Linux, Teensy, RPi), code-signing (macOS/Windows), live RMII Ethernet reconfigure, installer UX polish, P4 DHCP-hostname recheck; DevicesModule discovery growth (HTTP probe off the render task, more mDNS types + UDP, deterministic scan scenario). +- **ESP32 performance & memory** — E1.31 multicast (IGMP), WiFi ArtNet perf matrix, async ArtNet send (PSRAM-only), network round-trip drop/reorder test, slow eth bring-up, non-PSRAM memory ceiling + boot-time buffer degradation, task core-pinning; ops: static IP on STA, mDNS toggle, MoonDeck doc-asset hardening, CI SHA-pinning. +- **Architecture** — disable-releases-resources, cross-module pin-uniqueness check, Improv-child-of-NetworkModule, `std::span` platform API, Improv-as-REST follow-ups, **live scripting** (on-device authored effects/layouts/modifiers/drivers/sensor logic — design phase, see the bottom-up survey); composition/config: runtime board presets, per-layout coordinate offset. +- **HTTP & OTA** — direct binary-upload OTA, HTTP file serving off the render tick. +- **Testing** — additional coverage (UI load time, teardown memory, JS harness), live full-suite state leak. +- **Housekeeping** — WS-send socket-pair fixture, ESP-IDF version pinning, three-level device model, persistence-overlay audit, **ESP32-P4 rounds 3-4 (in progress)**, WiFi runtime disable. +- **UI** — deferred-to-1.x items, open design questions (multi-layer UI, modifier-chain viz, presets, node-graph), and the v1 gap analysis. + +### Light ([backlog-light.md](backlog-light.md)) + +- **Drivers** — extract shared lane-driver scaffolding (on the 3rd backend), 1..8-pin LCD output, classic ESP32 I2S 16-lane driver. +- **LED drivers — deferred** — sigrok flicker cross-check, core-1 driver task, fuller RMT error handling, per-driver buffer window, 16-bit/dither, moving-head preview interpreter. +- **LCD / DMA driver work** — drop the i80 WR/DC sacrificial pins, LCD/Parlio DMA buffer → PSRAM. +- **Effects & preview** — real z-axis in 2D effects, full-density interpolated preview, self-describing frame header, RGBW preview, fixture model (moving heads/beams), extract the resumable transport. +- **Sensors & audio-reactive input** — audio follow-ups (per-band noise floor, adaptive gate), GyroDriver → core Peripheral move, Raspberry Pi 5 sensor input (mic/IMU/line-in). + +### Mixed ([backlog-mixed.md](backlog-mixed.md)) + +- MultiplyModifier mapping-LUT memory at large grids; composed modifiers (chain the whole stack, not just the first); intermittent ~0.5 s RMT LED pauses; NoiseEffect simplex cost on ESP32. + +## In-flight draft specs + +A spec for a not-yet-built module can live here as a plain draft `.md` (alongside the design studies below) until the module ships — at which point its final spec is written in [`../moonmodules/`](../moonmodules/) and the draft is deleted. None are in flight right now. + +## Design studies One-off research documents that informed a future direction, kept for the reasoning rather than as living specs. - [leddriver-analysis-top-down.md](leddriver-analysis-top-down.md) — reasons from the end goal (driving WS2812-class LEDs from a GPIO pin) toward a generic driver architecture, per-platform implementation, and a testing strategy. - [leddriver-analysis-bottom-up.md](leddriver-analysis-bottom-up.md) — the companion landscape survey: catalogues the existing LED-driver libraries across ESP32, Teensy, Raspberry Pi, and PC, and recommends a path. - -(The 3-layer installer plan these analyses' sibling produced shipped fully and its deferred items already had homes in [backlog.md](backlog.md), so its file was deleted per [*Mandatory subtraction*](../../CLAUDE.md#process-rules). The installer lives in `docs/install/` + `scripts/build/`; the durable reasoning is in `architecture.md` / `history/decisions.md`.) +- [livescripts-analysis-bottom-up.md](livescripts-analysis-bottom-up.md) — live scripting (run user-authored effects/layouts/modifiers/drivers/sensor logic on-device without a reflash), Stage-1 survey. Deep-reads the ESPLiveScript fork (hpwit's native-Xtensa JIT), surveys the field (ARTI-FX interpreter by ewowi, embedded VMs, WASM/WAMR), and records the product-owner direction. +- [livescripts-analysis-top-down.md](livescripts-analysis-top-down.md) — the Stage-2 redesign: a native-codegen engine, Xtensa-first behind an IR seam (WASM/WAMR the per-target fallback), a C-subset language that ports an effect near-verbatim, the MoonModule binding, and a staged spike plan along the MoonLight effects-tutorial ladder. diff --git a/docs/backlog/backlog.md b/docs/backlog/backlog-core.md similarity index 66% rename from docs/backlog/backlog.md rename to docs/backlog/backlog-core.md index fe0e60a..4473c32 100644 --- a/docs/backlog/backlog.md +++ b/docs/backlog/backlog-core.md @@ -1,8 +1,6 @@ -# What to build next +# Backlog — core -Completed items are removed. This file is deleted when empty. - ---- +Forward-looking to-build items for the **core / infrastructure** domain (`src/core/`, `src/platform/`, build, CI, network, persistence, UI). The light-domain counterpart is [backlog-light.md](backlog-light.md); items that genuinely span both are in [backlog-mixed.md](backlog-mixed.md). Index + overview: [README.md](README.md). Completed items are removed. ## Distribution @@ -28,26 +26,8 @@ DevicesModule discovers via two strategies that merge into one list: an **mDNS b - **More mDNS service types + UDP** — the mDNS browse cycle (`kMdnsServices`) extends one entry at a time as classification lands for each (Home Assistant `_home-assistant._tcp`, ESPHome `_esphome._tcp`, RTP-MIDI `_apple-midi._udp`). Separately, the **four-mechanism split** (decided): discovery and messaging are separate axes, none replaces another — **mDNS** = discovery (standard, whole ecosystem), **HTTP sweep** = discovery fallback (what mDNS misses, e.g. a PC instance on :8080), **REST `/api/control`** = *reliable* messaging (config push, fleet OTA — TCP guarantees delivery, already built), **UDP** = *lossy real-time streaming* only (SuperSync clock / live timing, where drop-and-continue is fine and low latency matters). The MoonLight "messages sometimes didn't arrive" pain came from using UDP for must-arrive messages — route must-arrive over REST, reserve UDP for streams. A UDP *presence beacon* could also seed projectMM↔projectMM discovery, but mDNS is preferred there as the recognizable standard. UDP receive is a cheap non-blocking poll; UDP *send* of large frames is throughput-bound (see Async ArtNet) and belongs off the render task. - **Deterministic full-pipeline scan scenario (canned `httpGet`)** — `scenario_DevicesModule_scan.json` is live-only (needs a real LAN, runs on hardware). A desktop-runnable parallel that exercises scan → classify → upsert → age-out → list-serialization with *canned* `httpGet` responses would pin the whole discovery pipeline without flakiness. Needs a new platform seam: a settable response table the desktop `httpGet` consults (mirroring the existing `setTestNowMs` clock override). Today the age-out + restore + serialize paths are covered by `unit_DevicesModule_ageout.cpp` and classify by `unit_DeviceIdentify.cpp`, so this is breadth, not a gap — deferred so the httpGet-mock seam gets its own focused change rather than riding in on a review batch. ---- - ## ESP32 performance and memory -### MultiplyModifier mapping-LUT memory at large grids (investigation, re-verify on classic) - -`scenario_perf_full` on the S3 (2026-06-17) measured the MultiplyModifier's cost across grid sizes. The finding, stated correctly: the modifier **reduces compute** (with the default 2×2 kaleidoscope the effect renders only the ¼-size logical quadrant — Noise+Multiply at 16K is 29,647µs vs 50,555µs for Noise alone), and its real cost is **memory** — the 1:N fan-out mapping LUT. Measured modifier heap cost on the S3: 16²→1.7KB, 32²→10.8KB, 64²→23.5KB, **128²(16K)→93KB** (the LUT destinations array; `nrOfLightsType` is `uint32_t` on a PSRAM board). On the S3's 8MB PSRAM this is trivial. - -**This is NOT a no-PSRAM blocker** — 16K Noise + Multiply has run on a classic ESP32 (no PSRAM, 320KB internal) before at **10–20 FPS** (WiFi vs Ethernet), sending frames out over **ArtNet to a display, not physical LED drivers**. It works there because classic's `nrOfLightsType` is `uint16_t` (half the LUT size) and the modifier shrinks the logical render grid. So the action is **re-verify the working classic setup when a classic board is connected** (find the config — grid, mirror, ArtNet target — that reproduces the historical 10–20 FPS), not "fix an impossibility." Worth investigating only if that re-verification shows the LUT memory has regressed since: the destinations array is the obvious lever (it stores a `nrOfLightsType` per physical destination; a 2× kaleidoscope is 1:1 in *count* so the LUT need not store fan-out > the physical count — confirm it isn't over-allocating to `maxMultiplier()` when the effective fan-out is 1). Capture the classic numbers into performance.md's multi-board table first. - -### Intermittent ~0.5 s LED pauses with the RMT driver (pending investigation) - -Observed on the bench (2026-06): LED output running on the RMT driver occasionally freezes for about half a second. Postponed by the product owner until more observations exist. Ranked suspects from the initial analysis, each with a cheap experiment: - -1. **WiFi modem power-save never disabled** — nothing in `src/` calls `esp_wifi_set_ps(WIFI_PS_NONE)`, so the IDF default `WIFI_PS_MIN_MODEM` is active; the radio's DTIM sleep causes exactly this class of intermittent multi-hundred-ms stall. WLED and the v1/v2 lineage disable sleep. Experiment: one line in the ESP32 platform code after association. -2. **NetworkSendDriver sending synchronously every tick to an absent destination** (default `192.168.1.70`) — lwIP keeps re-ARPing a dead address while the send sits in the render tick. Data point (2026-06-10): the bench esp32-16mb had NetworkSend *disabled* in its persisted config, consistent with the pauses being annoying enough to switch the sender off. Experiment: point the ArtNet IP at a live host (or disable the driver) and see if the pauses stop. -3. **`rmt_tx_wait_all_done` 1 s timeout** — a wedged transmission blocks the tick up to a full second (multi-pin: up to N×1 s). Least likely (~1 s, not ~0.5 s) but it's the only hard block in the driver itself. - -If pauses correlate with UI control changes, also consider the 2 s-debounced SPIFFS save stalling flash-resident code. The per-tick KPI log around a pause discriminates between these immediately. - ### E1.31 multicast receive (IGMP join) NetworkReceiveEffect accepts E1.31 via unicast only — the same scope MoonLight ships. Multicast senders address the per-universe group `239.255.{universe_hi}.{universe_lo}`, which a receiver must join via IGMP; the platform `UdpSocket` has no `IP_ADD_MEMBERSHIP` support yet (lwIP `setsockopt` on ESP32, plain `setsockopt` on desktop, plus a join-per-accepted-universe bookkeeping question). Add when a multicast-only sender actually shows up on a bench; until then the spec documents "point sACN senders at the device's IP". @@ -108,21 +88,6 @@ What we still don't know (all **physical** tests — no code change is warranted Bottom line: intermittent, build-independent, reset-correlated → a hardware/PHY issue, not a firmware bug. The earlier "slow DHCP at boot" is likely the same root cause (the PHY cycling many times before one window holds long enough to complete DHCP). Pick this up with the physical tests above before touching any code. -### NoiseEffect simplex cost on ESP32 (investigation) - -With mirror XY at 128×128, NoiseEffect renders the 64×64 logical quadrant in **~11 ms/tick** on the Olimex (measured) — the simplex math dominates, since the Xtensa LX6 has no FPU and float math is software-emulated. (RainbowEffect on the same pipeline is much cheaper.) This is correct, non-degraded behaviour; it's only worth revisiting if a deployment needs Noise faster than ~11 ms at this grid. - -Worth investigating if so: - -- **Q16 fixed-point simplex** instead of float (kills the software-float emulation cost). -- **Lower-precision hash** — current simplex uses a 256-entry permutation lookup; a smaller / SIMD-friendly hash may be faster on Xtensa. -- **Strided sampling + interpolation** — render at 32×32, bilinear up to 64×64. Visual quality cost; needs A/B comparison. -- **Inline / unroll the inner per-pixel loop** to keep the simplex state in registers. - -None of these are obviously free, and a fixed-point port may shift the visual signature. Defer until there's a real use case — on the no-PSRAM Olimex at large grids the tick is dominated by the synchronous ArtNet send (~35 ms), not Noise, so the effect is rarely the bottleneck there. - -**S3 render-only data point (2026-06-17, `scenario_perf_full`):** on the PSRAM S3 with **no output driver**, Noise is the dominant cost at every grid and there's no ArtNet floor to hide it: 16²→738µs, 32²→2,831µs, 64²→11,235µs, **128²(16K)→50,555µs (~20 FPS)** — clean ~linear-in-pixels (67×), so no fragmentation/realloc pathology, just raw simplex compute. The light effect (Checkerboard) on the same sweep is 6–11× faster (16K→7,949µs, ~128 FPS). So on a PSRAM board the heavy effect IS the 16K bottleneck (where on the Olimex the network send was). This is the strongest case for the fixed-point/strided-sampling ideas above, since a PSRAM board can run 16K grids that the network-bound Olimex never reaches. The S3 has a real FPU (LX7), so the win is less about software-float emulation and more about per-pixel simplex work; profile before committing. - ### MoonDeck doc-asset endpoint hardening (backlog) `scripts/moondeck.py::_serve_doc_asset` accepts any ROOT-relative path and serves the file. Path traversal *is* blocked (`asset_path.relative_to(ROOT.resolve())`), but inside the repo any file is served — including local-only artefacts like `scripts/build/wifi_credentials.json` if present. MoonDeck binds to all interfaces by design (the existing comment in `main()` explicitly enables LAN reach), so anyone on the LAN can hit the endpoint. @@ -181,8 +146,6 @@ Related: this is the render/output-buffer face of the same non-PSRAM fragmentati No FreeRTOS tasks are pinned today. At 16K LEDs the render task takes ~52 ms/tick; if OTA download or Improv scan causes tick-variance spikes, pin render → core 1, OTA/Improv → core 0 (where WiFi already lives via `CONFIG_ESP_WIFI_TASK_PINNED_TO_CORE_0=y`). Defer until contention is observed — neither OTA nor Improv runs during normal operation. ---- - ## Architecture ### Disabling a module should release its resources, not just stop its loop (backlog) @@ -207,18 +170,6 @@ Today `setEnabled(false)` only makes the Scheduler skip the module's `loop`/`loo **Related:** [§ Disabling a module should release its resources](#disabling-a-module-should-release-its-resources-not-just-stop-its-loop-backlog) — a disabled module freeing its pins is what lets the same GPIO be reassigned live without a conflict-reject. -### Extract shared lane-driver scaffolding when the 3rd parallel backend lands (deferred) - -The LcdLedDriver (S3 LCD_CAM i80) and ParlioLedDriver (P4 Parlio) share ~245 of 362 lines, and their platform-side loopback capture+verify is ~100 lines byte-for-byte identical (`platform_esp32_parlio.cpp` even notes "The RX capture half is byte-for-byte identical" to the LCD one). The status-string lifecycle (`failBuf_` / `configErr_` / `clearFailBuf` / `clearConfigErr`) is triplicated across all three LED drivers (RMT/LCD/Parlio), ~60 lines. The branch deliberately extracted the *encoders* (`LcdSlots.h` shared by i80+Parlio, `RmtSymbol.h`, `PinList.h`) on the "extract when the second user lands" rule, but stopped at the lifecycle/loopback scaffolding. **Accepted for this merge** (the reviewer agreed driver-level extraction can wait): the duplication is in mechanical lifecycle/test scaffolding, not domain logic, and a DriverBase-level refactor touching three drivers is riskier than the duplication it removes. **Do it when the third parallel backend arrives** (16-lane widening, or Teensy FlexIO), at which point the pattern is proven three ways: (a) a `detail::` platform helper for capture+verify (the only per-peripheral difference is the transmit call, pass a callback, beside the already-shared `loopbackJumperOk`), and (b) a small owned-status helper or DriverBase members for the fail/config strings. Until then the cost is line count, not correctness. - -### 1..8-pin LCD output (future) — would let S3 default to LCD - -`LcdLedDriver` requires **all 8** i80 data lanes (`kExactLaneCount = true`, `LcdLedDriver.h`): the ESP-IDF `esp_lcd` i80 bus configures every data line of the bus width and rejects a partial set, so even a few WS2812 strands claim 8 GPIOs. That's why **S3 boards default to `RmtLedDriver`** in `deviceModels.json` (RMT runs one channel per pin, 1..N) rather than LCD — a board with fewer than 8 strips can't sensibly use the LCD driver, and the 8-lane LCD bench wiring (`1,2,4,5,6,7,8,9`) collides with common peripheral pins (e.g. the mic on 4/5/6). A **1..8-pin LCD mode** (drive only the lanes named in `pins`, leave the rest unclaimed — matching Parlio's flexibility) would let the parallel S3 path run any lane count, at which point an S3 board entry could choose LCD vs RMT by intent. Parlio already does this (`kExactLaneCount = false`, 1..8 lanes), so the P4 default *is* the parallel driver. Until LCD gains the same flexibility, S3 stays on RMT by default. Low priority — RMT covers the few-strip S3 case today. - -### Classic ESP32 I2S 16-lane parallel LED driver (future) — beyond RMT's 8 channels - -The **classic ESP32 has 8 RMT TX channels** (`platform_config.h`: "8 on classic ESP32, 4 on the S3 and P4"), so RMT covers up to 8 parallel outputs on classic ESP32 — e.g. the 8-output QuinLED Dig-Octa runs fine on `RmtLedDriver`. For **more than 8 lanes on classic ESP32**, the established trick drives the **I2S peripheral in LCD/parallel mode** (the hpwit [I2SClocklessLedDriver](https://github.com/hpwit/I2SClocklessLedDriver) / FastLED I2S lineage), clocking out up to **16 lanes** from one autonomous DMA transfer. This is the classic ESP32's high-lane-count path, distinct from the S3 (LCD_CAM → `LcdLedDriver`, plus the [1..8-pin LCD item](#18-pin-lcd-output-future--would-let-s3-default-to-lcd) above) and the P4 (Parlio). No catalog board needs it today (none exceeds 8 outputs), so no board's `planned` list points at it yet; it's the marker for a future ≥9-output classic-ESP32 board. Studied under *Industry standards, our own code* — carry the idea, write our own against the project architecture (host-testable encoder in `src/light/`, peripheral seam in `src/platform/esp32/`). **When it lands**, follow the per-chip driver-gating pattern now in `main.cpp` (each LED driver's `#include` + `registerType` is wrapped in `#if defined(CONFIG_SOC__SUPPORTED)`, keyed off the SOC capability macro that backs its `platform_config.h` lane-count flag): the I2S driver gates on the relevant I2S/LCD SOC macro so it compiles + registers on classic ESP32 only, and adds an `i2sLanes` capability flag beside `rmtTxChannels`/`lcdLanes`/`parlioLanes`. Prior art: hpwit's I2SClockless lineage and FastLED's I2S driver; the same parallel-DMA lineage is already credited in [LcdLedDriver.md § Prior art](../moonmodules/light/drivers/LcdLedDriver.md#prior-art). - ### Runtime board presets (multi-commit, partially landed) The firmware-vs-board separation is now in place across the codebase (see [architecture.md § Firmware vs deviceModel vs board](../architecture.md#firmware-vs-devicemodel-vs-board)). `build_esp32.py --firmware ` picks the compiled binary; MoonDeck deduces the physical board where the firmware uniquely identifies hardware (`esp32-eth*` ⇒ `olimex-esp32-gateway-rev-g`) and lets the user pick from a short hardcoded list otherwise. Firmware variants stay separate — `esp32-eth` saves ~670 KB flash + ~30 KB DRAM vs the default `esp32` (WiFi+Ethernet, measured); merging would erase that win. @@ -242,7 +193,7 @@ Board preset catalog + upload (later, when the runtime config has real consumers - **LED output pins** — per-strip data GPIOs (1–16 outputs/board); the first real consumer (a Driver pin control) unblocks multi-output boards (QuinLED Dig-Quad/Octa, SE16, LightCrafter). - **Ethernet PHY config** — LAN8720/RMII (MDC/MDIO/CLK/power-pin/PHY-addr/clock-mode) vs W5500/SPI (MISO/MOSI/SCK/CS/IRQ); the consumer is the runtime `Network.eth_*` controls listed above, replacing the hardcoded Olimex pins. - **Power budget** — `maxPower` (Watts) per board, for a future current-limit / brightness-cap control. -- **Audio / I2S** — SD/WS/SCK/MCLK pins, the input side of audio-reactive effects ([Pi-5 sensor note](#sensor-input-on-raspberry-pi-5--microphone-imu-line-in-post-10-multi-commit) is the desktop counterpart). +- **Audio / I2S** — SD/WS/SCK/MCLK pins, the input side of audio-reactive effects ([Pi-5 sensor note](backlog-light.md#sensor-input-on-raspberry-pi-5--microphone-imu-line-in-post-10-multi-commit) is the desktop counterpart). - **Buttons & inputs** — push/toggle/lights-on, PIR, digital-input; needs an input-event concept the firmware doesn't have yet. - **Relays & power control** — relay / lights-on / high-low pins. - **Infrared** — IR receive pin (remote control). @@ -254,16 +205,6 @@ Sequencing rule (unchanged): each functionality lands a device-side control firs **Module variant + PSRAM within the classic-ESP32 family.** `getChipDescription()` and MoonLight's `ModuleIO.h` both report only the *core* family ("ESP32"), not the *module* (WROOM / WROVER / PICO) — so neither distinguishes whether a classic-ESP32 board has PSRAM. This matters for projectMM (whose large-LED story leans on PSRAM) in a way it doesn't for MoonLight: e.g. the **QuinLED Dig-Next-2 is built on an ESP32-PICO with 2 MB PSRAM**, but projectMM's `esp32` build has no `CONFIG_SPIRAM` (see the `#ifdef CONFIG_SPIRAM` gate in `platform_esp32.cpp::psramAlloc`), so it flashes and runs as a no-PSRAM device and hits the non-PSRAM fragmentation ceiling at large grids that the 2 MB would otherwise relieve. A PSRAM-enabled classic-ESP32 firmware variant (e.g. `esp32-psram`) would unlock it; `deviceModels.json` could then carry a `psram` hint per board to steer the picker — but only once that variant exists (no consumer today). `deviceModels.json` currently maps every classic board to the WiFi-only `esp32` variant, which is correct-but-unoptimised for PSRAM-bearing PICO boards. -### Multi-layer composition (backlog) - -`Layers` holds N layers; `Drivers` reads from a single active layer today. Composition is the missing piece — additional layers render their buffers but only the first enabled layer reaches output. - -When picked up: -- `Drivers::loop()` blends each enabled Layer's buffer into the shared output using per-Layer blend mode + opacity (controls to add on Layer). -- `Layer::startX/Y/Z` / `endX/Y/Z` (already persisted, currently no-op) become active in `rebuildLUT` — each Layer carves a percentage region of the physical extent. -- Memory-aware allocator at `onBuildState` time decides how many Layers fit and degrades gracefully. -- Persistence already encodes Layers children positionally — adding siblings just works on the file-format side. - ### Per-layout coordinate offset for independent placement (backlog) `Layouts` stitches multiple child layouts into one physical light space, but only their *indices* are stitched (offset sequentially in `forEachCoord`) — their *coordinates* are not translated. Two layouts therefore overlap in the same coordinate box: two 64×64 grids both occupy x,y ∈ 0..63, so the Layer's dense bounding-box buffer is 64×64 (4096 voxels) even though the container reports 8192 lights, and the second layout's lights land on the first's positions. `scenario_Layouts_mutation` documents this (its steps assert pipeline liveness, not buffer-size arithmetic). @@ -291,67 +232,11 @@ Device-model injection over Improv shipped as **"Improv = REST over serial"** (t **Open follow-up: shared JS helpers across device-UI and web-installer.** `safeLocalGet` / `safeLocalSet` (3-line hostile-storage guards) are duplicated in `src/ui/install-picker.js` (device firmware, embedded as a C string via `embed_ui.cmake`) and `docs/install/devices.js` (web installer page, served from Pages). The two live in different build contexts so the shared extract isn't trivial — it'd need a new `src/ui/safe-storage.js` plus updates to: `embed_ui.cmake` (embed the new file), `ui_embedded.h` generator (new C array), HTTP server file routing (new path served), `release.yml` workflow staging, `preview_installer.py` staging. Five files for one 3-line helper is too much pre-merge. Worth doing when the next shared helper arrives — `relativeTime` and `formatBytes` are candidates. Two helpers earn the build-glue cost; one doesn't. ---- - -## Sensors and audio-reactive input - -### Audio-reactive follow-ups - -The manual level + 16-band FFT spectrum has shipped ([AudioModule](../moonmodules/core/AudioModule.md); what landed and why is in [decisions.md](../history/decisions.md)). These are the deferred follow-ups, each its own increment: - -- **Per-band noise-floor (kill a steady single-frequency hum)** — the bench mic picks up a constant ~258 Hz tone (a mains harmonic via the mic/supply) that lights one band even in silence. A high-pass can't remove it (it's well above the ~40 Hz DC-blocker cutoff) without also killing real bass; the clean fix is a per-band adaptive floor that learns each band's idle baseline and subtracts it, so a constant tone in one band gates to dark while the others stay sensitive. Minimal version ≈ 16 floats of state + ~16 ops/frame. This is the next concrete audio step. -- **Adaptive conditioning** — auto noise-floor / auto-gain / smoothing so the display self-calibrates to a room ("sound off → dark, sound on → vivid") instead of being tuned by hand. A self-calibrating version was prototyped and removed; the manual `floor`/`gain` is the shipped baseline. Reinvent from scratch when wanted, and **tune it in a quiet room** — a noisy environment (a strong, varying low-frequency ambient) is the adversarial case that made the prototype hard to settle. (The per-band floor above is the first piece of this.) -- **Adaptive noise gate** — replace the borrowed `squelch`/`floor`-as-gate with a real noise gate: asymmetric bang-bang timing (open fast, close slow), a relative "detect silence" test (thresholds as factors of a learned floor, not absolute sample counts), keying off the RMS envelope we already compute, GEQ/FFT bands left untouched. A softhack007 concept; analysed and judged in full (good idea, industry-standard, but tight on the <30ms budget; decompose into steps rather than overhaul) in [AudioModule.md § Adaptive noise gate](../moonmodules/core/AudioModule.md#adaptive-noise-gate-forward-looking). The recommended sequencing: the per-band floor above is step 1 (its complementary frequency-domain half), the relative-threshold-over-RMS is the cheap high-value cherry-pick as step 2, hysteresis/timing step 3, log-domain + soft-gate optional. Eventually retires the manual squelch. -- **Pin auto-scan** — detect the mic's `sdPin` with `wsPin`/`sckPin` fixed (a noise-prompt + confirm convenience); ships today with explicit pin controls. -- **Beat / onset detection** beyond the raw peak; more audio effects (2D / palette-driven frequency-reactive). - -### GyroDriver → core Peripheral move + AudioModule-consistency pass (branched, not merged) - -A working **GyroDriver** (MPU6050 IMU over I²C) exists on an unmerged branch (commit `11f8eb7`, "Add GyroDriver (MPU6050) + generic platform I2C layer"); it is not in this branch's tree. This entry reverse-engineers that commit so the move is tracked now. **Verify against the real implementation when the branch merges, then delete this entry.** - -What the commit contains (reverse-engineered): - -- `src/light/drivers/GyroDriver.h` — reads an MPU6050 over I²C and surfaces five read-only telemetry controls (`gyroX`/`gyroY`/`gyroZ` rates in °/s, `pitch`/`roll` tilt angles). Polls the sensor in `loop20ms()` (50 Hz), formats the display strings in `loop1s()`. WHO_AM_I probe + wake on `setup()`, big-endian 14-byte burst parse, `atan2`-based tilt (no fusion filter). -- A **generic, domain-neutral platform I²C master** (`platform::i2cInit`/`i2cWriteReg`/`i2cReadRegs`, 7-bit addressing) so future sensors reuse it; ESP32 impl on the IDF v6 `i2c_master` driver in a new `platform_esp32_i2c.cpp`, plus an MPU6050-shaped desktop simulation so the UI and host tests see live values without hardware. -- `unit_GyroDriver.cpp` — WHO_AM_I probe, simulated burst parse, control formatting, time-ramp tracking. - -The move: it currently masquerades as an input-only **driver** under the Drivers container (a no-op `setSourceBuffer(Buffer*) override {}` is the tell). It belongs as a **SystemModule Peripheral** child, exactly like [AudioModule](../moonmodules/core/AudioModule.md) — both are sensor peripherals that poll hardware and publish read-only telemetry. On the move, make it consistent with AudioModule (the established sibling pattern): - -- **Relocate** `src/light/drivers/GyroDriver.h` → `src/core/` and its spec `docs/moonmodules/light/drivers/GyroDriver.md` → `docs/moonmodules/core/`; change `role()` to `Peripheral`; delete the `setSourceBuffer` no-op; rewrite the doc's "input-only driver under the Drivers container" framing. -- **Pin controls + rebuild path.** GyroDriver hardcodes SDA/SCL (`static constexpr` 21/22, with its own "Hardcoded until BoardModule exposes I2C pin mapping" comment). AudioModule already shows the pattern: editable `uint16` pin controls + `controlChangeTriggersBuildState` + a `reinit()` on `onBuildState`. Adopting it retires the hardcoded-pins TODO and satisfies the robustness rule (reconfigure in any order). -- **Lifecycle.** GyroDriver has `setup()` only — no `teardown()`. Add teardown for symmetry with AudioModule's setup/teardown/reinit (the shared I²C bus has little per-instance state to free, so this is consistency, not a leak fix). -- **Document the cadence difference.** GyroDriver polls in `loop20ms()` (50 Hz is plenty for tilt); AudioModule reads in `loop()` every tick because I²S DMA must be drained promptly or it overflows. Both are correct; add a one-line "why this cadence" comment at each so the two siblings aren't "harmonised" into a bug. -- **Wire it** in `main.cpp` as a Peripheral child of System under `markWiredByCode`, the same shape as AudioModule. - -Already done on this branch (the reverse direction): AudioModule's two live read-outs were switched from `addText`+`setReadOnly` to `addReadOnly` (the display-only type, matching SystemModule and the way GyroDriver already does it correctly) — so the telemetry idiom is consistent before the gyro branch even lands. - -### Sensor input on Raspberry Pi 5 — microphone, IMU, line-in (post-1.0, multi-commit) - -Audio-reactive lighting (and motion-reactive) is core to what WLED-MM / MoonLight are known for. The Pi 5 is the right host for it: it has the CPU and RAM for real FFT-based audio analysis that the Xtensa ESP32 struggles with, and a full Linux audio + I²C stack. None of this exists today — the codebase has no sensor, audio, or IMU concept, and the Pi currently runs the **desktop** platform backend (there is no `src/platform/rpi/`), which has no hardware access. So this is a domain expansion built on a real platform-backend prerequisite, not a small add. - -**Target sensors and their Pi 5 interfaces:** +### Live scripting — author effects/layouts/modifiers/drivers/sensor logic on-device (multi-commit, design phase) -- **Microphone** — I²S MEMS mic, or a USB audio device read via ALSA. The high-value one: FFT → frequency bands + beat detection drive audio-reactive effects. -- **Line-in** — the Pi 5 has no native analog input, so this is a USB audio interface / DAC HAT feeding the same audio pipeline as the mic; only the source differs. -- **IMU / gyro** — an I²C device (MPU-6050 / 9250-class) on the Pi's I²C bus; tilt / motion → effect parameters. +Run user-authored scripts on a running device — a scripted effect, layout, modifier, driver, or core sensor rule, pushed as text and live on the next tick with no reflash/reboot — the leap WLED took with ARTI-FX and the heart of the PixelBlaze product. A scripted module **is** a MoonModule (controls, `loop()`, role, generic UI). The engine lives in core (domain-neutral: also "transform sensor data") and serves the light domain specifically. Targets in order: ESP32 classic + S3 first, then P4/other ESP32, then Teensy, then desktop. Must be blazingly fast (runs in the render hot path at 16K+ lights × 50 FPS), memory-smart (IRAM/PSRAM via `platform::alloc`, compile-once), and synced (Scheduler tick, tick-atomic hot-swap, live reconfig). -**How it fits the architecture (the load-bearing part):** - -1. **The module category exists — `ModuleRole::Peripheral`.** Peripherals are user-add/deletable children of SystemModule (a gyro `Peripheral` already lands there via the GyroDriver→core move). What's missing for audio-reactive is the *consumption* side: a sensor reads hardware and *produces* values (audio bands, IMU axes) that effects consume — the producer side of the [producer/consumer data-exchange model](../architecture.md#data-exchange-between-modules) (a sensor produces an `AudioFrame` / `ImuState` the way effects produce a buffer that drivers consume). Define the producer struct domain-neutrally so it isn't audio-specific. Today's peripherals are display-only; wiring them into effects is the new work. -2. **All hardware access stays behind the platform boundary.** New `platform::` APIs (e.g. `readAudio()` returning PCM/FFT, `readImu()` returning axes) with the ALSA / I²S / I²C implementation in a real `src/platform/rpi/` backend — which is itself the prerequisite that doesn't exist yet (the Pi uses the desktop backend today). No ALSA/I²C include or call outside `src/platform/`. -3. **Effects consume sensor data the same way they read the layer.** An audio-reactive effect reads the current `AudioFrame` (bands/level/beat) the way `PreviewDriver` reads what `Layer` produces — through a plain data structure wired in `main.cpp`, not a direct hardware call. - -**Increments (each a normal domain addition, picked up one at a time):** - -1. A real `src/platform/rpi/` hardware backend (GPIO/I²C/I²S/ALSA) — the prerequisite; until it lands, the Pi runs the desktop backend with no sensors. -2. The producer struct(s) (`AudioFrame` / `ImuState`) + the `platform::read*` APIs. (The `Peripheral` role + SystemModule add/delete already exist.) -3. The first audio peripheral — **MicrophoneModule** (canonical, highest value: FFT bands + beat). -4. The first audio-reactive effect(s) consuming it. -5. IMU and line-in slot into the same source-module + platform-API shape afterwards. - -Study the proven audio pipeline in MoonLight / WLED-MM (FFT band layout, AGC, beat detection) to inform our own — reference the approach, don't port their code, per [history](../history/README.md) practice. Specs before code: a `MicrophoneModule.md` (and the source-category contract) get written and reviewed before implementation. - ---- +The **bottom-up landscape survey** is done — [livescripts-analysis-bottom-up.md](livescripts-analysis-bottom-up.md): deep-reads the [ESPLiveScript fork](https://github.com/ewowi/ESPLiveScript/tree/fix-warnings) (a from-scratch C-like JIT that emits **native Xtensa** machine code — blazingly fast but **Xtensa-only**, so it covers classic+S3 and *not* P4/Teensy/desktop), surveys the field (PixelBlaze bytecode VM + web editor, WLED ARTI-FX AST-walking interpreter, embedded VMs / WASM / lightweight multi-ISA JITs), and extracts the load-bearing decisions (execution strategy, the IR seam ESPLiveScript lacks, the MoonModule binding, the per-pixel contract, memory placement, sync, sandboxing). Its thesis to validate: a **portable bytecode-VM baseline that runs on every target on day one + an optional native back-end for the hot ISAs behind a shared IR**. **Next: the top-down redesign** — the prompt that generates `livescripts-analysis-top-down.md` is at the bottom of the bottom-up doc; it produces the reference architecture + staged spike plan. Implementation is multi-commit, spike-ordered, after the top-down lands. Credits: [history/hpwit-ESPLiveScript.md](../history/hpwit-ESPLiveScript.md). ## HTTP and OTA @@ -370,46 +255,11 @@ What to build (~4 h): `HttpServerModule::handleConnection()` serves large embedded files (`app.js`, `style.css`) with the blocking `TcpConnection::write` — a page load can briefly stall `loop20ms`. One-shot per load (lower priority than the per-tick preview issue, which is fixed). Fix: serve large HTTP responses with `writeChunks` (the same non-blocking path used for preview frames). ---- - -## Effects and preview - -### Add real z-axis variation to 2D effects (pending) - -Only **NoiseEffect**, **PlasmaEffect** and **RipplesEffect** have z-aware math. The other honest-D2 effects use `Layer::extrude` to duplicate the z=0 plane, so every z-slice is identical on 3D layers. Candidates for genuine D3 promotion: Metaballs/GlowParticles (add z to blob coordinates), Plasma palette/Spiral (add z-driven phase term), Fire (z-drift heat grid), Rings/LavaLamp/Checkerboard/Particles (add z to each element). Prioritise after seeing real 3D installations; each promoted effect also needs its `dynamicBytes` budget for the full 3D buffer. - -### Full-density interpolated preview for large layouts (backlog) - -The preview index-downsamples a large layout to fit the WS send budget (e.g. 128×128 = 16384 lights → ~1639 sent at stride 10), so the UI shows a sparse sample, not every light. To show **all** lights at their real positions with **interpolated** colours for the unsent ones: - -- Decouple the `0x03` coordinate-table density from the per-frame `0x02` stride. Positions are static and sent once, so the table can carry **all** light coordinates (16384 × 3 = ~48 KB one-time — acceptable off the per-frame path, possibly chunked) while the per-frame RGB stays strided to protect ArtNet/the link. -- The browser holds the full position set and, per frame, interpolates each unsent light's colour from its nearest sent neighbours (the sent indices are known from the stride). True positions, guessed colours — better than the removed dense-box block-replicate because positions are exact. -- Open questions: 48 KB one-time table vs `MAX_WRITE_CHUNKS` / send-buffer (needs chunked send or a raised cap, with the same partial-write care as `writeChunks`' drain); interpolation cost on a 16384-point cloud each frame in JS; whether nearest-neighbour or weighted is worth it. - -Not simple — own planning pass. Until then the preview is a faithful strided *sample* (correct shape/colour/motion, not per-pixel). A cheap interim (point-size scaled by stride to fatten samples into their cells) was tried and reverted as not what's wanted — it filled the volume but didn't add real points. - -### Self-describing preview frame header (mid term) - -The preview wire format is a private opcode protocol: `0x02` per-frame channels, `0x03` coordinate table, each a hand-rolled byte layout, and the colour payload is **always RGB** regardless of the buffer's `channelsPerLight`. Every new data kind (RGBW display, beam direction, …) means inventing another opcode and another fixed layout by hand. The minimal fix that stops that sprawl: a small **typed header** — `[type][format][count][stride]` where `format` enumerates `{RGB, RGBW, …}` — so one message kind carries any per-light channel layout and the browser shader reads `format` to interpret the payload. Do it concrete-first, when RGBW *display* (below) is actually wanted, not speculatively. Prereq for both items below. - -### RGBW preview end-to-end (mid term) - -The light `Buffer` already holds `channelsPerLight = 4` (RGBW), and the device output drivers handle it, but the **preview only ever sends/draws RGB** — the W channel is invisible in the UI. (The full-res fast path no longer penalises a cpl≥3 buffer — see the short-term fix — but it still drops W on the wire.) Once the self-describing header lands, carry the W channel on the wire and render it in the shader (W as a warm-white tint / brightness lift on the disc). Small, but gated on the header so it isn't another bespoke opcode. - -### Fixture model — moving heads, beams (long term) - -Today a "light" is a point at a static coordinate with a colour. A **moving head** is a fixture that emits a *beam* in a direction it controls live (pan + tilt), plus colour, beam-width, etc. — per-light **vector** state, not just colour, and a different draw (a cone/ray, not a disc). The static-positions-`0x03` + colour-`0x02` split can't express "this fixture's beam now points here." The industry-standard model is **DMX/GDTF fixtures**: a fixture has a position *and* a set of typed attributes (color, pan, tilt, beam). The preview becomes a fixture renderer (disc for a pixel, cone for a beam); this is also the "make Preview a general-purpose module, not light-specific" goal. A domain-model change (the fixture/attribute model), not just transport. Plan when moving heads are actually on the bench. - -### Extract the resumable backpressure transport as a domain-neutral channel (long term) - -The preview's transport — resumable cross-tick send from a stable buffer + newest-wins backpressure drop + adaptive graceful degradation (see [architecture.md § graceful degradation under transport backpressure](../architecture.md)) — is **payload-agnostic**: any bulky throttled stream (a future MJPEG/video preview, fixture-state streams, fleet telemetry) could ride it. The *payload* model (count/stride/RGB) is light-specific; the *byte-pump* is not. When a second consumer for this transport appears, promote the pump into a domain-neutral core primitive (a `ThrottledChannel`-style sink) that PreviewDriver becomes *a* producer on, rather than owning the protocol. Concrete-first: extract on the second use, not before — until then the seam stays inside HttpServerModule/PreviewDriver. - ---- - ## Testing ### Additional test coverage (pending) +- **Memory degradation cascade** — the output-buffer *allocation* decision (no buffer for a lone identity layer; a buffer for ≥2 layers or any LUT layer) is unit-pinned (`unit_Layers_container` "Drivers allocates the output buffer only when…"), and LUT-vs-identity is pinned by `unit_Layer_sparse_mapping`. What's **not** pinned is the *low-heap* half of [architecture.md § Degradation cascade](../architecture.md#degradation-cascade): under heap pressure the LUT + driver buffer are skipped *together* (`lutSkipped()` true, forced 1:1), and below that the layer buffer *reduces dimensions* (halving to a 8×8 floor) rather than failing. The hook exists — `unit_BlendMap` already uses `platform::setTestMaxAllocBlock` to force allocation failure for the paging test — so a test could cap the block size and assert: (1) LUT+output buffer both skip and `lutSkipped()` flips, (2) the layer buffer shrinks to fit and never goes null. Pre-existing gap (predates multi-layer); the *happy-path* allocation contract is covered, only the OOM-degrade branch isn't. - **UI page load time** — scenario step measuring HTTP response time for `/`, `/api/state`, `/api/system` via the live runner. Verifies acceptable load time on ESP32. - **Module teardown memory** — scenario that tears down all modules and verifies heap returns to pre-setup baseline. Confirms no lifecycle leaks. - **JavaScript test harness** — `vitest` + `jsdom` for the browser UI: pure helpers in `install-picker.js` (`isCompatible`, `parseFirmwaresFromAssets`, `relativeTime`) **and `app.js`'s conditional-control DOM logic** (`syncVisibleControls` — reconciles which control rows are rendered when a `hidden` flag flips). The C++/backend half of conditional controls IS unit-tested (`conditional_controls.h` + per-module tests pin the binding + `hidden` flag), but the **UI re-render half is not** — `syncVisibleControls` was the source of a real re-render-loop freeze (Network static-IP toggle) caught only on hardware. A `jsdom` test that builds a card, flips a control's `hidden`, runs the reconcile, and asserts the right rows appear/disappear (and that it converges — the unchanged→no-op fast path) would have caught it. **Attempted and reverted (2026-06-17):** stood up vitest + 13 passing tests for the install-picker pure helpers, but the high-value half (`syncVisibleControls`) needs either an `app.js` module seam or extracting its reconcile logic into a separate served `.js` (6 embed/route wiring edits for a firmware-served file). Judged not worth adding a whole Node/npm toolchain to a C++/Python repo to test ~3 small pure functions; the toolchain earns its place only once the `syncVisibleControls` DOM test (and a real body of JS logic) lands with it. **Do it as its own focused branch**, deciding the app.js seam first (it's already `type="module"`, so extracting `reconcileControlRows` into a served file — wired through `embed_ui.cmake` + the two HttpServerModule routes like the other UI .js — is the clean shape). Pure-helper `_test` exports + the reconcile extraction are the two pieces; both were prototyped in that reverted attempt. @@ -425,8 +275,6 @@ Run a bare-leaving scenario before a tree-assuming one and the latter fails pre- Fix options: (a) make every live mutate scenario clear+rebuild its own canvas (consistent with the newer ones) so order never matters; or (b) have the live runner reboot / restore the canonical tree between scenarios. (a) is the cleaner long-term shape. Until then, the in-process suite is the gate; live full-suite runs need a clean boot per scenario, or run scenarios individually. ---- - ## Housekeeping ### Socket-pair fixture for HttpServerModule WS-send tests (test infra) @@ -466,18 +314,62 @@ Rounds 1 (board + Ethernet-only) and 2 (Parlio LED driver) have landed. Remainin **Dev-loop note — reading the P4's runtime log over USB.** The P4-NANO's primary console is **UART on GPIO 37/38** (`CONFIG_ESP_CONSOLE_UART_DEFAULT`), not the USB port, so `ESP_LOGI` / `mm_net` lines are *not* visible over `/dev/cu.usbmodem*` by default — only the ROM boot banner and `std::printf`-to-stdout (which routes to the **secondary** USB-Serial-JTAG console) come through. Two workarounds when you need the runtime log over USB: (a) temporarily set `CONFIG_ESP_CONSOLE_USB_SERIAL_JTAG=y` (note the JTAG endpoint re-enumerates when the app starts, so a reader must reconnect across the drop — `idf.py monitor` handles it; a plain fixed `pyserial` handle dies); or (b) hang a USB-UART adapter on GPIO 37/38. This cost real time during the P4 no-DHCP hunt; the fastest signal there turned out to be a `printf` of the runtime struct (stdout → secondary JTAG console) plus a `git worktree` bisect (build an old commit, flash, check LAN reachability) to prove code-vs-hardware without needing the log at all. -### Drop the i80 WR/DC sacrificial pins (S3 LcdLedDriver) via direct LCD_CAM +### WiFi runtime disable (backlog) + +Compile-time answer already ships: `--firmware esp32-eth` excludes the WiFi stack. The default `esp32` already *cascades* — `ethInit()` runs first, WiFi only comes up if no PHY responds — so a wired board never associates over WiFi. What's still missing is reclaiming WiFi's **heap**: even when Ethernet wins the cascade, `esp_wifi_init`'s RX buffers stay allocated. This item skips that init entirely once Ethernet is up, freeing ~16 KB. Defer until the heap saving is worth the teardown-ordering risk. -The S3 i80 LED path costs **two GPIOs the LEDs never use**: the IDF `esp_lcd` i80 bus hard-requires a WR (pixel clock) and a DC pin on real GPIOs (`esp_lcd_panel_io_i80.c`: `wr_gpio_num >= 0 && dc_gpio_num >= 0`), even though WS2812 strands ignore both. Today `LcdLedDriver` keeps overridable defaults (clockPin=10, dcPin=11) — peripheral-required, not user-strand wiring, so a default cannot do harm. **Two ways to reclaim the pins, neither trivial:** -- **Cannot reuse a data pin for WR/DC.** A GPIO carries exactly one peripheral signal (`esp_rom_gpio_connect_out_signal` binds data_sig[i] / wr_sig / dc_sig each to its own pin); routing WR onto a data lane would clock the *clock* waveform onto that strand instead of its colour bytes. WR/DC must be distinct *physical* pins from the 8 data pins. (You CAN already point them at any otherwise-free or unstrapped GPIO via the controls — that's the "reuse a pin you're not using" answer; it's the *spare* pin you avoid, not a data pin.) -- **Zero WR/DC pins needs bypassing esp_lcd** and driving the LCD_CAM peripheral's registers directly (hpwit's I2SClockless approach — legacy parallel mode has no DC concept and emits WR without a dedicated config pin). That's the only path to 8-pins-total on the S3. Cost: leaving the recognisable IDF `esp_lcd` API for register-banging (a *Common patterns first* hit), re-proving the driver bit-perfect on hardware (the loopback self-test is the proof). Benefit: 2 GPIOs back on a tight S3 board. Its own increment, not a pin-default tweak. Parlio (P4) already needs no extra pins (`clk_out_gpio_num = GPIO_NUM_NC`), so this is S3-i80-only. -### LCD/Parlio DMA frame buffer → PSRAM (free internal SRAM for big frames) +## UI -For driving **lots of LEDs**, internal SRAM is the scarce resource and the parallel-driver DMA frame buffer is the biggest consumer (8 lanes × lights × outCh × 24 slot-bytes + latch pad). Today both parallel drivers allocate it as `MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL` (`platform_esp32_lcd.cpp`, `platform_esp32_parlio.cpp`) — **internal SRAM only**, so a large frame can exhaust DRAM while PSRAM sits unused. The IDF confirms both peripherals' GDMA **can burst straight from PSRAM** on the S3/P4: `esp_lcd_panel_io_i80.c` sets `access_ext_mem = true` and itself allocates the buffer with `MALLOC_CAP_SPIRAM | MALLOC_CAP_DMA` when asked; `esp_driver_parlio/src/parlio_tx.c:158` sets `access_ext_mem = true // support transmit PSRAM buffer`. (RMT already does the right thing — its symbol buffer goes through `platform::alloc`, which is PSRAM-first with an internal fallback.) +Forward-looking companion to the shipped UI spec, [moonmodules/core/ui.md](../moonmodules/core/ui.md). The live spec describes the UI as shipped; this file holds what is **not** in it yet: deferred items, open design questions for 1.0, and the gap analysis against projectMM v1. The backward-looking half (how v1/v2 actually worked, patterns consciously rejected, recorded quirks) lives in [history/v1-inventory.md](../history/v1-inventory.md). -**The change:** allocate the LCD/Parlio buffer `MALLOC_CAP_DMA | MALLOC_CAP_SPIRAM` first, falling back to internal when PSRAM is absent/full, using the **external-memory alignment** the IDF requires (`gdma_get_alignment_constraints` → `ext_mem_align`, typically the cache line — larger than the current 64-byte internal alignment) and keeping the buffer cache-aligned + its size a multiple of that alignment. **Why it's its own increment, not this commit:** it changes the proven hot DMA path, PSRAM DMA has real caveats (cache-line alignment, write-back/coherence on the encode→DMA handoff, and lower PSRAM bandwidth that the IDF guards with a CPU-MAX DFS lock during transmit), and it **must be re-proven on S3 + P4 hardware** (the loopback self-test bit-verifies it, then a real strip). Measure the bandwidth headroom too: a very wide, long frame at speed may want internal SRAM regardless. Scope: the two `heap_caps_aligned_alloc` sites + their `bufferBytes` alignment rounding + the capacity check; no domain-code change (the encode loop already writes through `dmaBuf_`). +### Deferred to 1.x -### WiFi runtime disable (backlog) +- Side nav with drag-reorder of root modules (root order is fixed in `main.cpp` today; not painful — and arguably correct, see the gap-analysis note below) +- Health panel (`
` + `GET /api/test`) +- Log panel (`
` + WS `{t:"log",m:"…"}`) +- Core affinity badge (C0/C1) — only meaningful when core pinning lands +- Module `category()` field — taxonomy beyond `role()` for the picker (decision: derive from `role()` for now) -Compile-time answer already ships: `--firmware esp32-eth` excludes the WiFi stack. The default `esp32` already *cascades* — `ethInit()` runs first, WiFi only comes up if no PHY responds — so a wired board never associates over WiFi. What's still missing is reclaiming WiFi's **heap**: even when Ethernet wins the cascade, `esp_wifi_init`'s RX buffers stay allocated. This item skips that init entirely once Ethernet is up, freeing ~16 KB. Defer until the heap saving is worth the teardown-ordering risk. +### Open design questions + +These don't block the shipped baseline but should be answered before 1.0: + +- **Multi-layer UI** — [architecture.md](../architecture.md) plans for N layers blended into one Drivers. The current card layout shows one Layer. Likely needs a tab/accordion to switch layers, or a per-layer column. +- **Modifier chain visualization** — show the modifier order visually. Today they're a flat list, and only the **first enabled** modifier actually applies (the `children[]` order is *not* yet an apply order — see [Composed modifiers](backlog-mixed.md#composed-modifiers--chain-the-whole-modifier-stack-not-just-the-first-planned-multi-commit)). This viz item only becomes meaningful *after* composed modifiers land; until then a chain UI would imply a stacking the engine doesn't do. +- **Presets** — save/load named bundles of control values. Persistence already stores them; needs a UI surface. +- **Canvas/node-graph view** — v2 attempted this. Powerful for complex setups but doubles the UI surface. A reasonable v3 follow-up gated on user demand. + +### Gap analysis — v1 features not yet in v3 + +Inventory of v1 frontend behaviours v3 lacks, with a recommendation each. Items already shipped (control types, dragTs, two-timescale inputs, type picker, theme, scroll-shrink preview, status bar, reset-to-default, fps/ms toggle, drag reorder, side nav + drawer + footer) are not repeated. + +Legend: **Adopt-1.0** (small, high value) · **Defer-1.x** (needs engine work or a feature we lack) · **Drop** (not needed). + +### Per-card features + +| v1 feature | v3 today | Recommendation | +|---|---|---| +| Header: setup-dot before name | name only | **Defer-1.x** — needs `setupOk()` + `health()` on MoonModule with a real failure mode. Today both would always be `true` / `""`. | +| Module ID shown separately from name | name only | **Defer-1.x** — add when instances need disambiguating (e.g. two effects of the same type under one Layer). | +| Category emoji badge on the card header | role emoji in the picker, not on the card | **Defer-1.x** — `ROLE_EMOJI` already exists in `app.js`; showing it per-card is a small step if card scannability needs it. | +| Core affinity badge (C0/C1) | core pinning not implemented | **Drop** until core pinning is a real engine feature. | +| Memory split heap vs PSRAM | `static+dynamic` shown on the card | **Defer-1.x** — splitting `dynamicBytes` further needs `platform::isPsramPointer(p)` or per-alloc tracking, neither exists yet. | + +### WebSocket / panels + +| v1 feature | v3 today | Recommendation | +|---|---|---| +| Drag-to-reorder *root* modules (`POST /api/modules/reorder`) | not supported | **Drop** — root order is fixed in `main.cpp` and that's correct: Layouts/Layers/Drivers + system modules are mandatory and ordered. Children reorder via drag already. | +| Log channel `{t:"log",m:"…"}` pushed by server | no server log push | **Defer-1.x** — needs an engine-side log producer. Gate: when boot/network/persistence logs become interesting to non-developers. | +| Schema channel `{t:"schema",modules:[…]}` for tree-shape changes | full `/api/state` push every update | **Drop** — keep the full-tree push; re-evaluate only if WS bandwidth becomes a problem with large trees. | +| System health panel (polls `GET /api/test`, pass/fail table) | none | **Defer-1.x** — needs a runtime `/api/test` that runs the doctest suite; `ctest` covers this for now. | +| Log panel (ring buffer, severity colouring, stick-to-bottom, `GET /api/log` backfill) | none | **Defer-1.x** — pairs with the log WS channel; both arrive together. | + +### Cost / decision table + +| Cost class | Items | +|---|---| +| Tiny (< 30 lines, no backend) | category emoji badge on the card header | +| Medium (minor backend change) | help-link mapping (needs docs site); richer `category()` than role()-derived | +| Large (separate plan) | health panel + `/api/test`; log panel + WS log channel; OTA + GitHub-update badge; full multi-layer UI; presets UI | diff --git a/docs/backlog/backlog-light.md b/docs/backlog/backlog-light.md new file mode 100644 index 0000000..fc3d128 --- /dev/null +++ b/docs/backlog/backlog-light.md @@ -0,0 +1,137 @@ +# Backlog — light domain + +Forward-looking to-build items for the **light domain** (`src/light/`: drivers, effects, layouts, modifiers, preview) and its sensors. The core/infrastructure counterpart is [backlog-core.md](backlog-core.md); cross-domain items are in [backlog-mixed.md](backlog-mixed.md). Index + overview: [README.md](README.md). Completed items are removed. + +## Drivers + +### Extract shared lane-driver scaffolding when the 3rd parallel backend lands (deferred) + +The LcdLedDriver (S3 LCD_CAM i80) and ParlioLedDriver (P4 Parlio) share ~245 of 362 lines, and their platform-side loopback capture+verify is ~100 lines byte-for-byte identical (`platform_esp32_parlio.cpp` even notes "The RX capture half is byte-for-byte identical" to the LCD one). The status-string lifecycle (`failBuf_` / `configErr_` / `clearFailBuf` / `clearConfigErr`) is triplicated across all three LED drivers (RMT/LCD/Parlio), ~60 lines. The branch deliberately extracted the *encoders* (`LcdSlots.h` shared by i80+Parlio, `RmtSymbol.h`, `PinList.h`) on the "extract when the second user lands" rule, but stopped at the lifecycle/loopback scaffolding. **Accepted for this merge** (the reviewer agreed driver-level extraction can wait): the duplication is in mechanical lifecycle/test scaffolding, not domain logic, and a DriverBase-level refactor touching three drivers is riskier than the duplication it removes. **Do it when the third parallel backend arrives** (16-lane widening, or Teensy FlexIO), at which point the pattern is proven three ways: (a) a `detail::` platform helper for capture+verify (the only per-peripheral difference is the transmit call, pass a callback, beside the already-shared `loopbackJumperOk`), and (b) a small owned-status helper or DriverBase members for the fail/config strings. Until then the cost is line count, not correctness. + +### 1..8-pin LCD output (future) — would let S3 default to LCD + +`LcdLedDriver` requires **all 8** i80 data lanes (`kExactLaneCount = true`, `LcdLedDriver.h`): the ESP-IDF `esp_lcd` i80 bus configures every data line of the bus width and rejects a partial set, so even a few WS2812 strands claim 8 GPIOs. That's why **S3 boards default to `RmtLedDriver`** in `deviceModels.json` (RMT runs one channel per pin, 1..N) rather than LCD — a board with fewer than 8 strips can't sensibly use the LCD driver, and the 8-lane LCD bench wiring (`1,2,4,5,6,7,8,9`) collides with common peripheral pins (e.g. the mic on 4/5/6). A **1..8-pin LCD mode** (drive only the lanes named in `pins`, leave the rest unclaimed — matching Parlio's flexibility) would let the parallel S3 path run any lane count, at which point an S3 board entry could choose LCD vs RMT by intent. Parlio already does this (`kExactLaneCount = false`, 1..8 lanes), so the P4 default *is* the parallel driver. Until LCD gains the same flexibility, S3 stays on RMT by default. Low priority — RMT covers the few-strip S3 case today. + +### Classic ESP32 I2S 16-lane parallel LED driver (future) — beyond RMT's 8 channels + +The **classic ESP32 has 8 RMT TX channels** (`platform_config.h`: "8 on classic ESP32, 4 on the S3 and P4"), so RMT covers up to 8 parallel outputs on classic ESP32 — e.g. the 8-output QuinLED Dig-Octa runs fine on `RmtLedDriver`. For **more than 8 lanes on classic ESP32**, the established trick drives the **I2S peripheral in LCD/parallel mode** (the hpwit [I2SClocklessLedDriver](https://github.com/hpwit/I2SClocklessLedDriver) / FastLED I2S lineage), clocking out up to **16 lanes** from one autonomous DMA transfer. This is the classic ESP32's high-lane-count path, distinct from the S3 (LCD_CAM → `LcdLedDriver`, plus the [1..8-pin LCD item](#18-pin-lcd-output-future--would-let-s3-default-to-lcd) above) and the P4 (Parlio). No catalog board needs it today (none exceeds 8 outputs), so no board's `planned` list points at it yet; it's the marker for a future ≥9-output classic-ESP32 board. Studied under *Industry standards, our own code* — carry the idea, write our own against the project architecture (host-testable encoder in `src/light/`, peripheral seam in `src/platform/esp32/`). **When it lands**, follow the per-chip driver-gating pattern now in `main.cpp` (each LED driver's `#include` + `registerType` is wrapped in `#if defined(CONFIG_SOC__SUPPORTED)`, keyed off the SOC capability macro that backs its `platform_config.h` lane-count flag): the I2S driver gates on the relevant I2S/LCD SOC macro so it compiles + registers on classic ESP32 only, and adds an `i2sLanes` capability flag beside `rmtTxChannels`/`lcdLanes`/`parlioLanes`. Prior art: hpwit's I2SClockless lineage and FastLED's I2S driver; the same parallel-DMA lineage is already credited in [LcdLedDriver.md § Prior art](../moonmodules/light/drivers/LcdLedDriver.md#prior-art). + +## Sensors and audio-reactive input + +### Audio-reactive follow-ups + +The manual level + 16-band FFT spectrum has shipped ([AudioModule](../moonmodules/core/AudioModule.md); what landed and why is in [decisions.md](../history/decisions.md)). These are the deferred follow-ups, each its own increment: + +- **Per-band noise-floor (kill a steady single-frequency hum)** — the bench mic picks up a constant ~258 Hz tone (a mains harmonic via the mic/supply) that lights one band even in silence. A high-pass can't remove it (it's well above the ~40 Hz DC-blocker cutoff) without also killing real bass; the clean fix is a per-band adaptive floor that learns each band's idle baseline and subtracts it, so a constant tone in one band gates to dark while the others stay sensitive. Minimal version ≈ 16 floats of state + ~16 ops/frame. This is the next concrete audio step. +- **Adaptive conditioning** — auto noise-floor / auto-gain / smoothing so the display self-calibrates to a room ("sound off → dark, sound on → vivid") instead of being tuned by hand. A self-calibrating version was prototyped and removed; the manual `floor`/`gain` is the shipped baseline. Reinvent from scratch when wanted, and **tune it in a quiet room** — a noisy environment (a strong, varying low-frequency ambient) is the adversarial case that made the prototype hard to settle. (The per-band floor above is the first piece of this.) +- **Adaptive noise gate** — replace the borrowed `squelch`/`floor`-as-gate with a real noise gate: asymmetric bang-bang timing (open fast, close slow), a relative "detect silence" test (thresholds as factors of a learned floor, not absolute sample counts), keying off the RMS envelope we already compute, GEQ/FFT bands left untouched. A softhack007 concept; analysed and judged in full (good idea, industry-standard, but tight on the <30ms budget; decompose into steps rather than overhaul) in [AudioModule.md § Adaptive noise gate](../moonmodules/core/AudioModule.md#adaptive-noise-gate-forward-looking). The recommended sequencing: the per-band floor above is step 1 (its complementary frequency-domain half), the relative-threshold-over-RMS is the cheap high-value cherry-pick as step 2, hysteresis/timing step 3, log-domain + soft-gate optional. Eventually retires the manual squelch. +- **Pin auto-scan** — detect the mic's `sdPin` with `wsPin`/`sckPin` fixed (a noise-prompt + confirm convenience); ships today with explicit pin controls. +- **Beat / onset detection** beyond the raw peak; more audio effects (2D / palette-driven frequency-reactive). + +### GyroDriver → core Peripheral move + AudioModule-consistency pass (branched, not merged) + +A working **GyroDriver** (MPU6050 IMU over I²C) exists on an unmerged branch (commit `11f8eb7`, "Add GyroDriver (MPU6050) + generic platform I2C layer"); it is not in this branch's tree. This entry reverse-engineers that commit so the move is tracked now. **Verify against the real implementation when the branch merges, then delete this entry.** + +What the commit contains (reverse-engineered): + +- `src/light/drivers/GyroDriver.h` — reads an MPU6050 over I²C and surfaces five read-only telemetry controls (`gyroX`/`gyroY`/`gyroZ` rates in °/s, `pitch`/`roll` tilt angles). Polls the sensor in `loop20ms()` (50 Hz), formats the display strings in `loop1s()`. WHO_AM_I probe + wake on `setup()`, big-endian 14-byte burst parse, `atan2`-based tilt (no fusion filter). +- A **generic, domain-neutral platform I²C master** (`platform::i2cInit`/`i2cWriteReg`/`i2cReadRegs`, 7-bit addressing) so future sensors reuse it; ESP32 impl on the IDF v6 `i2c_master` driver in a new `platform_esp32_i2c.cpp`, plus an MPU6050-shaped desktop simulation so the UI and host tests see live values without hardware. +- `unit_GyroDriver.cpp` — WHO_AM_I probe, simulated burst parse, control formatting, time-ramp tracking. + +The move: it currently masquerades as an input-only **driver** under the Drivers container (a no-op `setSourceBuffer(Buffer*) override {}` is the tell). It belongs as a **SystemModule Peripheral** child, exactly like [AudioModule](../moonmodules/core/AudioModule.md) — both are sensor peripherals that poll hardware and publish read-only telemetry. On the move, make it consistent with AudioModule (the established sibling pattern): + +- **Relocate** `src/light/drivers/GyroDriver.h` → `src/core/` and its spec `docs/moonmodules/light/drivers/GyroDriver.md` → `docs/moonmodules/core/`; change `role()` to `Peripheral`; delete the `setSourceBuffer` no-op; rewrite the doc's "input-only driver under the Drivers container" framing. +- **Pin controls + rebuild path.** GyroDriver hardcodes SDA/SCL (`static constexpr` 21/22, with its own "Hardcoded until BoardModule exposes I2C pin mapping" comment). AudioModule already shows the pattern: editable `uint16` pin controls + `controlChangeTriggersBuildState` + a `reinit()` on `onBuildState`. Adopting it retires the hardcoded-pins TODO and satisfies the robustness rule (reconfigure in any order). +- **Lifecycle.** GyroDriver has `setup()` only — no `teardown()`. Add teardown for symmetry with AudioModule's setup/teardown/reinit (the shared I²C bus has little per-instance state to free, so this is consistency, not a leak fix). +- **Document the cadence difference.** GyroDriver polls in `loop20ms()` (50 Hz is plenty for tilt); AudioModule reads in `loop()` every tick because I²S DMA must be drained promptly or it overflows. Both are correct; add a one-line "why this cadence" comment at each so the two siblings aren't "harmonised" into a bug. +- **Wire it** in `main.cpp` as a Peripheral child of System under `markWiredByCode`, the same shape as AudioModule. + +Already done on this branch (the reverse direction): AudioModule's two live read-outs were switched from `addText`+`setReadOnly` to `addReadOnly` (the display-only type, matching SystemModule and the way GyroDriver already does it correctly) — so the telemetry idiom is consistent before the gyro branch even lands. + +### Sensor input on Raspberry Pi 5 — microphone, IMU, line-in (post-1.0, multi-commit) + +Audio-reactive lighting (and motion-reactive) is core to what WLED-MM / MoonLight are known for. The Pi 5 is the right host for it: it has the CPU and RAM for real FFT-based audio analysis that the Xtensa ESP32 struggles with, and a full Linux audio + I²C stack. None of this exists today — the codebase has no sensor, audio, or IMU concept, and the Pi currently runs the **desktop** platform backend (there is no `src/platform/rpi/`), which has no hardware access. So this is a domain expansion built on a real platform-backend prerequisite, not a small add. + +**Target sensors and their Pi 5 interfaces:** + +- **Microphone** — I²S MEMS mic, or a USB audio device read via ALSA. The high-value one: FFT → frequency bands + beat detection drive audio-reactive effects. +- **Line-in** — the Pi 5 has no native analog input, so this is a USB audio interface / DAC HAT feeding the same audio pipeline as the mic; only the source differs. +- **IMU / gyro** — an I²C device (MPU-6050 / 9250-class) on the Pi's I²C bus; tilt / motion → effect parameters. + +**How it fits the architecture (the load-bearing part):** + +1. **The module category exists — `ModuleRole::Peripheral`.** Peripherals are user-add/deletable children of SystemModule (a gyro `Peripheral` already lands there via the GyroDriver→core move). What's missing for audio-reactive is the *consumption* side: a sensor reads hardware and *produces* values (audio bands, IMU axes) that effects consume — the producer side of the [producer/consumer data-exchange model](../architecture.md#data-exchange-between-modules) (a sensor produces an `AudioFrame` / `ImuState` the way effects produce a buffer that drivers consume). Define the producer struct domain-neutrally so it isn't audio-specific. Today's peripherals are display-only; wiring them into effects is the new work. +2. **All hardware access stays behind the platform boundary.** New `platform::` APIs (e.g. `readAudio()` returning PCM/FFT, `readImu()` returning axes) with the ALSA / I²S / I²C implementation in a real `src/platform/rpi/` backend — which is itself the prerequisite that doesn't exist yet (the Pi uses the desktop backend today). No ALSA/I²C include or call outside `src/platform/`. +3. **Effects consume sensor data the same way they read the layer.** An audio-reactive effect reads the current `AudioFrame` (bands/level/beat) the way `PreviewDriver` reads what `Layer` produces — through a plain data structure wired in `main.cpp`, not a direct hardware call. + +**Increments (each a normal domain addition, picked up one at a time):** + +1. A real `src/platform/rpi/` hardware backend (GPIO/I²C/I²S/ALSA) — the prerequisite; until it lands, the Pi runs the desktop backend with no sensors. +2. The producer struct(s) (`AudioFrame` / `ImuState`) + the `platform::read*` APIs. (The `Peripheral` role + SystemModule add/delete already exist.) +3. The first audio peripheral — **MicrophoneModule** (canonical, highest value: FFT bands + beat). +4. The first audio-reactive effect(s) consuming it. +5. IMU and line-in slot into the same source-module + platform-API shape afterwards. + +Study the proven audio pipeline in MoonLight / WLED-MM (FFT band layout, AGC, beat detection) to inform our own — reference the approach, don't port their code, per [history](../history/README.md) practice. Specs before code: a `MicrophoneModule.md` (and the source-category contract) get written and reviewed before implementation. + +## Effects and preview + +### Add real z-axis variation to 2D effects (pending) + +Only **NoiseEffect**, **PlasmaEffect** and **RipplesEffect** have z-aware math. The other honest-D2 effects use `Layer::extrude` to duplicate the z=0 plane, so every z-slice is identical on 3D layers. Candidates for genuine D3 promotion: Metaballs/GlowParticles (add z to blob coordinates), Plasma palette/Spiral (add z-driven phase term), Fire (z-drift heat grid), Rings/LavaLamp/Checkerboard/Particles (add z to each element). Prioritise after seeing real 3D installations; each promoted effect also needs its `dynamicBytes` budget for the full 3D buffer. + +### Full-density interpolated preview for large layouts (backlog) + +The preview index-downsamples a large layout to fit the WS send budget (e.g. 128×128 = 16384 lights → ~1639 sent at stride 10), so the UI shows a sparse sample, not every light. To show **all** lights at their real positions with **interpolated** colours for the unsent ones: + +- Decouple the `0x03` coordinate-table density from the per-frame `0x02` stride. Positions are static and sent once, so the table can carry **all** light coordinates (16384 × 3 = ~48 KB one-time — acceptable off the per-frame path, possibly chunked) while the per-frame RGB stays strided to protect ArtNet/the link. +- The browser holds the full position set and, per frame, interpolates each unsent light's colour from its nearest sent neighbours (the sent indices are known from the stride). True positions, guessed colours — better than the removed dense-box block-replicate because positions are exact. +- Open questions: 48 KB one-time table vs `MAX_WRITE_CHUNKS` / send-buffer (needs chunked send or a raised cap, with the same partial-write care as `writeChunks`' drain); interpolation cost on a 16384-point cloud each frame in JS; whether nearest-neighbour or weighted is worth it. + +Not simple — own planning pass. Until then the preview is a faithful strided *sample* (correct shape/colour/motion, not per-pixel). A cheap interim (point-size scaled by stride to fatten samples into their cells) was tried and reverted as not what's wanted — it filled the volume but didn't add real points. + +### Self-describing preview frame header (mid term) + +The preview wire format is a private opcode protocol: `0x02` per-frame channels, `0x03` coordinate table, each a hand-rolled byte layout, and the colour payload is **always RGB** regardless of the buffer's `channelsPerLight`. Every new data kind (RGBW display, beam direction, …) means inventing another opcode and another fixed layout by hand. The minimal fix that stops that sprawl: a small **typed header** — `[type][format][count][stride]` where `format` enumerates `{RGB, RGBW, …}` — so one message kind carries any per-light channel layout and the browser shader reads `format` to interpret the payload. Do it concrete-first, when RGBW *display* (below) is actually wanted, not speculatively. Prereq for both items below. + +### RGBW preview end-to-end (mid term) + +The light `Buffer` already holds `channelsPerLight = 4` (RGBW), and the device output drivers handle it, but the **preview only ever sends/draws RGB** — the W channel is invisible in the UI. (The full-res fast path no longer penalises a cpl≥3 buffer — see the short-term fix — but it still drops W on the wire.) Once the self-describing header lands, carry the W channel on the wire and render it in the shader (W as a warm-white tint / brightness lift on the disc). Small, but gated on the header so it isn't another bespoke opcode. + +### Fixture model — moving heads, beams (long term) + +Today a "light" is a point at a static coordinate with a colour. A **moving head** is a fixture that emits a *beam* in a direction it controls live (pan + tilt), plus colour, beam-width, etc. — per-light **vector** state, not just colour, and a different draw (a cone/ray, not a disc). The static-positions-`0x03` + colour-`0x02` split can't express "this fixture's beam now points here." The industry-standard model is **DMX/GDTF fixtures**: a fixture has a position *and* a set of typed attributes (color, pan, tilt, beam). The preview becomes a fixture renderer (disc for a pixel, cone for a beam); this is also the "make Preview a general-purpose module, not light-specific" goal. A domain-model change (the fixture/attribute model), not just transport. Plan when moving heads are actually on the bench. + +### Extract the resumable backpressure transport as a domain-neutral channel (long term) + +The preview's transport — resumable cross-tick send from a stable buffer + newest-wins backpressure drop + adaptive graceful degradation (see [architecture.md § graceful degradation under transport backpressure](../architecture.md)) — is **payload-agnostic**: any bulky throttled stream (a future MJPEG/video preview, fixture-state streams, fleet telemetry) could ride it. The *payload* model (count/stride/RGB) is light-specific; the *byte-pump* is not. When a second consumer for this transport appears, promote the pump into a domain-neutral core primitive (a `ThrottledChannel`-style sink) that PreviewDriver becomes *a* producer on, rather than owning the protocol. Concrete-first: extract on the second use, not before — until then the seam stays inside HttpServerModule/PreviewDriver. + +## LCD / DMA driver work + +### Drop the i80 WR/DC sacrificial pins (S3 LcdLedDriver) via direct LCD_CAM + +The S3 i80 LED path costs **two GPIOs the LEDs never use**: the IDF `esp_lcd` i80 bus hard-requires a WR (pixel clock) and a DC pin on real GPIOs (`esp_lcd_panel_io_i80.c`: `wr_gpio_num >= 0 && dc_gpio_num >= 0`), even though WS2812 strands ignore both. Today `LcdLedDriver` keeps overridable defaults (clockPin=10, dcPin=11) — peripheral-required, not user-strand wiring, so a default cannot do harm. **Two ways to reclaim the pins, neither trivial:** +- **Cannot reuse a data pin for WR/DC.** A GPIO carries exactly one peripheral signal (`esp_rom_gpio_connect_out_signal` binds data_sig[i] / wr_sig / dc_sig each to its own pin); routing WR onto a data lane would clock the *clock* waveform onto that strand instead of its colour bytes. WR/DC must be distinct *physical* pins from the 8 data pins. (You CAN already point them at any otherwise-free or unstrapped GPIO via the controls — that's the "reuse a pin you're not using" answer; it's the *spare* pin you avoid, not a data pin.) +- **Zero WR/DC pins needs bypassing esp_lcd** and driving the LCD_CAM peripheral's registers directly (hpwit's I2SClockless approach — legacy parallel mode has no DC concept and emits WR without a dedicated config pin). That's the only path to 8-pins-total on the S3. Cost: leaving the recognisable IDF `esp_lcd` API for register-banging (a *Common patterns first* hit), re-proving the driver bit-perfect on hardware (the loopback self-test is the proof). Benefit: 2 GPIOs back on a tight S3 board. Its own increment, not a pin-default tweak. Parlio (P4) already needs no extra pins (`clk_out_gpio_num = GPIO_NUM_NC`), so this is S3-i80-only. + +### LCD/Parlio DMA frame buffer → PSRAM (free internal SRAM for big frames) + +For driving **lots of LEDs**, internal SRAM is the scarce resource and the parallel-driver DMA frame buffer is the biggest consumer (8 lanes × lights × outCh × 24 slot-bytes + latch pad). Today both parallel drivers allocate it as `MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL` (`platform_esp32_lcd.cpp`, `platform_esp32_parlio.cpp`) — **internal SRAM only**, so a large frame can exhaust DRAM while PSRAM sits unused. The IDF confirms both peripherals' GDMA **can burst straight from PSRAM** on the S3/P4: `esp_lcd_panel_io_i80.c` sets `access_ext_mem = true` and itself allocates the buffer with `MALLOC_CAP_SPIRAM | MALLOC_CAP_DMA` when asked; `esp_driver_parlio/src/parlio_tx.c:158` sets `access_ext_mem = true // support transmit PSRAM buffer`. (RMT already does the right thing — its symbol buffer goes through `platform::alloc`, which is PSRAM-first with an internal fallback.) + +**The change:** allocate the LCD/Parlio buffer `MALLOC_CAP_DMA | MALLOC_CAP_SPIRAM` first, falling back to internal when PSRAM is absent/full, using the **external-memory alignment** the IDF requires (`gdma_get_alignment_constraints` → `ext_mem_align`, typically the cache line — larger than the current 64-byte internal alignment) and keeping the buffer cache-aligned + its size a multiple of that alignment. **Why it's its own increment, not this commit:** it changes the proven hot DMA path, PSRAM DMA has real caveats (cache-line alignment, write-back/coherence on the encode→DMA handoff, and lower PSRAM bandwidth that the IDF guards with a CPU-MAX DFS lock during transmit), and it **must be re-proven on S3 + P4 hardware** (the loopback self-test bit-verifies it, then a real strip). Measure the bandwidth headroom too: a very wide, long frame at speed may want internal SRAM regardless. Scope: the two `heap_caps_aligned_alloc` sites + their `bufferBytes` alignment rounding + the capacity check; no domain-code change (the encode loop already writes through `dmaBuf_`). + + +## LED drivers — deferred + +The LED-driver increments **shipped**: increment 1 (RMT/WS2812B single-strand on classic ESP32 — [`RmtLedDriver.h`](../../src/light/drivers/RmtLedDriver.h), `RmtSymbol.h`, `platform_esp32_rmt.cpp`) and increment 2 (2a multi-pin RMT, 2b parallel LCD_CAM on the S3 — [`LcdLedDriver.h`](../../src/light/drivers/LcdLedDriver.h) via [`ParallelLedDriver.h`](../../src/light/drivers/ParallelLedDriver.h), `platform_esp32_lcd.cpp`), all with host + on-board-loopback tests, hardware-proven. The locked decisions, file-by-file phases, the WiFi-flicker test-rig analysis, and the bench deviations (8-GPIO i80 bus, 2.67 MHz slot clock, SOC-macro gate, real-frame loopback) are in [decisions.md](../history/decisions.md), the [driver docs](../moonmodules/light/drivers/RmtLedDriver.md), and the [analysis docs](leddriver-analysis-top-down.md). What remains here is only the work that has **not** shipped and is tracked nowhere else. + +- **sigrok/fx2lafw cross-check + MoonDeck "LED driver test" Python script** — the independent-clock proof and the run-from-MoonDeck flow ([analysis §5.3](leddriver-analysis-top-down.md)). The on-board RMT-RX loopback (shipped) is the cheap CI correctness gate but a *compromised witness* for WiFi-induced flicker — the RX capture runs on the same ESP32 whose WiFi causes the glitch. The real flicker test is a **sustained capture (seconds) with WiFi associated + a packet flood**, decoding every frame for a byte-slip or reset-gap deviation; it belongs with the core-1 driver-task work below, since that task pinning is the *fix* it validates. A DSLogic Plus (100 MS/s) upgrade is reactive — only if a flicker reproduces that 24 MS/s can't resolve. +- **Dedicated core-1 driver task + per-module core-affinity control** ([analysis §7.2](leddriver-analysis-top-down.md)) — the WiFi-glitch mitigation, shared across all the LED drivers. (See also [backlog-core § Task core-pinning](backlog-core.md#task-core-pinning-backlog) for the general task-pinning question.) +- **`rmtWs2812Show` fuller error handling** (deferred from PR #17 / 🐇 CodeRabbit). The shipped path has a finite `rmt_tx_wait_all_done` timeout (1 s) so a wedged DMA can't hang the render tick forever, and a dropped frame self-heals (the driver re-encodes the whole frame next tick). The fuller version — `rmt_transmit` return check, `rmt_tx_stop` to cancel an in-flight transfer on timeout, `show()` returning failure so `loop()` won't reuse `symbols_` mid-transmit — belongs with the **core-1 driver-task** work, since that task owns the buffer lifetime and in-flight state the cancel logic needs. +- **Per-driver buffer window** — `start`/`count` controls on each physical driver, so different slices of the light buffer can go to different outputs (e.g. "some lights to ArtNet, others to LED pins"). Additive on `DriverBase` consumers when it lands — no change to the Drivers container or the buffer-passing contract; the multi-pin RMT slicing would then subdivide the driver's window instead of the whole buffer. + + **This is the model for light distribution — distribution is *explicit*, not derived from driver order.** Worth stating because it's a common expectation otherwise: every driver reads the **same shared source buffer** ([`Drivers::passBufferToDrivers`](../../src/light/drivers/Drivers.h) hands the same `Buffer*` to every child) and selects *its* lights from *its own* controls — `NetworkSendDriver` via `universe_start` + `light_count` ("0 = whole buffer, >0 = the first N"), the LED drivers via their `pins` / `ledsPerPin`. There is **no running offset across driver siblings**, so **reordering drivers via drag-and-drop does not change which lights each driver outputs** — it only changes tick order and the persisted file order. A "split the buffer across drivers by sibling order" model (some controllers do this) is explicitly *not* what projectMM does; this `start`/`count` window is the deliberate alternative — the user says which slice goes where, order-independently. **Estimate: small — 1–2 commits.** Add `start` (alongside the existing `light_count` as `count`) to `DriverBase`'s windowing, clamp to the source buffer, apply in each driver's read loop, plus a unit test (two drivers, non-overlapping windows, assert each emits its slice) and a doc line. The wire/output loops already read a sub-range, so this is mostly lifting `light_count`'s "first N" into a "[start, start+count)" window on the shared base. +- **Auto-derived DMA buffer count** (7 / 30 / 75 per [analysis §7.4](leddriver-analysis-top-down.md)), **16-bit pipeline + dither** ([§7.3](leddriver-analysis-top-down.md)), **shift-register expander stubs** ([§7.5](leddriver-analysis-top-down.md)). +- **Moving-head preview = peer interpreter.** When moving heads land, the previewer must interpret channel semantics (pan/tilt/RGBW-at-arbitrary-indices) to render a moving fixture — the same light-preset model physical drivers use, interpreted to screen. This is *why* the increments named the abstraction "interpret the preset" rather than "apply correction / opt out": so Preview becomes a full peer here without a rename. Its own design plan when moving-head support starts. + +(The shared lane-driver scaffolding extraction — when a 3rd parallel backend lands — is tracked separately under [§ Extract shared lane-driver scaffolding](#extract-shared-lane-driver-scaffolding-when-the-3rd-parallel-backend-lands-deferred) above.) diff --git a/docs/backlog/backlog-mixed.md b/docs/backlog/backlog-mixed.md new file mode 100644 index 0000000..a9e5b4d --- /dev/null +++ b/docs/backlog/backlog-mixed.md @@ -0,0 +1,53 @@ +# Backlog — mixed (core + light) + +Forward-looking items whose work genuinely spans **both** the core and light domains — a core mechanism interacting with a light driver/effect/modifier, where assigning it to one side would misrepresent it. Core-only items are in [backlog-core.md](backlog-core.md), light-only in [backlog-light.md](backlog-light.md). Index + overview: [README.md](README.md). + +## Cross-domain + +### MultiplyModifier mapping-LUT memory at large grids (investigation, re-verify on classic) + +`scenario_perf_full` on the S3 (2026-06-17) measured the MultiplyModifier's cost across grid sizes. The finding, stated correctly: the modifier **reduces compute** (with the default 2×2 kaleidoscope the effect renders only the ¼-size logical quadrant — Noise+Multiply at 16K is 29,647µs vs 50,555µs for Noise alone), and its real cost is **memory** — the 1:N fan-out mapping LUT. Measured modifier heap cost on the S3: 16²→1.7KB, 32²→10.8KB, 64²→23.5KB, **128²(16K)→93KB** (the LUT destinations array; `nrOfLightsType` is `uint32_t` on a PSRAM board). On the S3's 8MB PSRAM this is trivial. [Composed modifiers](#composed-modifiers--chain-the-whole-modifier-stack-not-just-the-first-planned-multi-commit) would multiply this memory cost by the chain depth — size it there. + +**This is NOT a no-PSRAM blocker** — 16K Noise + Multiply has run on a classic ESP32 (no PSRAM, 320KB internal) before at **10–20 FPS** (WiFi vs Ethernet), sending frames out over **ArtNet to a display, not physical LED drivers**. It works there because classic's `nrOfLightsType` is `uint16_t` (half the LUT size) and the modifier shrinks the logical render grid. So the action is **re-verify the working classic setup when a classic board is connected** (find the config — grid, mirror, ArtNet target — that reproduces the historical 10–20 FPS), not "fix an impossibility." Worth investigating only if that re-verification shows the LUT memory has regressed since: the destinations array is the obvious lever (it stores a `nrOfLightsType` per physical destination; a 2× kaleidoscope is 1:1 in *count* so the LUT need not store fan-out > the physical count — confirm it isn't over-allocating to `maxMultiplier()` when the effective fan-out is 1). Capture the classic numbers into performance.md's multi-board table first. + +### Composed modifiers — chain the whole modifier stack, not just the first (planned, multi-commit) + +**Confirmed scope, not an open question:** multiple modifiers per Layer applied as a stack was always the plan, and it ships in **MoonLight** (Mirror, Rotate, Transpose, Kaleidoscope, … all composable on one layer — see [moonlight-inventory.md](../history/moonlight-inventory.md)). projectMM's single-modifier behaviour is the not-yet-finished state, not a design choice. + +Today a Layer applies **only the first enabled modifier**. `Layer::rebuildLUT()` finds the first enabled `Modifier` child and `break`s ([Layer.h](../../src/light/layers/Layer.h) `rebuildLUT`), and `Layer::loop()` ticks only that one (with an explicit comment that ticking a later one would desync the LUT, since a dynamic modifier's `loop()` can drive a rebuild the LUT must reflect). So with two modifiers on a Layer the second is dead weight — dragging it above the first is the only way to make it the active one. The intended behaviour is **modifier order = apply order**: a stack where each modifier reshapes the result of the one below ("modifiers on modifiers"), e.g. Multiply (kaleidoscope) *then* Rotate the kaleidoscoped result. The [modifier-chain-viz UI item](backlog-core.md#open-design-questions) is the surface for it and only becomes meaningful once this lands. + +**Mechanism — follow MoonLight's proven model, our own code** ([*Industry standards, our own code*](../../CLAUDE.md#principles)). MoonLight composes by streaming the layout's coordinates through each modifier's `modifyLayout`/`modifyLight` in order while the mapping table is built, so the *final* table already encodes the whole chain — the per-frame hot path stays a single lookup. We do the same with our pieces: `rebuildLUT()` walks the layout's coordinate stream (`Layouts::forEachCoord`) and passes each coordinate through modifier 1, then 2, …, then *n* before recording the destination, so the built `MappingLUT` is the composition `M₁ ∘ M₂ ∘ … ∘ Mₙ` collapsed to one `logical→driver` table. Composition is a **cold-path, build-time** concern; modifiers stay simple (each still answers `logicalDimensions()` + its own per-coordinate transform), so the complexity lives in the core per *[Complexity lives in core](../../CLAUDE.md#principles)*. Worth studying MoonLight's `PhysMap` 1:0/1:1/1:N packing (inventory §1) when sizing the table — a deep chain with fan-out is exactly where the per-entry byte cost matters. + +Why it's not a one-liner: + +- **Build path** — `rebuildLUT()` must iterate *all* enabled modifiers bottom-up, threading each stage's logical dimensions into the next, and fold the per-stage transforms into one final LUT. The single-modifier `maxDest` / fan-out ceiling math (the `maxMultiplier()` clamp that fixed the multiplyZ overflow) has to generalise to a **product** of multipliers across the chain — the dominant new correctness risk (and the memory blow-up noted in the MultiplyModifier-LUT item above: a 2-deep 2× chain is up to 4× the destinations). +- **Tick path** — a dynamic modifier (RandomMapModifier, RotateModifier) calls back into `Layer::onBuildState()` on its timer to rebuild the LUT. With a chain, *any* dynamic stage rebuilding must recompose the *whole* chain, and `loop()` must tick every enabled modifier (not `break` after the first) in the right order, after the effect pass. +- **Degrade path** — the per-stage OOM degrade (`degradeIdentity`) must decide what "degrade" means mid-chain (drop the offending stage? collapse to identity?) without leaving a stale partial LUT. +- **Tests** — `unit_Layers_container` / the modifier unit tests pin single-modifier behaviour; composed-order needs new cases (A∘B ≠ B∘A, a disabled middle stage is skipped not collapsed, the fan-out product ceiling holds at no-PSRAM `uint16_t`), plus a scenario that reorders a 2-modifier stack and asserts the composite changes. + +**Estimate: medium — roughly 4–6 commits.** (1) design note pinning the coordinate-stream composition model + the fan-out-product ceiling rule (reference the MoonLight inventory); (2) `MappingLUT` compose/fold primitive + unit tests in isolation; (3) `rebuildLUT()` chain iteration + `loop()` tick-all-in-order, behind the existing single-modifier tests staying green; (4) degrade-path decision + tests; (5) reorder scenario + `performance.md` memory capture at depth 2–3; (6) UI follow-up (the modifier-chain-viz item — see the correction noted there). Gate the depth: most setups are 1 modifier, so the chain path must cost nothing when `n == 1` (the current fast path stays the `n == 1` branch). + +### Intermittent ~0.5 s LED pauses with the RMT driver (pending investigation) + +Observed on the bench (2026-06): LED output running on the RMT driver occasionally freezes for about half a second. Postponed by the product owner until more observations exist. Ranked suspects from the initial analysis, each with a cheap experiment: + +1. **WiFi modem power-save never disabled** — nothing in `src/` calls `esp_wifi_set_ps(WIFI_PS_NONE)`, so the IDF default `WIFI_PS_MIN_MODEM` is active; the radio's DTIM sleep causes exactly this class of intermittent multi-hundred-ms stall. WLED and the v1/v2 lineage disable sleep. Experiment: one line in the ESP32 platform code after association. +2. **NetworkSendDriver sending synchronously every tick to an absent destination** (default `192.168.1.70`) — lwIP keeps re-ARPing a dead address while the send sits in the render tick. Data point (2026-06-10): the bench esp32-16mb had NetworkSend *disabled* in its persisted config, consistent with the pauses being annoying enough to switch the sender off. Experiment: point the ArtNet IP at a live host (or disable the driver) and see if the pauses stop. +3. **`rmt_tx_wait_all_done` 1 s timeout** — a wedged transmission blocks the tick up to a full second (multi-pin: up to N×1 s). Least likely (~1 s, not ~0.5 s) but it's the only hard block in the driver itself. + +If pauses correlate with UI control changes, also consider the 2 s-debounced SPIFFS save stalling flash-resident code. The per-tick KPI log around a pause discriminates between these immediately. + +### NoiseEffect simplex cost on ESP32 (investigation) + +With mirror XY at 128×128, NoiseEffect renders the 64×64 logical quadrant in **~11 ms/tick** on the Olimex (measured) — the simplex math dominates, since the Xtensa LX6 has no FPU and float math is software-emulated. (RainbowEffect on the same pipeline is much cheaper.) This is correct, non-degraded behaviour; it's only worth revisiting if a deployment needs Noise faster than ~11 ms at this grid. + +Worth investigating if so: + +- **Q16 fixed-point simplex** instead of float (kills the software-float emulation cost). +- **Lower-precision hash** — current simplex uses a 256-entry permutation lookup; a smaller / SIMD-friendly hash may be faster on Xtensa. +- **Strided sampling + interpolation** — render at 32×32, bilinear up to 64×64. Visual quality cost; needs A/B comparison. +- **Inline / unroll the inner per-pixel loop** to keep the simplex state in registers. + +None of these are obviously free, and a fixed-point port may shift the visual signature. Defer until there's a real use case — on the no-PSRAM Olimex at large grids the tick is dominated by the synchronous ArtNet send (~35 ms), not Noise, so the effect is rarely the bottleneck there. + +**S3 render-only data point (2026-06-17, `scenario_perf_full`):** on the PSRAM S3 with **no output driver**, Noise is the dominant cost at every grid and there's no ArtNet floor to hide it: 16²→738µs, 32²→2,831µs, 64²→11,235µs, **128²(16K)→50,555µs (~20 FPS)** — clean ~linear-in-pixels (67×), so no fragmentation/realloc pathology, just raw simplex compute. The light effect (Checkerboard) on the same sweep is 6–11× faster (16K→7,949µs, ~128 FPS). So on a PSRAM board the heavy effect IS the 16K bottleneck (where on the Olimex the network send was). This is the strongest case for the fixed-point/strided-sampling ideas above, since a PSRAM board can run 16K grids that the network-bound Olimex never reaches. The S3 has a real FPU (LX7), so the win is less about software-float emulation and more about per-pixel simplex work; profile before committing. diff --git a/docs/backlog/leddriver-deferred.md b/docs/backlog/leddriver-deferred.md deleted file mode 100644 index 6b78ece..0000000 --- a/docs/backlog/leddriver-deferred.md +++ /dev/null @@ -1,14 +0,0 @@ -# LED driver — remaining deferred items - -The LED-driver increments **shipped**: increment 1 (RMT/WS2812B single-strand on classic ESP32 — [`RmtLedDriver.h`](../../src/light/drivers/RmtLedDriver.h), `RmtSymbol.h`, `platform_esp32_rmt.cpp`) and increment 2 (2a multi-pin RMT, 2b parallel LCD_CAM on the S3 — [`LcdLedDriver.h`](../../src/light/drivers/LcdLedDriver.h) via [`ParallelLedDriver.h`](../../src/light/drivers/ParallelLedDriver.h), `platform_esp32_lcd.cpp`), all with host + on-board-loopback tests, hardware-proven. The locked decisions, file-by-file phases, the WiFi-flicker test-rig analysis, and the bench deviations (8-GPIO i80 bus, 2.67 MHz slot clock, SOC-macro gate, real-frame loopback) are in [decisions.md](../history/decisions.md), the [driver docs](../moonmodules/light/drivers/RmtLedDriver.md), and the [analysis docs](leddriver-analysis-top-down.md). What remains here is only the work that has **not** shipped and is tracked nowhere else. - -## Deferred - -- **sigrok/fx2lafw cross-check + MoonDeck "LED driver test" Python script** — the independent-clock proof and the run-from-MoonDeck flow ([analysis §5.3](leddriver-analysis-top-down.md)). The on-board RMT-RX loopback (shipped) is the cheap CI correctness gate but a *compromised witness* for WiFi-induced flicker — the RX capture runs on the same ESP32 whose WiFi causes the glitch. The real flicker test is a **sustained capture (seconds) with WiFi associated + a packet flood**, decoding every frame for a byte-slip or reset-gap deviation; it belongs with the core-1 driver-task work below, since that task pinning is the *fix* it validates. A DSLogic Plus (100 MS/s) upgrade is reactive — only if a flicker reproduces that 24 MS/s can't resolve. -- **Dedicated core-1 driver task + per-module core-affinity control** ([analysis §7.2](leddriver-analysis-top-down.md)) — the WiFi-glitch mitigation, shared across all the LED drivers. (See also backlog [§ Task core-pinning](backlog.md#task-core-pinning-backlog) for the general task-pinning question.) -- **`rmtWs2812Show` fuller error handling** (deferred from PR #17 / 🐇 CodeRabbit). The shipped path has a finite `rmt_tx_wait_all_done` timeout (1 s) so a wedged DMA can't hang the render tick forever, and a dropped frame self-heals (the driver re-encodes the whole frame next tick). The fuller version — `rmt_transmit` return check, `rmt_tx_stop` to cancel an in-flight transfer on timeout, `show()` returning failure so `loop()` won't reuse `symbols_` mid-transmit — belongs with the **core-1 driver-task** work, since that task owns the buffer lifetime and in-flight state the cancel logic needs. -- **Per-driver buffer window** — `start`/`count` controls on each physical driver, so different slices of the light buffer can go to different outputs (e.g. "some lights to ArtNet, others to LED pins"). Additive on `DriverBase` consumers when it lands — no change to the Drivers container or the buffer-passing contract; the multi-pin RMT slicing would then subdivide the driver's window instead of the whole buffer. -- **Auto-derived DMA buffer count** (7 / 30 / 75 per [analysis §7.4](leddriver-analysis-top-down.md)), **16-bit pipeline + dither** ([§7.3](leddriver-analysis-top-down.md)), **shift-register expander stubs** ([§7.5](leddriver-analysis-top-down.md)). -- **Moving-head preview = peer interpreter.** When moving heads land, the previewer must interpret channel semantics (pan/tilt/RGBW-at-arbitrary-indices) to render a moving fixture — the same light-preset model physical drivers use, interpreted to screen. This is *why* the increments named the abstraction "interpret the preset" rather than "apply correction / opt out": so Preview becomes a full peer here without a rename. Its own design plan when moving-head support starts. - -(The shared lane-driver scaffolding extraction — when a 3rd parallel backend lands — is tracked separately in [backlog § Extract shared lane-driver scaffolding](backlog.md#extract-shared-lane-driver-scaffolding-when-the-3rd-parallel-backend-lands-deferred).) diff --git a/docs/backlog/livescripts-analysis-bottom-up.md b/docs/backlog/livescripts-analysis-bottom-up.md new file mode 100644 index 0000000..0add6d8 --- /dev/null +++ b/docs/backlog/livescripts-analysis-bottom-up.md @@ -0,0 +1,236 @@ +# MoonLive — live-script engine landscape analysis + +> **Forward-looking research document — exception to CLAUDE.md present-tense rule.** This is a Stage-1 bottom-up survey of *live scripting* for projectMM: running user-authored scripts (LED effects, layouts, modifiers, drivers, sensor logic) on a running device without a recompile-and-flash cycle. It deep-reads one reference implementation — the [ewowi/ESPLiveScript `fix-warnings` fork](https://github.com/ewowi/ESPLiveScript/tree/fix-warnings) of [hpwit/ESPLiveScript](https://github.com/hpwit/ESPLiveScript) — at HEAD on **2026-06-25**, surveys the comparable field (WLED ARTI-FX, embedded VMs, WASM), and extracts the architectural primitives a clean projectMM redesign must decide. Companion to the monthly digest [history/hpwit-ESPLiveScript.md](../history/hpwit-ESPLiveScript.md) (credits + activity log). The **top-down** redesign document ([livescripts-analysis-top-down.md](livescripts-analysis-top-down.md)) expands the decisions recorded here into the build spec. Source citations use `file:line` against the cloned fork; inferred claims are marked *(inferred)*. Modelled on [leddriver-analysis-bottom-up.md](leddriver-analysis-bottom-up.md). + +## TL;DR + +- **What live scripting is, and why projectMM wants it.** The same itch that produced WLED ARTI-FX and ESPLiveScript: author an effect (or layout, modifier, driver, sensor rule) *as text*, push it to a running device, see it run **on the next tick** — no toolchain, no flash, no reboot. It turns projectMM from "the effects we compiled in" into "any effect a user can write," and it's the natural home for a web-based pattern editor. +- **The design space has three corners, set by *how* a script becomes execution.** (1) **AST-walking interpreter** — parse to a tree, walk it every frame (WLED **ARTI-FX**: everything stored as `double`, flexible, slow). (2) **Bytecode VM** — compile to a compact opcode stream, run a dispatch loop (**PixelBlaze**, most embedded Lua/JS). (3) **Native JIT** — emit real machine code, call it as a function (**ESPLiveScript**). Speed climbs (1)→(3); portability and simplicity fall (1)→(3). projectMM's "blazingly fast like ESPLiveScript" requirement points at (3), but (3) is exactly where portability dies — see below. +- **ESPLiveScript is a from-scratch C-like compiler that JIT-emits native Xtensa machine code** (confirmed, not bytecode): `tokenizer.h` → `asm_parser.h` (AST of `NodeToken`) → visitor methods emit Xtensa assembly *strings* → `asm_parser_LMbin.h` encodes them to 32-bit opcodes → the binary is copied to executable RAM (`heap_caps_malloc(..., MALLOC_CAP_EXEC)`) and **called as a function pointer** via inline `callx8` (`execute_asm.h:386-399`). That direct-call, no-dispatch-loop design is the entire "85 fps C++ vs 10 fps Gravity vs 3 fps Lua" speed story from its README. +- **The portability finding that reshapes everything: ESPLiveScript is Xtensa-only.** The agent deep-read confirmed **no RISC-V backend** — all codegen is Tensilica Xtensa LX (`asm_parser_LMbin.h`, the inline-asm executor). This matters enormously for projectMM's target order: classic ESP32 + S3 are Xtensa (✅ ESPLiveScript runs), but **ESP32-P4 is RISC-V** (❌), as are Teensy 4.x (ARM Cortex-M7) and desktop (x86-64 / ARM64). So ESPLiveScript as-is covers exactly the *first two* targets on projectMM's list and **none** of the rest. A native-codegen engine needs **one backend per ISA** (Xtensa, RISC-V, ARM Thumb-2, x86-64, ARM64) — that's the real cost of "blazingly fast everywhere." +- **The front-end is portable; the back-end is not.** Tokenizer + parser + AST (`NodeToken`) are CPU-agnostic; only the *visitor → opcode* tier and the *load-and-execute* tier are ISA-bound. But today they're **deeply interleaved** — visitor methods emit Xtensa strings inline, there is **no intermediate representation (IR)** between AST and machine code. A clean redesign's load-bearing decision is whether to introduce that IR seam so one front-end feeds many back-ends (the LLVM shape, scaled down). +- **The "compatible with MoonModule" requirement is the projectMM-specific value-add.** ESPLiveScript binds to the host via `addExternalFunction(name, ret, sig, fnptr)` / `addExternalVariable(name, type, _, ptr)` (`asm_external.h`) — a flat C-pointer registry. projectMM needs scripts to read/write **controls**, consume the **producer/consumer data structures** (a `Buffer`, an `AudioFrame`), and slot into the **module tree** as a scripted effect/layout/modifier/driver/peripheral. That binding layer — script ⇄ MoonModule — is ours to design; no surveyed engine has it. +- **Memory + sync are already partly modelled in ESPLiveScript** and align with projectMM's constraints: compiled code lands in IRAM/PSRAM by target (`execute.h:10-15` gates PSRAM stack on S3/P4), a **save/load compiled-binary path** exists (`savebinary`/`executebinary` examples → compile once, ship the binary, skip re-compile on boot), and a `sync()` primitive coordinates concurrent script tasks. These are the right *ideas*; the redesign carries them forward against our `platform::` seam and `Scheduler`. +- **Code-quality reality (for the redesign).** Header-only, ~18K lines across 11 headers, **pervasive global state** (`string signature; Token __t;` and dozens of file-scope compiler counters), no IR, no unit tests, a 4,100-line `Parser` and a 5,824-line `NodeToken`. It works and it's fast, but it is **not** a base to extend in place — it's the reference to learn from and rewrite against our architecture (exactly the *Industry standards, our own code* method we used for LED drivers). +- **Recommendation: build our own native engine, Xtensa-first, behind an IR seam — start small, start beautiful, no dead-ends.** Take the ESPLiveScript *approach* (native machine-code execution, near-100% speed — the standout, never-done-before-in-this-space when bound to a module system) and add the one thing our multi-target goal needs that a single-ISA engine doesn't: put an **IR seam** between a platform-independent front-end (tokenizer→parser→AST) and the code generator. **Ship one backend first — Xtensa (classic ESP32 + S3)** — exactly where ESPLiveScript already proves native speed; that's the small, beautiful, blazingly-fast first deliverable. The IR seam is the **no-dead-end guarantee**: adding RISC-V (P4), ARM (Teensy), or x86/ARM64 (desktop) later is "write another backend behind the same IR," never "go back to the drawing board." ESPLiveScript's real dead-end isn't *Xtensa-first* — it's *Xtensa-welded-in, no IR*; we start at the same fast place but with the seam it lacks. **WASM/WAMR is the named fallback, per target**: a target without a native backend yet can run the portable path through the same IR, so we're never blocked — but the *flagship* experience is native. (Detail + why-this-over-WASM-wholesale in [§ Recommendation](#recommendation--native-engine-xtensa-first-behind-an-ir-seam).) +- **Safety the same way — climb the tiers, don't pay upfront.** A user-facing script editor means a bad script must degrade, not brick. Start with the **cheap** safety (array **bounds-checking** = a compare-branch per indexed access, low single-digit %, and removable in a trusted/fast mode; **watchdog / instruction budget** to kill a runaway loop = near-free, the task WDT already does most of it) — these catch the common bad-script cases at low cost (the kind the `fix-warnings` null-deref was). The **expensive** tier — a true memory sandbox where a script *cannot* touch memory outside its arena — is exactly what WASM gives for free and native can't cheaply; leave it as a tier we *can* climb via the IR→WASM fallback if field experience demands it, not a wall we hit. So safety is staged, not a foregone full-sandbox cost. +- **Ruled out (with reasons, so the top-down doesn't relitigate).** **FastLED's WASM** is browser *preview* (Emscripten-compiled FastLED in a Web Worker), not on-device scripting — adjacent, not it. **MicroPython/Python** is the right *edit* ergonomics but far too slow for the hot path (the Lua/Gravity wall that birthed ESPLiveScript). **Rust/TinyGo** are near-native but **AOT-compiled-and-flashed** — they remove no edit loop. **Adopting WASM/WAMR *wholesale* as the only engine** is the considered alternative, not a dead end — it wins portability + free sandbox but tops out at ~50% native (WAMR-AOT) and carries a 200KB+ runtime; we keep it as the per-target fallback rather than the flagship, because *native speed is the differentiator we're chasing*. +- **Out of scope for Stage 1.** Final VM-vs-JIT decision; IR design; the web editor; benchmarking on real hardware; the language grammar spec. All Stage 2 (top-down). + +## Why this document exists + +projectMM compiles its effects, layouts, modifiers, and drivers into the firmware. Adding one means writing C++, building, and flashing. **Live scripting removes that loop**: a user writes an effect as text in a browser, the device compiles/loads it, and it runs as a first-class module on the next tick — the same leap WLED took with ARTI-FX. + +The product owner's requirements (verbatim intent): + +- **General in core, specific in light.** Scripts must work for domain-neutral core jobs (e.g. read/transform sensor data) *and* the light domain: a scripted **layout** (coordinate iterator), **effect** (writes the buffer), **modifier** (remaps positions), **driver** (consumes the buffer). One engine, many module roles. +- **Target order.** ESP32 classic + S3 **first**; then P4 and other ESP32 flavours; then Teensy; then macOS / Linux / Windows. +- **Blazingly fast, like ESPLiveScript** — near-native per-pixel throughput, because a script runs in the render hot path at up to 16K+ lights × 50 FPS. +- **Smart memory management** — IRAM/PSRAM-aware, no hot-path allocation, compile-once/run-many. +- **Infinitely scalable** — run *as many* live scripts concurrently as memory allows, exploiting PSRAM (each script is an independent compiled unit; the only ceiling is free heap, not a fixed slot count). Many small scripted modules — several effects across layers, a scripted modifier, a couple of core sensor rules — coexist; the device hosts what fits and degrades gracefully when it doesn't, the same way the light pipeline already scales to available memory. +- **Sync with the rest of the system** — live reconfig (no reboot), `Scheduler`-driven, robust to add/delete/replace in any order, controls and producer/consumer data wired in. +- **Compatible with the MoonModule class** — a scripted module is a MoonModule: it has controls, a `loop()`, a role, and renders generically in the UI with zero per-script UI code. + +This document characterises the one reference that already hits "blazingly fast" (ESPLiveScript), maps it against the field, and surfaces every decision the redesign must make. It does **not** pick the design — that's the top-down doc. + +## ESP32 — primary depth: ESPLiveScript (the reference) + +Read at `ewowi/ESPLiveScript@fix-warnings`, cloned 2026-06-25. ~18,358 lines across 11 header-only files in `src/`. + +### What it is, in one sentence + +A hand-written **C-like language with a from-scratch tokenizer → parser → AST → native-Xtensa code generator**, where the compiled script is loaded into executable RAM and **called directly as a function** — a JIT compiler, not an interpreter. Yves Bazin (hpwit) built it because Lua (3 fps) and Gravity (10 fps) couldn't drive his 12,288-LED panel where hand-written C++ hit 85 fps (README intro). + +### The pipeline (the load-bearing structure) + +Source text flows through five stages; the data structure between them is `Token` → `NodeToken` (the AST node) → assembly text → binary: + +1. **Tokenize** — `src/tokenizer.h` (2,394 lines). Lexes source into `Token`s with a `tokenType` enum. Also owns the user-defined-type registry (`_userDefinedTypes`, global). +2. **Parse → AST** — `src/asm_parser.h` (1,929 lines) builds a tree of `NodeToken` (`src/NodeToken.h`, 5,824 lines). `NodeToken`'s `nodeType` enum has ~47 kinds (`binOpNode`, `defFunctionNode`, `forNode`, `ifNode`, `callFunctionNode`, `returnNode`, `defAsmFunctionNode`, …). A for-loop becomes a `forNode` with init/cond/incr/body children. +3. **Generate code (visitor)** — `NodeToken::visitNode()` (`NodeToken.h:818-1001`) dispatches to `_visitbinOpNode()`, `_visitcallFunctionNode()`, etc., each of which **emits Xtensa assembly strings** into output buffers (e.g. `NodeToken.h:1858` emits `movi a%d,%d`). There is **no IR** — visitors know Xtensa directly. +4. **Encode to binary** — `src/asm_parser_LMbin.h` (592 lines) turns assembly text into 32-bit Xtensa opcodes (`bin_add`, `bin_l32i`, `bin_movi`; e.g. `bin_add` at `:97-100` emits `0x800000 | …`). +5. **Load + execute** — `src/execute_asm.h` (876 lines). `_createExcutablefromBinary()` (`:224-275`) copies the binary into executable RAM with `heap_caps_malloc(size, MALLOC_CAP_EXEC)` (`:232`), patches external references (relocation, `:44-223`), and `executeBinaryAsm()` (`:386-399`) runs it via inline asm: `l32i a15,%0,0 ; callx8 a15` — **a direct indirect call to the generated code.** No dispatch loop. That is the speed. + +### Why it's blazingly fast (confirmed) + +It is **native machine code called as a function** — the CPU fetches the script's own instructions from IRAM, exactly like a compiled C function. There is no per-opcode interpreter overhead (the cost a bytecode VM or AST-walker pays every operation). The README's benchmark (85 fps C++ ≈ ESPLiveScript ≫ 10 fps Gravity ≫ 3 fps Lua) is the direct consequence. Scripts can even drop to **inline Xtensa** for the hottest paths (`__ASM__ uint32_t millis() { "entry a1,32" … "retw.n" }`, `sc_examples/animwle.sc`). + +### What ties it to ESP32 — the portability barrier + +The agent's deep-read is unambiguous: **Xtensa-only, ESP-IDF-coupled.** + +- **Codegen is 100% Xtensa LX** (`asm_parser_LMbin.h`, all visitor emission). **No RISC-V, no ARM, no x86 backend exists.** Only ~1 arch `#ifdef` in the codegen — and it's a *memory-caps* choice (S3/P4 PSRAM stack vs classic internal, `execute.h:10-15`), not a second ISA. (Note the irony: that one `#ifdef` *mentions* ESP32-P4, but only for stack allocation — P4 is RISC-V, so the **generated code wouldn't run on it.** *(inferred from "Xtensa-only codegen" + P4 being RISC-V)*.) +- **Execution is ESP-IDF-specific**: `MALLOC_CAP_EXEC` IRAM allocation, inline `callx8`, `rsr a14,234` cycle-counter reads, `xTaskCreatePinnedToCoreWithCaps` (`execute.h:590`). +- **Front-end is portable, back-end is not**: tokenizer + parser + AST are CPU-agnostic; tiers 2-4 (visit→opcode, encode, load-execute) are ISA/platform-bound and **interleaved** with no seam between them. + +So on projectMM's target list, ESPLiveScript as-is runs on **classic ESP32 + S3** and stops there. P4 (RISC-V), Teensy (ARM), and desktop (x86-64/ARM64) each need a *new code generator* — 2-3K lines per ISA *(inferred, agent estimate)* — or a different execution strategy entirely. + +### Host integration — the binding model + +A host C program drives it through `class Parser` (`ESPLiveScript.h:79`): + +- **Compile + run**: `parseScript(&str)` → `Executable`; `Executable::execute("fn", args)` runs a function; `executeAsTask("fn", core, args)` runs it pinned to a FreeRTOS core; `suspend()/restart()/kill()/free()` manage its lifetime (`execute.h:352+`). +- **Expose a C function to scripts**: `addExternalFunction("calc","float","int",(void*)calcul)` → script calls `float h = calc(52);` (`asm_external.h`, README example). +- **Expose a C variable**: `addExternalVariable("value","int","",(void*)&v)` and `("array","int *","",(void*)arr)` → script does `value = value + 2; array[i] = 10;`. +- **Arguments**: `int` and `float` only (`ESPLivescriptRuntime.h:150-176`). +- **JSON path** (`__JSON__OPTION__`): scripts exchange JSON with the host (`execute_asm.h:400-471`) — the `enjoy json` feature from the digest; the bridge a web editor would lean on. +- **Precompiled binaries**: `parseScriptBinary()` → `saveBinary()/loadBinary()` → `createExecutableFromBinary()` (`execute_asm.h:276-384`). Compile once (on a desktop or web service), persist the `ESPLiveScript1.0.1`-format binary, load it on the device — but **external pointers are not serialized**, they re-bind at load (README). This is the seed of a smart compile-once memory strategy. + +This binding is a **flat C-pointer registry** — exactly what projectMM must *replace* with a MoonModule-aware layer (controls, producer/consumer structs, the module tree). + +### Memory model + +- Generated code → executable RAM via `MALLOC_CAP_EXEC` (IRAM on classic; PSRAM-capable on S3/P4 via `__LS_STACK_CAPS`, `execute.h:10-15`). +- Script globals → a malloc'd `data` buffer; locals → the Xtensa ABI stack frame (`entry a1, size`); params → registers a2-a7. +- Precompiled-binary persistence (above) = compile-once. +- `Executable::free()` releases both code and data. +- No PSRAM is *forced* for the data section; that's a knob the redesign would make policy. + +### The language (what a user writes) + +C-like, LED-oriented. From `sc_examples/*.sc` + README: + +- **Types**: `int`/`s_int` (32/16-bit), `uint8_t..uint32_t`, `float`, `char`, `bool`, plus **`CRGB`/`CRGBW`** (LED colour) as first-class; user `struct`s with fields, methods, constructors; multi-dimensional arrays (`int g[z][y][x]`). +- **Control flow**: `if/else`, ternary, `while`, C-style `for`, `break`/`continue`, `return`; recursion (`sc_examples/fibonacci.sc`). +- **Built-ins**: `printf`/`printfln` (int only), `millis()`, `rand`/`copy`/`memset`/`fill` (inline-asm in `functionlib.h`), `hsv()` and FastLED math when `USE_FASTLED` is set. +- **Escape hatch**: `__ASM__ … @` blocks for hand-written Xtensa. +- **Preprocessor**: `#define TOKEN value` (substitution only). +- **Flavour example** — `sc_examples/animwle.sc` is a **Mandelbrot effect ported from an existing pattern** (`#define width 128`, float `cR/cI`, nested grid loops, inline-asm `millis()`), which tells you the target audience: people porting effects from other LED-scripting environments. + +### What the `fix-warnings` fork changed + +Despite the branch name, the fork's substantive change is **one commit (`4871509`, 2026-04-02): a null-pointer crash fix**, not a `-Wall` cleanup. `findMaxArgumentSize()` in `NodeToken.h` dereferenced `getChildAtPos(1)`/`(2)` unconditionally; a **nested external-function argument** (`setRGB(random16(NUM_LEDS), CRGB(0,0,255))`) produces a node without those children → `LoadProhibited` crash on device. The fix adds null guards (return 0 — a scalar needs no pre-call stack spill). *Relevance to projectMM*: this is precisely the class of bug a from-scratch hand-written parser breeds (no test harness caught it), and a data point for "rewrite with tests" over "extend in place." + +### Structural observations (what a multi-target rewrite must account for — not a verdict on the original) + +These are the differences between ESPLiveScript's design (one author, one ISA, maximum speed) and what projectMM's *different* goals (multi-target, module-bound, tested) need. They are reasons to write our own against our architecture, not faults — ESPLiveScript meets its own goals well. + +- **Header-only**, ~18K lines, 11 files; the two biggest (`NodeToken.h` 5,824, `ESPLiveScript.h` 4,100) carry several jobs each (tree + metadata + 47 visitors + asm emission). Fine for a single-include library; we'd split for testability. +- **File-scope state**: `string signature; Token __t;` (`ESPLiveScript.h:29-30`), plus global register-allocation stacks, output buffers, compiler counters. A consequence: one compilation at a time. Acceptable on a device that compiles one script; we'd encapsulate it. +- **No IR** between AST and Xtensa — the one structural thing that makes multi-target hard, and the single highest-leverage change our redesign makes. (ESPLiveScript didn't need it — it targets one ISA.) +- **`.ino` integration examples, no unit suite** — natural for an Arduino library; we'd add unit + scenario tests because robustness is pinned by tests here. + +The lesson is the LED-driver lesson: **study it hard, credit it (see § Prior art & credits), write our own against our architecture and goals.** + +## The comparable field (what else to learn from) + +projectMM's "industry standards" rule says: name the prior art, take the textbook approach. The live-scripting field has three established design points; ESPLiveScript is the extreme of one. + +### WLED ARTI-FX — the AST-walking interpreter (our sibling project) + +[ARTI-FX](https://mm.kno.wled.ge/moonmodules/arti-fx/) (MoonModules, by ewowi — the author of this analysis — built on the **ARTI** runtime, a PEG-grammar-driven interpreter) parses a script and **walks the AST every frame**. Every value is stored as a **`double`**, converted to int when needed; scripts define `renderFrame` (per-frame) + `renderLed` (per-LED) callbacks and call `setPixelColor`/`setRange`/`fill`. Host binding is `arti_external_function` / `arti_set/get_external_variable` (the same flat-registry shape as ESPLiveScript). **What it contributes:** it proved live scripting works end-to-end in this ecosystem, and its design is maximally flexible and portable — pure C++ tree-walking, runs anywhere unchanged. The `double`-everything per-LED walk trades per-frame speed for that flexibility, which is the trade projectMM's 16K hot path can't take (and exactly the gap ESPLiveScript's native path closes). So the two are complementary baselines: ARTI-FX is the **reach** baseline (runs everywhere, the product shape proven), ESPLiveScript is the **speed** baseline (native, Xtensa) — and projectMM wants both, which is why neither alone is the answer. + +### WASM on ESP32 — the strongest off-the-shelf portable-runtime candidate (answering "is WASM what we want?") + +WebAssembly is a portable **bytecode standard** with mature small runtimes that **already run on the ESP32**, which makes it the most serious "don't build the engine, adopt one" option for the multi-target problem ESPLiveScript can't solve. Two runtimes matter: + +- **wasm3** — an ultra-light **interpreter** in C (~64 KB code, ~10 KB RAM), runs Arduino-class MCUs upward. Pure interpretation, so *slower* (the per-opcode dispatch cost, same class as a bytecode VM) — fine for control logic, questionable for a 16K-pixel inner loop. +- **WAMR (WebAssembly Micro Runtime)** — supports interpreter **and AOT/JIT**; in **AOT mode WAMR reaches ~50% of native speed**, "quite acceptable" for embedded use, at a larger footprint. Rule of thumb from the field: **RAM < 256 KB → wasm3, > 256 KB → WAMR**; the classic ESP32's 320 KB+ puts it in WAMR's range. ([arXiv survey](https://arxiv.org/html/2512.00035v1), [WAMR-ESP32](https://registry.platformio.org/libraries/mlaass/WAMR-ESP32)) + +**Why this is genuinely relevant to projectMM**, and arguably *the* answer to "runs everywhere": + +- **One artifact, every target.** A script compiled to `.wasm` runs on classic/S3/P4/Teensy/desktop through the same runtime — no per-ISA backend, which is exactly ESPLiveScript's missing piece. WASM **is** the portable IR + VM, off the shelf. +- **WAMR-AOT is the "blazingly fast" bridge.** AOT-compiling the `.wasm` to native on the device (or on a host) gets ~50% of native — between a naive bytecode VM and ESPLiveScript's near-100%. That's the same "portable baseline + native acceleration" shape the bottom-up proposes, but **already built and multi-ISA** (WAMR's AOT backends cover Xtensa, RISC-V, ARM, x86). +- **Sandboxed by design.** WASM is memory-safe and bounds-checked — a runaway/bad script traps instead of bricking the device (the safety story ESPLiveScript's native code lacks; recall the `fix-warnings` null-deref). +- **Mature toolchain + editor path.** Any language that targets WASM (C/C++, Rust, AssemblyScript ≈ TypeScript) becomes a script source; the browser already runs WASM natively, so a web editor could compile *and preview* the exact artifact the device runs. + +**The costs / open questions** (the top-down must weigh): the runtime is **heavier** than a hand-rolled VM (WAMR is 200 KB+; wasm3 is light but interpreter-only); the **host-binding** for WASM (imports/exports, linear-memory marshalling of a `Buffer`/`AudioFrame`) is more ceremony than ESPLiveScript's flat pointer registry and must be designed against the MoonModule data model; the **toolchain** (a WASM compiler in the editor path) is a real dependency; and **whether WAMR-AOT actually holds 16K×50FPS** on an S3 is the load-bearing benchmark. But as a way to get *every target on day one with sandboxing for free*, WASM is the candidate to beat — the top-down should evaluate "WAMR as the engine" head-to-head against "our own VM+IR." + +### FastLED's WASM — adjacent but **not** what we want + +FastLED's `master` WASM support (`src/platforms/wasm/`) compiles **FastLED itself to WebAssembly via Emscripten to run in a *browser*** — a **simulation/preview** of effects, not a runtime that runs user scripts *on the ESP32*. It runs FastLED in a Web Worker (`PROXY_TO_PTHREAD`), bridges C++↔JS via `EMSCRIPTEN_KEEPALIVE` exports (`js_bindings.cpp`), and exports frame/strip/UI data as JSON for the page to draw. FastLED's `FxEngine` is likewise a *compiled* effect manager (switch/transition between C++ effects), not an on-device scripting language. So FastLED gives projectMM **two adjacent ideas, neither the live-script engine**: (1) "compile your effect library to WASM to **preview it in the browser**" — a preview technique that sits next to projectMM's own 3D WebGL preview, not the scripting engine; (2) precedent that the WASM toolchain is production-ready. It does **not** answer the on-device live-scripting need. ([FastLED wasm platform](https://github.com/FastLED/FastLED/tree/master/src/platforms/wasm)) + +### Compiled languages (Rust / TinyGo) and interpreted Python — why neither is the answer (answering the Rust/Python question) + +- **Python / MicroPython is too slow for the hot path** — the benchmark literature is consistent: MicroPython is *"many times slower"* than C/Rust/TinyGo on ESP32 ([MDPI study](https://www.mdpi.com/2079-9292/12/1/143)). It's the same wall hpwit hit (Lua 3 fps, Gravity 10 fps) that *caused* ESPLiveScript. So "Python is an interpreter, so live-editing is easy" is true for the *edit loop* but fails the *speed* requirement at 16K×50FPS. Its real value is the **REPL/editor UX** reference, not the engine. (For non-hot-path *core* scripts — a slow sensor rule at 1 Hz — an interpreter's speed is irrelevant and Python-class ergonomics would be fine; this argues again for a **tiered** answer: cheap interpreter acceptable off the hot path, fast path needs VM/native.) +- **Rust / TinyGo are fast but *compiled*, not interpreted** — Rust-on-ESP32 (`esp-hal`, `no_std`) and TinyGo land near C speed, but they are **AOT-compiled and flashed** — they need a toolchain and a reflash, which is *exactly the loop live scripting exists to remove*. They give no live-edit story on their own. (Rust *does* become relevant via WASM: Rust → `.wasm` → WAMR is a legitimate script-authoring path, but then the engine is WASM, not Rust-on-device.) +- **Net:** interpreted-Python solves edit-speed but not run-speed; compiled-Rust solves run-speed but not edit-speed. The only options that give **both** live edit *and* hot-path speed are (a) a custom VM/native engine (ESPLiveScript's path, our redesign) or (b) **WASM+WAMR-AOT** (portable, sandboxed, ~50% native). Those two are the real finalists. + +### Other embedded VMs / JIT libraries (textbook back-ends) + +- **Lua / eLua / Luau** — the canonical embeddable scripting VM (register-based bytecode); the reference for a clean host C API and a GC'd value model. Too slow per-pixel raw (hpwit measured 3 fps), but the *architecture* (compile → bytecode → register VM) is the textbook. +- **Espruino** — JS on MCUs; full-language, GC-paused; an editor/REPL UX reference, the "too much" end for a per-pixel hot path. +- **LLVM / MIR / GNU lightning** — real JIT libraries with multi-ISA back-ends. LLVM is far too big for an MCU; **MIR** and **GNU lightning** are lightweight JITs that *do* target multiple ISAs and are the closest prior art to "one front-end, many native back-ends" if we go the custom-native-multi-target route (the alternative to adopting WASM). + +### The design-space map + +| Approach | Example | Speed (per-pixel) | Portability | Runtime size | Sandbox | Editor-friendliness | +|---|---|---|---|---|---|---| +| AST-walk interpreter | WLED **ARTI-FX** | Low (double-everything) | **Highest** (pure C++) | Small | Easy | High | +| Bytecode VM | Lua, AssemblyScript | Medium-High | **High** (one VM, any CPU) | Small-Medium | Easy | High | +| **WASM interpreter** | **wasm3** | Medium | **Highest** (standard) | ~64 KB | **Built-in** | High (any→wasm) | +| **WASM AOT/JIT** | **WAMR** | **~50% native** | **Highest** (standard, multi-ISA) | 200 KB+ | **Built-in** | High (any→wasm) | +| Native JIT (custom) | **ESPLiveScript** | **Highest** (native) | **Lowest** (one backend/ISA) | Medium | None (can crash) | Medium | + +projectMM wants ESPLiveScript's **speed** *and* ARTI-FX's **reach** — no single *custom* corner gives both, which is why one redesign path is a **layered custom** engine (portable VM baseline + optional native back-end behind a shared IR). But **WASM+WAMR-AOT collapses that table into one row**: portable to every target *and* ~50% native *and* sandboxed, off the shelf. The two real finalists for the top-down are therefore **(A) build our own VM+IR (+ optional native backend)** vs **(B) adopt WASM/WAMR as the engine** — weighed on hot-path speed (does WAMR-AOT hold 16K×50FPS?), runtime footprint, and how cleanly each binds to the MoonModule data model. + +## Architectural primitives observed (the decisions the redesign must make) + +Distilled across all four references, these are the load-bearing choices a clean engine faces — the *questions* the survey raises. Each is **decided and designed in the top-down**; listed here so the survey names what's at stake. + +1. **Execution strategy** — AST-walk vs bytecode-VM vs native-JIT (or a tier ladder). *The* decision; everything follows. ([top-down §2](livescripts-analysis-top-down.md#2-why-native-and-why-our-own-expanding-decision-1)) +2. **The IR seam** — ESPLiveScript emits Xtensa directly (right for one ISA); a multi-target redesign adds a representation between AST and execution so one front-end feeds many backends. The highest-leverage structural change. ([§3.2](livescripts-analysis-top-down.md#32-the-three-tiers-where-the-ir-seam-lives)) +3. **Host-binding model** — all four references use a flat name→pointer registry (`addExternalFunction`/`arti_*`); projectMM's value-add is a MoonModule binding (controls, producer/consumer structs, module role) with no prior art to copy. ([§3.4](livescripts-analysis-top-down.md#34-the-host-binding--script--moonmodule-decision-7-the-value-add)) +4. **Per-frame contract** — script writes its own `loop()` vs an engine-called `renderLed()`; determines the hot-loop shape and where per-pixel dispatch lands. ([§3.4](livescripts-analysis-top-down.md#34-the-host-binding--script--moonmodule-decision-7-the-value-add)) +5. **Compile-once / persist** — a saved artifact skips device-side recompile; portable (one artifact) with a VM/WASM, per-ISA with native. ([§3.7](livescripts-analysis-top-down.md#37-memory-placement--infinite-scalability-routed-through-platform-decisions-smart-memory-infinitely-scalable)) +6. **Memory placement** — code IRAM/PSRAM, data internal/PSRAM, per-target; routed through `platform::` as one policy. ([§3.7](livescripts-analysis-top-down.md#37-memory-placement--infinite-scalability-routed-through-platform-decisions-smart-memory-infinitely-scalable)) +7. **Concurrency + sync** — in-tick vs a pinned task; the threading contract against the scheduler + no-blocking-hot-path rule. ([§3.8](livescripts-analysis-top-down.md#38-execution-model--inline-by-default-task-as-the-exception-decision-sync)) +8. **Live reconfig + robustness** — a re-pushed script swaps in tick-atomically, old freed, no reboot, no mid-render crash. ([§3.6](livescripts-analysis-top-down.md#36-live-reconfig--tick-atomic-hot-swap-decision-sync)) +9. **Safety / sandboxing** — native can crash, a VM can bound; a user-facing editor raises the stakes. Coupled to the execution-strategy choice. ([§4](livescripts-analysis-top-down.md#4-safety--staged-decision-6)) + +## Mapping to projectMM's requirements + +| Requirement | ESPLiveScript today | What the redesign must add | +|---|---|---| +| Blazingly fast | ✅ native Xtensa | Keep native speed *where the ISA has a backend*; VM elsewhere | +| Core (sensor data) + light (layout/effect/modifier/driver) | ⚠️ generic funcs only; no module roles | The MoonModule binding + per-role entry-point contracts | +| ESP32 classic + S3 first | ✅ (both Xtensa) | Carry forward (native backend) | +| P4 + other ESP32 | ❌ P4 is RISC-V | RISC-V backend *or* VM fallback | +| Teensy | ❌ ARM | ARM backend *or* VM | +| macOS/Linux/Windows | ❌ x86/ARM + no IRAM | Desktop backend *or* VM (VM is the obvious win here) | +| Smart memory | ⚠️ one `#ifdef`, IRAM/PSRAM | Route through `platform::alloc`; compile-once artifact | +| Infinitely scalable (N scripts) | ❌ examples run one script | Independent `MoonLive` per module; code+data arenas PSRAM-first; ceiling = free heap, not a fixed count | +| Sync with system | ⚠️ FreeRTOS tasks + `sync()` | `Scheduler` tick contract; tick-atomic hot-swap; live reconfig | +| MoonModule-compatible | ❌ flat C registry | Scripted module = MoonModule (controls, loop, role, generic UI) | + +The pattern is clear: **ESPLiveScript nails speed on two chips and nothing else on this list.** Every other requirement is new work, and the multi-target + MoonModule-binding pieces are the bulk of it. + +## Recommendation — native engine, Xtensa-first, behind an IR seam + +The survey lands on a clear direction (mirroring the LED-driver bottom-up's "walk Scenario B" call, not an open fork): **build our own native-codegen engine, ship the Xtensa backend first, put an IR seam between the front-end and the code generator from day one — start small, start beautiful, extend with no dead-ends.** Native speed (near-100%) is the differentiator; the IR seam is the no-dead-end guarantee that makes "Xtensa-first" safe (RISC-V/ARM/desktop are each a new backend behind the unchanged IR); WASM/WAMR is the per-target fallback, never the rival; safety is staged (cheap bounds+watchdog first, true sandbox deferred to the WASM fallback). + +The full reasoning — why native over WASM-wholesale, why Xtensa-first isn't a corner, the cost accepted, the load-bearing spike — **is expanded in the top-down** ([§2 Why native](livescripts-analysis-top-down.md#2-why-native-and-why-our-own-expanding-decision-1), [§9 Staged plan](livescripts-analysis-top-down.md#9-staged-plan--the-moonlight-tutorial-ladder-as-the-spine)), and the decisions it rests on are recorded verbatim in [§ Answers — product-owner direction](#answers--product-owner-direction-2026-06-25) below. This section is the survey's conclusion; the build spec is the top-down. + +## Prior art & credits + +Per *Industry standards, our own code*: study the prior art, credit it by name, write our own. This redesign rests on work others did first. + +- **ESPLiveScript — Yves Bazin (hpwit).** The native-codegen approach the recommendation builds on is his. A from-scratch C-like compiler — tokenizer, parser, register allocator, Xtensa code generator, save/load compiled-binary path, header-only — that runs a script at near hand-written-C++ speed on an ESP32 (85 fps on a 12,288-LED panel where Lua managed 3 and Gravity 10). That is the result that makes "go native, not interpreted" the right call, and the reason this document exists. The structural notes below (global state, no IR, large files) are what a *rewrite toward a different goal* — multi-target, module-bound — has to account for; they are not a verdict on the original, which does its job and does it fast. We carry the idea forward and add the IR seam + the MoonModule binding. +- **ARTI-FX / ARTI — ewowi.** The prior projectMM-family answer to the same problem, written by this analysis's author: the interpreted-effects runtime in WLED MoonModules, on the PEG-grammar ARTI interpreter. It proved the live-scripting idea works end-to-end in this ecosystem (load a script, run it live), and it is the source of lessons carried straight into this design — the `renderFrame`/`renderLed` split, the host-binding shape, and, by being the AST-walking design, the clearest demonstration of *why* a 16K hot path wants native or VM execution over tree-walking. The redesign trades its interpreter for native speed; it keeps its product shape and its lessons. +- **MoonLight — MoonModules** (the [effects tutorial](https://moonmodules.org/MoonLight/moonlight/effects-tutorial/), the staging spine and the host-binding surface model). See the per-engine sections above for what each contributes. + +Activity + credits also in the digest [history/hpwit-ESPLiveScript.md](../history/hpwit-ESPLiveScript.md). + +## Risks and unknowns + +The open questions the survey surfaced are **resolved in the top-down**, each in its own section: the load-bearing speed unknown — can native-with-safety hold 16K×50FPS, else fall back to WASM — is the first spike ([top-down §9.2](livescripts-analysis-top-down.md#92-the-ladder)); IR design ([§3.2](livescripts-analysis-top-down.md#32-the-three-tiers-where-the-ir-seam-lives)); the per-pixel-vs-per-frame contract ([§3.4](livescripts-analysis-top-down.md#34-the-host-binding--script--moonmodule-decision-7-the-value-add)); the MoonModule-binding mechanics ([§3.3–3.4](livescripts-analysis-top-down.md#33-a-scripted-module-is-a-moonmodule-decision-7)); safety depth ([§4](livescripts-analysis-top-down.md#4-safety--staged-decision-6)); editor + persistence ([§9.2](livescripts-analysis-top-down.md#92-the-ladder), stage 10). The single load-bearing one to flag here: **whether native codegen holds the frame budget on a real S3** — if it doesn't, the IR seam makes the WASM fallback a backend swap, not a restart. + +## Answers — product-owner direction (2026-06-25) + +Decisions from the design discussion that produced this survey. These are *direction*, terse on purpose; the top-down expands each into full reasoning, an API/architecture, and a staged plan. (Mirrors the LED-driver bottom-up's product-owner-direction section.) + +1. **Execution = native, the standout.** Build our own **native-codegen** engine (ESPLiveScript-class speed, near-100%) — the differentiator; projectMM should stand out with something not done before (a native live-compiler bound to a real module system). *Not* a slow interpreter, *not* WASM-as-flagship. +2. **No dead-ends, start small + beautiful (the LED-driver method).** Ship **one ISA backend first — Xtensa (classic ESP32 + S3)** — as a complete, blazing first increment, then grow. The **IR seam** (front-end → typed IR → per-ISA backend) is the no-dead-end guarantee: RISC-V (P4), ARM (Teensy), x86/ARM64 (desktop) each become a *new backend behind the unchanged IR*, never a rewrite. WASM/WAMR is the **per-target fallback** so no target is ever blocked. +3. **The IR must NOT cost speed (hard constraint).** It is a *compile-time* representation that lowers to the *same* native instructions ESPLiveScript hand-emits — **zero per-pixel runtime overhead**, no interpreted layer. Equally fast as hpwit on Xtensa is non-negotiable; prove it by diffing generated instructions for a hot loop against hand-written Xtensa, and keep an `__asm__` escape hatch for the very hottest paths (as ESPLiveScript has). +4. **Source language = a C-subset, "as close as possible" to the precompiled effect, with pragmatic simplifications.** The effect *body* (types like `uint8_t`/`uint32_t`/`lengthType`, nested `for`, integer + 64-bit math, `static_cast`, `RGB`, `hsvToRgb`, buffer writes) ports **near-verbatim** from a file like `RipplesEffect.h` (our reference effect — it exercises the hard cases: `float` trig `std::sqrt`/`std::sin`/`std::floor`, `std::memset`, 3D with `depth()`, two controls). The C++ *file/class ceremony* that buys nothing in a script (`#pragma`/`#include`/`namespace`, and — accepted as a pragmatic simplification — `class : public EffectBase`/`override`/the `controls_.addUint8` host-object dance) is **dropped or lightened**: the engine supplies the module scaffolding around the script. Target: porting an existing effect is the loop body verbatim + a handful of lines changed, *not* a rewrite, and *not* implementing a full C++ object model (inheritance/vtables/host-method binding) in the engine. **Not** a JS-subset (the ARTI-FX surface): JS's double-everything number model is the slow path *and* further from our C++ codebase, so it's worse on both speed and portability of existing effects. +5. **Controls = minimal ceremony.** A scripted control is a near-plain top-level variable (e.g. `uint8_t speed = 60;` with a range annotation); the engine derives the MoonModule control + UI + persistence. Lighter than today's explicit `controls_.addUint8(...)`, copy-paste-friendly. (Exact annotation syntax is the top-down's call.) +6. **Safety = staged, climb the tiers, don't pay upfront.** Ship the **cheap** tier first — array **bounds-checking** (a compare-branch per indexed access, low single-digit %, removable in a trusted/fast mode) + **watchdog / instruction budget** (kill a runaway loop, near-free). The **expensive** true-memory-sandbox tier (a script physically can't touch memory outside its arena — what WASM gives free, native can't cheaply) is **deferred**, reachable via the IR→WASM fallback only if a public script editor in the field shows the cheap tier isn't enough. Decided this way because the price of full sandboxing upfront isn't worth paying before evidence demands it. +7. **MoonModule-first.** A scripted module **is** a MoonModule (role, controls, `loop()`, generic UI, lifecycle, robustness, live-reconfig). The script ⇄ MoonModule binding (reach the `Buffer`/`AudioFrame`/LUT via the producer/consumer pull pattern, no copy) is the projectMM value-add to design — no prior art copies cleanly. +8. **General in core + specific in light.** One engine serves a domain-neutral core script (e.g. transform sensor data) *and* a scripted layout / effect / modifier / driver. **Effect is the first role.** `RipplesEffect.h` is the *reference* effect for the language design (it stresses float trig + 3D + memset), but it is **too complex for the hello-world spike** — the first running script must be trivial (e.g. fill the buffer one colour, or a single moving dot), proving the engine end-to-end before any real effect. Ripples is the *graduation* target, not the spike. For how an effect is structured for a newcomer, the [MoonLight effects tutorial](https://moonmodules.org/MoonLight/moonlight/effects-tutorial/) is a good read (a sibling project's step-by-step). The simple→Ripples progression is itself the start-small-grow staging applied to the demo. +9. **Infinitely scalable.** Run *as many* live scripts concurrently as memory allows, exploiting PSRAM — each script is an independent compiled unit, the ceiling is free heap, not a fixed slot count. Many small scripted modules coexist; the device hosts what fits and degrades gracefully when it doesn't (the same scaling-to-available-memory contract the light pipeline already honours). +10. **Inline execution by default; task is the exception.** A scripted effect/layout/modifier/driver runs *inline in the `Scheduler` tick*, called exactly like a compiled effect's `loop()` — one mental model, no cross-thread sync to reach the buffer/`AudioFrame`, and it runs on the render task's *internal-RAM* stack (fast). Task-per-script isn't blocked on memory (a task stack can live in PSRAM), but it pays two costs inline doesn't: per-task **scheduling overhead** (a context switch per task per frame — hundreds of tasks thrash the scheduler, a ceiling independent of memory), and a **PSRAM-backed stack is hot-path-slow** (PSRAM latency on every per-pixel local access, ~12 vs ~80 MB/s). So inline keeps the per-script stack fast and free, and PSRAM is spent on script *code + data* (decision 9) rather than per-script stacks. A pinned task is the narrow, documented opt-in *only* for a long/blocking *core* script (e.g. slow sensor I/O) that must not share the render tick — never the default, never for a pipeline script. +11. **Sequencing: hybrid (depth-first to hello-world, then prove the seam on a 2nd ISA early).** Build the full vertical slice on Xtensa just far enough to run hello-world native (classic/S3), then *immediately* prove a minimal second-ISA backend (P4/RISC-V, or desktop x86-64) on that same slice — before deepening to controls/math/2D/3D. This retires the project's biggest risk (does the IR seam genuinely decouple front-end from backend?) at hello-world cost, when fixing it is cheap, rather than discovering a leak after six stages. Then deepen, primarily on Xtensa; the full second backend follows later. +12. **Domain-neutral engine core, thin binding.** The engine (front-end + IR + backends) is domain-neutral core — it never depends on projectMM; the binding (`MoonLiveHost`/`MoonLiveEffect`) depends on the engine, one-directionally, through the engine's public API + a tiny injectable platform seam (`allocExec`/`alloc`). This clean layering is adopted *because it is what makes projectMM-as-a-library optimal* (the [*Domain-neutral core*](../../CLAUDE.md) principle applied), and is **never compromised** for any separability — if a separation would cost projectMM's speed/simplicity/hot-path/principles, it isn't done. +## Out of scope for Stage 1 + +Final VM-vs-JIT decision; the IR design; the language grammar spec; the web editor; per-engine benchmarking on real hardware; the MoonModule-binding mechanics; the sandboxing depth. All belong to the top-down document the prompt above generates. diff --git a/docs/backlog/livescripts-analysis-top-down.md b/docs/backlog/livescripts-analysis-top-down.md new file mode 100644 index 0000000..e729070 --- /dev/null +++ b/docs/backlog/livescripts-analysis-top-down.md @@ -0,0 +1,432 @@ +# MoonLive — live-script engine, top-down redesign + +> **Forward-looking research document — exception to CLAUDE.md present-tense rule.** **MoonLive** is projectMM's live-script engine (the Moon family: MoonLight, MoonDeck, MoonLive — author an effect as text, see it live). Stage-2 companion to [livescripts-analysis-bottom-up.md](livescripts-analysis-bottom-up.md) (read first: it deep-reads the ESPLiveScript fork, surveys WLED ARTI-FX, the embedded-VM field, and a portable WASM fallback, and ends with the product-owner-direction decisions this document expands). It reasons from projectMM's end goal — *author a script as text, run it on a running device on the next tick* — down to a reference architecture, a concrete API, a performance budget, and a staged spike plan. Modelled on [leddriver-analysis-top-down.md](leddriver-analysis-top-down.md). This expands the eight decisions already made; it does not re-open them. All design is written fresh against projectMM's architecture — prior art (ESPLiveScript, ARTI-FX, MoonLight) is credited, not traced. + +## TL;DR + +- **MoonLive is our native-codegen engine** — a real compiler (lex → parse → AST → **IR** → native machine code), executed by direct function-pointer call, so a scripted effect runs at **near-100% native speed** in the render hot path. That speed, bound to a real module system, is projectMM's standout. +- **One narrow boundary, three tiers.** The seam is `MoonLive::run()` (the analog of `LedDriver::push()`). Above it: a **platform-independent front-end** (tokenizer → parser → typed AST). Below it: a **typed IR** (the seam that lets one front-end feed many backends) → a **per-ISA backend** (Xtensa first). The IR is compile-time only — **zero per-pixel runtime cost**; it lowers to the same native instructions a hand-written backend would emit. +- **Xtensa first, no dead-ends.** Ship the Xtensa backend (classic ESP32 + S3 — projectMM's first targets, the bench hardware) as a complete, blazing first increment. RISC-V (P4), ARM (Teensy), x86-64/ARM64 (desktop) are each *a new backend behind the unchanged IR* later — additive, never a rewrite. **WASM/WAMR is the per-target fallback** (IR→WASM is one more backend), so no target is ever blocked and a true sandbox stays reachable. +- **Source language: a C-subset, "as close as possible" to a precompiled effect, with pragmatic simplifications.** The effect *body* ports near-verbatim from a `.h` (types, `for`, integer + float math, `static_cast`, `RGB`, `hsvToRgb`, buffer writes). The C++ *file/class ceremony* that buys nothing in a script (`#pragma`/`#include`/`namespace`; lightened: `class : public EffectBase`/`override`/the `controls_`-dance) is supplied by the engine. **Not** JS (doubles = slow + further from our code); **not** full C++ (object model = build + hot-path cost for zero-value boilerplate). +- **Minimal-ceremony controls.** A control is a near-plain top-level variable with a range annotation; the engine derives the MoonModule control + UI + persistence. +- **MoonModule-first.** A scripted module **is** a `MoonModule` (role, controls, `loop()`, generic UI, lifecycle, robustness, live-reconfig). The script ⇄ host binding reaches the `Buffer` / `AudioFrame` / LUT through the producer/consumer pull pattern — no copy. This is the projectMM value-add with no prior art to trace. +- **Safety staged.** Ship cheap first — array **bounds-checking** (a compare-branch per indexed access, low single-digit %, switchable off) + a **watchdog / instruction budget** (kill a runaway loop, near-free). The expensive **true memory sandbox** (WASM gives it free; native can't cheaply) is deferred, reachable via the IR→WASM fallback only if a public editor in the field demands it. +- **Staging spine = the [MoonLight effects tutorial](https://moonmodules.org/MoonLight/moonlight/effects-tutorial/) ladder.** Each tutorial rung (random pixel → control → trails → oscillators → 2D → 3D → audio) is one engine-capability spike with a concrete acceptance bar. **RipplesEffect.h is the language-fidelity *graduation test*** (does the C-subset handle float trig + 3D + `memset` near-verbatim?) — the hard case, not the hello-world. +- **Load-bearing spike:** a minimal native Xtensa engine running the tutorial's hello-world (`setRGB(random16(N), blue)`) live on an ESP32-S3, hitting the frame budget *and* surviving a deliberately-bad script via cheap safety. If native-with-cheap-safety can't hold 16K×50FPS, the fallback is demote-to-WASM/WAMR — a backend swap behind the IR, not a restart. +- **Cost, eyes open.** A real compiler is more work than adopting an off-the-shelf VM — weeks to the first beautiful Xtensa increment, each ISA backend its own increment later. The deliberate trade for native speed + a differentiator, mitigated exactly as the LED drivers were: spike-ordered, one complete increment at a time, the hard multi-target part deferred behind a seam that keeps it reachable. + +## 1. The goal, in detail + +A user writes a script — an effect, layout, modifier, driver, or a domain-neutral core rule (transform sensor data) — in a text box in the browser. The device compiles it and runs it as a first-class `MoonModule` **on the next tick**: no toolchain, no flash, no reboot. The same leap WLED took with ARTI-FX. The hard requirements (from the bottom-up): + +- **Blazingly fast** — the script runs in the render hot path at up to 16K+ lights × 50 FPS, so a slow per-pixel path is fatal. This is *the* constraint that picks native over interpreted. +- **General in core + specific in light** — one engine, many `MoonModule` roles. +- **Target order** — ESP32 classic + S3, then P4 + other ESP32, then Teensy, then desktop. +- **Smart memory** — IRAM/PSRAM-aware via `platform::alloc`, no hot-path allocation, compile-once. +- **Infinitely scalable** — run *as many* live scripts concurrently as memory allows, exploiting PSRAM. No fixed slot count; each script is an independent compiled unit and the only ceiling is free heap. A device hosts as many scripted modules (effects across layers, modifiers, core sensor rules) as fit, and degrades gracefully when it doesn't — the same scaling-to-available-memory contract the light pipeline already honours. +- **Synced** — `Scheduler`-tick, live reconfig (no reboot), tick-atomic hot-swap, robust to add/delete/replace in any order. +- **MoonModule-compatible** — controls, `loop()`, role, generic UI with zero per-script UI code. + +## 2. Why native, and why our own (expanding decision 1) + +The design space runs from interpreted to native, and the choice is driven by the hot-path requirement: + +- **AST-walk** (ARTI-FX): stores values as `double` and walks the tree per frame — which buys maximum flexibility and runs on any platform unchanged, at the cost of per-frame speed. That speed cost is what rules it out *for the 16K×50FPS hot path specifically* — not a flaw, a different trade than projectMM needs here. (For a slow core script off the hot path, that trade would be fine.) +- **A bytecode VM is the middle ground** — a compact opcode stream run by a dispatch loop, far faster than tree-walking but paying a per-opcode dispatch tax every operation; at 16K×50FPS = 800K px/s that tax is the open question, not a given. +- **Native JIT is the only thing that reaches ~100%** (ESPLiveScript's 85 fps ≈ hand-written C++ — hpwit's result). The differentiator we're after is the *combination*: native speed **and** multi-target **and** bound to a real module system. Each prior engine has part of it — ESPLiveScript has the native speed (Xtensa); ARTI-FX has the live-scripting product shape and runs anywhere (interpreted). Neither combines all three; that combination is the open space. +- **WASM/WAMR** gets portability + free sandbox but tops at ~50% native (WAMR-AOT) with a 200KB+ runtime — kept as the fallback, not the flagship, because native speed is what we're chasing. + +**Why our own, not "adopt WAMR":** *Industry standards, our own code.* We take the textbook compiler *shape* (lex → parse → AST → IR → native — the LLVM structure scaled to an MCU) and textbook *names*, written fresh against our architecture. An off-the-shelf runtime would make the engine someone else's and cap us at half speed; building means the language, the MoonModule binding, and the hot path are ours to make beautiful. The cost (a real compiler) is accepted and staged. + +**What would flip it to WASM-wholesale:** if the load-bearing spike shows native-with-cheap-safety *cannot* hold 16K×50FPS on an S3 (native machine code makes this unlikely), or if a public script editor proves to demand a true sandbox we can't afford natively. The IR seam makes either flip a backend swap, not a restart. + +## 3. Reference architecture + +### 3.1 The one narrow boundary + +Everything hangs off a single tiny seam — the analog of the LED doc's `LedDriver::push(std::span<...>)`: + +```cpp +// A compiled script, ready to run. The host calls run() once per tick. +class MoonLive { +public: + bool ok() const; // compiled cleanly + const char* error() const; // human-readable compile/runtime error, "" if none + void run(); // execute the script's loop() — the hot path + void bind(MoonLiveHost& host); // wire controls + host data (§3.4) + // lifecycle: free() releases code + data; recompile swaps tick-atomically (§3.6) +}; +``` + +Above the line is the portable front-end; below it the IR and the per-ISA backend. The host (a `MoonLiveModule`, §3.3) owns a `MoonLive` and calls `run()` from its `loop()`. + +### 3.2 The three tiers (where the IR seam lives) + +``` + source text + │ ┌─────────────── platform-independent (one implementation, all targets) + tokenizer (lexer) + │ + parser → typed AST + │ + ┌────┴──────────── IR SEAM ──────────────┐ ← one front-end, many backends + │ + typed IR (SSA-ish, register/temp model, types resolved, bounds-check nodes inserted) + │ ┌─────────────── platform-bound (one backend per ISA) + backend: lower IR → native instructions + │ Xtensa first; RISC-V / ARM / x86-64 / ARM64 later; WASM as the fallback backend + encode → executable memory (platform::allocExec) → call as function pointer +``` + +- **Front-end (portable):** tokenizer + parser + AST live in `src/core/moonlive/` (domain-neutral). They know the *language*, never a CPU. One implementation serves every target. +- **IR (the seam):** a small typed intermediate representation — the AST lowered to a flat list of typed operations with explicit temporaries, types resolved, and **bounds-check / safety nodes inserted here** (so every backend inherits safety for free). This is a *compile-time data structure*; it does not exist at run time. +- **Backend (per-ISA):** lowers the IR to native instructions for one ISA. `src/platform//moonlive_backend_*.{h,cpp}` — the only place CPU-specific codegen lives, behind the platform boundary. A WASM backend (lower IR → `.wasm`, run by WAMR) is one such backend, the portable fallback. + +**Critical: the IR costs nothing at run time (decision 3).** It is consumed during compilation and discarded. The CPU executes only the final native instructions — identical in kind to hand-written assembly. *Matching native speed on Xtensa is non-negotiable.* The Xtensa backend must lower a hot loop to instructions that match hand-written Xtensa; the spike's acceptance bar **diffs generated instructions for the Ripples inner loop against a hand-emitted reference**, and an `__asm__` escape hatch covers the very hottest paths. If the IR ever costs hot-path speed, that's a backend bug to fix, not a tax to accept. + +### 3.3 A scripted module IS a MoonModule (decision 7) + +The one deliberate class hierarchy in the codebase is the module tree; a scripted module joins it like any other. `MoonLiveEffect` is a normal `EffectBase` whose `loop()` delegates to the compiled `MoonLive`: + +```cpp +// src/light/moonlive/MoonLiveEffect.h — a scripted effect is a first-class EffectBase +class MoonLiveEffect : public EffectBase { +public: + ModuleRole role() const override { return ModuleRole::Effect; } + const char* tags() const override { return "📝"; } // scripted + Dim dimensions() const override { return engine_.declaredDim(); } // from the script + + void setup() override { // acquire the engine + engine_.bind(host_); // wire the host data (§3.4) + } + + void onBuildControls() override { // DYNAMIC controls — re-runs when the script changes + controls_.addText("source", source_, kMaxSource); // the script text (persisted, editable) + // The engine declares its controls as NEUTRAL data; the binding translates to controls_. + for (auto& c : engine_.declaredControls()) // {name,type,min,max,default} — no projectMM type + controls_.add(c.name, c.type, c.min, c.max); // binding maps neutral → projectMM control (§3.5) + } + + // onBuildState is the rebuild sweep: it fires on a source edit (recompile) AND on a + // grid/size change (the engine re-sizes its script buffers for the new dimensions). + // projectMM has no separate onSizeChanged — resize routes through onBuildState, so the + // dynamic-memory re-allocation rides the same hook every config change already uses. + void onBuildState() override { + engine_.compile(source_); // recompile if source changed + if (!engine_.ok()) { setStatus(engine_.error(), Severity::Error); return; } + engine_.allocForSize(width(), height(), depth()); // (re)alloc script data for the current size + } + + void onUpdate(const char* name) override { // cheap per-control reaction, no full rebuild + engine_.onControlChanged(name); // poke a running script's bound control + } + + void loop() override { + if (engine_.ok()) engine_.run(); // the hot path: native code over our buffer + } + + void teardown() override { // release: free compiled code + script data + engine_.free(); // the "destructor" role — arenas returned to the heap + } +}; +``` + +The same shape gives `MoonLiveLayout` (role `Layout`, emits coordinates), `MoonLiveModifier` (role `Modifier`, remaps positions), `MoonLiveDriver` (role `Driver`, consumes the buffer), and a core `MoonLiveModule` (domain-neutral, e.g. a sensor rule). One engine, many roles — each a thin `MoonModule` subclass whose `loop()` is `engine_.run()`. The UI renders them generically (the `source` text control + the script-declared controls) with **zero per-script UI code** — exactly the module-tree payoff. + +**A scripted module implements the whole `MoonModule` lifecycle, not just `loop()`** — that's what makes it a first-class module and what answers dynamic controls / dynamic memory / cleanup: + +- **`onBuildControls()` — dynamic controls.** Re-runs whenever the module rebuilds, so a script that declares different controls (a new `@control` var) gets a different control set in the UI + persistence, live. The controls are *the script's*, not a fixed list. +- **`onBuildState()` — dynamic memory on size change.** projectMM routes a grid/size change through `onBuildState` (the same rebuild sweep that applies every config change without a reboot), so MoonLive re-allocates its per-size script buffers here (`allocForSize`, PSRAM-first per §3.7). There is no bespoke `onSizeChanged` — using the existing hook means a scripted module resizes exactly like a compiled one, and inherits the no-reboot + robustness contracts for free. +- **`setup()` / `teardown()` — acquire / release (the destructor role).** `teardown` frees the compiled code block + the script's data arena back to the heap, so deleting a scripted module returns all its memory — the lifecycle that makes "as many scripts as memory allows" (§3.7) safe to add *and remove* in any order. +- **`onUpdate(name)` — cheap per-control reaction.** A control edit pokes the running script's bound variable without a full recompile (the fast path for a slider drag); only a *source* edit triggers the heavier `onBuildState` recompile. + +So the binding overrides the same hooks any compiled module does; the only difference is that each one delegates to the compiled `MoonLive` instead of hand-written C++. + +**Crucially, all of these lifecycle methods live in the *binding* (`MoonLiveEffect`, `src/light/moonlive/`), not in the engine.** `onBuildControls`/`onBuildState`/`onUpdate`/`teardown`, `EffectBase`, `ModuleRole`, `controls_` — every projectMM type — sit on the binding side of the §3.9 seam. The engine (`MoonLive`, `src/core/moonlive/`) sees none of them; the binding reaches it only through a **neutral public API**: `compile(source)`, `run()`, `free()`, `declaredControls()` → a plain list of `{name, type, min, max, default}` structs the engine owns, and `allocForSize(w, h, d)` → plain ints. The binding *translates* — it reads the engine's neutral `declaredControls()` and calls projectMM's `controls_.addUint8(...)`; it maps a grid resize to `allocForSize`. **The engine never takes a `ControlList`, a `Buffer`, or any projectMM type** — so the rich MoonModule lifecycle is entirely a property of the binding, and the engine stays the domain-neutral core §3.9 describes. (This is the seam working as intended: a different host writes its own binding with its own lifecycle against the same neutral engine API.) + +### 3.4 The host binding — script ⇄ MoonModule (decision 7, the value-add) + +Rather than a flat name→pointer registry (the host-binding shape surveyed engines share), projectMM uses a **MoonModule-aware `MoonLiveHost`** that exposes the producer/consumer data the script needs, by reference, no copy — the same pull pattern effects already use (`EffectBase::buffer()/width()/elapsed()`, [Layer.h:499-504](../../src/light/layers/Layer.h)): + +- **Buffer + geometry** — `width()`, `height()`, `depth()`, `channelsPerLight()`, `nrOfLights()`, `elapsed()`, and pixel writers `setRGB(i,c)` / `setRGBXY(x,y,c)` / `setRGBXYZ(x,y,z,c)` (the MoonLight tutorial's exact surface). These compile to direct loads/stores against `layer_->buffer()` — the **identity-mapping fast path** preserved (the script writes the real buffer, no intermediate copy). +- **Controls** — the script's declared variables (§3.5) bind by reference so a UI control edit updates the running script live. +- **Producer structs** — a core or audio script reads `AudioFrame` (level + 16-band spectrum) / sensor structs through the same `const`-pointer pull the C++ effects use (`AudioModule::latestFrame()`), so add/remove in any order returns a live or silent-default frame, never null (robustness). +- **Built-ins** — `hsvToRgb`/`hsv`, `random16`, `sin`/`cos`/`sqrt`/`floor` (the trig Ripples needs), `millis`/`elapsed`, `fill`/`memset`. A small, fixed, recognizable library (FastLED-flavoured — the vocabulary effect authors already know), implemented once in the host and callable from any backend. + +The binding is generated *around* the script body — the script never writes `#include`, never reaches a raw pointer it shouldn't, and the host decides what's in scope per role (an effect sees the buffer; a core sensor script sees the sensor struct, not the LED buffer). + +### 3.5 Controls — minimal ceremony (decision 5) + +A control is a near-plain top-level variable with a range annotation; the engine derives the `MoonModule` control + UI + persistence: + +```c +uint8_t speed = 50; // @control 0..99 → controls_.addUint8("speed", …, 0, 99) +uint8_t interval = 128; // @control 1..254 +``` + +The front-end collects annotated top-level vars during parsing and the engine exposes them as a neutral `declaredControls()` list (`{name, type, min, max, default}` — no projectMM type); the *binding* reads that list and calls the normal `controls_.add(...)` the rest of projectMM uses (§3.3) — so a scripted control is indistinguishable from a compiled one in the UI, persistence, and the live-reconfig sweep, while the engine stays projectMM-agnostic. Lighter than today's explicit `onBuildControls` + `addUint8` (the engine writes that for you), and copy-paste-friendly: the `uint8_t speed = 50;` line is *already* how RipplesEffect.h declares it. (Exact annotation syntax — `@control`, a trailing comment convention, or a `slider(0,99)` initializer — is settled in the spike; the principle is "declare the var, get the control".) + +### 3.6 Live reconfig + tick-atomic hot-swap (decision: sync) + +A re-pushed script must swap in on the next tick with the old one freed, no reboot, no crash mid-render — the no-reboot + robustness principles applied to *code*. The mechanism rides the existing `onBuildState()` rebuild sweep (the same one that makes every config change live): a `source` edit marks the module dirty; the next `onBuildState()` compiles the new source into a *second* `MoonLive`, and only on success swaps the active pointer (the old `MoonLive` freed after the swap), so a failed compile leaves the running effect untouched and surfaces the error in the module's status. `run()` reads the active pointer once per tick — the swap is a single pointer store between ticks, never mid-`loop()`. + +### 3.7 Memory placement + infinite scalability, routed through `platform::` (decisions: smart memory, infinitely scalable) + +Memory placement routes through the existing `platform::` seam, so it's one policy, not scattered per-target branches: + +- **Compiled code** → `platform::allocExec(size)` (a new seam: `MALLOC_CAP_EXEC` IRAM on ESP32; `mmap(PROT_EXEC)` on desktop; the platform decides, the engine doesn't know). PSRAM-capable on S3/P4 where the chip allows executable PSRAM. +- **Script data** (globals, stack arena) → `platform::alloc` (PSRAM-first with internal fallback — already the project's policy). +- **Compile-once** → a portable compiled-artifact format persisted to LittleFS, so a known-good script skips device-side recompile on boot. The native artifact is per-ISA; the portable fallback artifact is one file for all targets. + +**Infinite scalability — as many scripts as memory allows.** Each `MoonLive` (§3.1) is a **self-contained compiled unit**: its own code block (from `allocExec`) and its own data arena (from `alloc`), owned by the `MoonLiveModule` that holds it, freed when that module is deleted. Nothing is shared or fixed-slot — so running N scripts is just N independent `MoonLive`s, and **the only ceiling is free heap**, not an arbitrary limit. This falls out of the architecture for free: + +- **The module tree already hosts N modules.** A scripted module is a `MoonModule` (§3.3); the tree puts no cap on how many effects a Layer holds or how many peripherals System hosts. Ten scripted effects across layers + a scripted modifier + two core sensor rules are just twelve modules — the UI, persistence, and `Scheduler` handle them like any other. +- **PSRAM is where it scales.** On an S3/P4 (8 MB PSRAM) the compiled code + data arenas live in PSRAM (`alloc` is PSRAM-first), so the device holds *far* more scripts than internal RAM alone would allow — exploiting PSRAM is exactly what lifts the ceiling from "a handful" to "as many as the script sizes sum to under PSRAM." A non-PSRAM classic ESP32 holds fewer (internal heap only) — correct and honest, the same internal-vs-PSRAM split the rest of the system has. +- **Graceful degradation when full.** When the next script won't fit, the device does what the light pipeline already does at the memory edge ([architecture.md § scaling to available memory](../architecture.md#scaling-to-available-memory)): the compile/bind fails cleanly, the module reports a "not enough memory" status, and everything already running keeps running — no crash, no reboot (the robustness + no-reboot principles). The cap is reached by *degrading*, never by bricking. +- **The hot-path cost is per-*running* script, not per-*loaded* script.** Memory scales with how many scripts are loaded; tick time scales with how many are *enabled and rendering*. A device can hold a large library of scripts in PSRAM and run only the active ones, so "infinitely scalable in memory" doesn't mean "infinitely slow" — a disabled scripted module costs RAM but no tick time (and the disable-releases-resources backlog item, when it lands, lets it cost neither). + +### 3.8 Execution model — inline by default, task as the exception (decision: sync) + +**A script runs inline in the `Scheduler` tick by default — not in its own task.** A scripted effect's `loop()` is called exactly like a compiled effect's `loop()`, on the render task, each tick. The task-per-script model some engines use fits when a script *is* the top-level loop and owns the device; in projectMM a scripted module is one `MoonModule` among many, called from the same single-threaded render loop as every compiled module, so inline is the consistent shape. Three reasons make inline the default, not just a choice: + +- **Consistency.** A scripted effect behaves identically to a compiled one — same call site, same hot-path rules, same `Scheduler`. One mental model, and the UI/persistence/lifecycle treat it like any other module. +- **It sidesteps two costs task-per-script can't.** Task stacks *can* live in PSRAM (`xTaskCreateWithCaps`, `MALLOC_CAP_SPIRAM`), so task-per-script is not blocked on *internal* RAM — but it pays two costs inline doesn't: (a) **scheduling overhead** — each task is a TCB + scheduler bookkeeping + a context switch; hundreds of tasks all wanting to run each frame thrash the scheduler instead of rendering, a ceiling that has nothing to do with memory; and (b) **a PSRAM-backed task stack is hot-path-slow** — a per-pixel inner loop touching locals on a PSRAM stack pays PSRAM latency (~12 MB/s vs internal ~80 MB/s) every access, exactly what a 16K×50FPS loop can't afford. An inline script runs on the render task's *internal-RAM* stack, fast, with no per-task scheduler cost. So PSRAM scales the script's *code + data* (§3.7), and inline keeps the per-script *stack* fast and free — the two pull together, where task-per-script would put them in tension. +- **No cross-thread sync.** An inline script reads `buffer()` / `elapsed()` / `AudioFrame` from the thread that owns them — no locks, no race, no memory barriers. A task touching the shared buffer while the render task reads it is exactly the data race the single-threaded hot path avoids by design. + +An inline script obeys the no-blocking-hot-path rule (it can't `delay`); a runaway loop is caught by the instruction-budget watchdog (§4), so a bad inline script degrades, it doesn't wedge the tick. + +**The exception — a pinned task — is narrow and opt-in.** A *core* script that genuinely blocks or runs at its own cadence and must **not** share the render tick (e.g. a slow I²C sensor transaction, or a rule that ticks at 1 Hz independent of render) may opt into its own task, pinned opposite the render core. This is a per-module, documented exception for off-hot-path core work — never the default, and never for a script in the render pipeline (effect/layout/modifier/driver), which is always inline. Two execution paths exist, but the inline one is the default and the task one earns its place case by case. + +### 3.9 Layering — a domain-neutral engine core, a thin binding (decision: domain-neutral core) + +The tiers above already separate cleanly along projectMM's own *Domain-neutral core* principle, and the layering is held to it deliberately: + +- **The engine core (MoonLive) is domain-neutral.** "MoonLive" is the engine's *name*, not a coupling — the front-end (`src/core/moonlive/`) and the IR + backends (`src/platform//moonlive_backend_*`) know the *language* and the *ISA*, never `Buffer`, `EffectBase`, the module tree, or anything light- or projectMM-specific. The core's only outward contract is a tiny injectable platform seam (`platform::allocExec` / `alloc` / `millis`) — a handful of functions, not a reach into projectMM's full platform layer. +- **The binding is the only projectMM-coupled layer**, and it is *thin*. `MoonLiveHost` + `MoonLiveEffect`/`MoonLiveLayout`/… (`src/light/moonlive/`, with a core `MoonLiveModule` for sensor rules) sit **on top of** the engine's public API and consume it; they never reach into engine internals. +- **Dependency direction is one-way:** the binding depends on the engine; the engine never depends on the binding (or on projectMM). The engine does not `#include` projectMM; projectMM `#include`s the engine. + +Why this matters concretely: **it is what makes projectMM-as-a-library optimal.** A clean library needs exactly this — a domain-neutral core with a one-directional dependency and a thin, replaceable binding, so the whole stack composes without circular dependencies or hidden coupling. So this layering is not extra structure for its own sake; it is the *Domain-neutral core* + *Complexity lives in core, domain modules stay simple* principles applied, and it is the same boundary projectMM needs to be a well-formed library. + +A true property of that boundary, worth stating: because the core (MoonLive) knows only the *language* and the *ISA* — never LEDs, buffers, or projectMM — the same front-end + IR + backends would serve a **wholly different host**: a different output device, or a different application entirely (a script that drives a display, reads a keypad, computes a result). Such a host writes its own thin binding against the same public API + platform seam; nothing in the core changes. The IR seam is what makes that portable, too — the host targets whatever chip it likes by writing one backend behind the unchanged IR. This is a *consequence* of building the core domain-neutral for projectMM, not a goal we design toward — but it is real, and it is the mark of a well-factored core: it doesn't care what you point it at. + +**Hard constraint: the layering is justified entirely by projectMM's optimality — never compromised for it.** The clean engine/binding split is adopted only because it makes projectMM architecturally sound, fast, and CLAUDE.md/architecture.md-compliant (domain-neutral core, data-over-objects on the hot path, the platform boundary). If any separability would cost projectMM's optimality — a slower hot path, a heavier binding, an abstraction the engine doesn't need — it is **not** done. The binding stays thin and the core stays neutral *because that is the optimal projectMM design*, full stop; nothing in the layering is bent toward a use beyond projectMM. + +## 4. Safety — staged (decision 6) + +A user-facing editor means a bad script must degrade, not brick. There are two distinct failure surfaces, and they need different defenses: + +- **Compile-time (parser / analysis).** A malformed script must be *rejected cleanly at parse*, never crash the compiler. The `fix-warnings` fork's null-deref is exactly this class: a nested external-call argument produced an AST node missing its children, and the compiler dereferenced them unconditionally. The defense is **AST/analysis validation** — the parser checks node shape (children present, types resolved) and emits a diagnostic instead of dereferencing a null, backed by a **regression test** per malformed input (the `fix-warnings` case is the first such test). This is a correctness property of the front-end, not a runtime cost. +- **Runtime — cheap, ship first.** Once a script *compiles*, a bad *value* at run time must degrade, not brick: (a) **Array bounds-checking** — the IR inserts a compare-branch before each indexed access (a clamp or skip on out-of-range), e.g. a `setRGB(i, …)` with `i` past the buffer. Low single-digit % overhead, inserted at the IR so every backend gets it, and **switchable off** in a trusted/fast mode for vetted built-in scripts. (b) **Watchdog / instruction budget** — a per-tick instruction or time budget that aborts a runaway `while(1)` (near-free; the task WDT already does most of it). Together these catch the common *runtime* failures — out-of-range index, infinite loop — at low cost. +- **Expensive, deferred.** A *true* memory sandbox (the script physically cannot touch memory outside its arena) is what native can't cheaply provide. Don't build it first — it's reachable via the IR→WASM fallback (suspect scripts compiled to the sandboxed backend) only if a public editor in the field shows the cheap tier isn't enough. Safety is a ladder climbed on evidence, not a wall built before any script runs. + +## 5. The language — a C-subset as close as possible (decision 4) + +### 5.1 The model + +A C-subset, not full C++, not JS. The type model is exactly what real effects use: `uint8_t`/`uint16_t`/`uint32_t`/`int`/`lengthType`/`nrOfLightsType` integers (with 64-bit where overflow matters — Rainbow's `uint64_t` phase), `float` (Ripples' trig), `bool`, `char`, a `RGB`/`CRGB` struct, and arrays (incl. multi-dim). Control flow: `if/else`, ternary, `while`, C-style `for`, `break`/`continue`, `return`; user functions; `static_cast` (or C casts). Grammar: **hand-written recursive-descent** — the recognizable textbook default, what most embedded script languages use; a PEG is the alternative but recursive-descent is simpler to make fast and to emit good errors from. Built-ins: the fixed host library (§3.4). + +### 5.2 What's dropped vs lightened (the pragmatic simplifications) + +- **Dropped** (file ceremony, zero value in a script): `#pragma once`, `#include`, `namespace`. The engine supplies the surrounding module. +- **Lightened** (the C++ object model): no `class : public EffectBase`, no `override`, no `controls_.addUint8(...)` host-object dance. The engine synthesizes the `MoonLiveEffect` wrapper (§3.3) around the script body; the role/`dimensions`/controls come from light annotations (§3.5) and the script's `loop()`. +- **Kept verbatim** (the part you iterate on): types, the `loop()` body, all the math, `static_cast`, `RGB c = hsvToRgb(...)`, the loops. + +**Why not full C++:** supporting `class`/inheritance/`override`/host-method-binding means implementing a C++ object model (vtables, member-reference binding) in the engine — build cost up front, and the object machinery is the very "object graph in the hot path" the architecture forbids. The wrapper has no runtime value; let the engine write it. + +**Why not JS** (the ARTI-FX surface): JS's number model is doubles-everywhere — the per-pixel cost that makes ARTI-FX flexible-but-not-fast — *and* it's further from our C++ effects, so porting an existing effect is harder, not easier. A C-subset is both faster and closer to the source. + +### 5.3 RipplesEffect.h → scripted form (the language-fidelity test) + +`RipplesEffect.h` is the **graduation test** for the language (float trig + 3D + `memset` + two controls — the hard case, deliberately not the hello-world). The body must port near-verbatim: + +**Today (`src/light/effects/RipplesEffect.h`, the C++):** + +```cpp +class RipplesEffect : public EffectBase { // ← dropped (engine supplies) + const char* tags() const override { return "💫🟦🦅"; } // ← lightened → annotation + Dim dimensions() const override { return Dim::D3; } // ← lightened → annotation + uint8_t speed = 50; // ← kept (becomes a control) + uint8_t interval = 128; + void onBuildControls() override { // ← dropped (derived from the vars) + controls_.addUint8("speed", speed, 0, 99); + controls_.addUint8("interval", interval, 1, 254); + } + void loop() override { // ← KEPT VERBATIM (the body) + uint8_t* buf = buffer(); … std::memset(buf, 0, nrOfLights()*cpl); + const float rippleInterval = 1.3f * … * std::sqrt((float)h); + for (lengthType z=0; z=h) continue; + uint8_t hue = elapsed()/50 + x*3 + z*7; + RGB c = hsvToRgb(hue,255,255); + setRGBXYZ(x, y, z, c); // replaces manual idx + px[0..2] + } +} +``` + +The diff is exactly the ceremony: gone are `class`/`override`/`onBuildControls`/`#include`/`namespace`/`std::`-qualification and the manual buffer-index arithmetic (→ `setRGBXYZ`). The math, the trig, the loops, the types — verbatim. **If the engine can compile this at native speed, the C-subset decision is proven.** (Float-trig native codegen is the part to validate — see the perf budget.) + +## 6. Testing — the engine's biggest structural advantage + +A live-script engine is one of the **most testable things projectMM can build**, and projectMM's two-tier test framework ([testing.md](../testing.md): doctest unit tests + JSON scenarios, each run in-process *and* live) maps onto it almost perfectly. This is a genuine edge: the bottom-up's structural note that ESPLiveScript ships `.ino` examples and no unit suite isn't a knock on it — it's the gap our framework closes. **Every live-script feature gets pinned by a test, back to back**, because two properties make a compiler exceptionally test-friendly: + +1. **Every compiler stage is a pure input→output function.** Lex, parse, IR-lower, codegen each take a known input and produce a deterministic output — the easiest thing in the world to unit-test, with no hardware and no flakiness. +2. **A script's *result* is deterministic and exactly assertable.** A known script over a known grid at a known `elapsed()` produces an exact buffer — byte-for-byte checkable. There is no "looks about right"; there is a golden buffer. + +### 6.1 Unit tests (`test/unit/core/unit_moonlive_*.cpp`, doctest) + +The compiler front-end and IR are domain-neutral core (§3.9), so they unit-test on the desktop with zero hardware: + +- **Tokenizer / parser** — source string → expected token list / AST shape; every language construct (types, `for`/`if`/ternary, functions, arrays, `static_cast`, struct access) and every *error* (unterminated string, type mismatch, undeclared var) pinned to an expected diagnostic. The fuzz-class bug the `fix-warnings` fork fixed (a nested external-call arg) is exactly a parser unit test — the regression that would have caught it for free. +- **IR** — AST → expected IR for representative snippets; the bounds-check / safety nodes (§4) asserted present where the IR should insert them. +- **Codegen (host backend)** — the **desktop/x86-64 backend** is itself a test asset: it runs in-process, so a compiled script *executes during a unit test* and its output buffer is asserted directly. A script that fills blue → assert every pixel is `(0,0,255)`; Ripples at a fixed `elapsed()` → assert the exact lit-column pattern. No device needed to test *the language*; the device tests only *the native ISA backend*. +- **Determinism harness** — the existing `setTestNowMs` clock-override seam (the same one scenarios use) lets a time-dependent script be tested at a fixed tick, so an animated effect is a deterministic assertion, not a guess. + +### 6.2 Scenario tests (`test/scenarios/{core,light}/scenario_moonlive_*.json`) + +Scenarios exercise a scripted module *as a wired `MoonModule`* — the integration layer unit tests don't reach: + +- **The MoonModule binding** — `add_module MoonLiveEffect`, `set_control source=

projectMM Installer Serial monitor

- + diff --git a/docs/install/install.css b/docs/install/install.css new file mode 100644 index 0000000..2ca8067 --- /dev/null +++ b/docs/install/install.css @@ -0,0 +1,472 @@ +/* projectMM web installer styles. Extracted from index.html — a static GitHub Pages + page (not embedded like the device UI), so an external stylesheet is free. */ + :root { + --bg: #1a1a2e; + --card: #16213e; + --fg: #e0e0e0; + --muted: #a0a0b0; + --accent: #a78bfa; + --border: #2a3a6a; + --ok: #57c97a; /* green — "active" capability (supported + a module configured in deviceModels.json) */ + --sup: #e3c84a; /* yellow — "supported" capability (firmware supports it, not pre-configured) */ + --plan: #e8923a; /* orange — "planned" capability (no module yet; greener than red, by design) */ + } + * { box-sizing: border-box; } + body { + margin: 0; + min-height: 100vh; + background: var(--bg); + color: var(--fg); + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; + font-size: 15px; + line-height: 1.55; + display: flex; + flex-direction: column; + align-items: center; + padding: 24px 16px 64px; + } + main { width: 100%; max-width: 640px; } + .help-link { + display: inline-block; + margin-left: 8px; + width: 22px; height: 22px; line-height: 22px; + text-align: center; + font-size: 14px; font-weight: 600; + vertical-align: middle; + color: var(--accent); + border: 1px solid var(--border); + border-radius: 50%; + text-decoration: none; + } + .help-link:hover { border-color: var(--accent); } + .version-chip { + display: inline-block; + margin-left: 8px; + padding: 2px 8px; + background: var(--card); + color: var(--muted); + border: 1px solid var(--border); + border-radius: 4px; + font-size: 13px; + font-weight: normal; + vertical-align: middle; + } + h1 { + margin: 0 0 8px; + font-size: 28px; + color: var(--accent); + } + p.tag { margin: 0 0 24px; color: var(--muted); } + .card { + background: var(--card); + border: 1px solid var(--border); + border-radius: 8px; + padding: 20px; + margin-bottom: 16px; + } + label { display: block; font-weight: 600; margin-bottom: 6px; } + select { + width: 100%; + padding: 10px 12px; + background: var(--bg); + color: var(--fg); + border: 1px solid var(--border); + border-radius: 6px; + font: inherit; + } + .button-row { margin-top: 16px; } + .note { color: var(--muted); font-size: 13px; margin-top: 10px; } + /* `.windows-only` elements are `hidden` by default in the HTML; the tiny + userAgent check at the top of below removes `hidden` only on + Windows. Inverse to a CSS-only approach because CSS can't detect the + host OS — `[hidden]` already wins specificity-wise. */ + .erase-row { margin-top: 12px; font-size: 13px; } + .erase-row label { cursor: pointer; } + .erase-row input { vertical-align: middle; margin-right: 6px; } + .erase-note { display: inline; margin-top: 0; } + a { color: var(--accent); } + code { + background: rgba(255,255,255,0.06); + padding: 1px 6px; + border-radius: 3px; + font-size: 13px; + } + .browser-warning { + background: #3a2a1a; + border: 1px solid #6a4a2a; + color: #e6c890; + display: none; + } + ol { padding-left: 22px; } + ol li { margin-bottom: 6px; } + .credits { + max-width: 720px; + margin: 32px auto 24px; + padding: 0 16px; + text-align: center; + border-top: 1px solid var(--border); + padding-top: 16px; + } + .credits .note { margin-top: 0; } + + /* Minimal mirror of the device UI's control-row shape so the shared + install-picker module (src/ui/install-picker.js) renders the same + way on the installer page. The picker emits `.control-row` + child + ` + (#rp-board) — we keep it (so its change-listener wires) but hide its row; + the picture grid above drives it. The row is the .control-row that + contains #rp-board. */ + .control-row:has(#rp-board) { display: none; } + + /* Picture board grid — collapsed by default (a control-row field), expands + on click. The summary button is the row's field, so it flexes like the + selects (flex: 1) to line up with USB Port / Release / Firmware. */ + #board-summary { + flex: 1; display: flex; align-items: center; justify-content: space-between; + gap: 12px; padding: 10px 12px; background: var(--bg); color: var(--fg); + border: 1px solid var(--border); border-radius: 6px; font: inherit; + cursor: pointer; text-align: left; + } + #board-summary:hover { border-color: var(--accent); } + .board-summary-left { display: flex; align-items: center; gap: 10px; min-width: 0; } + .board-summary-thumb { + width: 36px; height: 24px; border-radius: 3px; flex-shrink: 0; + background: #0e1020 center/contain no-repeat; border: 1px solid var(--border); + } + #board-summary-label { overflow: hidden; text-overflow: ellipsis; white-space: nowrap; } + .board-summary-caret { color: var(--muted); transition: transform .15s; flex-shrink: 0; } + #board-summary[aria-expanded="true"] .board-summary-caret { transform: rotate(180deg); } + /* The expanded grid breaks out full-width below the row (aligns with the + field column by offsetting the label width + gap). */ + #board-expand { margin: 0 0 10px 92px; } + .board-grid-controls { display: flex; align-items: center; gap: 12px; margin-bottom: 12px; flex-wrap: wrap; } + #board-search { + flex: 1; min-width: 160px; padding: 8px 10px; background: var(--bg); + color: var(--fg); border: 1px solid var(--border); border-radius: 6px; font: inherit; + } + .board-clear { + background: transparent; color: var(--muted); border: 1px solid var(--border); + border-radius: 6px; padding: 8px 12px; font: inherit; font-size: 13px; cursor: pointer; + } + .board-clear:hover { color: var(--fg); border-color: var(--accent); } + .board-filter-notice { color: var(--muted); font-size: 12px; margin-bottom: 10px; } + .board-filter-notice button { + background: none; border: none; color: var(--accent); font: inherit; font-size: 12px; + cursor: pointer; padding: 0; text-decoration: underline; + } + #board-grid { max-height: 420px; overflow-y: auto; } /* expanded grid scrolls, not the page */ + #board-grid { + display: grid; grid-template-columns: repeat(auto-fill, minmax(150px, 1fr)); gap: 10px; + } + .bg-chip-label { + grid-column: 1 / -1; color: var(--muted); font-size: 11px; text-transform: uppercase; + letter-spacing: .06em; margin: 6px 0 0; + } + .bg-card { + background: var(--bg); border: 1px solid var(--border); border-radius: 8px; + overflow: hidden; cursor: pointer; transition: border-color .12s, background .12s; + display: flex; flex-direction: column; + } + .bg-card:hover { border-color: var(--accent); } + .bg-card.selected { border-color: var(--accent); box-shadow: 0 0 0 1px var(--accent) inset; } + .bg-thumb { + aspect-ratio: 16 / 10; background: #0e1020 center/contain no-repeat; + display: flex; align-items: center; justify-content: center; + color: var(--muted); font-size: 10px; border-bottom: 1px solid var(--border); + } + .bg-thumb.noimg::after { content: "no photo"; } + .bg-body { padding: 8px 9px; display: flex; flex-direction: column; gap: 3px; } + .bg-name { font-weight: 600; font-size: 12px; line-height: 1.2; } + .bg-meta { color: var(--muted); font-size: 11px; } + /* Capability chips: supported (green) vs planned (orange) — distinguished by + colour, not by extra text. Labels are kept short in deviceModels.json so every + chip fits the ~150px card; the full label + state is in the chip's title + tooltip. */ + .bg-caps { display: flex; flex-wrap: wrap; gap: 3px; margin-top: 3px; } + .bg-cap { + font-size: 9px; line-height: 1.5; padding: 0 5px; border-radius: 999px; + max-width: 100%; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; + } + .bg-cap.act { background: color-mix(in srgb, var(--ok) 18%, transparent); color: var(--ok); } + .bg-cap.sup { background: color-mix(in srgb, var(--sup) 20%, transparent); color: var(--sup); } + .bg-cap.plan { background: color-mix(in srgb, var(--plan) 20%, transparent); color: var(--plan); } + .bg-link { color: var(--accent); font-size: 11px; text-decoration: none; } + .bg-link:hover { text-decoration: underline; } + + /* Board-details popup — native (standard modal pattern: built-in + backdrop, ESC-to-close, focus trap; no bespoke modal JS). Shows the full + deviceModels.json entry as a readable summary plus a collapsible raw-JSON block. */ + #board-details::backdrop { background: rgba(0,0,0,0.6); } + #board-details { + background: var(--card); color: var(--fg); + border: 1px solid var(--border); border-radius: 10px; + padding: 0; max-width: 560px; width: calc(100% - 32px); + max-height: 80vh; overflow: auto; + } + .bd-head { + display: flex; align-items: baseline; justify-content: space-between; + gap: 12px; padding: 16px 18px 8px; + } + .bd-title { font-size: 16px; font-weight: 600; } + .bd-close { + background: none; border: none; color: var(--muted); + font-size: 20px; line-height: 1; cursor: pointer; padding: 0 4px; + } + .bd-close:hover { color: var(--fg); } + .bd-body { padding: 0 18px 18px; } + .bd-row { display: flex; gap: 8px; padding: 3px 0; font-size: 13px; } + .bd-key { color: var(--muted); min-width: 92px; } + .bd-val { flex: 1; min-width: 0; overflow-wrap: anywhere; } + .bd-section { margin-top: 14px; font-weight: 600; font-size: 13px; } + .bd-mod { margin-top: 8px; padding-left: 10px; border-left: 2px solid var(--border); } + .bd-mod-name { font-size: 13px; } + .bd-mod-name .bd-mod-id { color: var(--muted); font-weight: normal; } + .bd-ctrl { font-size: 12px; color: var(--muted); padding-left: 8px; } + .bd-ctrl code { font-size: 11px; } + .bd-raw { margin-top: 16px; } + .bd-raw summary { cursor: pointer; color: var(--accent); font-size: 12px; } + .bd-raw pre { + margin: 8px 0 0; padding: 10px; background: var(--bg); + border: 1px solid var(--border); border-radius: 6px; + font-size: 11px; overflow: auto; white-space: pre; + } + .bg-link.bg-details { cursor: pointer; } + + .action-btn { + background: var(--accent); + color: var(--bg); + border: none; + border-radius: 6px; + padding: 10px 20px; + font: inherit; + font-weight: 600; + cursor: pointer; + } + .action-btn:disabled { opacity: 0.5; cursor: not-allowed; } + .rp-status { color: var(--muted); font-size: 13px; } + .rp-status-row { min-height: 1.5em; } + + /* Inline spinner shown in a field while its data is still being fetched + (install-picker renderSkeleton). A 1em spinning ring, sized to sit next + to the select's "Loading…" placeholder. */ + .rp-spinner { + display: inline-block; + width: 1em; height: 1em; + vertical-align: -0.15em; + margin-right: 0.4em; + border: 2px solid var(--muted); + border-top-color: transparent; + border-radius: 50%; + animation: rp-spin 0.7s linear infinite; + } + @keyframes rp-spin { to { transform: rotate(360deg); } } + + /* "Your devices" card — one row per provisioned device. The row + is the picker's `.control-row` flex shape with the device info + on the left and action buttons on the right. */ + .device-row { + justify-content: space-between; + padding: 8px 0; + border-top: 1px solid rgba(255,255,255,0.06); + } + .device-row:first-child { border-top: 0; } + .device-info { min-width: 0; flex: 1; } + .device-url { + display: block; + font-family: ui-monospace, monospace; + color: var(--muted); + font-size: 12px; + text-decoration: none; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + } + .device-url:hover { color: var(--accent); text-decoration: underline; } + .device-seen { color: var(--muted); font-size: 12px; margin-top: 2px; } + .device-actions { display: flex; gap: 6px; flex-shrink: 0; } + .device-btn { + background: transparent; + color: var(--accent); + border: 1px solid var(--accent); + border-radius: 4px; + padding: 4px 10px; + font: inherit; + font-size: 12px; + cursor: pointer; + } + .device-model-name { color: var(--fg); font-size: 12px; margin-top: 2px; } + .device-btn:hover { background: rgba(123, 158, 255, 0.08); } + + /* Install modal — backdrop + centered card. Replaces the ESP Web Tools + shadow-DOM dialog. Sections show one at a + time via .install-section.active. */ + .install-backdrop { + position: fixed; inset: 0; + background: rgba(0, 0, 0, 0.65); + display: none; + align-items: center; + justify-content: center; + z-index: 1000; + } + .install-backdrop.open { display: flex; } + .install-modal { + background: var(--card); + border: 1px solid var(--border); + border-radius: 8px; + padding: 24px; + max-width: 480px; + width: calc(100% - 32px); + box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5); + } + .install-modal h2 { + margin: 0 0 16px; + font-size: 20px; + color: var(--accent); + } + .install-section { display: none; } + .install-section.active { display: block; } + .install-status { margin: 8px 0; color: var(--muted); } + .install-done-note { margin: 4px 0 10px; font-size: 13px; color: var(--muted); } + /* Notice variant — for a flashed-OK-but-action-needed outcome (e.g. eth-only firmware + waiting on a cable). Amber, like the "supported" capability chip (var(--sup)): reads as + "do this next", not a plain note and not a red error. */ + .install-done-note.install-done-note--notice { + color: var(--sup); + background: color-mix(in srgb, var(--sup) 12%, transparent); + border-left: 3px solid var(--sup); + padding: 8px 10px; border-radius: 4px; + } + .install-warn { color: #d4a052; font-size: 12px; margin-top: 8px; } + .install-progress { + height: 8px; + background: var(--bg); + border-radius: 4px; + overflow: hidden; + margin: 12px 0; + } + .install-progress-bar { + height: 100%; + background: var(--accent); + width: 0; + transition: width 0.2s; + } + /* Indeterminate state — esptool-js's eraseFlash() doesn't report + progress (12 s of "wait and hope"), so we animate a marquee-style + bar to confirm the page hasn't hung. Toggled by adding the + .indeterminate class to .install-progress-bar; width set to 100% + so the animation has something to clip. */ + .install-progress-bar.indeterminate { + width: 100%; + background: linear-gradient( + 90deg, + var(--bg) 0%, + var(--accent) 40%, + var(--accent) 60%, + var(--bg) 100%); + background-size: 200% 100%; + animation: install-marquee 1.4s linear infinite; + transition: none; + } + @keyframes install-marquee { + from { background-position: 100% 0; } + to { background-position: -100% 0; } + } + .install-form label { + display: block; + margin: 12px 0 4px; + font-size: 13px; + color: var(--muted); + } + .install-form input[type="text"], + .install-form input[type="password"] { + width: 100%; + padding: 8px 12px; + background: var(--bg); + color: var(--fg); + border: 1px solid var(--border); + border-radius: 6px; + font: inherit; + } + .install-actions { + display: flex; + gap: 8px; + justify-content: flex-end; + margin-top: 16px; + } + .install-actions button { + padding: 8px 16px; + font: inherit; + font-weight: 600; + border: 0; + border-radius: 6px; + cursor: pointer; + } + .install-actions button.primary { + background: var(--accent); + color: #1a1a2e; + } + .install-actions button.secondary { + background: transparent; + color: var(--fg); + border: 1px solid var(--border); + } + .install-error { + color: #f8a5a5; + font-size: 13px; + margin: 12px 0; + white-space: pre-wrap; + overflow-wrap: anywhere; + } + .install-success-url { + display: block; /* IP and .local each on their own line */ + width: fit-content; + margin-top: 8px; + color: var(--accent); + text-decoration: none; + font-family: ui-monospace, monospace; + } + .install-success-url:hover { text-decoration: underline; } + .install-log-wrap { + margin-top: 16px; + border-top: 1px solid var(--border); + padding-top: 12px; + } + .install-log-toggle { + background: transparent; + color: var(--muted); + border: 0; + padding: 0; + cursor: pointer; + font: inherit; + font-size: 12px; + text-decoration: underline; + } + .install-log-toggle:hover { color: var(--fg); } + .install-log { + margin-top: 8px; + max-height: 240px; + overflow: auto; + background: var(--bg); + color: var(--muted); + font-family: ui-monospace, monospace; + font-size: 11px; + padding: 8px; + border: 1px solid var(--border); + border-radius: 4px; + white-space: pre-wrap; + word-break: break-all; + } diff --git a/docs/install/install.js b/docs/install/install.js new file mode 100644 index 0000000..1e81c1c --- /dev/null +++ b/docs/install/install.js @@ -0,0 +1,1228 @@ +// projectMM web installer logic. Extracted from index.html's inline module script. +// A static GitHub Pages page, so an external module is free. + +// Shared install-picker (release → board → firmware). Same file as the +// on-device OTA UI uses; only the onInstall callback differs: +// - Device UI: POST the chosen .bin URL to /api/firmware/url; device +// fetches the binary directly via esp_https_ota. +// - Web installer (here): hand the manifest URL to the orchestrator, +// which flashes via esptool-js then provisions WiFi via Improv, +// all over the same SerialPort. +// +// Manifests + binaries must be same-origin with this page (Web Serial +// would happily flash from any URL, but the manifest fetch + part +// downloads via fetch() are subject to CORS). The release workflow +// self-hosts the last N releases into pages/install/releases//. +// toLocalUrl rewrites the picker's absolute GitHub URLs to the local +// copies before handing them to the orchestrator. +import { installPicker } from "./install-picker.js"; +import { myDevices } from "./devices.js"; +import { installer } from "./install-orchestrator.js"; +// Board catalog + chip detection — web-installer only, kept out of the +// firmware-embedded install-picker.js and injected here via boardSupport. +import * as boardSupport from "./install-picker-boards.js"; + +// Windows-only hints (was a separate inline