From e6de2b670cc64da021b95f1cb225abe8089bef63 Mon Sep 17 00:00:00 2001 From: sunrisepeak Date: Sun, 24 May 2026 12:32:11 +0800 Subject: [PATCH 1/4] fix: emit absolute `-fprebuilt-module-path` so clangd resolves modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit src/build/flags.cppm was emitting a bare `-fprebuilt-module-path=pcm.cache` (or `=gcm.cache`). This works for `mcpp build` because ninja runs commands with cwd = `/target///` so the relative path resolves correctly. But the same flag is captured verbatim into `compile_commands.json`, whose `directory` field is `plan.projectRoot`. Per CDB spec, clangd does `cd ` before resolving the args, so it looks for `/pcm.cache` — which doesn't exist. Effect: `import` resolution silently fails in clangd ("module 'X' not found"), even when `mcpp build` succeeds. The other `-fmodule-file=` flags in the same block were already absolute (escape_path-wrapped); this one was an oversight. Fix: format the flag with `escape_path(plan.outputDir / traits.bmiDir)`. Single line. ninja still works (absolute paths are cwd-independent), and clangd now finds the BMI cache. Regression test: tests/e2e/47_cdb_prebuilt_module_path_abs.sh creates a fresh project, runs `mcpp build`, parses `compile_commands.json`, asserts every `-fprebuilt-module-path=X`: - is absolute (starts with `/` on POSIX or `:` on Windows) - ends in `/pcm.cache` or `/gcm.cache` - points to an existing directory Pass-through for GCC-only flows that don't emit the flag at all. --- src/build/flags.cppm | 12 +++- tests/e2e/47_cdb_prebuilt_module_path_abs.sh | 68 ++++++++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) create mode 100755 tests/e2e/47_cdb_prebuilt_module_path_abs.sh diff --git a/src/build/flags.cppm b/src/build/flags.cppm index 0f70de0..6249170 100644 --- a/src/build/flags.cppm +++ b/src/build/flags.cppm @@ -212,7 +212,17 @@ CompileFlags compute_flags(const BuildPlan& plan) { auto traits = mcpp::toolchain::bmi_traits(plan.toolchain); std::string prebuilt_module_flag; if (traits.needsPrebuiltModulePath) { - prebuilt_module_flag = std::format(" -fprebuilt-module-path={}", traits.bmiDir); + // Absolute path: a bare `pcm.cache` / `gcm.cache` works at ninja + // time because ninja runs commands with cwd = outputDir, but the + // same flag ends up verbatim in `compile_commands.json` whose + // `directory` field is the project root. clangd does `cd directory` + // before resolving the flag, so a bare relative path points at + // `/pcm.cache` (which doesn't exist) and `import` + // resolution fails with `module 'X' not found`. The other + // `-fmodule-file=` flags in this block are already escape_path'd + // (absolute) for the same reason — this one was a leftover. + prebuilt_module_flag = std::format(" -fprebuilt-module-path={}", + escape_path(plan.outputDir / traits.bmiDir)); } f.cxx = std::format("-std=c++23{}{}{}{}{}{}{}{}{}{}", module_flag, std_module_flag, std_compat_module_flag, prebuilt_module_flag, diff --git a/tests/e2e/47_cdb_prebuilt_module_path_abs.sh b/tests/e2e/47_cdb_prebuilt_module_path_abs.sh new file mode 100755 index 0000000..72c8f4d --- /dev/null +++ b/tests/e2e/47_cdb_prebuilt_module_path_abs.sh @@ -0,0 +1,68 @@ +#!/usr/bin/env bash +# requires: +# 47_cdb_prebuilt_module_path_abs.sh — `-fprebuilt-module-path` in +# compile_commands.json must be an ABSOLUTE path, not a bare `pcm.cache` / +# `gcm.cache`. Reason: CDB `directory` is the project root, but ninja runs +# from the outputDir; a bare relative path works at build time only and +# silently breaks clangd's module resolution ("module 'X' not found"). +set -e + +TMP=$(mktemp -d) +trap "rm -rf $TMP" EXIT + +cd "$TMP" +"$MCPP" new app > /dev/null +cd app +"$MCPP" build > /dev/null + +cdb=compile_commands.json +[[ -f "$cdb" ]] || { echo "FAIL: no $cdb generated"; exit 1; } + +# Extract every -fprebuilt-module-path= token. +# (No jq dependency — grep is enough and portable on macOS / git-bash.) +vals=$(grep -oE '\-fprebuilt-module-path=[^"]+' "$cdb" | sed 's/^-fprebuilt-module-path=//') +if [[ -z "$vals" ]]; then + # No -fprebuilt-module-path emitted = GCC toolchain that uses -fmodules + # only (gcm.cache). bmi_traits sets needsPrebuiltModulePath=false for + # the GCC path. Nothing to assert — pass. + echo "OK (no prebuilt-module-path flag — GCC toolchain)" + exit 0 +fi + +# Every value must be absolute AND point at the actual build cache dir. +fail=0 +while read -r v; do + [[ -z "$v" ]] && continue + echo " checking: $v" + + # Absolute path test: leading '/' on POSIX or ':' on Windows. + if [[ "$v" =~ ^/ || "$v" =~ ^[A-Za-z]: ]]; then + : + else + echo "FAIL: -fprebuilt-module-path value is relative: '$v'" + echo " this breaks clangd because CDB 'directory' = project root," + echo " but the BMI cache lives under target///." + fail=1 + fi + + # Must end with pcm.cache or gcm.cache (sanity). + case "$v" in + */pcm.cache|*/gcm.cache) ;; + *) echo "FAIL: -fprebuilt-module-path value does not end in pcm.cache/gcm.cache: '$v'" + fail=1 ;; + esac +done <<< "$vals" + +[[ $fail -eq 0 ]] || exit 1 + +# Stronger: clangd would `cd` into CDB's directory then resolve. Verify the +# value, taken at face value (already absolute), points at a real dir. +first=$(echo "$vals" | head -1) +if [[ ! -d "$first" ]]; then + echo "FAIL: -fprebuilt-module-path resolves to a non-existent dir: $first" + echo " (build succeeded, so the BMI dir must exist somewhere — this" + echo " means the flag points to the wrong place even with abs path.)" + exit 1 +fi + +echo "OK" From cc1f12d88b38ef6e0ea57d005dfe0e26860aed24 Mon Sep 17 00:00:00 2001 From: sunrisepeak Date: Sun, 24 May 2026 12:43:43 +0800 Subject: [PATCH 2/4] ci/test: un-escape ninja sequences in CDB args; harden e2e for Windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First CI round revealed two layered issues exposed by the new e2e: 1. **The shipped CDB carries ninja-escape artefacts.** `flags.cppm`'s `escape_path` ninja-escapes paths (`$ ` for space, `$:` for colon, `$$` for literal `$`) so they survive embedding in ninja rule strings. But the same escaped strings flow verbatim through `f.cxx` into `compile_commands.json`, whose `arguments` array is exec'd by clangd without any ninja involved. On Windows that makes `C:` appear as `C$:` in CDB → clangd can't resolve the path. POSIX usually escaped nothing in practice, hiding the bug; Windows drive letters always trigger `$:`. Fix in `compile_commands.cppm::split_flags`: same single pass now splits tokens *and* unescapes the three ninja sequences. Splitting was also broken for paths with a literal space (would've broken `-isystem "/path with space/include"` mid-arg) — that's fixed simultaneously since `$ ` is no longer a token separator. 2. **e2e test 47 needed Windows-friendly checks.** The original `[^"]+` grep returned the raw JSON-escaped form (`\\Users\\...`) and the `^[A-Za-z]:` regex didn't match `C$:`. Rewrote using `jq -r` (which does JSON un-escape natively, preinstalled on all GHA runners) and: - explicit ninja-escape sniff (`$:`, `$ `, `$$`) - both POSIX and Windows-drive absolute-path checks - basename check normalised against `\` / `/` Net: CDB now contains plain paths in both runtimes, and the test fails loudly if either layer regresses. --- src/build/compile_commands.cppm | 49 +++++++++--- tests/e2e/47_cdb_prebuilt_module_path_abs.sh | 82 ++++++++++++-------- 2 files changed, 86 insertions(+), 45 deletions(-) diff --git a/src/build/compile_commands.cppm b/src/build/compile_commands.cppm index 29809aa..5f6b4c1 100644 --- a/src/build/compile_commands.cppm +++ b/src/build/compile_commands.cppm @@ -37,20 +37,47 @@ bool is_c_source(const std::filesystem::path& src) { return src.extension() == ".c"; } -// Split a flag string into individual tokens. +// Split a flag string into individual tokens AND un-escape ninja-style +// path escapes (`$ ` → space, `$:` → `:`, `$$` → `$`). +// +// `flags.cppm::escape_path` ninja-escapes path arguments so they survive +// embedding in ninja rule strings. Those escaped strings are then captured +// into `f.cxx` / `f.cc` which is what we receive here. CDB consumers like +// clangd exec the `arguments` array literally — no ninja involved — so +// escaped chars must be undone or paths like `C:\Users\...` come through +// as `C$:\Users\...` and break clangd's path resolution on Windows. (The +// same issue would silently affect any POSIX path containing a space or +// `$` — those just happen to be rare.) +// +// Splitting and un-escaping in one pass is correct: a literal space inside +// a path appears as `$ ` in the input, which we must NOT treat as a token +// separator. std::vector split_flags(std::string_view s) { std::vector out; - std::size_t i = 0; - while (i < s.size()) { - while (i < s.size() && s[i] == ' ') - ++i; - if (i >= s.size()) - break; - std::size_t start = i; - while (i < s.size() && s[i] != ' ') - ++i; - out.emplace_back(s.substr(start, i - start)); + std::string token; + auto flush = [&] { + if (!token.empty()) { + out.push_back(std::move(token)); + token.clear(); + } + }; + for (std::size_t i = 0; i < s.size(); ++i) { + char c = s[i]; + if (c == '$' && i + 1 < s.size()) { + char nc = s[i + 1]; + if (nc == ' ' || nc == ':' || nc == '$') { + token.push_back(nc); + ++i; + continue; + } + } + if (c == ' ') { + flush(); + } else { + token.push_back(c); + } } + flush(); return out; } diff --git a/tests/e2e/47_cdb_prebuilt_module_path_abs.sh b/tests/e2e/47_cdb_prebuilt_module_path_abs.sh index 72c8f4d..83f5e85 100755 --- a/tests/e2e/47_cdb_prebuilt_module_path_abs.sh +++ b/tests/e2e/47_cdb_prebuilt_module_path_abs.sh @@ -1,10 +1,14 @@ #!/usr/bin/env bash # requires: # 47_cdb_prebuilt_module_path_abs.sh — `-fprebuilt-module-path` in -# compile_commands.json must be an ABSOLUTE path, not a bare `pcm.cache` / -# `gcm.cache`. Reason: CDB `directory` is the project root, but ninja runs -# from the outputDir; a bare relative path works at build time only and -# silently breaks clangd's module resolution ("module 'X' not found"). +# compile_commands.json must be an ABSOLUTE path, NOT a bare `pcm.cache`, +# AND must not carry ninja-escape artefacts like `C$:` on Windows. +# Reason: CDB `directory` is the project root and clangd does `cd +# directory` before running the args, so a bare relative path points at +# `/pcm.cache` (missing) and a `C$:` prefix is treated as a +# literal string, not a Windows drive letter. Both modes silently break +# clangd's module resolution while `mcpp build` itself keeps working +# (ninja runs from outputDir AND unescapes its own escape sequences). set -e TMP=$(mktemp -d) @@ -18,51 +22,61 @@ cd app cdb=compile_commands.json [[ -f "$cdb" ]] || { echo "FAIL: no $cdb generated"; exit 1; } -# Extract every -fprebuilt-module-path= token. -# (No jq dependency — grep is enough and portable on macOS / git-bash.) -vals=$(grep -oE '\-fprebuilt-module-path=[^"]+' "$cdb" | sed 's/^-fprebuilt-module-path=//') -if [[ -z "$vals" ]]; then - # No -fprebuilt-module-path emitted = GCC toolchain that uses -fmodules - # only (gcm.cache). bmi_traits sets needsPrebuiltModulePath=false for - # the GCC path. Nothing to assert — pass. +command -v jq >/dev/null 2>&1 || { + echo "SKIP: jq not on PATH (preinstalled on GitHub-hosted runners)" + exit 0 +} + +# jq returns each value JSON-unescaped (\\ → \, etc.). +mapfile -t vals < <( + jq -r ' + .[] | .arguments[]? + | select(type == "string" and startswith("-fprebuilt-module-path=")) + | sub("^-fprebuilt-module-path="; "") + ' "$cdb" +) + +if [[ ${#vals[@]} -eq 0 ]]; then + # GCC's libstdc++ flow uses -fmodules / gcm.cache without the explicit + # -fprebuilt-module-path flag (see bmi_traits.needsPrebuiltModulePath). + # Nothing to assert in that mode. echo "OK (no prebuilt-module-path flag — GCC toolchain)" exit 0 fi -# Every value must be absolute AND point at the actual build cache dir. fail=0 -while read -r v; do - [[ -z "$v" ]] && continue +for v in "${vals[@]}"; do echo " checking: $v" - # Absolute path test: leading '/' on POSIX or ':' on Windows. + # Must NOT carry ninja-escape artefacts. The key signal is `$:` (drive + # letter) or `$ ` / `$$` (path with space / dollar). If any of these + # survives into CDB the JSON-args runtime treats them as literal text + # → clangd fails to find the BMI. + if [[ "$v" == *'$:'* || "$v" == *'$ '* || "$v" == *'$$'* ]]; then + echo "FAIL: value retains ninja escape sequence ('\$:' / '\$ ' / '\$\$') — must be plain path in CDB" + fail=1 + fi + + # Absolute: POSIX (starts with '/') or Windows drive (e.g. 'C:'). if [[ "$v" =~ ^/ || "$v" =~ ^[A-Za-z]: ]]; then : else - echo "FAIL: -fprebuilt-module-path value is relative: '$v'" - echo " this breaks clangd because CDB 'directory' = project root," - echo " but the BMI cache lives under target///." + echo "FAIL: value is relative: '$v'" + echo " CDB 'directory' is the project root, but the BMI cache" + echo " lives under target/// — clangd resolves to" + echo " the wrong location and module imports fail." fail=1 fi - # Must end with pcm.cache or gcm.cache (sanity). - case "$v" in - */pcm.cache|*/gcm.cache) ;; - *) echo "FAIL: -fprebuilt-module-path value does not end in pcm.cache/gcm.cache: '$v'" + # Basename must be pcm.cache or gcm.cache (cross-platform: normalise + # backslashes first so Windows paths like 'C:\foo\pcm.cache' work). + normalised="${v//\\//}" + case "${normalised##*/}" in + pcm.cache|gcm.cache) ;; + *) echo "FAIL: basename is not pcm.cache/gcm.cache: '${normalised##*/}'" fail=1 ;; esac -done <<< "$vals" +done [[ $fail -eq 0 ]] || exit 1 - -# Stronger: clangd would `cd` into CDB's directory then resolve. Verify the -# value, taken at face value (already absolute), points at a real dir. -first=$(echo "$vals" | head -1) -if [[ ! -d "$first" ]]; then - echo "FAIL: -fprebuilt-module-path resolves to a non-existent dir: $first" - echo " (build succeeded, so the BMI dir must exist somewhere — this" - echo " means the flag points to the wrong place even with abs path.)" - exit 1 -fi - echo "OK" From 43af760a5463a397c23ca6b330ec82a78c793141 Mon Sep 17 00:00:00 2001 From: sunrisepeak Date: Sun, 24 May 2026 12:52:51 +0800 Subject: [PATCH 3/4] test(e2e/47): strip trailing CR from jq output on git-bash/Windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Windows CI run reported: FAIL: basename is not pcm.cache/gcm.cache: 'pcm.cache ' The closing quote on the next line is the giveaway — the value carries a trailing `\r`. `jq.exe` on git-bash emits CRLF; `mapfile -t` strips LF but leaves CR, so every downstream comparison saw `pcm.cache$'\r'` and failed. Trim with `${v%$'\r'}` per element. The source-code fix (un-escape ninja sequences) is working — the value itself is now a clean `C:\Users\...\pcm.cache`, no `$:` artefact. --- tests/e2e/47_cdb_prebuilt_module_path_abs.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/e2e/47_cdb_prebuilt_module_path_abs.sh b/tests/e2e/47_cdb_prebuilt_module_path_abs.sh index 83f5e85..f2956e9 100755 --- a/tests/e2e/47_cdb_prebuilt_module_path_abs.sh +++ b/tests/e2e/47_cdb_prebuilt_module_path_abs.sh @@ -46,6 +46,10 @@ fi fail=0 for v in "${vals[@]}"; do + # `jq` on git-bash/Windows emits CRLF line endings; mapfile strips the LF + # but leaves a trailing CR which then poisons every downstream string + # comparison (basename ends with `\r`, regex matches go sideways). + v="${v%$'\r'}" echo " checking: $v" # Must NOT carry ninja-escape artefacts. The key signal is `$:` (drive From 098b04f8eb3ea52e06de03ab9c89c00eb29d4a28 Mon Sep 17 00:00:00 2001 From: sunrisepeak Date: Sun, 24 May 2026 13:03:24 +0800 Subject: [PATCH 4/4] test(e2e/47): drop bash-4 `mapfile` for macOS bash 3.2 compat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apple ships GPLv2-era bash 3.2 on macOS, which has no `mapfile` / `readarray`. The previous round failed on ci-macos with: line 31: mapfile: command not found FAIL: 47_cdb_prebuilt_module_path_abs.sh (exit 127) Replace `mapfile -t vals < <(jq …)` with `jq … > tmp.txt` followed by a `while … read … done < tmp.txt`. Using input redirection (not a `|` pipeline) keeps the loop in the current shell so `fail=1` propagates; the temp file lives under $TMP and is cleaned up by the script's trap. Empty-list signal now uses `[[ ! -s "$TMP/vals.txt" ]]` instead of array length — same semantics, no bashism. --- tests/e2e/47_cdb_prebuilt_module_path_abs.sh | 30 +++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/tests/e2e/47_cdb_prebuilt_module_path_abs.sh b/tests/e2e/47_cdb_prebuilt_module_path_abs.sh index f2956e9..6283322 100755 --- a/tests/e2e/47_cdb_prebuilt_module_path_abs.sh +++ b/tests/e2e/47_cdb_prebuilt_module_path_abs.sh @@ -27,16 +27,18 @@ command -v jq >/dev/null 2>&1 || { exit 0 } -# jq returns each value JSON-unescaped (\\ → \, etc.). -mapfile -t vals < <( - jq -r ' - .[] | .arguments[]? - | select(type == "string" and startswith("-fprebuilt-module-path=")) - | sub("^-fprebuilt-module-path="; "") - ' "$cdb" -) +# jq returns each value JSON-unescaped (\\ → \, etc.). Stage to a temp +# file then read with a redirected while loop — bash 3.2 on macOS lacks +# `mapfile`/`readarray`, and a `| while` pipeline puts the loop in a +# subshell so `fail=1` would not propagate. Input redirection runs the +# loop in the current shell, preserving the flag. +jq -r ' + .[] | .arguments[]? + | select(type == "string" and startswith("-fprebuilt-module-path=")) + | sub("^-fprebuilt-module-path="; "") +' "$cdb" > "$TMP/vals.txt" -if [[ ${#vals[@]} -eq 0 ]]; then +if [[ ! -s "$TMP/vals.txt" ]]; then # GCC's libstdc++ flow uses -fmodules / gcm.cache without the explicit # -fprebuilt-module-path flag (see bmi_traits.needsPrebuiltModulePath). # Nothing to assert in that mode. @@ -45,11 +47,11 @@ if [[ ${#vals[@]} -eq 0 ]]; then fi fail=0 -for v in "${vals[@]}"; do - # `jq` on git-bash/Windows emits CRLF line endings; mapfile strips the LF - # but leaves a trailing CR which then poisons every downstream string - # comparison (basename ends with `\r`, regex matches go sideways). +while IFS= read -r v; do + # `jq` on git-bash/Windows emits CRLF; strip the trailing CR so basename + # / regex comparisons don't trip over an invisible `\r`. v="${v%$'\r'}" + [[ -z "$v" ]] && continue echo " checking: $v" # Must NOT carry ninja-escape artefacts. The key signal is `$:` (drive @@ -80,7 +82,7 @@ for v in "${vals[@]}"; do *) echo "FAIL: basename is not pcm.cache/gcm.cache: '${normalised##*/}'" fail=1 ;; esac -done +done < "$TMP/vals.txt" [[ $fail -eq 0 ]] || exit 1 echo "OK"