Skip to content

Commit b7698ce

Browse files
authored
feat: add rebuild-fts command for FTS5 shadow-table corruption (#287) (#288)
Closes #287. ## What this adds - **`msgvault rebuild-fts`** — drops and recreates `messages_fts`, then repopulates it from `messages` / `message_bodies` / `message_recipients` / `participants` via the existing batched backfill. Recovery path for `malformed inverted index for FTS5 table main.messages_fts` in `verify` output — the case where SQLite's own `rebuild` pragma and `delete-all` don't help on a contentful FTS5 table. Wraps `SQLITE_BUSY`/`SQLITE_LOCKED` as "stop msgvault serve / MCP and retry." - **`verify` ordering** — integrity check now runs before OAuth setup, so corrupt databases surface the repair hint even with an expired token. - **`verify` hints** — split between FTS-only corruption (points at `rebuild-fts`) and core-table corruption (points at `.recover`). - **`docs/recovery.md`** — covers both paths and the contentful-FTS5 caveat. ## What this does not cover - Core-table B-tree corruption (e.g., `Rowid out of order` in `messages` / `message_bodies`). `rebuild-fts` is strictly for the derived index; core corruption still needs `.recover`. - `_synchronous=FULL` default. Worth considering as archival-durability hardening in a separate PR; not framed as the fix for #287. ## Usage ``` msgvault rebuild-fts ``` Stop `msgvault serve` and MCP clients first — needs an exclusive write lock. Peak extra disk ≈ size of the FTS5 shadow tables (a few percent of the DB).
1 parent f0bd7bf commit b7698ce

11 files changed

Lines changed: 600 additions & 42 deletions

File tree

cmd/msgvault/cmd/rebuild_fts.go

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
package cmd
2+
3+
import (
4+
"fmt"
5+
"os"
6+
"strings"
7+
8+
"github.com/spf13/cobra"
9+
"github.com/wesm/msgvault/internal/store"
10+
)
11+
12+
var rebuildFTSCmd = &cobra.Command{
13+
Use: "rebuild-fts",
14+
Short: "Rebuild the full-text search index from scratch",
15+
Long: `Drop and recreate the messages_fts virtual table, then repopulate it
16+
from messages / message_bodies / message_recipients / participants.
17+
18+
Use this to recover from FTS5 shadow-table corruption that surfaces as
19+
"malformed inverted index for FTS5 table main.messages_fts" in
20+
'msgvault verify' output. SQLite's own 'rebuild' pragma reads from the
21+
same corrupt shadow tables and cannot clear this state.
22+
23+
This command only fixes the derived search index. Core-table corruption
24+
(e.g., "Rowid out of order" in messages / message_bodies B-trees) requires
25+
a different recovery path — see 'msgvault verify' output.
26+
27+
Peak extra disk usage is roughly the size of the FTS5 shadow tables
28+
(a few percent of the SQLite database). Stop 'msgvault serve' and any
29+
MCP clients before running this command — it needs an exclusive write lock.`,
30+
RunE: func(cmd *cobra.Command, args []string) error {
31+
dbPath := cfg.DatabaseDSN()
32+
s, err := store.Open(dbPath)
33+
if err != nil {
34+
return fmt.Errorf("open database: %w", err)
35+
}
36+
defer func() { _ = s.Close() }()
37+
38+
if err := s.InitSchema(); err != nil {
39+
return fmt.Errorf("init schema: %w", err)
40+
}
41+
42+
fmt.Fprintln(os.Stderr, "Rebuilding full-text search index...")
43+
n, err := s.RebuildFTS(func(done, total int64) {
44+
if total <= 0 {
45+
return
46+
}
47+
if done > total {
48+
done = total
49+
}
50+
pct := int(done * 100 / total)
51+
barWidth := 30
52+
filled := barWidth * pct / 100
53+
bar := strings.Repeat("=", filled) +
54+
strings.Repeat(" ", barWidth-filled)
55+
fmt.Fprintf(os.Stderr, "\r [%s] %3d%%", bar, pct)
56+
})
57+
if err != nil {
58+
fmt.Fprintln(os.Stderr)
59+
if s.IsBusyError(err) {
60+
return fmt.Errorf(
61+
"database is busy — stop 'msgvault serve' and any MCP " +
62+
"clients, then retry",
63+
)
64+
}
65+
return fmt.Errorf("rebuild FTS: %w", err)
66+
}
67+
fmt.Fprintf(os.Stderr,
68+
"\r [%s] 100%% %d messages indexed.\n",
69+
strings.Repeat("=", 30), n)
70+
return nil
71+
},
72+
}
73+
74+
func init() {
75+
rootCmd.AddCommand(rebuildFTSCmd)
76+
}

cmd/msgvault/cmd/verify.go

Lines changed: 71 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"fmt"
77
"os"
88
"os/signal"
9+
"strings"
910
"syscall"
1011

1112
"github.com/mattn/go-isatty"
@@ -53,6 +54,33 @@ Examples:
5354
return fmt.Errorf("init schema: %w", err)
5455
}
5556

57+
// Run SQLite integrity check before any Gmail work. Users with a
58+
// corrupt database should see the repair hint even if their OAuth
59+
// token is expired or the network is down.
60+
var dbCorrupt bool
61+
if !verifySkipDBCheck {
62+
fmt.Println("Running database integrity check...")
63+
integrityErrors, err := runIntegrityCheck(s)
64+
if err != nil {
65+
return fmt.Errorf("integrity check failed: %w", err)
66+
}
67+
if len(integrityErrors) == 0 {
68+
fmt.Println(" Database integrity: OK")
69+
} else {
70+
dbCorrupt = true
71+
fmt.Printf(" Database integrity: FAILED (%d errors)\n", len(integrityErrors))
72+
for i, ie := range integrityErrors {
73+
if i >= 10 {
74+
fmt.Printf(" ... and %d more errors\n", len(integrityErrors)-10)
75+
break
76+
}
77+
fmt.Printf(" - %s\n", ie)
78+
}
79+
printIntegrityRecoveryHint(integrityErrors)
80+
}
81+
fmt.Println()
82+
}
83+
5684
// Look up source to get OAuth app binding
5785
appName := ""
5886
src, srcErr := findGmailSource(s, email)
@@ -72,10 +100,6 @@ Examples:
72100
return err
73101
}
74102

75-
if err := s.InitSchema(); err != nil {
76-
return fmt.Errorf("init schema: %w", err)
77-
}
78-
79103
// Create OAuth manager and get token source
80104
oauthMgr, err := oauth.NewManager(clientSecretsPath, cfg.TokensDir(), logger)
81105
if err != nil {
@@ -106,35 +130,6 @@ Examples:
106130
client := gmail.NewClient(tokenSource, gmail.WithLogger(logger))
107131
defer func() { _ = client.Close() }()
108132

109-
// Run SQLite integrity check first (offline, no Gmail needed)
110-
var dbCorrupt bool
111-
if !verifySkipDBCheck {
112-
fmt.Println("Running database integrity check...")
113-
integrityErrors, err := runIntegrityCheck(s)
114-
if err != nil {
115-
return fmt.Errorf("integrity check failed: %w", err)
116-
}
117-
if len(integrityErrors) == 0 {
118-
fmt.Println(" Database integrity: OK")
119-
} else {
120-
dbCorrupt = true
121-
fmt.Printf(" Database integrity: FAILED (%d errors)\n", len(integrityErrors))
122-
for i, ie := range integrityErrors {
123-
if i >= 10 {
124-
fmt.Printf(" ... and %d more errors\n", len(integrityErrors)-10)
125-
break
126-
}
127-
fmt.Printf(" - %s\n", ie)
128-
}
129-
fmt.Println()
130-
fmt.Println(" The database has corruption. Consider:")
131-
fmt.Println(" 1. Back up the database file before any repair attempts")
132-
fmt.Println(" 2. Run: sqlite3 msgvault.db '.recover' | sqlite3 recovered.db")
133-
fmt.Println(" 3. Or export to SQL and reimport: sqlite3 msgvault.db .dump | sqlite3 new.db")
134-
}
135-
fmt.Println()
136-
}
137-
138133
// Get Gmail profile
139134
profile, err := client.GetProfile(ctx)
140135
if err != nil {
@@ -286,6 +281,49 @@ func runIntegrityCheck(s *store.Store) ([]string, error) {
286281
return errors, rows.Err()
287282
}
288283

284+
// printIntegrityRecoveryHint prints repair guidance tailored to the kind of
285+
// corruption reported. FTS5 shadow-table corruption is fixable with the
286+
// lightweight `rebuild-fts` command; core B-tree corruption needs `.recover`,
287+
// which requires free disk roughly equal to the database size.
288+
func printIntegrityRecoveryHint(integrityErrors []string) {
289+
var ftsErrs, coreErrs int
290+
for _, e := range integrityErrors {
291+
if isFTSIntegrityError(e) {
292+
ftsErrs++
293+
} else {
294+
coreErrs++
295+
}
296+
}
297+
298+
fmt.Println()
299+
fmt.Println(" Back up msgvault.db before attempting any repair.")
300+
fmt.Println()
301+
302+
if ftsErrs > 0 {
303+
fmt.Println(" Search index (FTS5) corruption:")
304+
fmt.Println(" Run: msgvault rebuild-fts")
305+
fmt.Println(" Drops and recreates messages_fts from the core tables.")
306+
fmt.Println(" SQLite's 'rebuild' pragma reads from the corrupt shadow")
307+
fmt.Println(" tables and cannot clear this state.")
308+
fmt.Println()
309+
}
310+
311+
if coreErrs > 0 {
312+
fmt.Println(" Core table corruption (e.g., Rowid out of order in messages")
313+
fmt.Println(" or message_bodies):")
314+
fmt.Println(" Run: sqlite3 msgvault.db '.recover' | sqlite3 recovered.db")
315+
fmt.Println(" (requires free disk roughly equal to the database size)")
316+
fmt.Println(" Alternative: sqlite3 msgvault.db .dump | sqlite3 new.db")
317+
}
318+
}
319+
320+
// isFTSIntegrityError reports whether an integrity-check line describes
321+
// corruption in the FTS5 search index rather than the core tables.
322+
func isFTSIntegrityError(msg string) bool {
323+
return strings.Contains(msg, "messages_fts") ||
324+
strings.Contains(msg, "FTS5")
325+
}
326+
289327
func init() {
290328
verifyCmd.Flags().IntVar(&verifySampleSize, "sample", 100, "Number of messages to sample for MIME verification")
291329
verifyCmd.Flags().BoolVar(&verifySkipDBCheck, "skip-db-check", false, "Skip SQLite integrity check")

cmd/msgvault/cmd/verify_test.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package cmd
2+
3+
import "testing"
4+
5+
// TestIsFTSIntegrityError_Classification verifies that the hint-classifier
6+
// cleanly separates FTS5 shadow-table errors (which rebuild-fts can fix)
7+
// from core-table errors (which need .recover). Messages come from real
8+
// PRAGMA integrity_check output; the shapes below are what users will see.
9+
func TestIsFTSIntegrityError_Classification(t *testing.T) {
10+
tests := []struct {
11+
msg string
12+
wantFT bool
13+
}{
14+
{
15+
msg: "malformed inverted index for FTS5 table main.messages_fts",
16+
wantFT: true,
17+
},
18+
{
19+
msg: "row 42 missing from index messages_fts_idx",
20+
wantFT: true,
21+
},
22+
{
23+
msg: "Tree 26 page 8231140 cell 2: Rowid 421177 out of order",
24+
wantFT: false,
25+
},
26+
{
27+
msg: "non-unique entry in index sqlite_autoindex_messages_1",
28+
wantFT: false,
29+
},
30+
{
31+
msg: "",
32+
wantFT: false,
33+
},
34+
}
35+
36+
for _, tc := range tests {
37+
if got := isFTSIntegrityError(tc.msg); got != tc.wantFT {
38+
t.Errorf("isFTSIntegrityError(%q) = %v, want %v", tc.msg, got, tc.wantFT)
39+
}
40+
}
41+
}

docs/recovery.md

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# Database Recovery
2+
3+
`msgvault verify you@gmail.com` runs `PRAGMA integrity_check` against the
4+
SQLite database before the Gmail comparison step. When that check fails,
5+
the recovery path depends on which part of the database is affected.
6+
7+
## Search-index (FTS5) corruption
8+
9+
Symptom (from `msgvault verify` output):
10+
11+
```
12+
malformed inverted index for FTS5 table main.messages_fts
13+
```
14+
15+
Fix:
16+
17+
```
18+
msgvault rebuild-fts
19+
```
20+
21+
This drops `messages_fts` and recreates it from the core tables
22+
(`messages`, `message_bodies`, `message_recipients`, `participants`). Peak
23+
extra disk usage is roughly the size of the FTS5 shadow tables — a few
24+
percent of the SQLite database.
25+
26+
Stop `msgvault serve` and any MCP clients before running; `rebuild-fts`
27+
needs an exclusive write lock and will fail with a "database is busy"
28+
message otherwise.
29+
30+
### Why SQLite's own rebuild pragma does not work
31+
32+
`INSERT INTO messages_fts(messages_fts) VALUES('rebuild')` regenerates the
33+
FTS5 inverted index from the contentful shadow tables themselves. If those
34+
shadow tables are already malformed, the pragma reads the corruption right
35+
back out.
36+
37+
`INSERT INTO messages_fts(messages_fts) VALUES('delete-all')` is rejected
38+
with `'delete-all' may only be used with a contentless or external content
39+
fts5 table`. msgvault's `messages_fts` is contentful by design (it stores
40+
its own copy of the searchable text), so `delete-all` is not available.
41+
42+
`rebuild-fts` sidesteps both: it drops the virtual table entirely — which
43+
removes the shadow tables — then recreates it fresh and repopulates from
44+
the core tables.
45+
46+
## Core-table corruption
47+
48+
Symptom:
49+
50+
```
51+
Tree 26 page 8231140 cell 2: Rowid 421177 out of order
52+
non-unique entry in index sqlite_autoindex_messages_1
53+
```
54+
55+
Fix (requires free disk roughly equal to the size of the database):
56+
57+
```
58+
sqlite3 ~/.msgvault/msgvault.db '.recover' | sqlite3 ~/.msgvault/recovered.db
59+
mv ~/.msgvault/msgvault.db ~/.msgvault/msgvault.db.bak
60+
mv ~/.msgvault/recovered.db ~/.msgvault/msgvault.db
61+
msgvault verify you@gmail.com
62+
```
63+
64+
A leaner alternative that works on cleaner corruption:
65+
66+
```
67+
sqlite3 ~/.msgvault/msgvault.db .dump | sqlite3 ~/.msgvault/new.db
68+
```
69+
70+
If free disk is tight, individual corrupt rows can sometimes be repaired
71+
by hand — delete and re-insert the affected row(s) from their source
72+
(MIME blob, etc.). This is a last resort and only advisable if you can
73+
identify the specific rows flagged by `integrity_check`.
74+
75+
## Before any repair
76+
77+
Back up the database file. If the repair tool is interrupted or makes
78+
things worse, the backup is the only way back:
79+
80+
```
81+
cp ~/.msgvault/msgvault.db ~/.msgvault/msgvault.db.bak
82+
```
83+
84+
If the database has any activity, also copy the `-wal` and `-shm` sidecar
85+
files at the same instant (or run `msgvault` once to checkpoint the WAL
86+
into the main file before copying). A bare `.db` copy without its sidecars
87+
can itself be a source of corruption.

internal/store/dialect.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,14 @@ type Dialect interface {
9090
// (e.g., PostgreSQL includes tsvector in its main schema).
9191
SchemaFTS() string
9292

93+
// FTSRebuildSchema tears down and recreates the FTS infrastructure from
94+
// scratch — the caller is expected to follow up with a full backfill.
95+
// Used to recover from malformed FTS shadow-table state that in-place
96+
// rebuild operations (e.g., SQLite's rebuild pragma) cannot clear.
97+
// SQLite: DROP TABLE IF EXISTS messages_fts + re-execute schema_sqlite.sql.
98+
// PostgreSQL: TODO (REINDEX / recompute tsvector column).
99+
FTSRebuildSchema(db *sql.DB) error
100+
93101
// Connection lifecycle
94102

95103
// InitConn performs driver-specific connection initialization.
@@ -128,4 +136,10 @@ type Dialect interface {
128136
// This handles SQLite < 3.35 which doesn't support RETURNING.
129137
// Always false for PostgreSQL (which always supports RETURNING).
130138
IsReturningError(err error) bool
139+
140+
// IsBusyError returns true if the error indicates the database is held
141+
// by another connection, either busy (SQLITE_BUSY) or locked
142+
// (SQLITE_LOCKED). Used to surface actionable errors from maintenance
143+
// commands that need exclusive access.
144+
IsBusyError(err error) bool
131145
}

0 commit comments

Comments
 (0)