From 13063f998a43a9b3904cf8b338c038a51fb80150 Mon Sep 17 00:00:00 2001 From: George Giorgidze Date: Sun, 24 May 2026 23:01:07 -0700 Subject: [PATCH 1/2] preload-index: respect --really-refresh override of assume-unchanged When refresh_index() is invoked with REFRESH_REALLY (e.g. via "git update-index --really-refresh"), the documented behaviour is that the "assume unchanged" bit on cache entries is disregarded so that stale stat data on those entries is still refreshed. The preload pass runs before the single-threaded refresh loop and is intended to mark up-to-date entries quickly so the slow path only has to deal with the leftovers. However, preload_thread() unconditionally called ie_match_stat() with CE_MATCH_RACY_IS_DIRTY|CE_MATCH_IGNORE_FSMONITOR and never with CE_MATCH_IGNORE_VALID, so it honoured the "assume unchanged" bit. When a modified file's entry was marked assume-unchanged, preload would conclude the entry was clean and call ce_mark_uptodate(); the subsequent --really-refresh loop would then skip the entry (because ce_uptodate(ce) is true) and never report it as needing an update. This only manifests when preload is active, so it has been latent in default configurations. It is observable today via GIT_TEST_PRELOAD_INDEX=1. Plumb the refresh flags through to the preload threads via a new refresh_flags field on struct thread_data, and have preload_thread() add CE_MATCH_IGNORE_VALID to its match options when REFRESH_REALLY is in effect. Update refresh_index() to pass "flags & REFRESH_REALLY" to preload_index() instead of a bare 0. Add a regression test under t2106 that forces preload on and confirms that "update-index --really-refresh" reports a modified assume-unchanged entry as needing update. Signed-off-by: George Giorgidze --- preload-index.c | 7 ++++++- read-cache.c | 2 +- t/t2106-update-index-assume-unchanged.sh | 11 +++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/preload-index.c b/preload-index.c index b222821b448526..88bb4863293452 100644 --- a/preload-index.c +++ b/preload-index.c @@ -42,6 +42,7 @@ struct thread_data { struct progress_data *progress; int offset, nr; int t2_nr_lstat; + unsigned int refresh_flags; }; static void *preload_thread(void *_data) @@ -60,6 +61,7 @@ static void *preload_thread(void *_data) do { struct cache_entry *ce = *cep++; struct stat st; + unsigned int ce_option = CE_MATCH_RACY_IS_DIRTY | CE_MATCH_IGNORE_FSMONITOR; if (ce_stage(ce)) continue; @@ -87,7 +89,9 @@ static void *preload_thread(void *_data) p->t2_nr_lstat++; if (lstat(ce->name, &st)) continue; - if (ie_match_stat(index, ce, &st, CE_MATCH_RACY_IS_DIRTY|CE_MATCH_IGNORE_FSMONITOR)) + if (p->refresh_flags & REFRESH_REALLY) + ce_option |= CE_MATCH_IGNORE_VALID; + if (ie_match_stat(index, ce, &st, ce_option)) continue; ce_mark_uptodate(ce); mark_fsmonitor_valid(index, ce); @@ -150,6 +154,7 @@ void preload_index(struct index_state *index, copy_pathspec(&p->pathspec, pathspec); p->offset = offset; p->nr = work; + p->refresh_flags = refresh_flags; if (pd.progress) p->progress = &pd; offset += work; diff --git a/read-cache.c b/read-cache.c index 38a04b8de3d7fb..f5023b9a8bf280 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1520,7 +1520,7 @@ int refresh_index(struct index_state *istate, unsigned int flags, * cache entries quickly then in the single threaded loop below, * we only have to do the special cases that are left. */ - preload_index(istate, pathspec, 0); + preload_index(istate, pathspec, flags & REFRESH_REALLY); trace2_region_enter("index", "refresh", NULL); for (i = 0; i < istate->cache_nr; i++) { diff --git a/t/t2106-update-index-assume-unchanged.sh b/t/t2106-update-index-assume-unchanged.sh index 6b2ccc21a937df..266c88d45dc82f 100755 --- a/t/t2106-update-index-assume-unchanged.sh +++ b/t/t2106-update-index-assume-unchanged.sh @@ -24,4 +24,15 @@ test_expect_success 'do not switch branches with dirty file' ' test_grep overwritten err ' +test_expect_success '--really-refresh overrides assume-unchanged under preload' ' + git reset --hard && + test_commit really-refresh really-refresh original && + git update-index --assume-unchanged really-refresh && + printf "modified\n" >really-refresh && + test-tool chmtime -100000 really-refresh && + test_must_fail env GIT_TEST_PRELOAD_INDEX=1 \ + git update-index --really-refresh >out 2>err && + test_grep "needs update" out +' + test_done From a235b9e31d74a04c28701b11dee8e74ff7dc4d9e Mon Sep 17 00:00:00 2001 From: George Giorgidze Date: Sun, 24 May 2026 23:03:27 -0700 Subject: [PATCH 2/2] update-index: add --refresh-stat-only When a working tree is copied from another machine, or restored from a tarball, container image, or CI cache on the same machine, the files may be byte-for-byte identical while cached stat data in the index no longer matches. Backup and sync tools can preserve mtimes, but fields like inode and device numbers are filesystem-local, so large repositories can still end up paying for expensive refresh checks on every "git status". Git already has runtime configuration for reducing which stat fields are checked, such as core.checkStat=. That affects how future checks interpret cached stat data, but it does not provide a one-shot way to update the index's cached stat data to match the current filesystem without also rehashing file contents. Setting core.checkStat=minimal is "sticky": it weakens every subsequent operation in the repository for the duration of the configuration, rather than performing a single, bounded correction at a well-defined point. A similar idea was discussed on the list in January 2017 under the name "--assume-content-unchanged"; see the thread starting at <20170105112359.GN8116@chrystal.oracle.com>. The concern raised there was that exposing a way to update cached stat data without content comparison opens the index to abuse: an interactive user could skip a slow refresh, lie to Git about the worktree, then file a bug after a later merge corrupts a file. That concern is taken seriously here, and this proposal is deliberately narrower than the 2017 one: * It is a one-shot action, not a sticky configuration or per-entry bit. The name --refresh-stat-only reflects that: it describes what the command does in a single invocation, not a trust state attached to entries (contrast with --assume-unchanged). * The trust assertion is intended for closed-loop callers (CI cache restore, container provisioning, backup/restore tooling) where the worktree and the index were produced or verified together by the same process. It is not a knob for interactive users to reach for when "git status" feels slow. * The failure mode is named directly in the documentation: if the worktree does not in fact match the index, affected entries will appear clean while the recorded object ID remains stale. The user must type the flag, having read the warning. This is a narrower contract than core.checkStat=minimal, which silently affects every subsequent operation. Container-based CI has become the dominant deployment model in the years since that 2017 discussion. The current workaround -- setting core.checkStat=minimal in every job step, or accepting the cost of full content rehashing -- is operationally fragile: it requires every step in every pipeline to set and preserve the configuration, and it permanently weakens stat semantics for every command those steps run. A single explicit invocation at restore time is a tighter, more local fix. Teach git update-index --refresh-stat-only to refresh only cached stat information. It follows the existing refresh machinery, but skips ie_modified() and treats racy entries as dirty by stat instead of resolving them by content. Like --really-refresh, it ignores the "assume unchanged" setting, so stale stat data on those entries is still updated; that behaviour is documented alongside the flag. The preload pass is extended to recognise REFRESH_STAT_ONLY (on top of REFRESH_REALLY, which was wired up in the preceding commit) so that assume-unchanged entries are not marked uptodate before the main refresh path can update them. Add tests covering object ID preservation, missing-file handling with and without --ignore-missing, assume-unchanged override, and quiet output. Signed-off-by: George Giorgidze --- Documentation/git-update-index.adoc | 19 ++++++++ builtin/update-index.c | 12 +++++ preload-index.c | 2 +- read-cache-ll.h | 3 ++ read-cache.c | 24 +++++---- t/meson.build | 1 + t/t2109-update-index-refresh-stat-only.sh | 59 +++++++++++++++++++++++ 7 files changed, 110 insertions(+), 10 deletions(-) create mode 100755 t/t2109-update-index-refresh-stat-only.sh diff --git a/Documentation/git-update-index.adoc b/Documentation/git-update-index.adoc index 9bea9fab9ad1fa..2840a52ae466ad 100644 --- a/Documentation/git-update-index.adoc +++ b/Documentation/git-update-index.adoc @@ -109,6 +109,25 @@ you will need to handle the situation manually. Like `--refresh`, but checks stat information unconditionally, without regard to the "assume unchanged" setting. +--refresh-stat-only:: + Like `--refresh`, but updates only the stat information + in the index, without rehashing the file contents. This is + useful for large repositories after a working tree has been + produced or restored by means other than a normal checkout -- + for example, a CI cache restore, container provisioning, or + copying a working tree from another machine -- when the file + contents are known to be correct but the cached stat + information no longer matches. Some backup and syncing tools + preserve mtimes, but inode numbers, device identifiers, and + other filesystem-specific stat fields generally cannot be + preserved across machines or even across mounts on the same + machine. Like `--really-refresh`, this option disregards the + "assume unchanged" setting so that stale stat data on those + entries is still updated. Use with care: if the worktree + content does not actually match what the index records, the + affected entries will appear clean while the recorded object + ID remains stale. + --skip-worktree:: --no-skip-worktree:: When one of these flags is specified, the object names recorded diff --git a/builtin/update-index.c b/builtin/update-index.c index 8a5907767bf297..5e5d2e77c9ee3c 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -766,6 +766,14 @@ static int really_refresh_callback(const struct option *opt, return refresh(opt->value, REFRESH_REALLY); } +static int refresh_stat_only_callback(const struct option *opt, + const char *arg, int unset) +{ + BUG_ON_OPT_NEG(unset); + BUG_ON_OPT_ARG(arg); + return refresh(opt->value, REFRESH_STAT_ONLY); +} + static int chmod_callback(const struct option *opt, const char *arg, int unset) { @@ -957,6 +965,10 @@ int cmd_update_index(int argc, N_("like --refresh, but ignore assume-unchanged setting"), PARSE_OPT_NOARG | PARSE_OPT_NONEG, really_refresh_callback), + OPT_CALLBACK_F(0, "refresh-stat-only", &refresh_args, NULL, + N_("refresh stat information without checking content"), + PARSE_OPT_NOARG | PARSE_OPT_NONEG, + refresh_stat_only_callback), { .type = OPTION_LOWLEVEL_CALLBACK, .long_name = "cacheinfo", diff --git a/preload-index.c b/preload-index.c index 88bb4863293452..9ecc6e4af0d970 100644 --- a/preload-index.c +++ b/preload-index.c @@ -89,7 +89,7 @@ static void *preload_thread(void *_data) p->t2_nr_lstat++; if (lstat(ce->name, &st)) continue; - if (p->refresh_flags & REFRESH_REALLY) + if (p->refresh_flags & (REFRESH_REALLY | REFRESH_STAT_ONLY)) ce_option |= CE_MATCH_IGNORE_VALID; if (ie_match_stat(index, ce, &st, ce_option)) continue; diff --git a/read-cache-ll.h b/read-cache-ll.h index 2c8b4b21b1c7e9..7e4b555a315890 100644 --- a/read-cache-ll.h +++ b/read-cache-ll.h @@ -425,6 +425,8 @@ void *read_blob_data_from_index(struct index_state *, const char *, unsigned lon #define CE_MATCH_REFRESH 0x10 /* don't refresh_fsmonitor state or do stat comparison even if CE_FSMONITOR_VALID is true */ #define CE_MATCH_IGNORE_FSMONITOR 0X20 +/* update stat info without checking content */ +#define CE_MATCH_STAT_ONLY 0x40 int is_racy_timestamp(const struct index_state *istate, const struct cache_entry *ce); int has_racy_timestamp(struct index_state *istate); @@ -452,6 +454,7 @@ int fake_lstat(const struct cache_entry *ce, struct stat *st); #define REFRESH_IN_PORCELAIN (1 << 5) /* user friendly output, not "needs update" */ #define REFRESH_PROGRESS (1 << 6) /* show progress bar if stderr is tty */ #define REFRESH_IGNORE_SKIP_WORKTREE (1 << 7) /* ignore skip_worktree entries */ +#define REFRESH_STAT_ONLY (1 << 8) /* update stat info without checking content */ int refresh_index(struct index_state *, unsigned int flags, const struct pathspec *pathspec, char *seen, const char *header_msg); /* * Refresh the index and write it to disk. diff --git a/read-cache.c b/read-cache.c index f5023b9a8bf280..ec9419cbe25f74 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1350,6 +1350,7 @@ static struct cache_entry *refresh_cache_ent(struct index_state *istate, int ignore_skip_worktree = options & CE_MATCH_IGNORE_SKIP_WORKTREE; int ignore_missing = options & CE_MATCH_IGNORE_MISSING; int ignore_fsmonitor = options & CE_MATCH_IGNORE_FSMONITOR; + int stat_only = options & CE_MATCH_STAT_ONLY; if (!refresh || ce_uptodate(ce)) return ce; @@ -1420,12 +1421,14 @@ static struct cache_entry *refresh_cache_ent(struct index_state *istate, } } - if (t2_did_scan) - *t2_did_scan = 1; - if (ie_modified(istate, ce, &st, options)) { - if (err) - *err = EINVAL; - return NULL; + if (!stat_only) { + if (t2_did_scan) + *t2_did_scan = 1; + if (ie_modified(istate, ce, &st, options)) { + if (err) + *err = EINVAL; + return NULL; + } } updated = make_empty_cache_entry(istate, ce_namelen(ce)); @@ -1490,11 +1493,14 @@ int refresh_index(struct index_state *istate, unsigned int flags, int not_new = (flags & REFRESH_IGNORE_MISSING) != 0; int ignore_submodules = (flags & REFRESH_IGNORE_SUBMODULES) != 0; int ignore_skip_worktree = (flags & REFRESH_IGNORE_SKIP_WORKTREE) != 0; + int stat_only = (flags & REFRESH_STAT_ONLY) != 0; int first = 1; int in_porcelain = (flags & REFRESH_IN_PORCELAIN); unsigned int options = (CE_MATCH_REFRESH | - (really ? CE_MATCH_IGNORE_VALID : 0) | - (not_new ? CE_MATCH_IGNORE_MISSING : 0)); + ((really || stat_only) ? CE_MATCH_IGNORE_VALID : 0) | + (not_new ? CE_MATCH_IGNORE_MISSING : 0) | + (stat_only ? (CE_MATCH_STAT_ONLY | + CE_MATCH_RACY_IS_DIRTY) : 0)); const char *modified_fmt; const char *deleted_fmt; const char *typechange_fmt; @@ -1520,7 +1526,7 @@ int refresh_index(struct index_state *istate, unsigned int flags, * cache entries quickly then in the single threaded loop below, * we only have to do the special cases that are left. */ - preload_index(istate, pathspec, flags & REFRESH_REALLY); + preload_index(istate, pathspec, flags & (REFRESH_REALLY | REFRESH_STAT_ONLY)); trace2_region_enter("index", "refresh", NULL); for (i = 0; i < istate->cache_nr; i++) { diff --git a/t/meson.build b/t/meson.build index fd955f44efc0be..e1e68921b41c2b 100644 --- a/t/meson.build +++ b/t/meson.build @@ -291,6 +291,7 @@ integration_tests = [ 't2106-update-index-assume-unchanged.sh', 't2107-update-index-basic.sh', 't2108-update-index-refresh-racy.sh', + 't2109-update-index-refresh-stat-only.sh', 't2200-add-update.sh', 't2201-add-update-typechange.sh', 't2202-add-addremove.sh', diff --git a/t/t2109-update-index-refresh-stat-only.sh b/t/t2109-update-index-refresh-stat-only.sh new file mode 100755 index 00000000000000..404d2a6fb29e2e --- /dev/null +++ b/t/t2109-update-index-refresh-stat-only.sh @@ -0,0 +1,59 @@ +#!/bin/sh + +test_description='git update-index --refresh-stat-only' + +. ./test-lib.sh + +test_expect_success 'setup' ' + test_commit initial base-file base +' + +test_expect_success '--refresh-stat-only updates stat info without rehashing' ' + test_commit refresh-stat refresh-stat original && + git ls-files --stage -- refresh-stat >expect && + git ls-files --debug refresh-stat | grep mtime >before && + printf "modified\n" >refresh-stat && + test-tool chmtime -100000 refresh-stat && + test_must_fail git diff-files --quiet -- refresh-stat && + git update-index --refresh-stat-only && + git ls-files --debug refresh-stat | grep mtime >after && + ! test_cmp before after && + git ls-files --stage -- refresh-stat >actual && + test_cmp expect actual && + git diff-files --quiet -- refresh-stat +' + +test_expect_success '--refresh-stat-only ignores assume-unchanged' ' + test_commit assume-unchanged assume-unchanged old && + git update-index --assume-unchanged assume-unchanged && + printf "new\n" >assume-unchanged && + test-tool chmtime -100000 assume-unchanged && + GIT_TEST_PRELOAD_INDEX=1 git update-index --refresh-stat-only && + git update-index --no-assume-unchanged assume-unchanged && + git diff-files --quiet -- assume-unchanged +' + +test_expect_success '--refresh-stat-only with missing file and --ignore-missing' ' + test_commit missing-ignore missing-ignore content && + rm missing-ignore && + git update-index --ignore-missing --refresh-stat-only && + git checkout -- missing-ignore +' + +test_expect_success '--refresh-stat-only reports error on missing file without --ignore-missing' ' + test_commit missing-error missing-error content && + rm missing-error && + test_must_fail git update-index --refresh-stat-only >out 2>err && + test_grep "needs update" out && + git checkout -- missing-error +' + +test_expect_success '--refresh-stat-only with -q is quiet' ' + test_commit missing-quiet missing-quiet content && + rm missing-quiet && + git update-index -q --ignore-missing --refresh-stat-only >out 2>err && + test_must_be_empty out && + test_must_be_empty err +' + +test_done