From 07e72d6821efa850cc4617a384a0807596073d5c Mon Sep 17 00:00:00 2001 From: bdchatham Date: Mon, 15 Jun 2026 15:36:53 -0700 Subject: [PATCH 1/5] feat(stats): schedule_lag self-check + run verdict (PLT-463) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compute schedule_lag = AttemptedSendTime - IntendedSendTime per open-loop tx (bounded reservoir, Algorithm R), expose p99 every run, and render a run VERDICT: VOID when schedule_lag_p99 > threshold x (1/lambda) — a generator-bound run is void, not a footnote. Threshold is a named const (0.10, 'tune from first calibration run'), overridable via config. Gated on the actual arrival model (closed-loop / ramped-lambda => N/A); prewarm and zero-IntendedSendTime txs excluded. Stacked on PLT-459 (#51): needs the inclusion run-summary surface. Co-Authored-By: Claude Opus 4.8 --- config/settings.go | 14 ++++- main.go | 18 ++++++ sender/worker.go | 7 +++ stats/collector.go | 59 ++++++++++++++++++++ stats/metrics.go | 8 +++ stats/run_summary.go | 15 +++++ stats/schedule_lag_test.go | 53 ++++++++++++++++++ stats/verdict.go | 111 +++++++++++++++++++++++++++++++++++++ stats/verdict_test.go | 94 +++++++++++++++++++++++++++++++ 9 files changed, 377 insertions(+), 2 deletions(-) create mode 100644 stats/schedule_lag_test.go create mode 100644 stats/verdict.go create mode 100644 stats/verdict_test.go diff --git a/config/settings.go b/config/settings.go index 6f2e7ab..0298c61 100644 --- a/config/settings.go +++ b/config/settings.go @@ -42,6 +42,11 @@ type Settings struct { // txs that would exceed it at their scheduled instant are dropped and // counted rather than throttling the arrival clock. Ignored in closed-loop. MaxInFlight int `json:"maxInFlight,omitempty"` + // ScheduleLagVoidThreshold is the fraction of the arrival interval (1/λ) that + // schedule_lag_p99 may reach before an open-loop run is VOID (PLT-463). Zero + // uses the provisional built-in default; set via config to retune without a + // rebuild. Ignored in closed-loop. + ScheduleLagVoidThreshold float64 `json:"scheduleLagVoidThreshold,omitempty"` } // Arrival model identifiers for the ArrivalModel setting. @@ -95,6 +100,9 @@ func DefaultSettings() Settings { PostSummaryFlushDelay: Duration(25 * time.Second), ArrivalModel: ArrivalModelClosedLoop, MaxInFlight: 10_000, + // Zero defers to the stats package's provisional default; surfaced here so + // a config file can override without a CLI flag. + ScheduleLagVoidThreshold: 0, } } @@ -150,6 +158,7 @@ func InitializeViper(cmd *cobra.Command) error { viper.SetDefault("postSummaryFlushDelay", defaults.PostSummaryFlushDelay.ToDuration()) viper.SetDefault("arrivalModel", defaults.ArrivalModel) viper.SetDefault("maxInFlight", defaults.MaxInFlight) + viper.SetDefault("scheduleLagVoidThreshold", defaults.ScheduleLagVoidThreshold) return nil } @@ -193,7 +202,8 @@ func ResolveSettings() *Settings { TargetGas: viper.GetUint64("targetGas"), NumBlocksToWrite: viper.GetInt("numBlocksToWrite"), PostSummaryFlushDelay: Duration(viper.GetDuration("postSummaryFlushDelay")), - ArrivalModel: viper.GetString("arrivalModel"), - MaxInFlight: viper.GetInt("maxInFlight"), + ArrivalModel: viper.GetString("arrivalModel"), + MaxInFlight: viper.GetInt("maxInFlight"), + ScheduleLagVoidThreshold: viper.GetFloat64("scheduleLagVoidThreshold"), } } diff --git a/main.go b/main.go index 4b0101c..e86e18f 100644 --- a/main.go +++ b/main.go @@ -420,6 +420,24 @@ func runLoadTest(ctx context.Context, cmd *cobra.Command) error { log.Printf("📦 Inclusion: included=%d expired=%d dropped_at_cap=%d inflight_at_shutdown=%d", incl.Included, incl.Expired, incl.DroppedAtCap, incl.InflightAtShutdown) } + // Open-loop self-check (PLT-463): compute schedule_lag_p99 and the run + // verdict. Gated on the model the run actually used (summary.ArrivalModel, + // not the requested flag — the txs-writer path downgrades to closed_loop). + openLoopRun := summary.ArrivalModel == config.ArrivalModelOpenLoop + verdict := stats.EvaluateScheduleLag( + collector.ScheduleLagSamples(), cfg.Settings.TPS, openLoopRun, cfg.Settings.ScheduleLagVoidThreshold) + summary.ScheduleLagP99 = verdict.ScheduleLagP99 + summary.Verdict = verdict.Verdict + summary.VoidReason = verdict.VoidReason + if verdict.Verdict == stats.VerdictVoid { + log.Printf("⚠️ VOID: %s (schedule_lag_p99=%s, samples=%d)", + verdict.VoidReason, verdict.ScheduleLagP99.Round(time.Microsecond), verdict.SampleCount) + } else { + log.Printf("🧪 Run verdict: %s | schedule_lag_p99=%s (samples=%d, arrival_interval=%s)", + verdict.Verdict, verdict.ScheduleLagP99.Round(time.Microsecond), + verdict.SampleCount, verdict.ArrivalInterval.Round(time.Microsecond)) + } + collector.EmitRunSummary(ctx, summary) if d := cfg.Settings.PostSummaryFlushDelay.ToDuration(); d > 0 { log.Printf("⏳ Holding pod for post-summary scrape window (%s)...", d) diff --git a/sender/worker.go b/sender/worker.go index 21d33e4..a11b690 100644 --- a/sender/worker.go +++ b/sender/worker.go @@ -169,6 +169,13 @@ func (w *Worker) runTxSender(ctx context.Context, client *ethclient.Client) erro startTime := time.Now() // Sole owner between dequeue and hand-off: stamp is race-free (see LoadTx). tx.AttemptedSendTime = startTime + // schedule_lag self-check: only open-loop txs carry a true scheduled + // instant. A zero IntendedSendTime (prewarm) is excluded here; the + // closed-loop enqueue time is excluded at the run level (the verdict gates + // on the arrival model, see stats.EvaluateScheduleLag). + if !tx.IntendedSendTime.IsZero() { + w.cfg.Collector.RecordScheduleLag(startTime.Sub(tx.IntendedSendTime)) + } err = w.sendTransaction(ctx, client, tx) // OnComplete must fire only after the real send returns — that is what // bounds true unacked in-flight (see doc.go). Nil on closed-loop/batch. diff --git a/stats/collector.go b/stats/collector.go index 3c2ea7e..b3a2f5c 100644 --- a/stats/collector.go +++ b/stats/collector.go @@ -2,11 +2,20 @@ package stats import ( "fmt" + "math/rand" "sort" "sync" "time" ) +// scheduleLagReservoirCap bounds the schedule_lag sample set. Open-loop runs can +// emit millions of txs; storing every lag is unbounded memory. A reservoir of +// this size keeps a uniform random sample of the full run (Algorithm R), so the +// p99 stays representative regardless of run length — unlike tail-trimming, +// which would bias the percentile toward the run's final window. ~16k * +// time.Duration (8B) ≈ 128KB, negligible against the tx working set. +const scheduleLagReservoirCap = 16384 + // Collector tracks comprehensive statistics for load testing type Collector struct { mu sync.RWMutex @@ -34,6 +43,14 @@ type Collector struct { totalTxs uint64 lastWindowTime time.Time + // schedule_lag reservoir: a bounded uniform sample of per-tx send lag + // (AttemptedSendTime − IntendedSendTime), used for the open-loop self-check + // verdict. scheduleLagSeen is the total count of recorded samples (the + // reservoir's n), needed for Algorithm R replacement probability. + scheduleLag []time.Duration + scheduleLagSeen uint64 + scheduleLagRand *rand.Rand + // Configuration maxLatencyHistory int // Limit latency history to prevent memory leaks } @@ -56,7 +73,49 @@ func NewCollector() *Collector { startTime: time.Now(), lastWindowTime: time.Now(), maxLatencyHistory: 10000, // Keep last 10k latencies per endpoint + scheduleLag: make([]time.Duration, 0, scheduleLagReservoirCap), + // Local source: this is a self-check sample, not security-sensitive, and a + // per-collector source avoids contending the global rand mutex on the hot + // send path. + scheduleLagRand: rand.New(rand.NewSource(time.Now().UnixNano())), //nolint:gosec // sampling, not crypto + } +} + +// RecordScheduleLag records one open-loop send lag (AttemptedSendTime − +// IntendedSendTime) into the bounded reservoir. The worker calls it right after +// stamping AttemptedSendTime, only when IntendedSendTime is set (open-loop txs; +// closed-loop/prewarm pass a zero IntendedSendTime and are excluded by the +// caller). Negative lags (clock skew between scheduler and worker reads) are +// clamped to zero so they cannot deflate the p99. +func (c *Collector) RecordScheduleLag(lag time.Duration) { + if lag < 0 { + lag = 0 + } + c.mu.Lock() + defer c.mu.Unlock() + + c.scheduleLagSeen++ + if len(c.scheduleLag) < scheduleLagReservoirCap { + c.scheduleLag = append(c.scheduleLag, lag) + return } + // Reservoir full: replace a uniformly random slot with probability + // cap/seen (Algorithm R), keeping the retained set a uniform sample. + j := c.scheduleLagRand.Int63n(int64(c.scheduleLagSeen)) + if j < int64(scheduleLagReservoirCap) { + c.scheduleLag[j] = lag + } +} + +// ScheduleLagSamples returns a copy of the current schedule_lag reservoir. Call +// at run end to feed EvaluateScheduleLag; the copy keeps the caller's percentile +// sort off the live slice. +func (c *Collector) ScheduleLagSamples() []time.Duration { + c.mu.RLock() + defer c.mu.RUnlock() + out := make([]time.Duration, len(c.scheduleLag)) + copy(out, c.scheduleLag) + return out } // RecordTransaction records a transaction attempt diff --git a/stats/metrics.go b/stats/metrics.go index 70ffd46..c8ec9d7 100644 --- a/stats/metrics.go +++ b/stats/metrics.go @@ -87,6 +87,14 @@ var ( "run_inflight_at_shutdown", metric.WithDescription("In-flight inclusion registry size at run end (emitted once at run end)"), metric.WithUnit("{transactions}"))) + + // Open-loop self-check (PLT-463). Emitted once at run end on every run; the + // verdict label distinguishes VALID / VOID / N/A so a generator-bound run is + // queryable, not just a log line. + runScheduleLagP99 = must(meter.Float64Gauge( + "run_schedule_lag_p99", + metric.WithDescription("p99 of per-tx send lag (attempted − intended) over this open-loop run (emitted once at run end)"), + metric.WithUnit("s"))) ) // meteredInclusionTrackers backs the inclusion_inflight gauge: each tracker diff --git a/stats/run_summary.go b/stats/run_summary.go index e222142..77317fe 100644 --- a/stats/run_summary.go +++ b/stats/run_summary.go @@ -41,6 +41,17 @@ type RunSummary struct { DroppedAtCap uint64 // InflightAtShutdown is len(inflight) read after workers and tracker joined. InflightAtShutdown uint64 + + // Open-loop self-check (PLT-463): schedule_lag = AttemptedSendTime − + // IntendedSendTime per tx. A p99 above the threshold fraction of the arrival + // interval (1/λ) means the generator could not keep its own schedule, so the + // run was generator-bound, not open-loop, and is VOID. Reported on every run + // regardless of verdict; Verdict is N/A for closed-loop or non-fixed-λ runs. + ScheduleLagP99 time.Duration + // Verdict is VerdictValid, VerdictVoid, or VerdictNA. + Verdict string + // VoidReason explains a VOID verdict; empty otherwise. + VoidReason string } // EmitRunSummary records the run-summary gauges. Call once at shutdown. @@ -61,4 +72,8 @@ func (c *Collector) EmitRunSummary(ctx context.Context, summary RunSummary) { if summary.InclusionTracked { runInflightAtShutdown.Record(ctx, int64(summary.InflightAtShutdown)) } + runScheduleLagP99.Record(ctx, summary.ScheduleLagP99.Seconds(), + metric.WithAttributes( + attribute.String("arrival_model", summary.ArrivalModel), + attribute.String("verdict", summary.Verdict))) } diff --git a/stats/schedule_lag_test.go b/stats/schedule_lag_test.go new file mode 100644 index 0000000..34b280f --- /dev/null +++ b/stats/schedule_lag_test.go @@ -0,0 +1,53 @@ +package stats + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestRecordScheduleLag_SamplesRoundTrip(t *testing.T) { + c := NewCollector() + c.RecordScheduleLag(1 * time.Millisecond) + c.RecordScheduleLag(2 * time.Millisecond) + c.RecordScheduleLag(3 * time.Millisecond) + + got := c.ScheduleLagSamples() + require.ElementsMatch(t, []time.Duration{1 * time.Millisecond, 2 * time.Millisecond, 3 * time.Millisecond}, got) + + // Returned slice is a copy: mutating it must not affect the collector. + got[0] = 999 * time.Second + require.NotContains(t, c.ScheduleLagSamples(), 999*time.Second) +} + +// Negative lags (scheduler/worker clock-read skew) clamp to zero so they cannot +// deflate the p99. +func TestRecordScheduleLag_NegativeClampsToZero(t *testing.T) { + c := NewCollector() + c.RecordScheduleLag(-5 * time.Millisecond) + require.Equal(t, []time.Duration{0}, c.ScheduleLagSamples()) +} + +// The reservoir is bounded: recording far past the cap never grows the sample +// set beyond scheduleLagReservoirCap. +func TestRecordScheduleLag_ReservoirBounded(t *testing.T) { + c := NewCollector() + for i := range scheduleLagReservoirCap * 4 { + c.RecordScheduleLag(time.Duration(i) * time.Nanosecond) + } + require.Len(t, c.ScheduleLagSamples(), scheduleLagReservoirCap) +} + +// End-to-end through the collector: a known sample set yields the expected p99 +// verdict, proving the record → sample → evaluate path agrees. +func TestRecordScheduleLag_FeedsVerdict(t *testing.T) { + c := NewCollector() + for range 99 { + c.RecordScheduleLag(100 * time.Microsecond) + } + c.RecordScheduleLag(50 * time.Millisecond) + + v := EvaluateScheduleLag(c.ScheduleLagSamples(), 100, true, 0) + require.Equal(t, VerdictVoid, v.Verdict) +} diff --git a/stats/verdict.go b/stats/verdict.go new file mode 100644 index 0000000..9e12cfb --- /dev/null +++ b/stats/verdict.go @@ -0,0 +1,111 @@ +package stats + +import ( + "fmt" + "sort" + "time" +) + +// scheduleLagVoidThreshold is the fraction of the arrival interval (1/λ) that +// schedule_lag_p99 may reach before the run is VOID: a p99 send lag above this +// fraction means the generator could not keep up with its own schedule, so the +// load was generator-bound, not open-loop, and the run does not measure the SUT. +// Provisional value — tune from first calibration run. +const scheduleLagVoidThreshold = 0.10 + +// Verdict labels for a run's open-loop self-check. +const ( + VerdictValid = "VALID" + VerdictVoid = "VOID" + // VerdictNA marks a run where the self-check does not apply (closed-loop, or + // no fixed arrival rate to compare against). schedule_lag_p99 is still + // reported, but no pass/fail gate is rendered. + VerdictNA = "N/A" +) + +// ScheduleLagVerdict is the self-check result that proves an open-loop run was +// actually open-loop. schedule_lag = AttemptedSendTime − IntendedSendTime per +// tx; its p99 is checked against threshold × (1/λ). It is computed on every run +// and reported regardless of outcome. +type ScheduleLagVerdict struct { + // Verdict is VerdictValid, VerdictVoid, or VerdictNA. + Verdict string + // VoidReason is a human-readable explanation, empty unless Verdict is VOID. + VoidReason string + // ScheduleLagP99 is the 99th-percentile send lag across sampled open-loop + // txs; zero when no open-loop samples were recorded. + ScheduleLagP99 time.Duration + // SampleCount is the number of schedule_lag samples the verdict is based on. + SampleCount int + // ArrivalInterval is 1/λ, the bound's reference interval; zero when λ is not + // a single fixed rate (e.g. ramping with no configured TPS). + ArrivalInterval time.Duration + // Threshold is the fraction of ArrivalInterval used as the VOID boundary. + Threshold float64 +} + +// EvaluateScheduleLag computes the open-loop self-check verdict from the +// recorded schedule_lag samples, the configured arrival rate targetTPS (λ), the +// run's arrival model, and the VOID threshold fraction (<=0 falls back to the +// provisional default). p99 is the sorted-slice percentile, matching the repo's +// block-time percentile idiom. +// +// The verdict is N/A — reported, never a gate — when the model is not open-loop +// or when λ is not a single fixed rate (targetTPS <= 0, e.g. a ramping run), +// since there is no single 1/λ to bound against. schedule_lag_p99 is still +// reported in those cases. +func EvaluateScheduleLag(samples []time.Duration, targetTPS float64, openLoop bool, threshold float64) ScheduleLagVerdict { + if threshold <= 0 { + threshold = scheduleLagVoidThreshold + } + + v := ScheduleLagVerdict{ + Verdict: VerdictNA, + ScheduleLagP99: scheduleLagPercentile(samples, 99), + SampleCount: len(samples), + Threshold: threshold, + } + + if !openLoop || targetTPS <= 0 { + return v + } + + arrivalInterval := time.Duration(float64(time.Second) / targetTPS) + v.ArrivalInterval = arrivalInterval + + // No samples: the run scheduled nothing open-loop. Treat as VALID (nothing + // disproves open-loop) rather than VOID — VOID is reserved for an observed + // generator-bound run. + if len(samples) == 0 { + v.Verdict = VerdictValid + return v + } + + bound := time.Duration(threshold * float64(arrivalInterval)) + if v.ScheduleLagP99 > bound { + v.Verdict = VerdictVoid + v.VoidReason = formatVoidReason(v.ScheduleLagP99, bound, threshold, arrivalInterval) + return v + } + v.Verdict = VerdictValid + return v +} + +func formatVoidReason(p99, bound time.Duration, threshold float64, arrivalInterval time.Duration) string { + return fmt.Sprintf( + "generator-bound: schedule_lag_p99 %s exceeds %s (%.0f%% of arrival interval %s) — load was not open-loop", + p99.Round(time.Microsecond), bound.Round(time.Microsecond), threshold*100, arrivalInterval.Round(time.Microsecond)) +} + +// scheduleLagPercentile returns the percentile of a copy-then-sort of samples, +// reusing the repo's calculatePercentile index rule. Copies so the caller's +// slice order is preserved. +func scheduleLagPercentile(samples []time.Duration, percentile int) time.Duration { + if len(samples) == 0 { + return 0 + } + sorted := make([]time.Duration, len(samples)) + copy(sorted, samples) + sort.Slice(sorted, func(i, j int) bool { return sorted[i] < sorted[j] }) + return calculatePercentile(sorted, percentile) +} diff --git a/stats/verdict_test.go b/stats/verdict_test.go new file mode 100644 index 0000000..925f6b4 --- /dev/null +++ b/stats/verdict_test.go @@ -0,0 +1,94 @@ +package stats + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +// lags builds a sample slice from millisecond values for readability. +func lags(ms ...int) []time.Duration { + out := make([]time.Duration, len(ms)) + for i, m := range ms { + out[i] = time.Duration(m) * time.Millisecond + } + return out +} + +// At 100 TPS the arrival interval is 10ms, so the VOID bound at the default 10% +// threshold is 1ms. An over-driven run whose p99 sits well above that is VOID. +func TestEvaluateScheduleLag_OverDrivenIsVoid(t *testing.T) { + // 100 samples mostly small, but the top tail (p99 index = 99) is large. + samples := make([]time.Duration, 0, 100) + for range 99 { + samples = append(samples, 100*time.Microsecond) + } + samples = append(samples, 50*time.Millisecond) // the p99 element + + v := EvaluateScheduleLag(samples, 100, true, 0) + + require.Equal(t, VerdictVoid, v.Verdict) + require.NotEmpty(t, v.VoidReason) + require.Equal(t, 50*time.Millisecond, v.ScheduleLagP99) + require.Equal(t, 10*time.Millisecond, v.ArrivalInterval) +} + +// A healthy run keeps p99 below 10% of the 10ms interval (1ms) → VALID. +func TestEvaluateScheduleLag_HealthyIsValid(t *testing.T) { + samples := lags(0, 0, 0, 0, 0, 0, 0, 0, 0, 0) // all 0ms, p99 = 0 + samples = append(samples, 200*time.Microsecond) + + v := EvaluateScheduleLag(samples, 100, true, 0) + + require.Equal(t, VerdictValid, v.Verdict) + require.Empty(t, v.VoidReason) + require.Less(t, v.ScheduleLagP99, time.Millisecond) +} + +// p99 must match the repo's sorted-slice index rule for a known set. +func TestEvaluateScheduleLag_P99ComputedCorrectly(t *testing.T) { + // 100 samples 1ms..100ms; index = (100*99)/100 = 99 → sorted[99] = 100ms. + samples := make([]time.Duration, 0, 100) + for i := 1; i <= 100; i++ { + samples = append(samples, time.Duration(i)*time.Millisecond) + } + + // targetTPS=0 keeps verdict N/A but still reports p99. + v := EvaluateScheduleLag(samples, 0, true, 0) + require.Equal(t, 100*time.Millisecond, v.ScheduleLagP99) + require.Equal(t, 100, v.SampleCount) +} + +// Closed-loop runs are reported but never gated: N/A regardless of lag size. +func TestEvaluateScheduleLag_ClosedLoopIsNA(t *testing.T) { + samples := lags(500, 500, 500) // huge lag, would be VOID if open-loop + + v := EvaluateScheduleLag(samples, 100, false, 0) + + require.Equal(t, VerdictNA, v.Verdict) + require.Empty(t, v.VoidReason) + require.Equal(t, 500*time.Millisecond, v.ScheduleLagP99) // still reported +} + +// Open-loop with no fixed λ (e.g. ramping, TPS=0) cannot bound against 1/λ → N/A. +func TestEvaluateScheduleLag_NoFixedRateIsNA(t *testing.T) { + v := EvaluateScheduleLag(lags(100, 200, 300), 0, true, 0) + require.Equal(t, VerdictNA, v.Verdict) + require.Equal(t, time.Duration(0), v.ArrivalInterval) +} + +// No open-loop samples is VALID, not VOID: VOID needs an observed bad run. +func TestEvaluateScheduleLag_NoSamplesIsValid(t *testing.T) { + v := EvaluateScheduleLag(nil, 100, true, 0) + require.Equal(t, VerdictValid, v.Verdict) + require.Equal(t, time.Duration(0), v.ScheduleLagP99) +} + +// A configured threshold overrides the default boundary. +func TestEvaluateScheduleLag_ConfiguredThreshold(t *testing.T) { + samples := lags(2) // p99 = 2ms; interval at 100 TPS = 10ms + // 10% bound = 1ms → VOID; 50% bound = 5ms → VALID. + require.Equal(t, VerdictVoid, EvaluateScheduleLag(samples, 100, true, 0.10).Verdict) + require.Equal(t, VerdictValid, EvaluateScheduleLag(samples, 100, true, 0.50).Verdict) +} From b4772759816bae32f169e0a78d5675bffacddaf4 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Mon, 15 Jun 2026 15:48:55 -0700 Subject: [PATCH 2/5] fix(stats): close schedule_lag verdict false-VALID paths (cohort review) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - RampUp => N/A (checked before TPS): the ramper drives the live limit via SetLimit so cfg.Settings.TPS is stale; gating against 1/TPS is wrong. - Zero samples on a fixed-λ open-loop run => N/A, never VALID (a trust gate must not bless 'no data' as a clean run). Thread the admitted count from the dispatcher conservation counters; if admitted>0 yet samples==0, log loudly (recorder may be mis-wired) and flag Anomaly. - Drop the redundant inline comment on ScheduleLagVoidThreshold (go-doc keeps the rationale). Cohort: security (false-VALID F1/F2), systems (F2 confirm), idiom (doc dup). Co-Authored-By: Claude Opus 4.8 --- config/settings.go | 4 +-- main.go | 16 ++++++++--- stats/schedule_lag_test.go | 2 +- stats/verdict.go | 54 +++++++++++++++++++++++++++----------- stats/verdict_test.go | 46 +++++++++++++++++++++++--------- 5 files changed, 87 insertions(+), 35 deletions(-) diff --git a/config/settings.go b/config/settings.go index 0298c61..41f1f6f 100644 --- a/config/settings.go +++ b/config/settings.go @@ -99,9 +99,7 @@ func DefaultSettings() Settings { NumBlocksToWrite: 100, PostSummaryFlushDelay: Duration(25 * time.Second), ArrivalModel: ArrivalModelClosedLoop, - MaxInFlight: 10_000, - // Zero defers to the stats package's provisional default; surfaced here so - // a config file can override without a CLI flag. + MaxInFlight: 10_000, ScheduleLagVoidThreshold: 0, } } diff --git a/main.go b/main.go index e86e18f..b4f4cf2 100644 --- a/main.go +++ b/main.go @@ -396,11 +396,13 @@ func runLoadTest(ctx context.Context, cmd *cobra.Command) error { ramper.LogFinalStats() } summary := stats.RunSummary{ArrivalModel: config.ArrivalModelClosedLoop} + var admitted uint64 if dispatcher != nil { summary.ArrivalModel = string(dispatcher.ArrivalModel()) dstats := dispatcher.GetStats() summary.Dropped = dstats.Dropped summary.Failed = dstats.Failed + admitted = dstats.TotalSent + dstats.Failed if summary.Dropped > 0 { log.Printf("⚠️ Open-loop dropped %d txs (in-flight saturated; not throttled)", summary.Dropped) } @@ -425,14 +427,22 @@ func runLoadTest(ctx context.Context, cmd *cobra.Command) error { // not the requested flag — the txs-writer path downgrades to closed_loop). openLoopRun := summary.ArrivalModel == config.ArrivalModelOpenLoop verdict := stats.EvaluateScheduleLag( - collector.ScheduleLagSamples(), cfg.Settings.TPS, openLoopRun, cfg.Settings.ScheduleLagVoidThreshold) + collector.ScheduleLagSamples(), cfg.Settings.TPS, openLoopRun, + cfg.Settings.RampUp, admitted, cfg.Settings.ScheduleLagVoidThreshold) summary.ScheduleLagP99 = verdict.ScheduleLagP99 summary.Verdict = verdict.Verdict summary.VoidReason = verdict.VoidReason - if verdict.Verdict == stats.VerdictVoid { + if verdict.Anomaly { + log.Printf("🚨 no schedule_lag samples despite %d admitted txs — recorder may be mis-wired", admitted) + } + switch verdict.Verdict { + case stats.VerdictVoid: log.Printf("⚠️ VOID: %s (schedule_lag_p99=%s, samples=%d)", verdict.VoidReason, verdict.ScheduleLagP99.Round(time.Microsecond), verdict.SampleCount) - } else { + case stats.VerdictNA: + log.Printf("🧪 Run verdict: N/A — %s | schedule_lag_p99=%s (samples=%d)", + verdict.NAReason, verdict.ScheduleLagP99.Round(time.Microsecond), verdict.SampleCount) + default: log.Printf("🧪 Run verdict: %s | schedule_lag_p99=%s (samples=%d, arrival_interval=%s)", verdict.Verdict, verdict.ScheduleLagP99.Round(time.Microsecond), verdict.SampleCount, verdict.ArrivalInterval.Round(time.Microsecond)) diff --git a/stats/schedule_lag_test.go b/stats/schedule_lag_test.go index 34b280f..b21d815 100644 --- a/stats/schedule_lag_test.go +++ b/stats/schedule_lag_test.go @@ -48,6 +48,6 @@ func TestRecordScheduleLag_FeedsVerdict(t *testing.T) { } c.RecordScheduleLag(50 * time.Millisecond) - v := EvaluateScheduleLag(c.ScheduleLagSamples(), 100, true, 0) + v := EvaluateScheduleLag(c.ScheduleLagSamples(), 100, true, false, 100, 0) require.Equal(t, VerdictVoid, v.Verdict) } diff --git a/stats/verdict.go b/stats/verdict.go index 9e12cfb..2b4540a 100644 --- a/stats/verdict.go +++ b/stats/verdict.go @@ -17,9 +17,10 @@ const scheduleLagVoidThreshold = 0.10 const ( VerdictValid = "VALID" VerdictVoid = "VOID" - // VerdictNA marks a run where the self-check does not apply (closed-loop, or - // no fixed arrival rate to compare against). schedule_lag_p99 is still - // reported, but no pass/fail gate is rendered. + // VerdictNA marks a run where the self-check does not apply: closed-loop, a + // ramped λ (no single 1/λ), no fixed arrival rate, or a fixed-λ run that + // recorded zero schedule_lag samples (cannot prove open-loop either way). + // schedule_lag_p99 is still reported, but no pass/fail gate is rendered. VerdictNA = "N/A" ) @@ -32,6 +33,12 @@ type ScheduleLagVerdict struct { Verdict string // VoidReason is a human-readable explanation, empty unless Verdict is VOID. VoidReason string + // NAReason explains an N/A verdict (why no gate applies); empty otherwise. + NAReason string + // Anomaly is true when the inputs are self-inconsistent — admitted txs but + // zero schedule_lag samples — so the caller can log loudly: the recorder is + // likely mis-wired rather than the run being clean. + Anomaly bool // ScheduleLagP99 is the 99th-percentile send lag across sampled open-loop // txs; zero when no open-loop samples were recorded. ScheduleLagP99 time.Duration @@ -46,15 +53,20 @@ type ScheduleLagVerdict struct { // EvaluateScheduleLag computes the open-loop self-check verdict from the // recorded schedule_lag samples, the configured arrival rate targetTPS (λ), the -// run's arrival model, and the VOID threshold fraction (<=0 falls back to the -// provisional default). p99 is the sorted-slice percentile, matching the repo's -// block-time percentile idiom. +// run's arrival model, whether the run ramped λ, the count of admitted txs, and +// the VOID threshold fraction (<=0 falls back to the provisional default). p99 +// is the sorted-slice percentile, matching the repo's block-time percentile +// idiom. // -// The verdict is N/A — reported, never a gate — when the model is not open-loop -// or when λ is not a single fixed rate (targetTPS <= 0, e.g. a ramping run), -// since there is no single 1/λ to bound against. schedule_lag_p99 is still -// reported in those cases. -func EvaluateScheduleLag(samples []time.Duration, targetTPS float64, openLoop bool, threshold float64) ScheduleLagVerdict { +// The verdict is N/A — reported, never a gate — when the model is not open-loop, +// when the run ramped λ (a ramp has no single 1/λ to bound against, and the +// ramper drives the live limit so targetTPS is stale), or when λ is not a single +// fixed rate (targetTPS <= 0). A fixed-λ open-loop run that recorded zero +// schedule_lag samples is also N/A, not VALID: zero samples cannot distinguish a +// SUT that kept up from a recorder that never fired. When admitted > 0 yet no +// samples landed, Anomaly is set so the caller logs the mis-wiring loudly. +// schedule_lag_p99 is still reported in every case. +func EvaluateScheduleLag(samples []time.Duration, targetTPS float64, openLoop, ramped bool, admitted uint64, threshold float64) ScheduleLagVerdict { if threshold <= 0 { threshold = scheduleLagVoidThreshold } @@ -66,18 +78,28 @@ func EvaluateScheduleLag(samples []time.Duration, targetTPS float64, openLoop bo Threshold: threshold, } - if !openLoop || targetTPS <= 0 { + if !openLoop { + v.NAReason = "closed-loop run: open-loop self-check does not apply" + return v + } + if ramped { + v.NAReason = "ramped λ has no single arrival interval" + return v + } + if targetTPS <= 0 { + v.NAReason = "no fixed arrival rate (λ): nothing to bound against" return v } arrivalInterval := time.Duration(float64(time.Second) / targetTPS) v.ArrivalInterval = arrivalInterval - // No samples: the run scheduled nothing open-loop. Treat as VALID (nothing - // disproves open-loop) rather than VOID — VOID is reserved for an observed - // generator-bound run. + // Zero samples is N/A, not VALID: it cannot tell a SUT that kept up from a + // recorder that never fired or a run that dropped every tick. Admitted txs + // with no samples is an outright anomaly — flag it for the caller. if len(samples) == 0 { - v.Verdict = VerdictValid + v.NAReason = "no schedule_lag samples recorded" + v.Anomaly = admitted > 0 return v } diff --git a/stats/verdict_test.go b/stats/verdict_test.go index 925f6b4..60cf092 100644 --- a/stats/verdict_test.go +++ b/stats/verdict_test.go @@ -26,7 +26,7 @@ func TestEvaluateScheduleLag_OverDrivenIsVoid(t *testing.T) { } samples = append(samples, 50*time.Millisecond) // the p99 element - v := EvaluateScheduleLag(samples, 100, true, 0) + v := EvaluateScheduleLag(samples, 100, true, false, 100, 0) require.Equal(t, VerdictVoid, v.Verdict) require.NotEmpty(t, v.VoidReason) @@ -39,7 +39,7 @@ func TestEvaluateScheduleLag_HealthyIsValid(t *testing.T) { samples := lags(0, 0, 0, 0, 0, 0, 0, 0, 0, 0) // all 0ms, p99 = 0 samples = append(samples, 200*time.Microsecond) - v := EvaluateScheduleLag(samples, 100, true, 0) + v := EvaluateScheduleLag(samples, 100, true, false, 100, 0) require.Equal(t, VerdictValid, v.Verdict) require.Empty(t, v.VoidReason) @@ -55,7 +55,7 @@ func TestEvaluateScheduleLag_P99ComputedCorrectly(t *testing.T) { } // targetTPS=0 keeps verdict N/A but still reports p99. - v := EvaluateScheduleLag(samples, 0, true, 0) + v := EvaluateScheduleLag(samples, 0, true, false, 100, 0) require.Equal(t, 100*time.Millisecond, v.ScheduleLagP99) require.Equal(t, 100, v.SampleCount) } @@ -64,31 +64,53 @@ func TestEvaluateScheduleLag_P99ComputedCorrectly(t *testing.T) { func TestEvaluateScheduleLag_ClosedLoopIsNA(t *testing.T) { samples := lags(500, 500, 500) // huge lag, would be VOID if open-loop - v := EvaluateScheduleLag(samples, 100, false, 0) + v := EvaluateScheduleLag(samples, 100, false, false, 3, 0) require.Equal(t, VerdictNA, v.Verdict) require.Empty(t, v.VoidReason) require.Equal(t, 500*time.Millisecond, v.ScheduleLagP99) // still reported } -// Open-loop with no fixed λ (e.g. ramping, TPS=0) cannot bound against 1/λ → N/A. +// Open-loop with no fixed λ (TPS=0) cannot bound against 1/λ → N/A. func TestEvaluateScheduleLag_NoFixedRateIsNA(t *testing.T) { - v := EvaluateScheduleLag(lags(100, 200, 300), 0, true, 0) + v := EvaluateScheduleLag(lags(100, 200, 300), 0, true, false, 3, 0) require.Equal(t, VerdictNA, v.Verdict) require.Equal(t, time.Duration(0), v.ArrivalInterval) } -// No open-loop samples is VALID, not VOID: VOID needs an observed bad run. -func TestEvaluateScheduleLag_NoSamplesIsValid(t *testing.T) { - v := EvaluateScheduleLag(nil, 100, true, 0) - require.Equal(t, VerdictValid, v.Verdict) +// A ramped run drives λ via the limiter, so the configured TPS is stale and +// there is no single 1/λ to gate against — N/A regardless of TPS. +func TestEvaluateScheduleLag_RampedIsNA(t *testing.T) { + // TPS>0 but ramped: must still be N/A, not gated against the stale 1/TPS. + v := EvaluateScheduleLag(lags(500, 500, 500), 100, true, true, 3, 0) + require.Equal(t, VerdictNA, v.Verdict) + require.Empty(t, v.VoidReason) + require.Equal(t, "ramped λ has no single arrival interval", v.NAReason) + require.Equal(t, time.Duration(0), v.ArrivalInterval) +} + +// No samples on a fixed-λ run is N/A, not VALID: it cannot distinguish a SUT +// that kept up from a recorder that never fired. +func TestEvaluateScheduleLag_NoSamplesIsNA(t *testing.T) { + v := EvaluateScheduleLag(nil, 100, true, false, 0, 0) + require.Equal(t, VerdictNA, v.Verdict) + require.Equal(t, "no schedule_lag samples recorded", v.NAReason) + require.False(t, v.Anomaly) // zero admitted: no anomaly, just an empty run require.Equal(t, time.Duration(0), v.ScheduleLagP99) } +// Admitted txs but zero samples is an anomaly: the recorder likely never fired. +func TestEvaluateScheduleLag_AdmittedButNoSamplesIsAnomaly(t *testing.T) { + v := EvaluateScheduleLag(nil, 100, true, false, 5000, 0) + require.Equal(t, VerdictNA, v.Verdict) + require.Equal(t, "no schedule_lag samples recorded", v.NAReason) + require.True(t, v.Anomaly) +} + // A configured threshold overrides the default boundary. func TestEvaluateScheduleLag_ConfiguredThreshold(t *testing.T) { samples := lags(2) // p99 = 2ms; interval at 100 TPS = 10ms // 10% bound = 1ms → VOID; 50% bound = 5ms → VALID. - require.Equal(t, VerdictVoid, EvaluateScheduleLag(samples, 100, true, 0.10).Verdict) - require.Equal(t, VerdictValid, EvaluateScheduleLag(samples, 100, true, 0.50).Verdict) + require.Equal(t, VerdictVoid, EvaluateScheduleLag(samples, 100, true, false, 1, 0.10).Verdict) + require.Equal(t, VerdictValid, EvaluateScheduleLag(samples, 100, true, false, 1, 0.50).Verdict) } From d20c35af24a833d6fb296f143a4b24bd8014d1b3 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Mon, 15 Jun 2026 16:01:14 -0700 Subject: [PATCH 3/5] feat(stats): tail-degradation guard for schedule_lag verdict (cohort F3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The whole-run p99 (from a uniform reservoir) can dilute a sub-percentile late-run tail blowup → false VALID. Add an UNSAMPLED over-bound counter (incremented per recorded send, not sampled) + max lag: VOID when p99 > bound OR > scheduleLagOverBoundFraction (0.5%, provisional) of sends exceed the bound, with a distinct reason per criterion. Bound is single-sourced (ScheduleLagBound) so run-start arming and verdict-time can't drift; armed only on fixed-λ open-loop runs (inert elsewhere, matching the N/A set). EvaluateScheduleLag now takes ScheduleLagInputs (kills the adjacent-bool positional trap). VOID stays advisory. Co-Authored-By: Claude Opus 4.8 --- main.go | 24 +++++++- stats/collector.go | 38 ++++++++++++ stats/metrics.go | 13 ++++ stats/run_summary.go | 21 +++++++ stats/schedule_lag_test.go | 39 +++++++++++- stats/verdict.go | 121 ++++++++++++++++++++++++++++++------- stats/verdict_test.go | 92 +++++++++++++++++++++++++--- 7 files changed, 312 insertions(+), 36 deletions(-) diff --git a/main.go b/main.go index b4f4cf2..1f8e953 100644 --- a/main.go +++ b/main.go @@ -323,6 +323,12 @@ func runLoadTest(ctx context.Context, cmd *cobra.Command) error { switch { case openLoop && cfg.Settings.TxsDir == "": dispatcher.SetOpenLoop(sharedLimiter, cfg.Settings.MaxInFlight) + // Arm the unsampled over-bound counter for this fixed-λ open-loop run: + // same VOID bound the verdict uses (threshold × 1/λ), known here at run + // start. Inert on non-fixed-λ runs (no SetScheduleLagBound call). + if bound := stats.ScheduleLagBound(cfg.Settings.TPS, cfg.Settings.ScheduleLagVoidThreshold); bound > 0 { + collector.SetScheduleLagBound(bound) + } log.Printf("📤 Arrival model: open_loop (max in-flight: %d)", cfg.Settings.MaxInFlight) case openLoop: // open_loop was requested but the txs-writer path has no arrival clock, @@ -426,10 +432,22 @@ func runLoadTest(ctx context.Context, cmd *cobra.Command) error { // verdict. Gated on the model the run actually used (summary.ArrivalModel, // not the requested flag — the txs-writer path downgrades to closed_loop). openLoopRun := summary.ArrivalModel == config.ArrivalModelOpenLoop - verdict := stats.EvaluateScheduleLag( - collector.ScheduleLagSamples(), cfg.Settings.TPS, openLoopRun, - cfg.Settings.RampUp, admitted, cfg.Settings.ScheduleLagVoidThreshold) + lagTotal, lagOverBound, lagMax := collector.ScheduleLagTail() + verdict := stats.EvaluateScheduleLag(stats.ScheduleLagInputs{ + Samples: collector.ScheduleLagSamples(), + TargetTPS: cfg.Settings.TPS, + OpenLoop: openLoopRun, + Ramped: cfg.Settings.RampUp, + Admitted: admitted, + Threshold: cfg.Settings.ScheduleLagVoidThreshold, + OverBoundCount: lagOverBound, + OverBoundTotal: lagTotal, + MaxLag: lagMax, + }) summary.ScheduleLagP99 = verdict.ScheduleLagP99 + summary.ScheduleLagMax = verdict.MaxLag + summary.ScheduleLagOverBoundCount = verdict.OverBoundCount + summary.ScheduleLagTotal = verdict.OverBoundTotal summary.Verdict = verdict.Verdict summary.VoidReason = verdict.VoidReason if verdict.Anomaly { diff --git a/stats/collector.go b/stats/collector.go index b3a2f5c..9523d7f 100644 --- a/stats/collector.go +++ b/stats/collector.go @@ -51,6 +51,18 @@ type Collector struct { scheduleLagSeen uint64 scheduleLagRand *rand.Rand + // Unsampled schedule_lag tail signal: the reservoir is a uniform sample, so a + // sub-percentile late-run tail can stay under the whole-run p99 yet still mean + // the generator fell behind. These exact (un-sampled) counters give the + // verdict a tail-degradation signal the reservoir cannot dilute. + // scheduleLagBound is the VOID bound (threshold × 1/λ) set once at run start + // for a fixed-λ open-loop run; zero leaves over-bound counting inert (ramped / + // closed-loop / no-λ runs are N/A anyway). scheduleLagMax is the largest lag + // ever recorded, surfaced for diagnostics. + scheduleLagBound time.Duration + scheduleLagOverBound uint64 + scheduleLagMax time.Duration + // Configuration maxLatencyHistory int // Limit latency history to prevent memory leaks } @@ -94,6 +106,13 @@ func (c *Collector) RecordScheduleLag(lag time.Duration) { c.mu.Lock() defer c.mu.Unlock() + if lag > c.scheduleLagMax { + c.scheduleLagMax = lag + } + if c.scheduleLagBound > 0 && lag > c.scheduleLagBound { + c.scheduleLagOverBound++ + } + c.scheduleLagSeen++ if len(c.scheduleLag) < scheduleLagReservoirCap { c.scheduleLag = append(c.scheduleLag, lag) @@ -118,6 +137,25 @@ func (c *Collector) ScheduleLagSamples() []time.Duration { return out } +// SetScheduleLagBound arms the unsampled over-bound counter with the run's VOID +// bound (threshold × 1/λ). Call once at run start, only for a fixed-λ open-loop +// run; left unset (or set to <=0), over-bound counting stays inert. +func (c *Collector) SetScheduleLagBound(bound time.Duration) { + c.mu.Lock() + defer c.mu.Unlock() + c.scheduleLagBound = bound +} + +// ScheduleLagTail reports the exact (un-sampled) tail figures: total lags +// recorded, how many exceeded the VOID bound, and the max lag observed. total is +// the true count, distinct from the bounded reservoir's len. overBound is zero +// when no bound was armed. +func (c *Collector) ScheduleLagTail() (total, overBound uint64, max time.Duration) { + c.mu.RLock() + defer c.mu.RUnlock() + return c.scheduleLagSeen, c.scheduleLagOverBound, c.scheduleLagMax +} + // RecordTransaction records a transaction attempt func (c *Collector) RecordTransaction(scenario, endpoint string, latency time.Duration, success bool) { c.mu.Lock() diff --git a/stats/metrics.go b/stats/metrics.go index c8ec9d7..6b42015 100644 --- a/stats/metrics.go +++ b/stats/metrics.go @@ -95,6 +95,19 @@ var ( "run_schedule_lag_p99", metric.WithDescription("p99 of per-tx send lag (attempted − intended) over this open-loop run (emitted once at run end)"), metric.WithUnit("s"))) + + // Unsampled tail signal: the reservoir p99 above can dilute a sub-percentile + // late-run tail, so the verdict also gates on the exact over-bound fraction. + // Max is diagnostic; fraction is the gate. + runScheduleLagMax = must(meter.Float64Gauge( + "run_schedule_lag_max", + metric.WithDescription("max per-tx send lag (attempted − intended) over this open-loop run, un-sampled (emitted once at run end)"), + metric.WithUnit("s"))) + + runScheduleLagOverBoundFraction = must(meter.Float64Gauge( + "run_schedule_lag_over_bound_fraction", + metric.WithDescription("exact fraction of sends whose lag exceeded the VOID bound over this open-loop run (emitted once at run end)"), + metric.WithUnit("1"))) ) // meteredInclusionTrackers backs the inclusion_inflight gauge: each tracker diff --git a/stats/run_summary.go b/stats/run_summary.go index 77317fe..b985e5e 100644 --- a/stats/run_summary.go +++ b/stats/run_summary.go @@ -48,6 +48,15 @@ type RunSummary struct { // run was generator-bound, not open-loop, and is VOID. Reported on every run // regardless of verdict; Verdict is N/A for closed-loop or non-fixed-λ runs. ScheduleLagP99 time.Duration + // ScheduleLagMax is the largest single send lag recorded (un-sampled), + // surfaced for diagnostics; the verdict gates on a fraction, not this max. + ScheduleLagMax time.Duration + // ScheduleLagOverBoundCount / ScheduleLagTotal are the exact (un-sampled) + // count of sends past the VOID bound and the total recorded; their ratio is + // the tail-degradation signal the sampled p99 cannot dilute. Total zero / no + // bound armed means the figures are inert (non-fixed-λ run). + ScheduleLagOverBoundCount uint64 + ScheduleLagTotal uint64 // Verdict is VerdictValid, VerdictVoid, or VerdictNA. Verdict string // VoidReason explains a VOID verdict; empty otherwise. @@ -76,4 +85,16 @@ func (c *Collector) EmitRunSummary(ctx context.Context, summary RunSummary) { metric.WithAttributes( attribute.String("arrival_model", summary.ArrivalModel), attribute.String("verdict", summary.Verdict))) + runScheduleLagMax.Record(ctx, summary.ScheduleLagMax.Seconds(), + metric.WithAttributes( + attribute.String("arrival_model", summary.ArrivalModel), + attribute.String("verdict", summary.Verdict))) + var overBoundFraction float64 + if summary.ScheduleLagTotal > 0 { + overBoundFraction = float64(summary.ScheduleLagOverBoundCount) / float64(summary.ScheduleLagTotal) + } + runScheduleLagOverBoundFraction.Record(ctx, overBoundFraction, + metric.WithAttributes( + attribute.String("arrival_model", summary.ArrivalModel), + attribute.String("verdict", summary.Verdict))) } diff --git a/stats/schedule_lag_test.go b/stats/schedule_lag_test.go index b21d815..90b96f1 100644 --- a/stats/schedule_lag_test.go +++ b/stats/schedule_lag_test.go @@ -48,6 +48,43 @@ func TestRecordScheduleLag_FeedsVerdict(t *testing.T) { } c.RecordScheduleLag(50 * time.Millisecond) - v := EvaluateScheduleLag(c.ScheduleLagSamples(), 100, true, false, 100, 0) + v := EvaluateScheduleLag(ScheduleLagInputs{ + Samples: c.ScheduleLagSamples(), TargetTPS: 100, OpenLoop: true, Admitted: 100, + }) require.Equal(t, VerdictVoid, v.Verdict) } + +// The unsampled tail counters are exact, not reservoir-diluted: with the bound +// armed, every over-bound lag is counted regardless of reservoir replacement, +// and the max is the true max. +func TestRecordScheduleLag_UnsampledTailCounters(t *testing.T) { + c := NewCollector() + // Bound = 10% of 1/100 = 1ms (matches ScheduleLagBound(100, 0.10)). + c.SetScheduleLagBound(ScheduleLagBound(100, 0.10)) + + // Record far more than the reservoir cap so sampling is in play. + const over = 50 + for range scheduleLagReservoirCap * 2 { + c.RecordScheduleLag(100 * time.Microsecond) // under bound + } + for range over { + c.RecordScheduleLag(5 * time.Millisecond) // over the 1ms bound + } + c.RecordScheduleLag(80 * time.Millisecond) // the max + + total, overBound, max := c.ScheduleLagTail() + require.Equal(t, uint64(scheduleLagReservoirCap*2+over+1), total) + require.Equal(t, uint64(over+1), overBound) // exact, not sampled + require.Equal(t, 80*time.Millisecond, max) +} + +// Without an armed bound the over-bound counter stays inert (ramped / +// closed-loop / no-λ runs), but the max is still tracked for diagnostics. +func TestRecordScheduleLag_OverBoundInertWhenUnset(t *testing.T) { + c := NewCollector() + c.RecordScheduleLag(500 * time.Millisecond) + total, overBound, max := c.ScheduleLagTail() + require.Equal(t, uint64(1), total) + require.Equal(t, uint64(0), overBound) // no bound armed → inert + require.Equal(t, 500*time.Millisecond, max) +} diff --git a/stats/verdict.go b/stats/verdict.go index 2b4540a..3a8c3c7 100644 --- a/stats/verdict.go +++ b/stats/verdict.go @@ -13,6 +13,15 @@ import ( // Provisional value — tune from first calibration run. const scheduleLagVoidThreshold = 0.10 +// scheduleLagOverBoundFraction is the share of recorded sends that may exceed the +// VOID bound before the run is VOID on the unsampled tail signal. The whole-run +// p99 is computed from a uniform reservoir sample, so a sub-percentile late-run +// tail (the generator hiccupping near the end of a long run) can stay under the +// p99 yet still mean the generator fell behind. This exact (un-sampled) fraction +// catches that tail; it is a fraction, not a single max-lag, so a lone GC-pause +// outlier does not trip it. Provisional — tune from first calibration run. +const scheduleLagOverBoundFraction = 0.005 + // Verdict labels for a run's open-loop self-check. const ( VerdictValid = "VALID" @@ -49,76 +58,144 @@ type ScheduleLagVerdict struct { ArrivalInterval time.Duration // Threshold is the fraction of ArrivalInterval used as the VOID boundary. Threshold float64 + // OverBoundCount is the exact (un-sampled) count of sends whose lag exceeded + // the VOID bound; OverBoundTotal is the exact total recorded. Their ratio is + // the tail-degradation signal the reservoir p99 cannot dilute. + OverBoundCount uint64 + OverBoundTotal uint64 + // MaxLag is the largest lag recorded over the run (un-sampled), surfaced for + // diagnostics; it is not a gate on its own (a fraction is, to survive a lone + // outlier). + MaxLag time.Duration +} + +// ScheduleLagInputs carries the verdict inputs. It replaces a long positional +// signature (the tail figures pushed it past the point where adjacent bools and +// uints read clearly at the call site). +type ScheduleLagInputs struct { + // Samples is the reservoir copy used for the p99. + Samples []time.Duration + // TargetTPS is the configured λ; <=0 means no fixed rate → N/A. + TargetTPS float64 + // OpenLoop and Ramped gate applicability: only a fixed-λ open-loop, + // non-ramped run is evaluated. + OpenLoop bool + Ramped bool + // Admitted is the count of admitted txs, used only to flag the + // admitted-but-no-samples anomaly. + Admitted uint64 + // Threshold is the VOID fraction of 1/λ for the p99 bound; <=0 falls back to + // the provisional default. + Threshold float64 + // OverBoundCount / OverBoundTotal / MaxLag are the collector's exact + // (un-sampled) tail figures (see Collector.ScheduleLagTail). + OverBoundCount uint64 + OverBoundTotal uint64 + MaxLag time.Duration } -// EvaluateScheduleLag computes the open-loop self-check verdict from the -// recorded schedule_lag samples, the configured arrival rate targetTPS (λ), the -// run's arrival model, whether the run ramped λ, the count of admitted txs, and -// the VOID threshold fraction (<=0 falls back to the provisional default). p99 -// is the sorted-slice percentile, matching the repo's block-time percentile -// idiom. +// EvaluateScheduleLag computes the open-loop self-check verdict. p99 is the +// sorted-slice percentile of the reservoir sample, matching the repo's block-time +// percentile idiom; the run is also VOID on the exact (un-sampled) over-bound +// fraction, the tail signal the reservoir cannot dilute. // // The verdict is N/A — reported, never a gate — when the model is not open-loop, // when the run ramped λ (a ramp has no single 1/λ to bound against, and the -// ramper drives the live limit so targetTPS is stale), or when λ is not a single -// fixed rate (targetTPS <= 0). A fixed-λ open-loop run that recorded zero +// ramper drives the live limit so the configured λ is stale), or when λ is not a +// single fixed rate (TargetTPS <= 0). A fixed-λ open-loop run that recorded zero // schedule_lag samples is also N/A, not VALID: zero samples cannot distinguish a -// SUT that kept up from a recorder that never fired. When admitted > 0 yet no +// SUT that kept up from a recorder that never fired. When Admitted > 0 yet no // samples landed, Anomaly is set so the caller logs the mis-wiring loudly. // schedule_lag_p99 is still reported in every case. -func EvaluateScheduleLag(samples []time.Duration, targetTPS float64, openLoop, ramped bool, admitted uint64, threshold float64) ScheduleLagVerdict { +func EvaluateScheduleLag(in ScheduleLagInputs) ScheduleLagVerdict { + threshold := in.Threshold if threshold <= 0 { threshold = scheduleLagVoidThreshold } v := ScheduleLagVerdict{ Verdict: VerdictNA, - ScheduleLagP99: scheduleLagPercentile(samples, 99), - SampleCount: len(samples), + ScheduleLagP99: scheduleLagPercentile(in.Samples, 99), + SampleCount: len(in.Samples), Threshold: threshold, + OverBoundCount: in.OverBoundCount, + OverBoundTotal: in.OverBoundTotal, + MaxLag: in.MaxLag, } - if !openLoop { + if !in.OpenLoop { v.NAReason = "closed-loop run: open-loop self-check does not apply" return v } - if ramped { + if in.Ramped { v.NAReason = "ramped λ has no single arrival interval" return v } - if targetTPS <= 0 { + if in.TargetTPS <= 0 { v.NAReason = "no fixed arrival rate (λ): nothing to bound against" return v } - arrivalInterval := time.Duration(float64(time.Second) / targetTPS) + arrivalInterval := time.Duration(float64(time.Second) / in.TargetTPS) v.ArrivalInterval = arrivalInterval // Zero samples is N/A, not VALID: it cannot tell a SUT that kept up from a // recorder that never fired or a run that dropped every tick. Admitted txs // with no samples is an outright anomaly — flag it for the caller. - if len(samples) == 0 { + if len(in.Samples) == 0 { v.NAReason = "no schedule_lag samples recorded" - v.Anomaly = admitted > 0 + v.Anomaly = in.Admitted > 0 return v } - bound := time.Duration(threshold * float64(arrivalInterval)) + bound := ScheduleLagBound(in.TargetTPS, threshold) + + // Whole-run p99 over bound: the run was generator-bound across the sample. if v.ScheduleLagP99 > bound { v.Verdict = VerdictVoid - v.VoidReason = formatVoidReason(v.ScheduleLagP99, bound, threshold, arrivalInterval) + v.VoidReason = formatP99VoidReason(v.ScheduleLagP99, bound, threshold, arrivalInterval) return v } + // Unsampled tail: a sub-percentile share over the bound that the reservoir + // p99 diluted. Checked only when the bound was armed (OverBoundTotal > 0). + if in.OverBoundTotal > 0 { + if frac := float64(in.OverBoundCount) / float64(in.OverBoundTotal); frac > scheduleLagOverBoundFraction { + v.Verdict = VerdictVoid + v.VoidReason = formatTailVoidReason(in.OverBoundCount, in.OverBoundTotal, frac, bound) + return v + } + } v.Verdict = VerdictValid return v } -func formatVoidReason(p99, bound time.Duration, threshold float64, arrivalInterval time.Duration) string { +// ScheduleLagBound is the VOID bound, threshold × 1/λ, for a fixed-λ open-loop +// run. Returns zero when there is no single fixed rate (targetTPS <= 0), so the +// caller leaves the collector's over-bound counter inert. threshold <= 0 falls +// back to the provisional default, matching EvaluateScheduleLag. +func ScheduleLagBound(targetTPS, threshold float64) time.Duration { + if targetTPS <= 0 { + return 0 + } + if threshold <= 0 { + threshold = scheduleLagVoidThreshold + } + arrivalInterval := time.Duration(float64(time.Second) / targetTPS) + return time.Duration(threshold * float64(arrivalInterval)) +} + +func formatP99VoidReason(p99, bound time.Duration, threshold float64, arrivalInterval time.Duration) string { return fmt.Sprintf( - "generator-bound: schedule_lag_p99 %s exceeds %s (%.0f%% of arrival interval %s) — load was not open-loop", + "generator-bound: schedule_lag_p99 %s exceeds %s bound (%.0f%% of arrival interval %s) — load was not open-loop", p99.Round(time.Microsecond), bound.Round(time.Microsecond), threshold*100, arrivalInterval.Round(time.Microsecond)) } +func formatTailVoidReason(overBound, total uint64, frac float64, bound time.Duration) string { + return fmt.Sprintf( + "tail degradation: %.2f%% of sends (%d/%d) exceeded the %s bound — generator fell behind on a sub-percentile tail the p99 missed", + frac*100, overBound, total, bound.Round(time.Microsecond)) +} + // scheduleLagPercentile returns the percentile of a copy-then-sort of samples, // reusing the repo's calculatePercentile index rule. Copies so the caller's // slice order is preserved. diff --git a/stats/verdict_test.go b/stats/verdict_test.go index 60cf092..9bb123b 100644 --- a/stats/verdict_test.go +++ b/stats/verdict_test.go @@ -26,7 +26,9 @@ func TestEvaluateScheduleLag_OverDrivenIsVoid(t *testing.T) { } samples = append(samples, 50*time.Millisecond) // the p99 element - v := EvaluateScheduleLag(samples, 100, true, false, 100, 0) + v := EvaluateScheduleLag(ScheduleLagInputs{ + Samples: samples, TargetTPS: 100, OpenLoop: true, Admitted: 100, + }) require.Equal(t, VerdictVoid, v.Verdict) require.NotEmpty(t, v.VoidReason) @@ -39,7 +41,9 @@ func TestEvaluateScheduleLag_HealthyIsValid(t *testing.T) { samples := lags(0, 0, 0, 0, 0, 0, 0, 0, 0, 0) // all 0ms, p99 = 0 samples = append(samples, 200*time.Microsecond) - v := EvaluateScheduleLag(samples, 100, true, false, 100, 0) + v := EvaluateScheduleLag(ScheduleLagInputs{ + Samples: samples, TargetTPS: 100, OpenLoop: true, Admitted: 100, + }) require.Equal(t, VerdictValid, v.Verdict) require.Empty(t, v.VoidReason) @@ -55,7 +59,9 @@ func TestEvaluateScheduleLag_P99ComputedCorrectly(t *testing.T) { } // targetTPS=0 keeps verdict N/A but still reports p99. - v := EvaluateScheduleLag(samples, 0, true, false, 100, 0) + v := EvaluateScheduleLag(ScheduleLagInputs{ + Samples: samples, TargetTPS: 0, OpenLoop: true, Admitted: 100, + }) require.Equal(t, 100*time.Millisecond, v.ScheduleLagP99) require.Equal(t, 100, v.SampleCount) } @@ -64,7 +70,9 @@ func TestEvaluateScheduleLag_P99ComputedCorrectly(t *testing.T) { func TestEvaluateScheduleLag_ClosedLoopIsNA(t *testing.T) { samples := lags(500, 500, 500) // huge lag, would be VOID if open-loop - v := EvaluateScheduleLag(samples, 100, false, false, 3, 0) + v := EvaluateScheduleLag(ScheduleLagInputs{ + Samples: samples, TargetTPS: 100, OpenLoop: false, Admitted: 3, + }) require.Equal(t, VerdictNA, v.Verdict) require.Empty(t, v.VoidReason) @@ -73,7 +81,9 @@ func TestEvaluateScheduleLag_ClosedLoopIsNA(t *testing.T) { // Open-loop with no fixed λ (TPS=0) cannot bound against 1/λ → N/A. func TestEvaluateScheduleLag_NoFixedRateIsNA(t *testing.T) { - v := EvaluateScheduleLag(lags(100, 200, 300), 0, true, false, 3, 0) + v := EvaluateScheduleLag(ScheduleLagInputs{ + Samples: lags(100, 200, 300), TargetTPS: 0, OpenLoop: true, Admitted: 3, + }) require.Equal(t, VerdictNA, v.Verdict) require.Equal(t, time.Duration(0), v.ArrivalInterval) } @@ -82,7 +92,9 @@ func TestEvaluateScheduleLag_NoFixedRateIsNA(t *testing.T) { // there is no single 1/λ to gate against — N/A regardless of TPS. func TestEvaluateScheduleLag_RampedIsNA(t *testing.T) { // TPS>0 but ramped: must still be N/A, not gated against the stale 1/TPS. - v := EvaluateScheduleLag(lags(500, 500, 500), 100, true, true, 3, 0) + v := EvaluateScheduleLag(ScheduleLagInputs{ + Samples: lags(500, 500, 500), TargetTPS: 100, OpenLoop: true, Ramped: true, Admitted: 3, + }) require.Equal(t, VerdictNA, v.Verdict) require.Empty(t, v.VoidReason) require.Equal(t, "ramped λ has no single arrival interval", v.NAReason) @@ -92,7 +104,9 @@ func TestEvaluateScheduleLag_RampedIsNA(t *testing.T) { // No samples on a fixed-λ run is N/A, not VALID: it cannot distinguish a SUT // that kept up from a recorder that never fired. func TestEvaluateScheduleLag_NoSamplesIsNA(t *testing.T) { - v := EvaluateScheduleLag(nil, 100, true, false, 0, 0) + v := EvaluateScheduleLag(ScheduleLagInputs{ + Samples: nil, TargetTPS: 100, OpenLoop: true, Admitted: 0, + }) require.Equal(t, VerdictNA, v.Verdict) require.Equal(t, "no schedule_lag samples recorded", v.NAReason) require.False(t, v.Anomaly) // zero admitted: no anomaly, just an empty run @@ -101,7 +115,9 @@ func TestEvaluateScheduleLag_NoSamplesIsNA(t *testing.T) { // Admitted txs but zero samples is an anomaly: the recorder likely never fired. func TestEvaluateScheduleLag_AdmittedButNoSamplesIsAnomaly(t *testing.T) { - v := EvaluateScheduleLag(nil, 100, true, false, 5000, 0) + v := EvaluateScheduleLag(ScheduleLagInputs{ + Samples: nil, TargetTPS: 100, OpenLoop: true, Admitted: 5000, + }) require.Equal(t, VerdictNA, v.Verdict) require.Equal(t, "no schedule_lag samples recorded", v.NAReason) require.True(t, v.Anomaly) @@ -111,6 +127,62 @@ func TestEvaluateScheduleLag_AdmittedButNoSamplesIsAnomaly(t *testing.T) { func TestEvaluateScheduleLag_ConfiguredThreshold(t *testing.T) { samples := lags(2) // p99 = 2ms; interval at 100 TPS = 10ms // 10% bound = 1ms → VOID; 50% bound = 5ms → VALID. - require.Equal(t, VerdictVoid, EvaluateScheduleLag(samples, 100, true, false, 1, 0.10).Verdict) - require.Equal(t, VerdictValid, EvaluateScheduleLag(samples, 100, true, false, 1, 0.50).Verdict) + require.Equal(t, VerdictVoid, EvaluateScheduleLag(ScheduleLagInputs{ + Samples: samples, TargetTPS: 100, OpenLoop: true, Admitted: 1, Threshold: 0.10, + }).Verdict) + require.Equal(t, VerdictValid, EvaluateScheduleLag(ScheduleLagInputs{ + Samples: samples, TargetTPS: 100, OpenLoop: true, Admitted: 1, Threshold: 0.50, + }).Verdict) +} + +// A late-run sub-percentile tail: whole-run p99 sits UNDER the bound (the +// reservoir diluted the tail), but the exact over-bound fraction exceeds the +// threshold → VOID with the tail reason. At 100 TPS / 10% the bound is 1ms. +func TestEvaluateScheduleLag_TailDegradationIsVoid(t *testing.T) { + // p99 of the sample is comfortably under the 1ms bound. + samples := lags(0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + // 0.8% of 100k sends exceeded the bound — above the 0.5% fraction. + v := EvaluateScheduleLag(ScheduleLagInputs{ + Samples: samples, TargetTPS: 100, OpenLoop: true, Admitted: 100_000, + OverBoundCount: 800, OverBoundTotal: 100_000, MaxLag: 80 * time.Millisecond, + }) + require.Equal(t, VerdictVoid, v.Verdict) + require.Contains(t, v.VoidReason, "tail degradation") + require.Contains(t, v.VoidReason, "0.80%") + require.Less(t, v.ScheduleLagP99, time.Millisecond) // p99 alone would pass + require.Equal(t, 80*time.Millisecond, v.MaxLag) // surfaced for diagnostics +} + +// A single over-bound outlier (one GC pause) well under the fraction must NOT +// trip the tail gate: the run stays VALID. This is why the gate is a fraction, +// not maxLag alone. +func TestEvaluateScheduleLag_LoneOutlierStaysValid(t *testing.T) { + samples := lags(0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + // 1 / 100k = 0.001% over bound, far below the 0.5% fraction. + v := EvaluateScheduleLag(ScheduleLagInputs{ + Samples: samples, TargetTPS: 100, OpenLoop: true, Admitted: 100_000, + OverBoundCount: 1, OverBoundTotal: 100_000, MaxLag: 200 * time.Millisecond, + }) + require.Equal(t, VerdictValid, v.Verdict) + require.Empty(t, v.VoidReason) + require.Equal(t, 200*time.Millisecond, v.MaxLag) // still surfaced +} + +// A clean run with no over-bound sends is VALID; the tail gate is a no-op. +func TestEvaluateScheduleLag_NoOverBoundIsValid(t *testing.T) { + samples := lags(0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + v := EvaluateScheduleLag(ScheduleLagInputs{ + Samples: samples, TargetTPS: 100, OpenLoop: true, Admitted: 100_000, + OverBoundCount: 0, OverBoundTotal: 100_000, MaxLag: 500 * time.Microsecond, + }) + require.Equal(t, VerdictValid, v.Verdict) + require.Empty(t, v.VoidReason) +} + +// ScheduleLagBound returns threshold × 1/λ, zero when λ is not fixed, and falls +// back to the default threshold — the single source the collector arms from. +func TestScheduleLagBound(t *testing.T) { + require.Equal(t, time.Millisecond, ScheduleLagBound(100, 0.10)) // 10% of 10ms + require.Equal(t, time.Millisecond, ScheduleLagBound(100, 0)) // default 0.10 + require.Equal(t, time.Duration(0), ScheduleLagBound(0, 0.10)) // no fixed λ } From c1da595ec67d99c6bff60d5779eaef533c1141ef Mon Sep 17 00:00:00 2001 From: bdchatham Date: Mon, 15 Jun 2026 16:04:45 -0700 Subject: [PATCH 4/5] fix(stats): don't arm over-bound counter on ramped runs (re-review) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both re-reviewers flagged that the over-bound counter was armed whenever TPS>0, including ramped open-loop runs (RampUp+TPS>0 is a valid config) — the verdict is N/A there so it was never a false-VOID, but it emitted a meaningless over_bound_fraction and contradicted the 'inert on ramped runs' comment. Gate arming on !RampUp so the counter stays inert exactly where the verdict is N/A. Co-Authored-By: Claude Opus 4.8 --- main.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/main.go b/main.go index 1f8e953..5f6d7aa 100644 --- a/main.go +++ b/main.go @@ -325,8 +325,9 @@ func runLoadTest(ctx context.Context, cmd *cobra.Command) error { dispatcher.SetOpenLoop(sharedLimiter, cfg.Settings.MaxInFlight) // Arm the unsampled over-bound counter for this fixed-λ open-loop run: // same VOID bound the verdict uses (threshold × 1/λ), known here at run - // start. Inert on non-fixed-λ runs (no SetScheduleLagBound call). - if bound := stats.ScheduleLagBound(cfg.Settings.TPS, cfg.Settings.ScheduleLagVoidThreshold); bound > 0 { + // start. Skipped under RampUp (verdict is N/A — no single 1/λ) and on + // non-fixed-λ runs, so the counter stays inert where it isn't judged. + if bound := stats.ScheduleLagBound(cfg.Settings.TPS, cfg.Settings.ScheduleLagVoidThreshold); bound > 0 && !cfg.Settings.RampUp { collector.SetScheduleLagBound(bound) } log.Printf("📤 Arrival model: open_loop (max in-flight: %d)", cfg.Settings.MaxInFlight) From 7caba67809cfc727a770b65caa2322153c636a50 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Mon, 15 Jun 2026 16:32:44 -0700 Subject: [PATCH 5/5] docs(stats): comment-discipline sweep (PLT-463) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Strip bare (PLT-463) self-labels and a was-X-now-Y changelog line, drop a standalone TODO and a few what-comments. Keep load-bearing why/invariant comments (reservoir-dilution rationale, Little's-law sizing, registered ⊆ succeeded, negative-lag clamp) and forward-pointing cross-refs. Comment-only. Co-Authored-By: Claude Opus 4.8 --- config/settings.go | 2 +- main.go | 9 ++++----- sender/worker.go | 4 ++-- stats/metrics.go | 2 +- stats/run_summary.go | 3 +-- 5 files changed, 9 insertions(+), 11 deletions(-) diff --git a/config/settings.go b/config/settings.go index 41f1f6f..06f085d 100644 --- a/config/settings.go +++ b/config/settings.go @@ -43,7 +43,7 @@ type Settings struct { // counted rather than throttling the arrival clock. Ignored in closed-loop. MaxInFlight int `json:"maxInFlight,omitempty"` // ScheduleLagVoidThreshold is the fraction of the arrival interval (1/λ) that - // schedule_lag_p99 may reach before an open-loop run is VOID (PLT-463). Zero + // schedule_lag_p99 may reach before an open-loop run is VOID. Zero // uses the provisional built-in default; set via config to retune without a // rebuild. Ignored in closed-loop. ScheduleLagVoidThreshold float64 `json:"scheduleLagVoidThreshold,omitempty"` diff --git a/main.go b/main.go index 5f6d7aa..0d3eccc 100644 --- a/main.go +++ b/main.go @@ -261,8 +261,7 @@ func runLoadTest(ctx context.Context, cmd *cobra.Command) error { }) } - // The --track-receipts flag now enables the block-indexed inclusion - // tracker (the lossy per-tx receipt path is retired). + // --track-receipts enables the block-indexed inclusion tracker. // Not wired under --dry-run: simulated sends never hit the chain, so they // would all reap as expired and pollute the inclusion stats. inclusion := utils.None[*stats.InclusionTracker]() @@ -429,9 +428,9 @@ func runLoadTest(ctx context.Context, cmd *cobra.Command) error { log.Printf("📦 Inclusion: included=%d expired=%d dropped_at_cap=%d inflight_at_shutdown=%d", incl.Included, incl.Expired, incl.DroppedAtCap, incl.InflightAtShutdown) } - // Open-loop self-check (PLT-463): compute schedule_lag_p99 and the run - // verdict. Gated on the model the run actually used (summary.ArrivalModel, - // not the requested flag — the txs-writer path downgrades to closed_loop). + // Open-loop self-check: compute schedule_lag_p99 and the run verdict. Gated + // on the model the run actually used (summary.ArrivalModel, not the requested + // flag — the txs-writer path downgrades to closed_loop). openLoopRun := summary.ArrivalModel == config.ArrivalModelOpenLoop lagTotal, lagOverBound, lagMax := collector.ScheduleLagTail() verdict := stats.EvaluateScheduleLag(stats.ScheduleLagInputs{ diff --git a/sender/worker.go b/sender/worker.go index a11b690..d26e2c0 100644 --- a/sender/worker.go +++ b/sender/worker.go @@ -184,8 +184,8 @@ func (w *Worker) runTxSender(ctx context.Context, client *ethclient.Client) erro } w.cfg.Collector.RecordTransaction(tx.Scenario.Name, w.cfg.Endpoint, time.Since(startTime), err == nil) // Register at send-completion, only on success: registered ⊆ succeeded. - // (The tracker is wired only for live runs — see main.go; DryRun never - // gets a tracker, so simulated sends are not inclusion-tracked.) + // The tracker is present only for live runs (wired in main.go; never under + // DryRun). if err == nil { if t, ok := w.cfg.Inclusion.Get(); ok { t.Register(tx) diff --git a/stats/metrics.go b/stats/metrics.go index 6b42015..5b27dab 100644 --- a/stats/metrics.go +++ b/stats/metrics.go @@ -88,7 +88,7 @@ var ( metric.WithDescription("In-flight inclusion registry size at run end (emitted once at run end)"), metric.WithUnit("{transactions}"))) - // Open-loop self-check (PLT-463). Emitted once at run end on every run; the + // Open-loop self-check. Emitted once at run end on every run; the // verdict label distinguishes VALID / VOID / N/A so a generator-bound run is // queryable, not just a log line. runScheduleLagP99 = must(meter.Float64Gauge( diff --git a/stats/run_summary.go b/stats/run_summary.go index b985e5e..591d211 100644 --- a/stats/run_summary.go +++ b/stats/run_summary.go @@ -30,7 +30,6 @@ type RunSummary struct { // registered == Included + Expired + InflightAtShutdown, with // registered ⊆ succeeded. InclusionTracked disambiguates a not-tracked run // (all zero, flag false) from a tracked run with no inclusions yet. - // TODO(PLT-467): owns run-summary schema versioning for these fields. InclusionTracked bool // Included is the count of txs the tracker observed on-chain (stamped). Included uint64 @@ -42,7 +41,7 @@ type RunSummary struct { // InflightAtShutdown is len(inflight) read after workers and tracker joined. InflightAtShutdown uint64 - // Open-loop self-check (PLT-463): schedule_lag = AttemptedSendTime − + // Open-loop self-check: schedule_lag = AttemptedSendTime − // IntendedSendTime per tx. A p99 above the threshold fraction of the arrival // interval (1/λ) means the generator could not keep its own schedule, so the // run was generator-bound, not open-loop, and is VOID. Reported on every run