Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 93 additions & 53 deletions explorer.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -2858,7 +2858,6 @@ zoomWatcher = {
let heatmapDebounce = null;
let heatmapLastKey = null;
const HEATMAP_CANVAS_SIZE = 512;
const HEATMAP_LIMIT = 100000;

function heatmapEnabled() {
return document.getElementById('heatmapToggle')?.checked === true;
Expand Down Expand Up @@ -2890,13 +2889,35 @@ zoomWatcher = {
function getHeatmapInstance() {
if (heatmapInstance) return heatmapInstance;
if (!window.h337) throw new Error('heatmap.js did not load');
// maxOpacity caps the rendered alpha so dense areas don't fully
// wash out the satellite imagery underneath. Without this, world
// view (35k+ pixel cells with overlapping blur radii) saturates
// to solid red. RY feedback 2026-05-27 on PR #240 follow-up.
heatmapInstance = window.h337.create({
container: ensureHeatmapContainer(),
radius: 25,
maxOpacity: 0.6,
});
return heatmapInstance;
}

// Adaptive per-point radius. heatmap.js applies a Gaussian blur of
// size `radius` around each data point; overlapping blurs add
// linearly, so at high cell density (world view: 35k cells on 512²
// canvas, each cell's default 25-pixel blur covering ~1% of canvas)
// the sum exceeds 1.0 across most of the canvas and everything
// saturates to full red regardless of underlying density.
//
// Empirical scaling: at world view (35k cells) want ~6 px; at small
// viewports (~300 cells) want ~30 px to fill space smoothly.
// sqrt(canvas_pixels / cell_count) gives ~3 at world, ~30 at small —
// double it and clamp to [6, 30].
function heatmapRadiusFor(cellCount) {
const canvasPx = HEATMAP_CANVAS_SIZE * HEATMAP_CANVAS_SIZE;
const raw = Math.sqrt(canvasPx / Math.max(1, cellCount)) * 2;
return Math.max(6, Math.min(30, Math.round(raw)));
}

function heatmapFilterHash() {
return JSON.stringify({
sources: getActiveSources().slice().sort(),
Expand Down Expand Up @@ -2950,57 +2971,79 @@ zoomWatcher = {
if (!heatmapEnabled()) return;
setHeatmapStatus('Rendering heatmap...');
try {
const rows = await db.query(`
SELECT latitude, longitude
FROM read_parquet('${lite_url}')
WHERE ${heatmapBboxPredicate(bounds, 'latitude', 'longitude')}
${sourceFilterSQL('source')}
${facetFilterSQL()}
LIMIT ${HEATMAP_LIMIT}
`);
if (myReq !== heatmapReqId || !heatmapEnabled()) return;

// SQL pre-aggregation at pixel resolution (issue #233 phase 1.5).
//
// Previous approach: SELECT latitude, longitude LIMIT 100000 then
// bin per pixel in JS. Two problems:
// (1) LIMIT 100000 picks an arbitrary first 100k rows in parquet
// storage order — NOT geographic random. At world view, the
// heatmap silently showed whichever source happened to be
// physically first in the file (likely SESAR).
// (2) For sample sets above the cap, the density was unfaithful.
//
// This approach: push the binning into DuckDB. The SQL groups by
// pixel-cell coordinates derived from the bbox + canvas size, so
// each row returned is one (x, y, count) tuple. Result cardinality
// is bounded by canvas pixels (≤ 512² = 262k), independent of how
// many samples the bbox contains. No LIMIT needed — every sample
// counted into its true pixel bucket.
//
// Antimeridian handling: when bbox wraps (west > east), the SQL
// shifts longitudes < west by +360 so the pixel arithmetic works
// in a continuous coordinate space, matching what the old JS loop
// did at line 2976. Same `eastForRectangle` adjustment downstream.
const width = HEATMAP_CANVAS_SIZE;
const height = HEATMAP_CANVAS_SIZE;
const west = bounds.west;
const eastNorm = bounds.west > bounds.east ? bounds.east + 360 : bounds.east;
const lngSpan = Math.max(1e-9, eastNorm - west);
const latSpan = Math.max(1e-9, bounds.north - bounds.south);
const bins = new Map();
let max = 1;

for (const row of rows) {
let lng = Number(row.longitude);
const lat = Number(row.latitude);
if (!Number.isFinite(lat) || !Number.isFinite(lng)) continue;
if (bounds.west > bounds.east && lng < west) lng += 360;
const x = Math.max(0, Math.min(width - 1, Math.floor(((lng - west) / lngSpan) * width)));
const y = Math.max(0, Math.min(height - 1, Math.floor(((bounds.north - lat) / latSpan) * height)));
const binKey = `${x},${y}`;
const next = (bins.get(binKey) || 0) + 1;
bins.set(binKey, next);
if (next > max) max = next;
}
const wraps = bounds.west > bounds.east;
// SQL-side pixel coordinate computation. CAST(... AS INTEGER) is
// explicit so DuckDB groups by integer keys, not floats.
const lngExprBase = `(longitude ${wraps ? `+ CASE WHEN longitude < ${west} THEN 360 ELSE 0 END` : ``})`;
const xExpr = `CAST(LEAST(${width - 1}, GREATEST(0, FLOOR((${lngExprBase} - ${west}) / ${lngSpan} * ${width}))) AS INTEGER)`;
const yExpr = `CAST(LEAST(${height - 1}, GREATEST(0, FLOOR((${bounds.north} - latitude) / ${latSpan} * ${height}))) AS INTEGER)`;
const aggregated = await db.query(`
SELECT
${xExpr} AS x,
${yExpr} AS y,
COUNT(*) AS n
FROM read_parquet('${lite_url}')
WHERE ${heatmapBboxPredicate(bounds, 'latitude', 'longitude')}
${sourceFilterSQL('source')}
${facetFilterSQL()}
GROUP BY x, y
`);
if (myReq !== heatmapReqId || !heatmapEnabled()) return;

// Log-scale bin weights to defeat supersite max-bias.
// iSamples data has extreme power-law spatial distribution: at
// Cyprus medium zoom, one position carries 52,252 co-located
// samples (likely a museum aggregation) while the median
// position has 2 — a 26,000× ratio. Linear heatmap.js
// max-normalization makes the supersite bin full red and
// everything else essentially invisible (2/52252 = 0.004%
// intensity). log(1+n) compresses the supersite (log(52253) ≈
// 10.86) and lifts the median (log(3) ≈ 1.10), bringing the
// ratio to ~10× and revealing the actual density distribution
// the user expects to see. RY feedback 2026-05-27 on PR #240.
const points = [];
// SQL did the binning. Convert each row to a heatmap.js point.
// Log-scale bin weights to defeat supersite max-bias. iSamples
// data has extreme power-law spatial distribution: at Cyprus
// medium zoom, one position carries 52,252 co-located samples
// (likely a museum aggregation) while the median position has
// 2 — a 26,000× ratio. Linear heatmap.js max-normalization
// makes the supersite bin full red and everything else
// essentially invisible (2/52252 = 0.004% intensity). log(1+n)
// compresses the supersite (log(52253) ≈ 10.86) and lifts the
// median (log(3) ≈ 1.10), bringing the ratio to ~10× and
// revealing the actual density distribution the user expects
// to see. RY feedback 2026-05-27 on PR #240.
const pointsRaw = [];
let logMax = 0;
for (const [binKey, value] of bins) {
const [x, y] = binKey.split(',').map(Number);
const logVal = Math.log1p(value);
let totalSamples = 0;
for (const row of aggregated) {
const n = Number(row.n);
totalSamples += n;
const logVal = Math.log1p(n);
if (logVal > logMax) logMax = logVal;
points.push({ x, y, value: logVal });
pointsRaw.push({ x: Number(row.x), y: Number(row.y), value: logVal });
}
// Adaptive radius: tight at high cell counts (world view) to
// avoid blur-overlap saturation; wide at low cell counts to
// fill space smoothly.
const radius = heatmapRadiusFor(pointsRaw.length);
const points = pointsRaw.map(p => ({ ...p, radius }));

const hm = getHeatmapInstance();
hm.setData({ min: 0, max: logMax, data: points });
Expand All @@ -3022,25 +3065,22 @@ zoomWatcher = {
heatmapImageryLayer = nextLayer;
heatmapLastKey = key; // success-only — see refreshHeatmap()
const refreshedAt = Date.now();
const capped = rows.length >= HEATMAP_LIMIT;
// With SQL pre-aggregation, every sample in the bbox is counted
// into its pixel cell — no more arbitrary LIMIT cap. `capped` is
// kept on the state shape (for spec back-compat) but always
// false. `lastPointCount` is now the true sample total, not the
// capped raw-row count.
viewer._heatmapOverlay = {
enabled: true,
layer: heatmapImageryLayer,
lastRefreshAt: refreshedAt,
lastPointCount: rows.length,
lastPointCount: totalSamples,
lastBinnedPointCount: points.length,
lastImageHash: heatmapStringHash(url),
lastKey: key,
capped,
capped: false,
};
// Codex round-1 review of #240: silent cap is misleading on
// global views (lite parquet has ~6M rows; LIMIT 100k shows an
// arbitrary first 100k, not honest density). Phase 2 progressive
// refinement removes the cap; for phase 1, warn explicitly so
// the user knows the heatmap is a sample, not the full density.
setHeatmapStatus(capped
? `Heatmap rendered from first ${HEATMAP_LIMIT.toLocaleString()} samples (capped — zoom or filter for full density).`
: `Heatmap rendered from ${rows.length.toLocaleString()} samples.`);
setHeatmapStatus(`Heatmap rendered from ${totalSamples.toLocaleString()} samples.`);
} catch (err) {
if (myReq !== heatmapReqId) return;
console.warn('Heatmap refresh failed:', err);
Expand Down
32 changes: 32 additions & 0 deletions tests/playwright/heatmap-overlay.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -135,4 +135,36 @@ test.describe('Heatmap overlay (#233 phase 1)', () => {
// Also assert the toggle DOM reflects the hydrated state.
await expect(page.locator('#heatmapToggle')).toBeChecked();
});

test('world view counts every sample (no LIMIT cap — phase 1.5)', async ({ page }) => {
// PR #241 (SQL pre-aggregation) removed the 100k LIMIT that PR #240
// had. This test pins that property: world view at alt=15Mkm should
// see > 100k samples (true count is ~6M) AND `capped` must be false.
// Codex round-1 review of #241 suggested this assertion to lock in
// the architectural promise that LIMIT is gone for good.
const WORLD_HASH = '#v=1&lat=20&lng=0&alt=15000000';
await page.goto(explorerUrl(WORLD_HASH + '&heatmap=1'), {
waitUntil: 'domcontentloaded',
timeout: 60000,
});
await page.waitForSelector('#cesiumContainer', { timeout: 30000 });
await page.waitForFunction(() => !!window._ojs?.ojsConnector?.mainModule, null, { timeout: 60000 });
await expect.poll(async () => {
const state = await heatmapState(page);
return state.enabled && state.hasLayer && state.lastPointCount > 0;
}, {
timeout: 120000,
intervals: [500, 1000, 2000],
}).toBeTruthy();
const state = await heatmapState(page);
expect(state.lastPointCount).toBeGreaterThan(100000);
// Codex round-2 polish: assert the raw `capped` field value (must be
// strictly false), not just "not-true" (which would also pass for
// undefined / null / etc).
const cappedRaw = await page.evaluate(async () => {
const v = await window._ojs.ojsConnector.mainModule.value('viewer');
return v?._heatmapOverlay?.capped;
});
expect(cappedRaw).toBe(false);
});
});
Loading