Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 7 additions & 80 deletions .github/workflows/refresh-counts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,86 +72,13 @@ jobs:
node-version: '20'

- name: Refresh registry counts (npm / PyPI / crates)
# Intentionally NO `set -e`/`-o pipefail` here — matches the original
# update-installs.yml behavior. Single-package curl failures fall
# through (curl exits non-zero, jq emits nothing, `$(())` treats the
# empty capture as 0, NPM_TOTAL just doesn't grow that iteration),
# and per-source HWM logic ensures we never regress. With pipefail,
# one transient registry blip would abort the whole daily refresh.
run: |
CACHE_FILE=".vitepress/theme/installs-cache.json"
TODAY=$(date +%Y-%m-%d)

# ── Fetch fresh registry counts ──
NPM_TOTAL=0
for pkg in runcycles @runcycles/mcp-server @runcycles/openclaw-budget-guard; do
COUNT=$(curl -sf "https://api.npmjs.org/downloads/point/2020-01-01:${TODAY}/${pkg}" | jq '.downloads // 0')
NPM_TOTAL=$((NPM_TOTAL + COUNT))
done
echo "npm=$NPM_TOTAL"

# PyPI: cumulative non-mirror downloads. /overall returns a daily
# series; sum it. Previously /recent .data.last_month was used, which
# is rolling and broke HWM semantics (a quiet month silently froze
# the displayed count above actual cumulative installs).
PYPI_TOTAL=0
for pkg in runcycles runcycles-openai-agents; do
COUNT=$(curl -sf "https://pypistats.org/api/packages/${pkg}/overall?mirrors=false" | jq '[.data[].downloads] | add // 0')
PYPI_TOTAL=$((PYPI_TOTAL + COUNT))
done
echo "pypi=$PYPI_TOTAL"

CRATES_TOTAL=0
for pkg in runcycles; do
COUNT=$(curl -sf -H 'User-Agent: runcycles-docs (https://github.com/runcycles/docs)' "https://crates.io/api/v1/crates/${pkg}" | jq '.crate.downloads // 0')
CRATES_TOTAL=$((CRATES_TOTAL + COUNT))
done
echo "crates=$CRATES_TOTAL"

# ── Read existing cache ──
if [ -f "$CACHE_FILE" ]; then
CACHED=$(cat "$CACHE_FILE")
else
# Cold start with the current schema — empty everything.
CACHED='{"npm":0,"pypi":0,"crates":0,"clones":0,"clonesByRepo":{},"releases":0,"releasesByRepo":{},"ghPackages":0,"maven":0,"total":0,"fetchedAt":""}'
fi

# ── Schema-regression guard: refuse to operate on a pre-PR-#515 cache. ──
# If the file is missing `clones` / `clonesByRepo` AND has the legacy
# `ghcr` field, this workflow would silently propagate the wipe by
# merging registry updates over the top. Fail loudly instead.
HAS_CLONES=$(echo "$CACHED" | jq 'has("clones") and has("clonesByRepo")')
HAS_GHCR=$(echo "$CACHED" | jq 'has("ghcr")')
if [ "$HAS_CLONES" = "false" ] && [ "$HAS_GHCR" = "true" ]; then
echo "::error::cache schema regression at $CACHE_FILE: pre-PR-#515 shape detected."
echo "Re-seed via 'GITHUB_TOKEN=... npm run build' locally and commit. See .outreach/installs-cache-runbook.md."
exit 1
fi

CACHED_NPM=$(echo "$CACHED" | jq '.npm // 0')
CACHED_PYPI=$(echo "$CACHED" | jq '.pypi // 0')
CACHED_CRATES=$(echo "$CACHED" | jq '.crates // 0')
echo "cached: npm=$CACHED_NPM pypi=$CACHED_PYPI crates=$CACHED_CRATES"

# ── Per-source HWMs (registry sources only) ──
NPM_HWM=$(( NPM_TOTAL > CACHED_NPM ? NPM_TOTAL : CACHED_NPM ))
PYPI_HWM=$(( PYPI_TOTAL > CACHED_PYPI ? PYPI_TOTAL : CACHED_PYPI ))
CRATES_HWM=$(( CRATES_TOTAL > CACHED_CRATES ? CRATES_TOTAL : CACHED_CRATES ))
echo "hwm: npm=$NPM_HWM pypi=$PYPI_HWM crates=$CRATES_HWM"

# ── Merge fresh registry HWMs into the cache. `total` and `fetchedAt`
# are intentionally NOT written here — the Node step that follows
# owns those fields, so there is a single canonical writer for the
# displayed total (matching the installs.data.ts loader formula).
NEW=$(echo "$CACHED" | jq --indent 2 \
--argjson npm "$NPM_HWM" \
--argjson pypi "$PYPI_HWM" \
--argjson crates "$CRATES_HWM" \
'. + { npm: $npm, pypi: $pypi, crates: $crates }')
# Preserve trailing newline to match installs.data.ts's writeFileSync
# (`JSON.stringify(...) + '\n'`); avoids a one-line cosmetic diff
# whenever the build runs after this workflow.
printf '%s\n' "$NEW" > "$CACHE_FILE"
# Per-package HWMs in scripts/update-registry-counts.mjs. Replaces
# the prior inline bash + curl + jq, which used aggregate-only HWMs
# and silently masked legitimate growth in one package whenever
# another package's API call failed on the same run (pypistats.org
# has known intermittent CDN issues — verified 2026-05-09 when two
# consecutive runs returned just one of two packages each).
run: node scripts/update-registry-counts.mjs

- name: Refresh GitHub-side counts (clones / releases / ghPackages)
env:
Expand Down
99 changes: 99 additions & 0 deletions .vitepress/theme/__tests__/installs.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -176,3 +176,102 @@ describe('clones day-cursor accumulator', () => {
expect(result.lastSeenDay).toBe('2026-04-17')
})
})

// ── Per-package HWM (registry counts) ────────────────────────────────
//
// Mirrors hwmPerPackage in installs.data.ts and scripts/update-registry-counts.mjs.
// Per-package HWMs solve the failure mode where one package's API call
// fails on the same run as another's legit growth — aggregate-only HWMs
// would mask that growth; per-package HWMs preserve each independently.
function hwmPerPackage(
packages: readonly string[],
fetched: Record<string, number | null>,
cachedByPackage: Record<string, number>,
): { byPackage: Record<string, number>; aggregate: number } {
const updated: Record<string, number> = { ...cachedByPackage }
for (const pkg of packages) {
const fresh = fetched[pkg]
const cached = updated[pkg] ?? 0
updated[pkg] = fresh != null ? Math.max(fresh, cached) : cached
}
const aggregate = Object.values(updated).reduce(
(sum, v) => sum + (typeof v === 'number' ? v : 0),
0,
)
return { byPackage: updated, aggregate }
}

describe('per-package HWM', () => {
const PACKAGES = ['runcycles', 'runcycles-openai-agents'] as const

it('cold start: empty cached map, all fetched values become per-package HWMs', () => {
const result = hwmPerPackage(PACKAGES, { 'runcycles': 1620, 'runcycles-openai-agents': 447 }, {})
expect(result.byPackage).toEqual({ 'runcycles': 1620, 'runcycles-openai-agents': 447 })
expect(result.aggregate).toBe(2067)
})

it('all packages succeed and grow: each package HWMs to its new value', () => {
const cached = { 'runcycles': 1620, 'runcycles-openai-agents': 447 }
const fetched = { 'runcycles': 1700, 'runcycles-openai-agents': 500 }
const result = hwmPerPackage(PACKAGES, fetched, cached)
expect(result.byPackage).toEqual({ 'runcycles': 1700, 'runcycles-openai-agents': 500 })
expect(result.aggregate).toBe(2200)
})

it('one package API fails (null), other succeeds and grows: failed package preserved, growth captured', () => {
// The exact scenario from 2026-05-09: pypistats CDN flakiness
// returned data for one package but errored on the other. With
// aggregate-only HWMs, the surviving package's growth would have
// been masked by an aggregate-HWM "regression" check.
const cached = { 'runcycles': 1620, 'runcycles-openai-agents': 447 }
const fetched = { 'runcycles': 1700, 'runcycles-openai-agents': null }
const result = hwmPerPackage(PACKAGES, fetched, cached)
expect(result.byPackage).toEqual({ 'runcycles': 1700, 'runcycles-openai-agents': 447 })
expect(result.aggregate).toBe(2147) // captured the +80 growth
})

it('all packages fail (all null): aggregate frozen at cached values', () => {
const cached = { 'runcycles': 1620, 'runcycles-openai-agents': 447 }
const fetched = { 'runcycles': null, 'runcycles-openai-agents': null }
const result = hwmPerPackage(PACKAGES, fetched, cached)
expect(result.byPackage).toEqual({ 'runcycles': 1620, 'runcycles-openai-agents': 447 })
expect(result.aggregate).toBe(2067)
})

it('one package returns lower than cached (rolling-window blip): preserved at cached HWM', () => {
// PyPI Stats /overall sometimes returns a truncated daily series;
// today's sum can be less than the cached HWM. Per-package HWM keeps
// the cached value, just like the aggregate-only version did.
const cached = { 'runcycles': 1620, 'runcycles-openai-agents': 447 }
const fetched = { 'runcycles': 1620, 'runcycles-openai-agents': 100 } // dropped from 447
const result = hwmPerPackage(PACKAGES, fetched, cached)
expect(result.byPackage).toEqual({ 'runcycles': 1620, 'runcycles-openai-agents': 447 })
expect(result.aggregate).toBe(2067)
})

it('successful zero is treated as a real value, not a failure', () => {
// A brand-new package on PyPI may legitimately have 0 downloads in
// its window. fetched=0 (number) is distinct from fetched=null
// (failure). Both result in cached value being preserved when cached
// is higher, but only null preserves cached when fetched is lower.
const cached = { 'pkg-a': 100 }
// Successful 0 + cached 100 → max(0, 100) = 100 (HWM holds)
expect(hwmPerPackage(['pkg-a'], { 'pkg-a': 0 }, cached).byPackage).toEqual({ 'pkg-a': 100 })
// null + cached 100 → preserved 100 (same outcome here)
expect(hwmPerPackage(['pkg-a'], { 'pkg-a': null }, cached).byPackage).toEqual({ 'pkg-a': 100 })
// Cold start with successful 0 → 0 (real value, no HWM to fall back on)
expect(hwmPerPackage(['pkg-a'], { 'pkg-a': 0 }, {}).byPackage).toEqual({ 'pkg-a': 0 })
// Cold start with null → 0 (no fresh, no cached)
expect(hwmPerPackage(['pkg-a'], { 'pkg-a': null }, {}).byPackage).toEqual({ 'pkg-a': 0 })
})

it('package removed from declared list but present in cached map: preserved', () => {
// A package was un-declared in the source list, but its prior count
// stays in the cached map so the aggregate doesn't regress.
const cached = { 'runcycles': 1620, 'old-package': 999 }
const fetched = { 'runcycles': 1700 } // 'old-package' not in fetched
const result = hwmPerPackage(['runcycles'], fetched, cached)
expect(result.byPackage).toEqual({ 'runcycles': 1700, 'old-package': 999 })
expect(result.aggregate).toBe(2699)
})
})
Loading