#!/usr/bin/env node import { promises as fs } from 'node:fs'; import path from 'node:path'; import { fileURLToPath, pathToFileURL } from 'node:url'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); const REPO_ROOT = path.resolve(__dirname, '..'); const API_ROOT = 'https://api.github.com'; const GITHUB_API_VERSION = '2022-11-28'; const ARCHIVE_VERSION = 1; const DAY_MS = 24 * 60 * 60 * 1000; const SUMMARY_WINDOWS = [ ['last_14_days', 14], ['last_30_days', 30], ['last_90_days', 90], ['last_365_days', 365], ]; const toIsoString = (value, label) => { const date = new Date(value); if (Number.isNaN(date.getTime())) { throw new Error(`Invalid ${label}: ${value}`); } return date.toISOString(); }; const toDailyTimestamp = (value) => `${toIsoString(value, 'traffic timestamp').slice(0, 10)}T00:00:00Z`; const toDateKey = (value) => toIsoString(value, 'capture timestamp').slice(0, 10); const toNonNegativeInteger = (value, label) => { const number = Number(value); if (!Number.isFinite(number) || number < 0) { throw new Error(`Invalid ${label}: ${value}`); } return Math.trunc(number); }; const toRequiredString = (value, label) => { if (typeof value !== 'string') { throw new Error(`${label} must be a non-empty string`); } const trimmed = value.trim(); if (!trimmed) { throw new Error(`${label} must be a non-empty string`); } return trimmed; }; const normalizeRepository = (repo) => { const normalized = String(repo || '').trim(); if (!/^[A-Za-z0-9_.-]+\/[A-Za-z0-9_.-]+$/.test(normalized)) { throw new Error(`Repository must be in owner/name form, received: ${repo || '(empty)'}`); } return normalized; }; const normalizeDailyEntries = (entries, label) => { if (!Array.isArray(entries)) { throw new Error(`${label} must be an array`); } return entries .map((entry) => ({ timestamp: toDailyTimestamp(entry.timestamp), count: toNonNegativeInteger(entry.count, `${label}.count`), uniques: toNonNegativeInteger(entry.uniques, `${label}.uniques`), })) .sort((a, b) => a.timestamp.localeCompare(b.timestamp)); }; const normalizeReferrers = (entries) => { if (!Array.isArray(entries)) { throw new Error('referrers must be an array'); } return entries.map((entry) => ({ referrer: toRequiredString(entry.referrer, 'referrers.referrer'), count: toNonNegativeInteger(entry.count, 'referrers.count'), uniques: toNonNegativeInteger(entry.uniques, 'referrers.uniques'), })); }; const normalizePaths = (entries) => { if (!Array.isArray(entries)) { throw new Error('paths must be an array'); } return entries.map((entry) => ({ path: toRequiredString(entry.path, 'paths.path'), title: toRequiredString(entry.title, 'paths.title'), count: toNonNegativeInteger(entry.count, 'paths.count'), uniques: toNonNegativeInteger(entry.uniques, 'paths.uniques'), })); }; const upsertByKey = (existing, incoming, key) => { const entriesByKey = new Map(); for (const entry of existing || []) { entriesByKey.set(entry[key], entry); } for (const entry of incoming || []) { entriesByKey.set(entry[key], entry); } return [...entriesByKey.values()].sort((a, b) => String(a[key]).localeCompare(String(b[key]))); }; const latestEntry = (entries) => { if (!entries?.length) { return null; } return entries[entries.length - 1]; }; const sumSeries = (entries) => entries.reduce( (totals, entry) => ({ count: totals.count + entry.count, sum_daily_uniques: totals.sum_daily_uniques + entry.uniques, }), { count: 0, sum_daily_uniques: 0 }, ); const startOfUtcDay = (date) => Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate()); const summarizeWindow = (entries, days, now) => { const cutoff = new Date(startOfUtcDay(now) - ((days - 1) * DAY_MS)); const filtered = entries.filter((entry) => new Date(entry.timestamp) >= cutoff); const totals = sumSeries(filtered); return { days, count: totals.count, sum_daily_uniques: totals.sum_daily_uniques, unique_semantics: 'sum_of_daily_uniques', first_date: filtered[0]?.timestamp.slice(0, 10) ?? null, last_date: filtered.at(-1)?.timestamp.slice(0, 10) ?? null, }; }; const summarizeAllTime = (entries) => { const totals = sumSeries(entries); return { count: totals.count, sum_daily_uniques: totals.sum_daily_uniques, unique_semantics: 'sum_of_daily_uniques', first_date: entries[0]?.timestamp.slice(0, 10) ?? null, last_date: entries.at(-1)?.timestamp.slice(0, 10) ?? null, }; }; const normalizeExistingArchive = (archive, repository, capturedAt) => { if (!archive) { return { version: ARCHIVE_VERSION, repository, archive_started_at: capturedAt, updated_at: capturedAt, daily: { views: [], clones: [], }, snapshots: { referrers: [], paths: [], }, captures: [], }; } if (archive.repository && archive.repository !== repository) { throw new Error(`Archive repository mismatch: ${archive.repository} != ${repository}`); } return { version: ARCHIVE_VERSION, repository, archive_started_at: archive.archive_started_at || capturedAt, updated_at: archive.updated_at || capturedAt, daily: { views: normalizeDailyEntries(archive.daily?.views || [], 'daily.views'), clones: normalizeDailyEntries(archive.daily?.clones || [], 'daily.clones'), }, snapshots: { referrers: (archive.snapshots?.referrers || []).map((snapshot) => ({ captured_at: toIsoString(snapshot.captured_at, 'referrer snapshot timestamp'), date: snapshot.date || toDateKey(snapshot.captured_at), entries: normalizeReferrers(snapshot.entries || []), })), paths: (archive.snapshots?.paths || []).map((snapshot) => ({ captured_at: toIsoString(snapshot.captured_at, 'path snapshot timestamp'), date: snapshot.date || toDateKey(snapshot.captured_at), entries: normalizePaths(snapshot.entries || []), })), }, captures: (archive.captures || []).map((capture) => ({ captured_at: toIsoString(capture.captured_at, 'capture timestamp'), date: capture.date || toDateKey(capture.captured_at), views_window: { count: toNonNegativeInteger(capture.views_window?.count || 0, 'captures.views_window.count'), uniques: toNonNegativeInteger(capture.views_window?.uniques || 0, 'captures.views_window.uniques'), }, clones_window: { count: toNonNegativeInteger(capture.clones_window?.count || 0, 'captures.clones_window.count'), uniques: toNonNegativeInteger(capture.clones_window?.uniques || 0, 'captures.clones_window.uniques'), }, })), }; }; export const mergeTrafficArchive = (existingArchive, snapshot) => { const repository = normalizeRepository(snapshot.repository); const capturedAt = toIsoString(snapshot.captured_at, 'capture timestamp'); const captureDate = toDateKey(capturedAt); const archive = normalizeExistingArchive(existingArchive, repository, capturedAt); const views = normalizeDailyEntries(snapshot.views?.views || [], 'views'); const clones = normalizeDailyEntries(snapshot.clones?.clones || [], 'clones'); const referrerSnapshot = { captured_at: capturedAt, date: captureDate, entries: normalizeReferrers(snapshot.referrers || []), }; const pathSnapshot = { captured_at: capturedAt, date: captureDate, entries: normalizePaths(snapshot.paths || []), }; const capture = { captured_at: capturedAt, date: captureDate, views_window: { count: toNonNegativeInteger(snapshot.views?.count ?? sumSeries(views).count, 'views.count'), uniques: toNonNegativeInteger(snapshot.views?.uniques ?? sumSeries(views).sum_daily_uniques, 'views.uniques'), }, clones_window: { count: toNonNegativeInteger(snapshot.clones?.count ?? sumSeries(clones).count, 'clones.count'), uniques: toNonNegativeInteger(snapshot.clones?.uniques ?? sumSeries(clones).sum_daily_uniques, 'clones.uniques'), }, }; return { ...archive, updated_at: capturedAt, daily: { views: upsertByKey(archive.daily.views, views, 'timestamp'), clones: upsertByKey(archive.daily.clones, clones, 'timestamp'), }, snapshots: { referrers: upsertByKey(archive.snapshots.referrers, [referrerSnapshot], 'date'), paths: upsertByKey(archive.snapshots.paths, [pathSnapshot], 'date'), }, captures: upsertByKey(archive.captures, [capture], 'date'), }; }; export const buildTrafficSummary = (archive, options = {}) => { const now = new Date(options.now || new Date().toISOString()); if (Number.isNaN(now.getTime())) { throw new Error(`Invalid summary date: ${options.now}`); } const views = archive.daily?.views || []; const clones = archive.daily?.clones || []; const buildMetrics = (entries) => { const metrics = Object.fromEntries(SUMMARY_WINDOWS.map(([key, days]) => [ key, summarizeWindow(entries, days, now), ])); metrics.all_time = summarizeAllTime(entries); return metrics; }; return { version: ARCHIVE_VERSION, repository: archive.repository, generated_at: now.toISOString(), archive_started_at: archive.archive_started_at || null, updated_at: archive.updated_at || null, source: { api: 'GitHub REST repository traffic endpoints', retention_limit: 'GitHub exposes roughly the last 14 days; this archive keeps daily snapshots long term.', unique_semantics: 'GitHub daily unique values are retained as sum_daily_uniques for longer windows, not deduplicated visitors.', }, metrics: { views: buildMetrics(views), clones: buildMetrics(clones), }, daily: { views, clones, }, latest_snapshots: { referrers: latestEntry(archive.snapshots?.referrers || []), paths: latestEntry(archive.snapshots?.paths || []), }, snapshot_counts: { referrers: archive.snapshots?.referrers?.length || 0, paths: archive.snapshots?.paths?.length || 0, captures: archive.captures?.length || 0, }, }; }; const fetchJson = async ({ repo, token, pathname, fetchImpl }) => { const url = new URL(pathname, API_ROOT); const response = await fetchImpl(url, { headers: { Accept: 'application/vnd.github+json', Authorization: `Bearer ${token}`, 'User-Agent': 'clawsec-traffic-archive', 'X-GitHub-Api-Version': GITHUB_API_VERSION, }, }); if (!response.ok) { const body = await response.text().catch(() => ''); const suffix = body ? ` ${body.slice(0, 500)}` : ''; const lacksPushAccess = response.status === 403 && /resource not accessible|must have push access/i.test(body); const hint = lacksPushAccess ? ' Traffic endpoints require a token with push access to the repository; the Actions GITHUB_TOKEN is always rejected. Use a classic PAT with the repo scope or a fine-grained PAT with read access to Administration.' : response.status === 401 ? ' The token was rejected as invalid — it may be expired or revoked. Rotate the TRAFFIC_ARCHIVE_TOKEN secret.' : ''; throw new Error(`GitHub traffic API request failed for ${repo}: ${url.pathname}${url.search} returned ${response.status}.${suffix}${hint}`); } return response.json(); }; export const fetchGitHubTraffic = async ({ repo, token, capturedAt = new Date().toISOString(), fetchImpl = globalThis.fetch, }) => { const repository = normalizeRepository(repo); if (!token) { throw new Error('A GitHub token is required to read repository traffic.'); } if (typeof fetchImpl !== 'function') { throw new Error('fetch is not available in this Node runtime.'); } const encodedRepo = repository.split('/').map(encodeURIComponent).join('/'); const request = (pathname) => fetchJson({ repo: repository, token, pathname: `/repos/${encodedRepo}${pathname}`, fetchImpl, }); const [views, clones, referrers, paths] = await Promise.all([ request('/traffic/views?per=day'), request('/traffic/clones?per=day'), request('/traffic/popular/referrers'), request('/traffic/popular/paths'), ]); return { repository, captured_at: toIsoString(capturedAt, 'capture timestamp'), views, clones, referrers, paths, }; }; const readJsonIfPresent = async (file) => { try { return JSON.parse(await fs.readFile(file, 'utf8')); } catch (error) { if (error?.code === 'ENOENT') { return undefined; } throw error; } }; const writeTextAtomic = async (file, content) => { const dir = path.dirname(file); const tempFile = path.join(dir, `.${path.basename(file)}.${process.pid}.${Date.now()}.tmp`); let handle; await fs.mkdir(dir, { recursive: true }); try { handle = await fs.open(tempFile, 'w'); await handle.writeFile(content, 'utf8'); await handle.sync(); await handle.close(); handle = undefined; await fs.rename(tempFile, file); } catch (error) { if (handle) { await handle.close().catch(() => {}); } await fs.unlink(tempFile).catch(() => {}); throw error; } }; export const writeJson = async (file, value) => { await writeTextAtomic(file, `${JSON.stringify(value, null, 2)}\n`); }; const parseArgs = (args) => { const options = {}; for (let index = 0; index < args.length; index += 1) { const arg = args[index]; if (arg === '--archive-dir') { options.archiveDir = args[index + 1]; index += 1; } else if (arg === '--repo') { options.repo = args[index + 1]; index += 1; } else if (arg === '--captured-at') { options.capturedAt = args[index + 1]; index += 1; } else if (arg === '--help' || arg === '-h') { options.help = true; } else { throw new Error(`Unknown argument: ${arg}`); } } return options; }; const printHelp = () => { console.log(`Usage: node scripts/archive-github-traffic.mjs [options] Options: --archive-dir