/**
* @module database/repositories/activityRepo
* @description Activity log CRUD backed by SQLite.
*/
import crypto from "node:crypto";
import { getDatabase } from "../sqlite.js";
/**
* SEC-007: Canonical hash-input shape for an activity row — everything that
* is persisted EXCEPT `prevHash` itself. Both the INSERT path and the
* verification path must use this identical helper so the computed hash
* round-trips deterministically. Key order is stable because object
* literal property order is preserved in V8.
*
* `meta` is normalised to its JSON-string form (or null) so the hash input
* matches the exact byte sequence stored in the TEXT column — the
* verification path reads the column back as TEXT and must hash it without
* re-parsing.
*
* Note on `id`: the primary key is included in the hashed payload deliberately
* so any re-numbering of activity rows (counter compaction, partial-backup
* restore, manual edit) is detected by `verifyAuditChain` as tampering. The
* tradeoff is that legitimate ID migrations require regenerating the chain
* from scratch; we accept that cost because in a SOC 2 / PCI-DSS audit log
* "the IDs changed but the content is the same" is exactly the class of
* change a verifier should flag.
*
* @param {Object} row
* @returns {Object}
* @private
*/
function rowMinusHash(row) {
return {
id: row.id,
type: row.type,
projectId: row.projectId || null,
projectName: row.projectName || null,
testId: row.testId || null,
testName: row.testName || null,
detail: row.detail || null,
status: row.status || "completed",
createdAt: row.createdAt,
userId: row.userId || null,
userName: row.userName || null,
workspaceId: row.workspaceId || null,
meta: row.meta == null
? null
: (typeof row.meta === "string" ? row.meta : JSON.stringify(row.meta)),
ipAddress: row.ipAddress || null,
userAgent: row.userAgent || null,
};
}
/**
* SEC-007: Compute the next `prevHash` from the previous row's hash and the
* current row's content. Exported for tests and re-used by `verifyAuditChain`
* so a single implementation is the source of truth (any drift between
* INSERT and verify breaks the chain immediately rather than silently).
*
* prevHash_i = sha256(prevHash_{i-1} ++ JSON.stringify(rowMinusHash_i))
*
* The leading hash for the very first row is the empty string.
*
* @param {string|null} previousHash
* @param {Object} row
* @returns {string} 64-char lowercase hex digest.
*/
export function computePrevHash(previousHash, row) {
const input = (previousHash || "") + JSON.stringify(rowMinusHash(row));
return crypto.createHash("sha256").update(input).digest("hex");
}
/**
* SEC-007: activity types eligible for compliance dedup.
*
* Industry-standard audit logs (Splunk, AWS CloudTrail, Auth0, Datadog)
* collapse high-volume *system-emitted* events (read access, polling)
* into a single row with a `count` + `lastAt`. They do NOT collapse
* user-initiated state-change events because each one is an independent
* attributable action.
*
* The allowlist captures exactly the event types where collapse is a
* noise-reduction win, not an attribution loss:
*
* - `audit.read` / `audit.export` — page reloads + stats fetches emit
* bursts of identical reads. PCI-DSS 10.5.3 permits summarisation
* provided every event remains attributable; `count` + `lastAt`
* preserve attribution.
* - `auth.login.failed` — credential-stuffing probes against a single
* account look like a burst of identical events. Collapsing makes the
* pattern MORE visible to a SOC 2 reviewer (one row "200 attempts"
* beats 200 noise rows).
*
* Every other event type (`auth.login`, `auth.role.change`, `test.*`,
* `audit.purge`) is excluded — those are single user actions that must
* each show as their own row for forensic reconstruction.
*
* @private
*/
const DEDUP_ELIGIBLE_TYPES = new Set([
"audit.read",
"audit.export",
"auth.login.failed",
]);
/**
* SEC-007: compute the dedup signature for an activity row.
*
* Two rows collapse only when this signature matches exactly — workspace,
* actor, event type, AND meta-filter shape. Different filter shapes on
* `audit.read` (e.g. one filtered by `dateFrom`, another with no filter)
* stay as separate rows; they're semantically different audit access
* patterns and a SOC 2 reviewer needs to see them as distinct events.
*
* `rowCount` is INTENTIONALLY excluded from the signature so two reads
* with the same filter shape but different counts (a new audit row
* appeared between reads) still collapse — the collapsed row's
* `meta.rowCount` reflects the most recent read's count.
*
* @param {Object} row
* @returns {string}
* @private
*/
function dedupSignature(row) {
const meta = row.meta || {};
const sigMeta = {
format: meta.format ?? null,
filters: meta.filters
? {
userId: meta.filters.userId ?? null,
projectId: meta.filters.projectId ?? null,
types: Array.isArray(meta.filters.types) ? [...meta.filters.types].sort() : null,
dateFrom: meta.filters.dateFrom ?? null,
dateTo: meta.filters.dateTo ?? null,
ipAddress: meta.filters.ipAddress ?? null,
}
: null,
};
return JSON.stringify({
workspaceId: row.workspaceId || null,
userId: row.userId || null,
type: row.type,
meta: sigMeta,
});
}
/**
* SEC-007: dedup window in ms. Default 60s matches Auth0 / Datadog
* industry-standard collapse window. `AUDIT_DEDUP_WINDOW_SEC=0` disables.
* Read at call time so test `setupEnv()` flips it correctly.
*
* @returns {number}
* @private
*/
function dedupWindowMs() {
const raw = process.env.AUDIT_DEDUP_WINDOW_SEC;
if (raw === "0") return 0;
const sec = Number.parseInt(raw ?? "", 10);
return Number.isFinite(sec) && sec > 0 ? sec * 1000 : 60_000;
}
/**
* Create an activity entry.
*
* ### Dedup (SEC-007)
* For high-volume read-shaped events (`audit.read`, `audit.export`,
* `auth.login.failed`), consecutive identical events within the dedup
* window collapse into a single row with `count++` and `lastAt = now`.
* Matches Splunk / CloudTrail / Auth0 industry practice — every event
* is still counted (anti-exfiltration signals intact) but the feed
* isn't dominated by duplicate noise. PCI-DSS 10.5.3 permits this
* provided attribution is preserved. Window is `AUDIT_DEDUP_WINDOW_SEC`
* (default 60, `0` disables).
*
* Dedup is automatically DISABLED when `AUDIT_HASH_CHAIN=true` —
* mutating `lastAt`/`count` on a persisted row would invalidate its
* `prevHash` and break the verification walk. With chain on, every
* event is its own row (the chain is the higher-priority compliance
* signal).
*
* When `AUDIT_HASH_CHAIN=true`, computes `prevHash` from the last row in
* the same workspace and stores it in the same transaction as the INSERT.
* Otherwise `prevHash` is persisted as null (chain disabled — default).
*
* @param {Object} activity — { id, type, projectId, projectName, testId, testName, detail, status, createdAt, userId?, userName?, workspaceId?, meta?, ipAddress?, userAgent?, prevHash? }
*/
export function create(activity) {
const db = getDatabase();
// `meta` is JSON-encoded TEXT (migration 018) so callers can pass a plain
// object; readers below re-parse it. Null when absent so the column is
// genuinely empty rather than the string "null".
const metaStr = activity.meta != null ? JSON.stringify(activity.meta) : null;
const insert = db.prepare(`
INSERT INTO activities (id, type, projectId, projectName, testId, testName, detail, status, createdAt, userId, userName, workspaceId, meta, ipAddress, userAgent, prevHash)
VALUES (@id, @type, @projectId, @projectName, @testId, @testName, @detail, @status, @createdAt, @userId, @userName, @workspaceId, @meta, @ipAddress, @userAgent, @prevHash)
`);
// SEC-007: hash-chain compute. Wrapped in a transaction so the lookup of
// the previous row and the INSERT are atomic — without this, two parallel
// logActivity() calls could both read the same "previous" row and chain
// off it, producing two siblings with identical prevHash that break the
// verification walk. The feature is opt-in (chained writes serialise
// INSERTs under contention) and gated by AUDIT_HASH_CHAIN=true.
const chainEnabled = process.env.AUDIT_HASH_CHAIN === "true";
const params = {
id: activity.id,
type: activity.type,
projectId: activity.projectId || null,
projectName: activity.projectName || null,
testId: activity.testId || null,
testName: activity.testName || null,
detail: activity.detail || null,
status: activity.status || "completed",
createdAt: activity.createdAt,
userId: activity.userId || null,
userName: activity.userName || null,
workspaceId: activity.workspaceId || null,
meta: metaStr,
ipAddress: activity.ipAddress || null,
userAgent: activity.userAgent || null,
prevHash: activity.prevHash || null,
};
if (chainEnabled) {
// Hash-chain path takes precedence over dedup. See the docstring above
// for why these features are mutually exclusive at the row level.
const tx = db.transaction((row) => {
// Order by createdAt DESC then numerically by the trailing integer of
// the `ACT-N` id (see idGenerator.js) as a tiebreaker for sub-
// millisecond bursts. A plain `id DESC` would sort lexicographically
// (ACT-10 < ACT-2), so at every digit boundary (9→10, 99→100, …) the
// INSERT side would chain off ACT-9 while the verify-side ASC walk
// visits ACT-10 right after ACT-1 — producing a spurious chain break.
const prev = db.prepare(
"SELECT prevHash FROM activities WHERE workspaceId = ? ORDER BY createdAt DESC, CAST(SUBSTR(id, 5) AS INTEGER) DESC LIMIT 1"
).get(row.workspaceId);
// Pass the stringified meta (matching what's about to be persisted) so
// computePrevHash's normaliser sees the same bytes the verifier will.
row.prevHash = computePrevHash(prev?.prevHash || null, { ...row, meta: metaStr });
insert.run(row);
});
tx(params);
// SEC-007: mirror the computed prevHash back onto the caller's object so
// downstream consumers (notably the SIEM forwarder in `activityLogger`)
// dispatch the row with the same `prevHash` that's persisted in SQLite.
// Without this mutation, every SIEM-forwarded event carries `prevHash: null`
// and a SIEM-side integrity verifier can never reconstruct the chain.
activity.prevHash = params.prevHash;
return;
}
// SEC-007 dedup path. Try to fold this event into the most-recent
// matching row within the dedup window before falling back to INSERT.
// Wrapped in a transaction so the SELECT-then-UPDATE-or-INSERT race is
// closed — two parallel logActivity calls cannot both INSERT siblings
// because the UPDATE arm acquires the row lock first.
const windowMs = dedupWindowMs();
const eligible = windowMs > 0 && DEDUP_ELIGIBLE_TYPES.has(activity.type);
if (!eligible) {
insert.run(params);
return;
}
const sig = dedupSignature({ ...activity, meta: activity.meta });
// Hash the sig so the LIKE match works reliably — the raw JSON sig
// contains quotes that get backslash-escaped inside the outer JSON
// meta column, breaking substring matching. A hex hash is safe for
// LIKE and deterministic.
const sigHash = crypto.createHash("sha256").update(sig).digest("hex");
const cutoff = new Date(Date.now() - windowMs).toISOString();
// `_dedupSig` flows in `meta._dedupSig` so the existing TEXT column carries
// it without a schema change — but we strip it from the user-facing meta
// in `hydrate()` below so consumers never see the internal field.
const metaWithSig = activity.meta != null
? JSON.stringify({ ...activity.meta, _dedupSig: sigHash })
: JSON.stringify({ _dedupSig: sigHash });
params.meta = metaWithSig;
const dedupTx = db.transaction(() => {
// Look for a recent matching row. Cast to lexicographic-safe comparison
// on ISO timestamps so the window is portable across SQLite + Postgres.
// Match on the full _dedupSig substring inside meta — same portable
// LIKE pattern used by `countDistinctTestIds.metaIsAutoApproved` above.
const recent = db.prepare(`
SELECT id, count, createdAt
FROM activities
WHERE type = ?
AND (userId = ? OR (userId IS NULL AND ? IS NULL))
AND (workspaceId = ? OR (workspaceId IS NULL AND ? IS NULL))
AND createdAt >= ?
AND meta LIKE ?
ORDER BY createdAt DESC, CAST(SUBSTR(id, 5) AS INTEGER) DESC
LIMIT 1
`).get(
activity.type,
activity.userId || null, activity.userId || null,
activity.workspaceId || null, activity.workspaceId || null,
cutoff,
`%${sigHash}%`,
);
if (recent) {
// Update the existing row: bump count + lastAt, refresh meta to the
// new event's full payload (so the rendered `rowCount`, `ipAddress`,
// etc. always reflect the most recent event in the collapse).
db.prepare(`
UPDATE activities
SET count = count + 1,
lastAt = ?,
meta = ?,
ipAddress = ?,
userAgent = ?
WHERE id = ?
`).run(activity.createdAt, metaWithSig, params.ipAddress, params.userAgent, recent.id);
// Mutate the caller's activity so the SIEM forwarder sees the merged
// row (same pattern as the prevHash mirror-back above).
activity.id = recent.id;
activity.count = (recent.count || 1) + 1;
activity.lastAt = activity.createdAt;
activity.createdAt = recent.createdAt;
activity.deduped = true;
return;
}
// No match in window — fresh row.
insert.run(params);
activity.count = 1;
activity.lastAt = null;
activity.deduped = false;
});
dedupTx();
}
/**
* Re-parse the JSON `meta` column into a plain object for callers. Tolerant
* of legacy rows where the column is null/empty/non-JSON — those rows
* predate migration 018 and surface as `meta: null`.
* @param {Object} row
* @returns {Object}
*/
function hydrate(row) {
if (!row) return row;
if (typeof row.meta === "string" && row.meta.length > 0) {
try { row.meta = JSON.parse(row.meta); } catch { row.meta = null; }
} else if (row.meta === undefined) {
row.meta = null;
}
// SEC-007: `_dedupSig` is a write-time-only internal field used by the
// dedup matcher in `create()`. Strip it from the user-facing payload so
// the UI, CSV/NDJSON exports, and SIEM forwarder never see it — only the
// dedup matcher reads it (and only via raw LIKE on the TEXT column).
if (row.meta && typeof row.meta === "object" && "_dedupSig" in row.meta) {
const { _dedupSig: _omit, ...rest } = row.meta;
void _omit;
row.meta = Object.keys(rest).length > 0 ? rest : null;
}
return row;
}
/**
* Get all activities.
* @returns {Object[]}
*/
export function getAll() {
const db = getDatabase();
return db.prepare("SELECT * FROM activities ORDER BY createdAt DESC").all().map(hydrate);
}
/**
* Get all activities as a dictionary keyed by ID.
* @returns {Object<string, Object>}
*/
export function getAllAsDict() {
const all = getAll();
const dict = {};
for (const a of all) dict[a.id] = a;
return dict;
}
/**
* Get filtered activities.
*
* `after` / `before` accept ISO-8601 strings (matching the column's storage
* format). The comparison is lexicographic on ISO strings, which is correct
* for the YYYY-MM-DDTHH:MM:SS.sssZ shape `Date.toISOString()` produces.
*
* `offset` pairs with `limit` for cursor-style "Load more" — clients pass
* the count of rows they've already rendered. Combined with the default
* `ORDER BY createdAt DESC`, this gives a stable forward window even
* when new rows arrive between fetches (the ordering key is the row's
* own timestamp, so a new row only shifts the cursor on the *first*
* page, not subsequent pages).
*
* @param {Object} [filters]
* @param {string} [filters.type]
* @param {string} [filters.projectId]
* @param {string} [filters.workspaceId] — Scope to workspace (ACL-001).
* @param {string} [filters.after] — ISO timestamp; only rows with
* `createdAt >= after` are returned. Powers the AUTO-003b approvals
* timeline date-range picker (This week / Last 30 days / Custom).
* @param {string} [filters.before] — ISO timestamp; only rows with
* `createdAt < before` are returned. Pairs with `after` for bounded
* ranges; either bound is optional.
* @param {number} [filters.limit=200]
* @param {number} [filters.offset] — Skip the first N rows of the
* result set; used by paginated UIs (Load more) to fetch the next page.
* @returns {Object[]}
*/
export function getFiltered({ type, projectId, workspaceId, after, before, limit, offset } = {}) {
const db = getDatabase();
let sql = "SELECT * FROM activities WHERE 1=1";
const params = [];
if (workspaceId) {
sql += " AND workspaceId = ?";
params.push(workspaceId);
}
if (type) {
sql += " AND type = ?";
params.push(type);
}
if (projectId) {
sql += " AND projectId = ?";
params.push(projectId);
}
if (after) {
sql += " AND createdAt >= ?";
params.push(after);
}
if (before) {
sql += " AND createdAt < ?";
params.push(before);
}
sql += " ORDER BY createdAt DESC LIMIT ?";
// Honour an explicit `limit: 0` (legit "count-only / probe" value) and
// reject non-finite inputs (NaN, Infinity) by falling back to the default
// only for `undefined` / `null`. `limit || 200` would coerce both `0` and
// `NaN` to 200 — the first silently returns 200 rows when the caller
// asked for none; the second hides a bad input behind a full page.
params.push(Number.isFinite(limit) ? limit : 200);
if (Number.isFinite(offset) && offset > 0) {
sql += " OFFSET ?";
params.push(offset);
}
return db.prepare(sql).all(...params).map(hydrate);
}
/**
* Count `DISTINCT testId` across activity rows matching the filter (AUTO-003b).
*
* Used by the approval-stats 7-day revert-rate calculation, which asks
* *"how many distinct tests were auto-approved in the window?"* and
* *"how many distinct tests were revoked in the window?"* — set sizes,
* not row counts, because a test that auto-approved twice in the window
* should still count as one.
*
* Previously computed by fetching up to 10,000 rows via `getFiltered`
* and building two `Set`s in JS; at ~1 KB per row that's ~10 MB of
* transferred data per project per call. This query returns a single
* integer, and the `activities(type, projectId, createdAt)` access
* pattern is index-friendly on both adapters.
*
* The `metaIsAutoApproved` filter matches the JSON-encoded flag
* `meta.wasAutoApproved = true` via a portable `LIKE` on the serialised
* `meta` TEXT column (migration 018). LIKE is case-sensitive on SQLite
* and case-insensitive on PostgreSQL (the adapter rewrites LIKE→ILIKE)
* — fine here because `logActivity` always writes the lowercase
* `"wasAutoApproved":true` shape, so case-variation isn't possible on
* real data. Using LIKE instead of `json_extract` keeps the query
* portable across the SQLite/PostgreSQL adapters without a dialect
* branch (INF-001).
*
* @param {Object} filters
* @param {string} filters.type — required, exact match on `activities.type`.
* @param {string} [filters.projectId] — scope to project.
* @param {string} [filters.workspaceId] — scope to workspace (ACL).
* @param {string} [filters.after] — ISO timestamp lower bound (inclusive).
* @param {string} [filters.before] — ISO timestamp upper bound (exclusive).
* @param {boolean} [filters.metaIsAutoApproved] — match rows whose `meta`
* column encodes `{ ..., "wasAutoApproved": true }`. Used to filter revoke
* rows down to "was the revoked test originally auto-approved?" without
* reading 10k rows into memory.
* @returns {number} Count of distinct non-null `testId` values among matching rows.
*/
export function countDistinctTestIds({ type, projectId, workspaceId, after, before, metaIsAutoApproved } = {}) {
const db = getDatabase();
let sql = "SELECT COUNT(DISTINCT testId) AS cnt FROM activities WHERE testId IS NOT NULL";
const params = [];
if (type) {
sql += " AND type = ?";
params.push(type);
}
if (projectId) {
sql += " AND projectId = ?";
params.push(projectId);
}
if (workspaceId) {
sql += " AND workspaceId = ?";
params.push(workspaceId);
}
if (after) {
sql += " AND createdAt >= ?";
params.push(after);
}
if (before) {
sql += " AND createdAt < ?";
params.push(before);
}
if (metaIsAutoApproved) {
// Portable JSON-in-TEXT probe. Matches the exact substring
// `"wasAutoApproved":true` (no spaces — `JSON.stringify` omits them)
// so the filter is stable across both adapters. A dialect-specific
// `json_extract(meta, '$.wasAutoApproved') = 1` would be nicer on
// SQLite but breaks on PostgreSQL (`jsonb_extract_path` / `->>`),
// and the LIKE is already bounded by the indexed `type + projectId`
// predicates above.
sql += " AND meta LIKE ?";
params.push('%"wasAutoApproved":true%');
}
return db.prepare(sql).get(...params)?.cnt || 0;
}
/**
* Count activities with optional workspace/project scope.
* @param {Object} [filters]
* @param {string} [filters.workspaceId]
* @param {string} [filters.projectId]
* @returns {number}
*/
export function countFiltered({ workspaceId, projectId } = {}) {
const db = getDatabase();
let sql = "SELECT COUNT(*) as cnt FROM activities WHERE 1=1";
const params = [];
if (workspaceId) {
sql += " AND workspaceId = ?";
params.push(workspaceId);
}
if (projectId) {
sql += " AND projectId = ?";
params.push(projectId);
}
return db.prepare(sql).get(...params).cnt;
}
/**
* Get activities filtered by type for dashboard analytics.
* Only returns type, status, createdAt — skips detail, names, etc.
* @param {string[]} types — Activity types to include.
* @param {Object} [opts]
* @param {string} [opts.workspaceId] — Optional workspace scope.
* @returns {Object[]}
*/
export function getByTypes(types, opts = {}) {
const db = getDatabase();
const { workspaceId } = opts;
const placeholders = types.map(() => "?").join(", ");
const workspaceClause = workspaceId ? " AND workspaceId = ?" : "";
const params = workspaceId ? [...types, workspaceId] : types;
return db.prepare(
`SELECT type, status, createdAt FROM activities WHERE type IN (${placeholders})${workspaceClause} ORDER BY createdAt DESC`
).all(...params);
}
/**
* Delete all activities for a project.
* @param {string} projectId
* @returns {number} Number of deleted rows.
*/
export function deleteByProjectId(projectId) {
const db = getDatabase();
const info = db.prepare("DELETE FROM activities WHERE projectId = ?").run(projectId);
return info.changes;
}
/**
* Delete all activities in a workspace.
* @param {string} workspaceId
* @returns {number} Number of deleted rows.
*/
export function clearByWorkspaceId(workspaceId) {
const db = getDatabase();
const info = db.prepare("DELETE FROM activities WHERE workspaceId = ?").run(workspaceId);
return info.changes;
}
/**
* SEC-007: retention sweep — delete activity rows older than `days` days.
*
* Called from the scheduler's daily retention task. The cutoff is computed
* in JS (ISO 8601 UTC) and passed as a parameterised string so the
* comparison stays lexicographic — matching the column's storage format
* and the existing `after` / `before` filters in `getFiltered`.
*
* Caller is responsible for the SOC-2 floor (≥ 90 days or 0 to disable);
* this function simply does what it's told. Boot-time validation in
* `backend/src/index.js` rejects `AUDIT_RETENTION_DAYS` values below 90.
*
* @param {number} days — Retention window in days. Must be > 0; rows with
* `createdAt < (now - days)` are deleted. Callers that want to disable
* the sweep entirely should not invoke this function at all (the
* scheduler honours `AUDIT_RETENTION_DAYS=0` by skipping).
* @returns {number} Number of deleted rows.
*/
export function purgeOlderThan(days) {
if (!Number.isFinite(days) || days <= 0) return 0;
const db = getDatabase();
const cutoff = new Date(Date.now() - days * 24 * 60 * 60 * 1000).toISOString();
const info = db.prepare("DELETE FROM activities WHERE createdAt < ?").run(cutoff);
return info.changes;
}
/**
* SEC-007: workspace-scoped audit-log fetch with cursor pagination (ENH-010).
*
* Cursor pagination — vs. LIMIT/OFFSET — is stable under concurrent writes:
* a new row arriving between two pages cannot shift the window because the
* cursor is the row's own `createdAt`, not its ordinal position. The cursor
* is opaque to the caller (an ISO string), so the API surface can later
* change to a composite cursor without a breaking change.
*
* The ordering tuple is `(createdAt DESC, id DESC)` — matches the INSERT-
* side tiebreaker in `create()` so sub-millisecond bursts stay deterministic.
*
* Filters are AND-combined and all parameterised. `types` is an array; an
* empty array means "no type filter".
*
* @param {string} workspaceId — Required. The authenticated workspace; the
* route handler MUST pass `req.workspaceId`, not a user-supplied URL param,
* so cross-workspace reads are impossible.
* @param {Object} [filters]
* @param {string} [filters.userId]
* @param {string[]} [filters.types]
* @param {string} [filters.dateFrom] — ISO 8601 lower bound (inclusive).
* @param {string} [filters.dateTo] — ISO 8601 upper bound (inclusive).
* @param {string} [filters.ipAddress]
* @param {string} [filters.cursor] — Opaque cursor from the previous
* page's `nextCursor`. Internally the last row's `createdAt`. Pass
* `undefined` (or omit) for the first page.
* @param {number} [filters.limit=200] — Hard-capped at 1000 by the
* caller to bound memory.
* @returns {{ rows: Object[], nextCursor: string|null }} Rows in newest-
* first order; `nextCursor` is the timestamp to pass to the next call,
* or `null` when there are no more rows.
*/
export function getWorkspaceAuditLog(workspaceId, { userId, projectId, types = [], dateFrom, dateTo, ipAddress, cursor, limit = 200 } = {}) {
const db = getDatabase();
// Hard-cap defensively even though the route handler also clamps — a
// direct repo caller (test, future internal job) shouldn't be able to
// accidentally pull the whole table.
const cap = Math.max(1, Math.min(1000, Number.isFinite(limit) ? limit : 200));
let sql = "SELECT * FROM activities WHERE workspaceId = ?";
const params = [workspaceId];
if (userId) { sql += " AND userId = ?"; params.push(userId); }
if (projectId) { sql += " AND projectId = ?"; params.push(projectId); }
if (types?.length) { sql += ` AND type IN (${types.map(() => "?").join(",")})`; params.push(...types); }
if (dateFrom) { sql += " AND createdAt >= ?"; params.push(dateFrom); }
if (dateTo) { sql += " AND createdAt <= ?"; params.push(dateTo); }
if (ipAddress) { sql += " AND ipAddress = ?"; params.push(ipAddress); }
// Composite cursor — `createdAt|id`. Strict `<` on createdAt with an
// equal-timestamp tiebreaker on the numeric id avoids the data-loss case
// where multiple rows share the same `createdAt` and a page boundary falls
// within them: a plain `createdAt < ?` filter would skip every remaining
// row at that exact timestamp. The id comparison is numeric (CAST) to
// match the ORDER BY tiebreaker and the INSERT-side chain ordering.
if (cursor) {
const sep = cursor.lastIndexOf("|");
const cursorTs = sep >= 0 ? cursor.slice(0, sep) : cursor;
const cursorIdRaw = sep >= 0 ? cursor.slice(sep + 1) : null;
const cursorIdNum = cursorIdRaw && cursorIdRaw.startsWith("ACT-")
? Number.parseInt(cursorIdRaw.slice(4), 10)
: Number.parseInt(cursorIdRaw, 10);
if (Number.isFinite(cursorIdNum)) {
sql += " AND (createdAt < ? OR (createdAt = ? AND CAST(SUBSTR(id, 5) AS INTEGER) < ?))";
params.push(cursorTs, cursorTs, cursorIdNum);
} else {
// Back-compat: legacy timestamp-only cursors still work, just with the
// known edge-case of skipping rows that share the boundary timestamp.
sql += " AND createdAt < ?";
params.push(cursorTs);
}
}
// Fetch one extra row so we can tell whether a next page exists without
// a second COUNT query. Numeric id tiebreaker matches the INSERT-side
// ordering in `create()` so sub-millisecond bursts stay deterministic.
sql += " ORDER BY createdAt DESC, CAST(SUBSTR(id, 5) AS INTEGER) DESC LIMIT ?";
params.push(cap + 1);
const all = db.prepare(sql).all(...params).map(hydrate);
const hasMore = all.length > cap;
const rows = hasMore ? all.slice(0, cap) : all;
// Encode the composite (createdAt, id) cursor so the next page can skip
// past the last row even when several rows share its timestamp.
const last = rows[rows.length - 1];
const nextCursor = hasMore && last ? `${last.createdAt}|${last.id}` : null;
return { rows, nextCursor };
}
/**
* SEC-007: Walk the audit-log chain for a workspace in chronological order
* and verify each row's `prevHash` matches the recomputed
* `sha256(prev.prevHash + JSON.stringify(rowMinusHash(row)))`.
*
* Uses the shared `computePrevHash` helper so any drift between the INSERT
* path and the verification path is impossible — both call the same function.
*
* Returns `{ verified: true, total, chainStartedAt }` on a clean walk, or
* `{ verified: false, firstBrokenRowId, total }` at the first mismatch.
* An empty workspace is trivially verified.
*
* ### Pre-chain rows (`prevHash IS NULL`)
* When `AUDIT_HASH_CHAIN=true` is enabled on a deployment with pre-existing
* activity rows, those legacy rows have `prevHash = null` and were never
* hash-linked. Reporting a tamper break at the first such row would be a
* false positive — the rows aren't tampered, they predate the feature. So
* the verifier skips the leading `prevHash IS NULL` rows and begins the
* walk at the first row with a non-null `prevHash`. `chainStartedAt`
* surfaces the `createdAt` of that first chained row so operators can
* see how much history is outside the cryptographic coverage.
*
* Caller is expected to gate this behind `AUDIT_HASH_CHAIN=true` — when
* the chain is disabled, every row has `prevHash = null` and the walk
* would skip everything (returning `{ verified: true, total, chainStartedAt: null }`).
* The route handler in `backend/src/routes/system.js` short-circuits to
* `{ chainDisabled: true }` in that case to avoid the empty-walk ambiguity.
*
* @param {string} workspaceId
* @returns {Object} `{ verified: boolean, firstBrokenRowId?: string, total: number, chainStartedAt?: string|null }`
*/
export function verifyAuditChain(workspaceId) {
const db = getDatabase();
// SELECT * so we have every column needed to reproduce `rowMinusHash`.
// Tiebreaker `id ASC` matches the INSERT-side `id DESC` ordering for
// sub-millisecond bursts where two rows share the same createdAt.
// Numeric tiebreaker on the trailing integer of the `ACT-N` id — see the
// matching DESC ordering in `create()`. Lexicographic `id ASC` would
// visit ACT-10 immediately after ACT-1, mis-pairing it with ACT-1's hash
// instead of ACT-9's and breaking the verification walk at every
// digit-boundary burst.
const rows = db.prepare(
"SELECT * FROM activities WHERE workspaceId = ? ORDER BY createdAt ASC, CAST(SUBSTR(id, 5) AS INTEGER) ASC"
).all(workspaceId);
// Skip leading rows that predate chain mode (prevHash IS NULL). These
// are not tampered — they simply existed before AUDIT_HASH_CHAIN=true was
// flipped on. See the docstring for the false-positive scenario.
let startIdx = 0;
while (startIdx < rows.length && rows[startIdx].prevHash == null) startIdx++;
const chainStartedAt = startIdx < rows.length ? rows[startIdx].createdAt : null;
let previousHash = null;
for (let i = startIdx; i < rows.length; i++) {
const expected = computePrevHash(previousHash, rows[i]);
if (rows[i].prevHash !== expected) {
return { verified: false, firstBrokenRowId: rows[i].id, total: rows.length };
}
previousHash = rows[i].prevHash;
}
return { verified: true, total: rows.length, chainStartedAt };
}