/**
* @module testRunner
* @description Thin orchestrator for Playwright test execution with parallel
* worker support.
*
* Owns the browser lifecycle, per-test loop (sequential or parallel), trace
* management, and final status transition. Delegates heavy sub-tasks to
* focused modules:
*
* | Module | Responsibility |
* |---------------------------------|---------------------------------------|
* | `runner/config.js` | Env constants, artifact dir setup |
* | `runner/codeParsing.js` | `extractTestBody` (hasCode check) |
* | `runner/executeTest.js` | Single-test execution |
* | `runner/feedbackIntegration.js` | Post-run AI feedback loop |
*
* ### Parallel execution
* When `parallelWorkers > 1`, tests run in concurrent browser contexts within
* a single Chromium instance. Each worker picks the next queued test, executes
* it in its own isolated `BrowserContext`, and reports back. The shared browser
* process keeps memory usage lower than launching N separate browsers.
*
* Concurrency is controlled by:
* 1. `PARALLEL_WORKERS` env var (default for all runs)
* 2. Per-run override via `options.parallelWorkers` (from Test Dials / API)
*
* ### Exports
* - {@link runTests} — Execute an array of approved tests against a project.
*/
import { extractTestBody, isApiTest } from "./runner/codeParsing.js";
import { executeTest } from "./runner/executeTest.js";
import { runFeedbackLoop } from "./runner/feedbackIntegration.js";
import { TRACES_DIR, DEFAULT_PARALLEL_WORKERS, launchBrowser, resolveBrowser, BROWSER_HEADLESS } from "./runner/config.js";
import { finalizeRunIfNotAborted, isRunAborted } from "./utils/abortHelper.js";
import { emitRunEvent, log, logWarn, logError, logSuccess } from "./utils/runLogger.js";
import { classifyError } from "./utils/errorClassifier.js";
import { structuredLog, formatLogLine } from "./utils/logFormatter.js";
import * as testRepo from "./database/repositories/testRepo.js";
import * as runRepo from "./database/repositories/runRepo.js";
import { signRunArtifacts, signArtifactUrl } from "./middleware/appSetup.js";
// ── Concurrency helper ────────────────────────────────────────────────────────
// Lightweight promise pool — no external dependencies. Runs `fn` for each item
// in `items` with at most `concurrency` in-flight at once. Results are returned
// in the original item order.
async function poolMap(items, concurrency, fn, signal) {
const results = new Array(items.length);
let nextIndex = 0;
async function worker() {
while (nextIndex < items.length) {
if (signal?.aborted) break;
const idx = nextIndex++;
if (idx >= items.length) break;
results[idx] = await fn(items[idx], idx);
}
}
const workers = [];
for (let w = 0; w < Math.min(concurrency, items.length); w++) {
workers.push(worker());
}
await Promise.all(workers);
return results;
}
/**
* Execute an array of approved tests against a project using Playwright.
* Launches Chromium, runs each test with self-healing (optionally in parallel),
* collects results, saves traces/videos, runs the AI feedback loop, and
* finalises the run.
*
* @param {Object} project - The project `{ id, name, url }`.
* @param {Object[]} tests - Array of test objects to execute.
* @param {Object} run - The run record (mutated in place).
* @param {Object} [options]
* @param {number} [options.parallelWorkers] - Concurrent browser contexts (1–10). Overrides env default.
* @param {string} [options.browser] - `"chromium" | "firefox" | "webkit"` (DIF-002). Defaults to chromium.
* @param {string} [options.device] - Playwright device preset name (DIF-003).
* @param {string} [options.locale] - BCP 47 locale (AUTO-007).
* @param {string} [options.timezoneId] - IANA timezone (AUTO-007).
* @param {Object} [options.geolocation] - `{ latitude, longitude }` (AUTO-007).
* @param {AbortSignal} [options.signal] - Abort signal for cancellation.
* @returns {Promise<void>}
*/
export async function runTests(project, tests, run, { parallelWorkers, browser: browserName, device, locale, timezoneId, geolocation, signal } = {}) {
const runId = run.id;
const tracePath = `${TRACES_DIR}/${runId}.zip`;
// Resolve concurrency: per-run override → env default → 1 (sequential)
const workers = Math.max(1, Math.min(10, parallelWorkers || DEFAULT_PARALLEL_WORKERS));
// Classify each test once upfront and cache the result on the test object.
// This avoids re-parsing the code body via isApiTest() multiple times per
// test (previously called 4× each: allApiOnly, apiCount, logging, executeTest).
// executeTest reads test._isApi instead of re-calling isApiTest().
for (const t of tests) {
t._isApi = !!(t.playwrightCode && isApiTest(t.playwrightCode));
// Persist the classification on the test object so the frontend can read
// test.isApiTest directly without reimplementing the detection heuristic.
testRepo.update(t.id, { isApiTest: t._isApi });
}
// If every test is API-only, skip the entire browser launch + trace context
// to save ~100-200MB of RAM.
const allApiOnly = tests.every(t => t._isApi);
let browser = null;
let traceContext = null;
// DIF-002: resolve the requested browser once so we can log + persist a
// canonical name (invalid / unknown values fall back to chromium).
const { name: resolvedBrowser } = resolveBrowser(browserName);
run.browser = resolvedBrowser;
structuredLog("run.start", { runId, projectId: project.id, tests: tests.length, workers, allApiOnly, browser: resolvedBrowser });
if (!allApiOnly) {
try {
browser = await launchBrowser({ browser: resolvedBrowser });
} catch (launchErr) {
const classified = classifyError(launchErr, "run");
run.status = "failed";
run.error = classified.message;
run.errorCategory = classified.category;
run.finishedAt = new Date().toISOString();
logError(run, classified.message);
structuredLog("browser.launch_failed", { runId, error: classified.message });
throw launchErr;
}
structuredLog("browser.launched", { runId });
// Shared tracing context (separate from per-test video contexts)
try {
traceContext = await browser.newContext({
userAgent: "Mozilla/5.0 (compatible; AutonomousQA/1.0)",
viewport: { width: 1280, height: 720 },
});
await traceContext.tracing.start({ screenshots: true, snapshots: true, sources: false });
} catch (ctxErr) {
await browser.close().catch(() => {});
const classified = classifyError(ctxErr, "run");
run.status = "failed";
run.error = classified.message;
run.errorCategory = classified.category;
run.finishedAt = new Date().toISOString();
logError(run, classified.message);
throw ctxErr;
}
}
const apiCount = tests.filter(t => t._isApi).length;
const modeLabel = workers > 1 ? `${workers} parallel workers` : "sequential";
log(run, `🚀 Starting test run: ${tests.length} tests (${modeLabel})`);
log(run, `⚙️ Run config:`);
log(run, `Execution mode: ${workers > 1 ? `⚡ Parallel (${workers} workers)` : "▶ Sequential (1 worker)"}`);
log(run, `Tests queued: ${tests.length}${apiCount > 0 ? ` (${apiCount} API, ${tests.length - apiCount} browser)` : ""}`);
log(run, `Project URL: ${project.url}`);
log(run, allApiOnly
? `Browser: ⏭️ Skipped (all tests are API-only)`
: `Browser: ${resolvedBrowser} (${BROWSER_HEADLESS ? "headless" : "headed"})`);
const runStart = Date.now();
const allVideoSegments = [];
// ── Process a single test result — shared by the pool worker callback ────
function processResult(test, result) {
run.results.push(result);
if (result.videoPath) allVideoSegments.push(result.videoPath);
if (result.status === "passed") {
run.passed++;
logSuccess(run, `PASSED (${result.durationMs}ms)`);
} else if (result.status === "warning") {
run.passed++;
logWarn(run, `WARNING: ${result.error}`);
} else {
run.failed++;
logError(run, `FAILED: ${result.error}`);
}
// Emit result event (without the heavy base64 screenshot)
const { screenshot: _ss, ...resultLean } = result;
const signedResult = { ...resultLean };
if (signedResult.screenshotPath) signedResult.screenshotPath = signArtifactUrl(signedResult.screenshotPath);
if (signedResult.videoPath) signedResult.videoPath = signArtifactUrl(signedResult.videoPath);
emitRunEvent(run.id, "result", { result: signedResult });
if (result.screenshotPath) {
emitRunEvent(run.id, "screenshot", {
testId: test.id,
screenshotPath: signArtifactUrl(result.screenshotPath),
});
}
testRepo.update(test.id, {
lastResult: result.status,
lastRunAt: new Date().toISOString(),
});
// Flush run state to SQLite after each result so a crash mid-run
// doesn't lose all results collected so far. SQLite writes are
// synchronous (~1ms) so this adds negligible overhead per test.
runRepo.save(run);
// Broadcast a snapshot after each result so the frontend progress bar
// updates in real time (especially important during parallel execution
// where multiple results arrive in quick succession).
if (!isRunAborted(run, signal)) {
emitRunEvent(run.id, "snapshot", { run: signRunArtifacts(run) });
}
}
try {
await poolMap(tests, workers, async (test, i) => {
if (signal?.aborted) return;
const hasCode = !!(test.playwrightCode && extractTestBody(test.playwrightCode));
const workerTag = workers > 1 ? ` [w${(i % workers) + 1}]` : "";
const typeTag = test._isApi ? "🌐 API" : hasCode ? "executing generated code" : "fallback smoke test";
structuredLog("test.start", { runId, testId: test.id, index: i + 1, total: tests.length, isApi: !!test._isApi });
log(run, `▶ [${i + 1}/${tests.length}]${workerTag} ${test.name} (${typeTag})`);
try {
const result = await executeTest(test, browser, runId, i, runStart, { browser: resolvedBrowser, device, locale, timezoneId, geolocation });
structuredLog("test.result", { runId, testId: test.id, status: result.status, durationMs: result.durationMs });
processResult(test, result);
} catch (err) {
// Build a synthetic result and route through processResult so SSE
// `result` and `snapshot` events are emitted — otherwise the
// frontend progress bar stalls during parallel execution.
structuredLog("test.crash", { runId, testId: test.id, error: err.message?.slice(0, 200) });
const errorResult = {
testId: test.id, testName: test.name,
status: "failed", error: err.message,
durationMs: 0, network: [], consoleLogs: [],
};
processResult(test, errorResult);
}
}, signal);
} finally {
// Always clean up browser resources — even if the loop threw unexpectedly.
// browser/traceContext are null when all tests are API-only.
if (traceContext) {
try {
await traceContext.tracing.stop({ path: tracePath });
run.tracePath = `/artifacts/traces/${runId}.zip`;
log(run, `📊 Trace saved`);
} catch (e) {
logWarn(run, `Trace save failed: ${e.message}`);
}
await traceContext.close().catch(() => {});
}
if (browser) {
await browser.close().catch((err) => {
console.warn(formatLogLine("warn", null, `[testRunner] browser.close() failed: ${err.message}`));
});
}
}
if (allVideoSegments.length > 0) {
run.videoPath = allVideoSegments[0];
run.videoSegments = allVideoSegments;
log(run, ` 🎬 ${allVideoSegments.length} video segment(s) saved`);
}
// NOTE: We intentionally keep run.status === "running" here so that:
// 1. The abort endpoint (POST /api/runs/:id/abort) still works during the
// feedback loop — it checks run.status === "running".
// 2. SSE reconnections don't prematurely close — the /events endpoint sends
// an immediate "done" + res.end() when run.status !== "running", which
// would cut off the client while the feedback loop is still active.
// The status is set to "completed" only after the feedback loop finishes.
const elapsed = ((Date.now() - runStart) / 1000).toFixed(1);
structuredLog("run.execution_done", { runId, passed: run.passed, failed: run.failed, total: run.total, elapsedSec: parseFloat(elapsed) });
log(run, `📋 Test execution done: ${run.passed} passed, ${run.failed} failed out of ${run.total} in ${elapsed}s${workers > 1 ? ` (${workers}x parallel)` : ""} — starting post-run analysis…`);
// Broadcast a final snapshot so the frontend sees the complete pass/fail
// counts before the feedback loop starts its long-running AI calls.
// (processResult already emits per-result snapshots, but this ensures the
// frontend has the final state even if the last result's snapshot was lost.)
if (!isRunAborted(run, signal)) {
emitRunEvent(run.id, "snapshot", { run: signRunArtifacts(run) });
}
// ── Feedback loop: auto-regenerate high-priority failing tests ──────────
// Delegated to runner/feedbackIntegration.js — no-ops when no failures,
// aborted, or no AI provider configured.
await runFeedbackLoop(run, tests, signal);
// Now that the feedback loop is done, finalize the run status.
// This is the single place where status transitions to "completed".
// Guard the log() call inside the callback so it only fires when the run
// actually transitions to "completed". After an abort, the SSE "done" event
// has already been emitted and the stream is closed — logging here would
// append to run.logs but the SSE broadcast would be silently lost.
finalizeRunIfNotAborted(run, () => {
run.finishedAt = new Date().toISOString();
run.duration = Date.now() - runStart;
logSuccess(run, `Run complete: ${run.passed} passed, ${run.failed} failed out of ${run.total}`);
structuredLog("run.complete", {
runId, projectId: project.id,
passed: run.passed, failed: run.failed, total: run.total,
durationMs: run.duration,
});
});
// Emit "done" only now — after the feedback loop — so the frontend's
// fetchRun() always sees the final, stable completed state.
// Skip if already aborted — the abort endpoint already emitted the done event.
if (!isRunAborted(run, signal)) {
emitRunEvent(run.id, "done", { status: run.status, passed: run.passed, failed: run.failed, total: run.total });
}
}