/**
* codeExecutor.js — Sandboxed execution of AI-generated Playwright test bodies
*
* Responsibilities:
* 1. Parse, clean, and patch the AI-generated code (via codeParsing.js)
* 2. Inject self-healing runtime helpers (via selfHealing.js)
* 3. Execute the code in a **vm sandbox** with a restricted global context
* 4. Lazy-load Playwright's `expect` at runtime
* 5. Provide a real Playwright `request` fixture for API tests
*
* ### Security model
* AI-generated code runs inside a vm context that sets `process: undefined`
* in the global scope. However, any injected host object (page, expect,
* Buffer, etc.) exposes the host's Function constructor via
* `.constructor.constructor`, which can be used to escape the sandbox:
*
* `page.constructor.constructor('return process')()`
*
* Node.js docs explicitly warn: "The vm module is not a security mechanism.
* Do not use it to run untrusted code."
*
* We block `process.exit()`, `process.kill()`, and `process.abort()` so
* escaped code cannot crash the server. We do NOT strip `process.env` because
* doing so breaks concurrent Express handlers (JWT verification, AI provider
* calls, SQLite operations) that read env vars between await points during
* async test execution.
*
* For true env isolation (preventing sandbox-escaped code from reading API
* keys), use worker_threads with `env: {}` — see NEXT_STEPS.md S1-02.
*
* Exports:
* runGeneratedCode(page, context, playwrightCode, expect, healingHints, { onStepCapture? })
* runApiTestCode(playwrightCode, expect, { signal? })
* getExpect()
*/
import vm from "vm";
import { extractTestBody, patchNetworkIdle, stripPlaywrightImports, stripHallucinatedPageAssertions, repairBrokenStringLiterals } from "./codeParsing.js";
import { getSelfHealingHelperCode, applyHealingTransforms } from "../selfHealing.js";
import playwright from "playwright";
// ─── Sandbox helpers ──────────────────────────────────────────────────────────
/**
* Build a vm context for executing AI-generated Playwright code.
*
* Injects only the objects the test needs (page, context, expect, etc.)
* plus Node.js globals that vm.createContext() doesn't provide automatically.
* Dangerous globals (process, require, global, etc.) are explicitly blocked.
*
* NOTE: Any injected host object can be used to reach the host's Function
* constructor via `.constructor.constructor`. The env-stripping in
* runWithStrippedEnv() is the actual security boundary, not this context.
*
* @param {Object} exposed — caller-provided objects to inject
* @returns {Object} A vm context object
*/
function buildSandboxContext(exposed) {
const safeConsole = Object.freeze({
log: (...args) => console.log(...args),
warn: (...args) => console.warn(...args),
error: (...args) => console.error(...args),
info: (...args) => console.info(...args),
});
return vm.createContext({
// ── Caller-provided objects (Playwright page, context, expect, etc.) ────
...exposed,
// ── Wrapped host functions (arrow functions hide host Function ctor) ────
console: safeConsole,
setTimeout: (...args) => setTimeout(...args),
clearTimeout: (...args) => clearTimeout(...args),
setInterval: (...args) => setInterval(...args),
clearInterval: (...args) => clearInterval(...args),
// ── Node.js globals NOT provided by vm.createContext() ────────────────
// vm.createContext() provides ECMAScript built-ins (Error, Promise,
// Array, Object, Date, RegExp, Map, Set, etc.) as sandbox-local copies.
// Node.js-specific globals must be injected explicitly.
URL,
URLSearchParams,
TextEncoder,
TextDecoder,
DOMException,
Buffer,
NaN,
Infinity,
undefined,
isNaN: (...args) => isNaN(...args),
isFinite: (...args) => isFinite(...args),
parseInt: (...args) => parseInt(...args),
parseFloat: (...args) => parseFloat(...args),
encodeURIComponent: (...args) => encodeURIComponent(...args),
decodeURIComponent: (...args) => decodeURIComponent(...args),
encodeURI: (...args) => encodeURI(...args),
decodeURI: (...args) => decodeURI(...args),
atob: typeof atob === "function" ? (...args) => atob(...args) : undefined,
btoa: typeof btoa === "function" ? (...args) => btoa(...args) : undefined,
structuredClone: typeof structuredClone === "function" ? (...args) => structuredClone(...args) : undefined,
// ── Explicitly blocked ─────────────────────────────────────────────────
process: undefined,
require: undefined,
module: undefined,
exports: undefined,
__filename: undefined,
__dirname: undefined,
global: undefined,
globalThis: undefined,
fetch: undefined,
XMLHttpRequest: undefined,
WebSocket: undefined,
Deno: undefined,
Bun: undefined,
});
}
// ─── Process guard (concurrency-safe) ─────────────────────────────────────────
// Blocks process.exit / process.kill / process.abort while sandboxed code runs.
// Uses reference counting so parallel workers all stay protected — the first
// entering test installs the guards, the last exiting test removes them.
//
// NOTE: We intentionally do NOT strip or replace process.env. The previous
// implementation replaced process.env with {} during async sandbox execution,
// which broke concurrent Express handlers that read env vars between await
// points (JWT verification, AI provider calls, SQLite config, etc.). Since
// Node.js is single-threaded but test execution is async, the event loop
// processes other tasks (including HTTP requests) while page actions await,
// and those tasks would find process.env empty.
//
// The vm sandbox already sets `process: undefined` in its context. The only
// way to reach the host process is via .constructor.constructor('return process')().
// For true env isolation, use worker_threads with `env: {}` (see NEXT_STEPS.md
// S1-02). The current approach blocks destructive operations (exit/kill/abort)
// without breaking the server.
let _envGuardCount = 0;
let _savedExit = null;
let _savedKill = null;
let _savedAbort = null;
/**
* Execute a function with destructive process methods blocked.
*
* Blocks `process.exit()`, `process.kill()`, and `process.abort()` so that
* sandbox-escaped code cannot crash the server. The vm sandbox already hides
* `process` from the global scope; these guards are a defense-in-depth layer
* for the `.constructor.constructor('return process')()` escape path.
*
* NOTE: `process.env` is NOT stripped — doing so breaks concurrent server
* operations (Express handlers, JWT verification, AI calls) that run on the
* same event loop between await points. For env isolation, use worker_threads
* with `env: {}`.
*
* Concurrency-safe: uses a reference counter so parallel workers (poolMap in
* testRunner.js) all run with guards installed. The first entering test
* installs them, the last exiting test restores the originals.
*
* @param {Function} fn — async function to execute with process guards
* @returns {Promise<*>} return value of fn
*/
async function runWithStrippedEnv(fn) {
if (_envGuardCount === 0) {
_savedExit = process.exit;
_savedKill = process.kill;
_savedAbort = process.abort;
process.exit = () => { throw new Error("process.exit() is blocked"); };
process.kill = () => { throw new Error("process.kill() is blocked"); };
process.abort = () => { throw new Error("process.abort() is blocked"); };
}
_envGuardCount++;
try {
return await fn();
} finally {
_envGuardCount--;
if (_envGuardCount === 0) {
process.exit = _savedExit;
process.kill = _savedKill;
process.abort = _savedAbort;
_savedExit = null;
_savedKill = null;
_savedAbort = null;
}
}
}
/**
* Compile and execute code inside a vm sandbox with env stripping.
*
* @param {string} code — The full async IIFE source to execute
* @param {Object} exposed — Objects to inject into the sandbox context
* @param {string} [filename] — Virtual filename for stack traces
* @returns {Promise<*>} The return value of the executed code
*/
async function runInSandbox(code, exposed, filename = "generated-test.js") {
const ctx = buildSandboxContext(exposed);
const fn = vm.compileFunction(code, [], {
parsingContext: ctx,
filename,
});
return await runWithStrippedEnv(() => fn());
}
/**
* Inject `await __captureStep(N)` calls after each `// Step N:` comment in the
* test body so we capture a screenshot + timing after each logical step.
*
* If the code has no `// Step N:` comments (older tests, manual code), the
* original code is returned unchanged — the caller falls back to a single
* end-of-test screenshot.
*
* @param {string} code — cleaned test body
* @returns {string} instrumented code
*/
function injectStepCaptures(code) {
let hasSteps = false;
// Strategy: after each block of code belonging to a step (i.e. just before
// the NEXT "// Step N:" comment or end-of-code), insert a capture call.
// We split on step boundaries and reassemble with capture calls.
const lines = code.split("\n");
const result = [];
let currentStep = null;
for (const line of lines) {
const match = line.match(/^\s*\/\/\s*Step\s+(\d+)\s*:/i);
if (match) {
// Before starting a new step, capture the previous step (if any)
if (currentStep !== null) {
result.push(` await __captureStep(${currentStep});`);
hasSteps = true;
}
currentStep = parseInt(match[1], 10);
}
result.push(line);
}
// Capture the last step
if (currentStep !== null) {
result.push(` await __captureStep(${currentStep});`);
hasSteps = true;
}
return hasSteps ? result.join("\n") : code;
}
/**
* runGeneratedCode(page, context, playwrightCode, expect, healingHints, opts)
*
* Dynamically executes the AI-generated test body against the live page.
* Returns { passed: true, healingEvents: [...], stepCaptures: [...] } or throws.
*
* healingHints is an optional map of "action::label" → strategyIndex from
* previous runs, injected into the runtime helpers so the winning strategy
* is tried first (adaptive self-healing).
*
* @param {Object} page
* @param {Object} context
* @param {string} playwrightCode
* @param {Function} expect
* @param {Object} [healingHints]
* @param {Object} [opts]
* @param {Function} [opts.onStepCapture] — async (stepNumber, page) => captureData.
* Called after each `// Step N:` block completes. Should return a serialisable
* object (e.g. { screenshot, artifactPath }) or null. Errors are swallowed.
*/
export async function runGeneratedCode(page, context, playwrightCode, expect, healingHints, opts = {}) {
const body = extractTestBody(playwrightCode);
if (!body) {
throw new Error("Could not parse test body from generated code");
}
const cleaned = repairBrokenStringLiterals(
applyHealingTransforms(
patchNetworkIdle(stripPlaywrightImports(body))
)
);
// Inject per-step screenshot capture points
const instrumented = injectStepCaptures(cleaned);
const helpers = getSelfHealingHelperCode(healingHints);
const browserRequestContexts = [];
let defaultRequestContext = null;
const __getDefaultRequestContext = async () => {
if (defaultRequestContext) return defaultRequestContext;
defaultRequestContext = await playwright.request.newContext({ ignoreHTTPSErrors: true });
browserRequestContexts.push(defaultRequestContext);
return defaultRequestContext;
};
const __newRequestContext = async (options) => {
const ctx = await playwright.request.newContext({ ignoreHTTPSErrors: true, ...options });
browserRequestContexts.push(ctx);
return ctx;
};
// Drain-and-dispose: splice out the tracked contexts on every call so a
// mid-test `request.dispose()` followed by a fresh `request.newContext()`
// still has its new context disposed by the `finally` block. The previous
// `requestContextsDisposed` early-return flag would skip the second call
// entirely, leaking any context created after the first dispose.
const __disposeRequestContexts = async () => {
if (browserRequestContexts.length === 0) return;
// Reset the default-context cache so the next request.<method>() call
// lazily creates a fresh context instead of reusing a disposed one.
if (defaultRequestContext && !browserRequestContexts.includes(defaultRequestContext)) {
defaultRequestContext = null;
}
const toDispose = browserRequestContexts.splice(0);
if (toDispose.includes(defaultRequestContext)) {
defaultRequestContext = null;
}
for (const ctx of toDispose) {
await ctx.dispose().catch(() => {});
}
};
// Hybrid browser tests may call both request.newContext() and request.get/post.
// Provide a fixture-like shim that supports both patterns.
const __requestShim = {
newContext: (...args) => __newRequestContext(...args),
dispose: () => __disposeRequestContexts(),
};
for (const method of ["get", "post", "put", "patch", "delete", "fetch", "head"]) {
__requestShim[method] = async (...args) => {
const ctx = await __getDefaultRequestContext();
return ctx[method](...args);
};
}
// Step capture state — collected by __captureStep inside the sandbox,
// populated by the onStepCapture callback provided by executeTest.
const stepCaptures = [];
const stepTimings = [];
let lastStepTime = Date.now();
// The __captureStep function is injected into the sandbox context.
// It records timing and calls the external onStepCapture callback.
const __captureStep = async (stepNumber) => {
const now = Date.now();
const durationMs = now - lastStepTime;
stepTimings.push({ step: stepNumber, durationMs, completedAt: now });
if (opts.onStepCapture) {
try {
const capture = await opts.onStepCapture(stepNumber, page);
if (capture) stepCaptures.push({ step: stepNumber, ...capture });
} catch { /* swallow — step capture must never fail the test */ }
}
// Update lastStepTime AFTER the screenshot so the next step's duration
// does not include the screenshot overhead from this step (DIF-016).
lastStepTime = Date.now();
};
// Build the code string that will run inside the vm sandbox.
// The sandbox context provides page, context, expect as globals.
const code = `
return (async () => {
${helpers}
// Stubs for Playwright fixtures that some LLMs hallucinate in the function
// signature but are not valid in our eval context (e.g. 'run', 'browser',
// 'request'). Defining them as undefined prevents ReferenceError crashes.
const run = undefined;
const browser = context?.browser?.() ?? undefined;
// Hybrid UI+API flows legitimately use request.newContext().
// Expose a scoped request fixture instead of undefined.
const request = __requestShim;
let __testError = null;
try {
${instrumented}
} catch (e) {
__testError = e;
}
// Always return healing events, even on failure, so the runner can
// persist what we learned from earlier steps.
if (__testError) {
__testError.__healingEvents = __healingEvents;
throw __testError;
}
return { __healingEvents };
})();
`;
try {
const result = await runInSandbox(
code,
{ page, context, expect, __captureStep, __requestShim },
"browser-test.js",
);
return { passed: true, healingEvents: result?.__healingEvents || [], stepCaptures, stepTimings };
} catch (err) {
err.__healingEvents = err.__healingEvents || [];
err.__stepCaptures = stepCaptures;
err.__stepTimings = stepTimings;
throw err;
} finally {
await __disposeRequestContexts();
}
}
/**
* runApiTestCode(playwrightCode, expect)
*
* Executes an API-only test that uses Playwright's `request.newContext()`
* instead of a browser page. Creates a real APIRequestContext, runs the
* generated code, and cleans up afterwards.
*
* Returns { passed: true, apiLogs } or throws with the error.
*
* @param {string} playwrightCode - The AI-generated Playwright test code.
* @param {Function} expect - Playwright's expect function.
* @param {Object} [options]
* @param {AbortSignal} [options.signal] - When aborted, all Playwright request
* contexts are forcibly disposed so the caller (e.g. a timeout race) doesn't
* leave HTTP connections lingering in the background.
*/
export async function runApiTestCode(playwrightCode, expect, { signal } = {}) {
const body = extractTestBody(playwrightCode);
if (!body) {
throw new Error("Could not parse test body from generated code");
}
const cleaned = repairBrokenStringLiterals(
stripHallucinatedPageAssertions(
patchNetworkIdle(stripPlaywrightImports(body))
)
);
// Build the code string. We validate syntax eagerly (before creating the
// request context) by compiling once with a throwaway context. If the AI
// generated invalid JS, this throws SyntaxError without leaking an HTTP
// context. The actual execution happens later with the real request object.
const apiCode = `
return (async () => {
// API tests don't use page/context — provide stubs to prevent ReferenceError
const page = undefined;
const context = undefined;
const run = undefined;
const browser = undefined;
let __testError = null;
try {
${cleaned}
} catch (e) {
__testError = e;
}
if (__testError) {
throw __testError;
}
return { passed: true };
})();
`;
// Eagerly validate syntax — throws SyntaxError before we allocate HTTP resources.
vm.compileFunction(apiCode, [], { parsingContext: buildSandboxContext({}) });
// Now that we know the code is syntactically valid, create the context.
const apiLogs = [];
const request = await playwright.request.newContext({
ignoreHTTPSErrors: true,
});
// Helper: wrap HTTP methods on an APIRequestContext to capture logs.
// NOTE: We intentionally exclude "fetch" from instrumentation. Playwright's
// named methods (get, post, put, …) internally delegate to fetch(), so
// instrumenting both would double-log every request. If the AI code calls
// fetch() directly, it still works — it just won't appear in the API logs
// (the named method wrappers cover 99% of AI-generated patterns).
function instrumentContext(ctx) {
for (const method of ["get", "post", "put", "patch", "delete", "head"]) {
if (typeof ctx[method] === "function") {
const original = ctx[method].bind(ctx);
ctx[method] = async (...args) => {
const start = Date.now();
const url = typeof args[0] === "string" ? args[0] : String(args[0]);
const httpMethod = method.toUpperCase();
const reqHeaders = args[1]?.headers || null;
const reqData = args[1]?.data != null ? (typeof args[1].data === "string" ? args[1].data : JSON.stringify(args[1].data)) : null;
const entry = {
method: httpMethod, url, startTime: start,
status: null, duration: null, size: null,
requestHeaders: reqHeaders,
requestBody: reqData,
responseHeaders: null,
responseBody: null,
};
try {
const resp = await original(...args);
entry.status = resp.status();
entry.duration = Date.now() - start;
try {
const bodyBuf = await resp.body();
entry.size = bodyBuf.length;
// Capture response body (text) — cap at 32KB to avoid bloating run results
const bodyText = bodyBuf.toString("utf-8");
entry.responseBody = bodyText.length > 32768 ? bodyText.slice(0, 32768) + "\n…(truncated)" : bodyText;
} catch { entry.size = 0; }
try { entry.responseHeaders = resp.headers(); } catch { /* ignore */ }
apiLogs.push(entry);
return resp;
} catch (err) {
entry.duration = Date.now() - start;
entry.status = 0;
apiLogs.push(entry);
throw err;
}
};
}
}
}
instrumentContext(request);
// AI-generated code may call request.newContext({ baseURL: '...' }) which
// requires the APIRequest factory (playwright.request), not the
// APIRequestContext we created above. Add a shim so both patterns work.
const subContexts = [];
request.newContext = async (options) => {
const ctx = await playwright.request.newContext({ ignoreHTTPSErrors: true, ...options });
subContexts.push(ctx);
instrumentContext(ctx);
return ctx;
};
// Helper to forcibly dispose all request contexts (used by both normal
// cleanup and external abort signals).
async function disposeAllContexts() {
for (const ctx of subContexts) {
await ctx.dispose().catch(() => {});
}
await request.dispose().catch(() => {});
}
// If the caller provides an AbortSignal (e.g. from a timeout race),
// dispose all contexts immediately when it fires. This ensures that
// even if fn() is still running in the background, the underlying
// HTTP connections are torn down promptly.
let onAbort;
if (signal) {
if (signal.aborted) {
await disposeAllContexts();
throw signal.reason || new Error("Aborted");
}
onAbort = () => { disposeAllContexts(); };
signal.addEventListener("abort", onAbort, { once: true });
}
try {
await runInSandbox(apiCode, { request, expect, __apiLogs: apiLogs }, "api-test.js");
return { passed: true, apiLogs };
} catch (err) {
err.__apiLogs = apiLogs;
throw err;
} finally {
if (signal && onAbort) {
signal.removeEventListener("abort", onAbort);
}
await disposeAllContexts();
}
}
/**
* getExpect()
*
* Returns Playwright's `expect` function by lazy-importing it from the
* test runner module. We don't import at the top level because Playwright's
* `expect` lives in @playwright/test which we don't load globally.
*/
export async function getExpect() {
const { expect } = await import("@playwright/test");
return expect;
}