/**
* codeParsing.js — Pure string transforms for AI-generated Playwright code
*
* These functions are stateless, side-effect-free, and have zero external
* dependencies — making them trivially unit-testable.
*
* Exports:
* extractTestBody(code) — pull the async arrow-fn body from a test()
* patchNetworkIdle(code) — rewrite networkidle → domcontentloaded
* stripPlaywrightImports(code) — remove import/require of @playwright/test
* repairBrokenStringLiterals(code) — collapse accidental newlines inside quoted strings
* isApiTest(code) — detect API-only tests (request.newContext)
*/
/**
* extractTestBody(playwrightCode)
*
* Pulls the async function body out of the generated Playwright test so we can
* run it directly against an already-open page/context — without needing to
* spawn a whole new Playwright test runner process.
*
* Handles both common shapes the AI produces:
* test('name', async ({ page }) => { ... })
* test('name', async ({ page, context }) => { ... })
*/
export function extractTestBody(playwrightCode) {
if (!playwrightCode) return null;
// Match: async ({ page ... }) => { ... }
// We want everything inside the outermost braces of the arrow function body.
const arrowMatch = playwrightCode.match(/async\s*\(\s*\{[^}]*\}\s*\)\s*=>\s*\{([\s\S]*)/);
if (!arrowMatch) return null;
// arrowMatch[1] starts just after the opening { of the test body.
// We walk character-by-character to find the matching closing brace.
const bodyAndRest = arrowMatch[1];
let depth = 1;
let i = 0;
for (; i < bodyAndRest.length && depth > 0; i++) {
if (bodyAndRest[i] === "{") depth++;
else if (bodyAndRest[i] === "}") depth--;
}
// Everything up to (but not including) the final closing brace is the body.
return bodyAndRest.slice(0, i - 1).trim();
}
/**
* patchNetworkIdle(code)
*
* Rewrites any waitForLoadState('networkidle') or waitForLoadState("networkidle")
* calls that the AI may have generated into the safe domcontentloaded equivalent.
*
* Many real-world sites (SPAs, e-commerce like Amazon) fire continuous background
* XHR/fetch requests for ads, personalisation, and tracking — they never reach
* networkidle, so Playwright always times out after 30 s. domcontentloaded is
* sufficient to guarantee the primary DOM content is ready for interaction.
*
* Also rewrites page.goto() calls that use waitUntil:'networkidle' to use
* waitUntil:'domcontentloaded' for the same reason.
*
* Additionally, wraps bare element.click() calls that are immediately followed
* by a waitForNavigation/waitForLoadState pattern into a safer Promise.all so
* the navigation promise is registered before the click fires.
*/
export function patchNetworkIdle(code) {
return code
// waitForLoadState('networkidle') / waitForLoadState("networkidle")
.replace(/waitForLoadState\s*\(\s*['"]networkidle['"]\s*(,\s*\{[^}]*\})?\s*\)/g,
"waitForLoadState('domcontentloaded', { timeout: 30000 })")
// waitUntil: 'networkidle' / waitUntil: "networkidle" inside goto / waitForNavigation
.replace(/waitUntil\s*:\s*['"]networkidle['"]/g,
"waitUntil: 'domcontentloaded'");
}
/**
* stripPlaywrightImports(code)
*
* Remove lines like:
* import { test, expect } from '@playwright/test';
* const { test, expect } = require('@playwright/test');
* so they don't cause parse errors when we eval the body.
*/
export function stripPlaywrightImports(code) {
return code
.split("\n")
.filter(line => !line.match(/import\s*\{.*\}\s*from\s*['"]@playwright\/test['"]/))
.filter(line => !line.match(/require\s*\(\s*['"]@playwright\/test['"]\s*\)/))
.join("\n");
}
/**
* repairBrokenStringLiterals(code)
*
* AI output occasionally breaks CSS/XPath selectors across lines inside
* single/double-quoted literals, creating invalid JavaScript:
* page.$('button[name=btnI]
* [type=submit]')
*
* JavaScript does not allow raw newlines in single/double quotes, so parsing
* fails with "Invalid or unexpected token". This repair pass replaces newline
* characters that occur while inside a single/double-quoted string with a
* space, preserving content while restoring valid syntax.
*/
export function repairBrokenStringLiterals(code) {
if (!code || typeof code !== "string") return code;
let out = "";
let inSingle = false;
let inDouble = false;
let inTemplate = false;
let inLineComment = false;
let inBlockComment = false;
let escaped = false;
for (let i = 0; i < code.length; i++) {
const ch = code[i];
const next = code[i + 1];
// ── Comment tracking (only when not inside string literals) ────────────
if (!inSingle && !inDouble && !inTemplate) {
if (!inLineComment && !inBlockComment && ch === "/" && next === "/") {
inLineComment = true;
out += ch;
continue;
}
if (!inLineComment && !inBlockComment && ch === "/" && next === "*") {
inBlockComment = true;
out += ch;
continue;
}
}
if (inLineComment) {
out += ch;
if (ch === "\n") inLineComment = false;
continue;
}
if (inBlockComment) {
out += ch;
if (ch === "*" && next === "/") {
out += "/";
i++;
inBlockComment = false;
}
continue;
}
if (escaped) {
out += ch;
escaped = false;
continue;
}
if (ch === "\\") {
out += ch;
escaped = true;
continue;
}
if (!inDouble && !inTemplate && ch === "'") {
inSingle = !inSingle;
out += ch;
continue;
}
if (!inSingle && !inTemplate && ch === "\"") {
inDouble = !inDouble;
out += ch;
continue;
}
if (!inSingle && !inDouble && ch === "`") {
inTemplate = !inTemplate;
out += ch;
continue;
}
if ((ch === "\n" || ch === "\r") && (inSingle || inDouble)) {
out += " ";
continue;
}
out += ch;
}
return out;
}
/**
* isApiTest(playwrightCode)
*
* Returns true when the generated code is an API-only test that uses
* `request.newContext()` (Playwright's APIRequestContext) rather than
* browser-based page interactions.
*
* API tests:
* - Do NOT need a browser page or page.goto()
* - Need a real Playwright `request` fixture instead of the undefined stub
* - Should skip browser-specific artifacts (screenshots, DOM snapshots, video)
*/
export function isApiTest(playwrightCode) {
if (!playwrightCode) return false;
const body = extractTestBody(playwrightCode);
if (!body) return false;
// Detect request.newContext() or destructured { request } fixture usage
// without any page.goto / page.click / page.locator interactions
const usesRequestContext = /request\s*\.\s*newContext\s*\(/.test(body)
|| /request\s*\.\s*(get|post|put|patch|delete|head|fetch)\s*\(/.test(body);
// Real page interactions — page.goto(), page.click(), page.fill(), etc.
// These definitively indicate a browser test.
// Note: expect(page).toHaveURL() is a common AI hallucination in API tests
// and is NOT counted here. Instead, those lines are stripped by
// stripHallucinatedPageAssertions() before execution.
const usesPage = /page\s*\.\s*(goto|click|locator|getByRole|getByText|getByLabel|getByPlaceholder|fill|type|check|uncheck|selectOption|waitForSelector|waitForLoadState)\s*\(/.test(body);
return usesRequestContext && !usesPage;
}
/**
* stripHallucinatedPageAssertions(code)
*
* Removes `expect(page).toHaveURL(...)` and similar page assertions that the
* AI sometimes hallucinates at the end of API-only tests. These lines would
* crash at runtime because `page` is undefined in the API execution context.
*
* Only called for code that has already been classified as an API test by
* isApiTest(), so we know there are no real page interactions.
*/
export function stripHallucinatedPageAssertions(code) {
return code
.split("\n")
.filter(line => !/^\s*await\s+expect\s*\(\s*page\s*\)/.test(line))
.join("\n");
}