Source: runner/executeTest.js

/**
 * executeTest.js — Single-test execution against a live browser
 *
 * Orchestrates a single test case: opens a browser context, attaches
 * network/console listeners, runs the AI-generated code (or a fallback
 * smoke test), captures artifacts, persists healing events, and cleans up.
 *
 * Heavy sub-tasks are delegated to focused modules:
 *   - codeParsing.js / codeExecutor.js  — parse & run generated code
 *   - screencast.js                     — CDP live-stream lifecycle
 *   - pageCapture.js                    — DOM snapshot, screenshots, boxes
 *   - healingPersistence.js             — write healing events to DB
 *
 * Exports:
 *   executeTest(test, browser, runId, stepIndex, runStart)
 */

import { v4 as uuidv4 } from "uuid";
import path from "path";
import fs from "fs";
import { getHealingHistoryForTest } from "../selfHealing.js";
import { extractTestBody, isApiTest } from "./codeParsing.js";
import { runGeneratedCode, runApiTestCode, getExpect } from "./codeExecutor.js";
import { startScreencast } from "./screencast.js";
import { waitForStable, captureDomSnapshot, captureScreenshot, captureBoundingBoxes } from "./pageCapture.js";
import { persistHealingEvents } from "./healingPersistence.js";
import { VIEWPORT_WIDTH, VIEWPORT_HEIGHT, NAVIGATION_TIMEOUT, API_TEST_TIMEOUT, BROWSER_TEST_TIMEOUT, VIDEOS_DIR, SHOTS_DIR, resolveDevice } from "./config.js";
import { formatLogLine } from "../utils/logFormatter.js";
import { injectCursorOverlay } from "./cursorOverlay.js";
import { diffScreenshot } from "./visualDiff.js";


// ─── Non-visual action detection (S3-06) ──────────────────────────────────────
// When a test's last meaningful action is non-visual (assertion, wait, evaluate),
// we skip the post-test screenshot / DOM snapshot / bounding-box capture. These
// artifacts are redundant for non-visual endings and each capture adds 50-200ms
// of overhead per test.

/**
 * Patterns that match non-visual Playwright actions at the end of a test body.
 * If the last non-blank, non-comment line matches any of these, we skip
 * screenshot capture on success since the page hasn't visually changed.
 */
const NON_VISUAL_PATTERNS = [
  /\bexpect\s*\(/,                        // any assertion: expect(...)
  /\bsafeExpect\s*\(/,                    // self-healing assertion
  /\.toBeVisible\s*\(/,                   // visibility assertion
  /\.toHaveURL\s*\(/,                     // URL assertion
  /\.toHaveTitle\s*\(/,                   // title assertion
  /\.toContainText\s*\(/,                 // text assertion
  /\.toHaveText\s*\(/,                    // exact text assertion
  /\.toHaveValue\s*\(/,                   // input value assertion
  /\.toBeEnabled\s*\(/,                   // enabled state assertion
  /\.toBeDisabled\s*\(/,                  // disabled state assertion
  /\.toBeChecked\s*\(/,                   // checkbox assertion
  /\.toHaveCount\s*\(/,                   // element count assertion
  /\bpage\.waitForTimeout\s*\(/,          // explicit wait
  /\bpage\.waitForSelector\s*\(/,         // selector wait
  /\bpage\.waitForLoadState\s*\(/,        // load state wait
  /\bpage\.waitForURL\s*\(/,              // URL wait
  /\bawait\s+sleep\s*\(/,                // custom sleep helper
  /\bconsole\.\w+\s*\(/,                 // console logging
];

/**
 * Returns true when the test body's last meaningful line is a non-visual action
 * (assertion, wait, evaluate) — meaning the page hasn't visually changed since
 * the last interaction and a screenshot would be redundant.
 *
 * @param {string|null} playwrightCode - The raw AI-generated code.
 * @returns {boolean}
 */
function endsWithNonVisualAction(playwrightCode) {
  if (!playwrightCode) return false;
  const body = extractTestBody(playwrightCode);
  if (!body) return false;

  // Walk backwards to find the last non-blank, non-comment line
  const lines = body.split("\n");
  for (let i = lines.length - 1; i >= 0; i--) {
    const trimmed = lines[i].trim();
    if (!trimmed || trimmed.startsWith("//") || trimmed === "}" || trimmed === "});") continue;
    return NON_VISUAL_PATTERNS.some(re => re.test(trimmed));
  }
  return false;
}

/**
 * Attach network & console listeners to a page.
 * Returns { networkLogs, consoleLogs, dispose } — the arrays are mutated
 * in-place as events arrive. Call `dispose()` before closing the page to
 * prevent async response handlers from accessing a closed page (which
 * throws unhandled rejections that crash Node.js).
 */
function attachPageListeners(page) {
  const networkLogs = [];
  const consoleLogs = [];
  let closed = false;

  page.on("request", (req) => {
    if (closed) return;
    try {
      networkLogs.push({
        id: uuidv4(),
        method: req.method(),
        url: req.url(),
        startTime: Date.now(),
        status: null,
        size: null,
        duration: null,
      });
    } catch { /* page may be closing */ }
  });

  page.on("response", async (res) => {
    if (closed) return;
    try {
      const entry = networkLogs.find((n) => n.url === res.url() && n.status === null);
      if (entry) {
        entry.status = res.status();
        entry.duration = Date.now() - entry.startTime;
        try {
          const body = await res.body().catch(() => Buffer.alloc(0));
          entry.size = body.length;
        } catch { entry.size = 0; }
      }
    } catch { /* page closed mid-handler — safe to ignore */ }
  });

  page.on("console", (msg) => {
    if (closed) return;
    try {
      consoleLogs.push({ time: new Date().toISOString(), level: msg.type(), text: msg.text() });
    } catch { /* page may be closing */ }
  });

  page.on("pageerror", (err) => {
    if (closed) return;
    try {
      consoleLogs.push({ time: new Date().toISOString(), level: "error", text: err.message });
    } catch { /* page may be closing */ }
  });

  return {
    networkLogs,
    consoleLogs,
    /** Call before page.close() to stop handlers from accessing the closed page. */
    dispose() { closed = true; },
  };
}

/**
 * Extract a clean, UI-safe error message from an Error (or AggregateError).
 */
function formatTestError(err) {
  let rawMsg = err.message || "";
  if ((!rawMsg || rawMsg === "AggregateError") && err.errors?.length) {
    rawMsg = err.errors.map(e => e?.message || String(e)).join("; ");
  }
  // Strip ANSI escape codes so the UI shows clean text
  return rawMsg
    .replace(/\x1B\[[0-9;]*[mGKHF]/g, "")
    .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "")
    .trim();
}

/**
 * executeTest(test, browser, runId, stepIndex, runStart, opts) → result object
 *
 * Runs a single test case inside a fresh browser context and returns a
 * result object suitable for pushing into run.results.
 *
 * @param {Object}  test
 * @param {Object}  browser      - Playwright Browser instance.
 * @param {string}  runId
 * @param {number}  stepIndex
 * @param {number}  runStart     - `Date.now()` when the run started.
 * @param {Object}  [opts]
 * @param {string}  [opts.browser]    - DIF-002: `"chromium" | "firefox" | "webkit"` (used only to stamp `result.browser`; the Playwright Browser is already launched by the caller).
 * @param {string}  [opts.device]     - DIF-003: Playwright device name (e.g. `"iPhone 14"`).
 * @param {string}  [opts.locale]     - AUTO-007: BCP 47 locale (e.g. `"fr-FR"`).
 * @param {string}  [opts.timezoneId] - AUTO-007: IANA timezone (e.g. `"Europe/Paris"`).
 * @param {Object}  [opts.geolocation] - AUTO-007: `{ latitude, longitude }`.
 */
export async function executeTest(test, browser, runId, stepIndex, runStart, opts = {}) {
  // ── API-only test path: no browser context needed ──────────────────────
  // Use the cached _isApi flag set by testRunner.js (avoids re-parsing).
  // Fall back to isApiTest() for callers that bypass the runner (e.g. tests).
  const isApi = test._isApi ?? (test.playwrightCode && isApiTest(test.playwrightCode));
  if (isApi) {
    return executeApiTest(test, runId, stepIndex, runStart);
  }

  // ── Browser-based test path — browser must be available ────────────────
  if (!browser) {
    throw new Error(
      `Browser test "${test.name}" requires a browser instance but none was launched. ` +
      `This can happen if the test was misclassified as API-only during batch setup.`
    );
  }

  const testVideoDir = path.join(VIDEOS_DIR, runId, `step${stepIndex}`);
  if (!fs.existsSync(testVideoDir)) fs.mkdirSync(testVideoDir, { recursive: true });

  // DIF-003: Resolve device emulation descriptor (viewport, userAgent, touch, etc.)
  const deviceDescriptor = resolveDevice(opts.device);
  const effectiveViewport = deviceDescriptor?.viewport || { width: VIEWPORT_WIDTH, height: VIEWPORT_HEIGHT };

  // AUTO-007: Resolve locale, timezone, and geolocation from run config
  const contextLocale = opts.locale || deviceDescriptor?.locale || undefined;
  const contextTimezone = opts.timezoneId || undefined;
  const contextGeolocation = opts.geolocation || undefined;

  // Build shared context options (everything except recordVideo)
  const contextOpts = {
    // Spread device descriptor first so explicit overrides below take precedence
    ...(deviceDescriptor || {}),
    // Always override these regardless of device profile
    userAgent: deviceDescriptor?.userAgent || "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
    viewport: effectiveViewport,
    permissions: ["geolocation", "notifications"],
    ignoreHTTPSErrors: true,
    // Enable downloads so page.waitForEvent('download') works (#42)
    acceptDownloads: true,
    // AUTO-007: Locale, timezone, and geolocation context options
    ...(contextLocale ? { locale: contextLocale } : {}),
    ...(contextTimezone ? { timezoneId: contextTimezone } : {}),
    ...(contextGeolocation ? { geolocation: contextGeolocation } : {}),
  };

  // Try creating context with video recording first. If ffmpeg is missing,
  // Playwright throws "Executable doesn't exist at …/ffmpeg-linux" on
  // newContext(). Fall back to no video so the test can still run — a missing
  // ffmpeg should degrade gracefully, not crash the entire test.
  let context;
  let videoEnabled = true;
  try {
    context = await browser.newContext({
      ...contextOpts,
      recordVideo: { dir: testVideoDir, size: { width: effectiveViewport.width, height: effectiveViewport.height } },
    });
  } catch (ctxErr) {
    if (ctxErr.message && ctxErr.message.includes("ffmpeg")) {
      console.warn(formatLogLine("warn", null,
        `[executeTest] ffmpeg not found — video recording disabled. Run "npx playwright install ffmpeg" to enable.`));
      videoEnabled = false;
      context = await browser.newContext(contextOpts);
    } else {
      throw ctxErr;
    }
  }

  const page = await context.newPage();

  // Auto-accept dialogs (window.alert, confirm, prompt) so they don't hang
  // the test until timeout. Tests that need to dismiss can override with
  // page.on('dialog', d => d.dismiss()) before the triggering action. (#40)
  page.on("dialog", (dialog) => {
    dialog.accept().catch(() => {});
  });

  // DIF-014: Inject animated cursor overlay so the live CDP screencast shows
  // what the test is doing (click ripple, keystroke toast, hover dot).
  // Re-injected after each navigation via the page "load" event.
  await injectCursorOverlay(page);
  page.on("load", () => { injectCursorOverlay(page).catch(() => {}); });

  // Start CDP screencast (returns cleanup fn or null)
  const screencastResult = await startScreencast(page, runId);
  const stopScreencast = screencastResult?.stop ?? null;

  // Attach network / console listeners — dispose() must be called before
  // page.close() to prevent async response handlers from crashing Node.
  const { networkLogs, consoleLogs, dispose: disposeListeners } = attachPageListeners(page);

  const result = {
    testId: test.id,
    testName: test.name,
    steps: test.steps || [],
    status: "passed",
    durationMs: 0,
    error: null,
    screenshot: null,
    screenshotPath: null,
    videoPath: null,
    runTimestamp: 0,
    network: [],
    consoleLogs: [],
    domSnapshot: null,
    boundingBoxes: [],
    stepCaptures: [],   // DIF-016: per-step screenshots
    stepTimings: [],    // DIF-016: per-step timing data
    visualDiff: null,   // DIF-001: final-screenshot visual-regression result
    browser: opts.browser || "chromium", // DIF-002: browser engine this test ran under
  };

  const start = Date.now();
  result.startedAt = start;

  // Per-test timeout guard — prevents a single hanging test from blocking
  // the worker slot indefinitely during parallel execution.
  // When the timeout fires, we proactively close the page to interrupt any
  // hung Playwright operations (navigation, waitFor, click, etc.). Without
  // this, the Promise.race only detects the timeout but the in-flight
  // Playwright call continues running until the finally block — which may
  // itself hang if Chromium is unresponsive.
  let testTimeoutHandle;
  const testTimeoutPromise = new Promise((_, reject) => {
    testTimeoutHandle = setTimeout(() => {
      // Force-close the page to unblock any hung Playwright operation.
      // This triggers errors inside the testExecution IIFE which are
      // swallowed by the .catch(() => {}) on line below.
      page.close().catch(() => {});
      reject(new Error(`Browser test timed out after ${BROWSER_TEST_TIMEOUT}ms`));
    }, BROWSER_TEST_TIMEOUT);
  });

  try {
    const expect = await getExpect();
    const browserName = opts.browser || "chromium";

    const testExecution = (async () => {
      if (test.playwrightCode && extractTestBody(test.playwrightCode)) {
        // ── PRIMARY PATH: Execute the actual AI-generated Playwright code ──
        const body = extractTestBody(test.playwrightCode);
        const codeAlreadyNavigates = body.includes("page.goto(");

        if (!codeAlreadyNavigates) {
          await page.goto(test.sourceUrl, { waitUntil: "domcontentloaded", timeout: NAVIGATION_TIMEOUT });
          await page.waitForTimeout(800);
        }

        const healingScopeId = `${test.id}@v${test.codeVersion || 0}`;
        const healingHints = getHealingHistoryForTest(healingScopeId);
        const codeResult = await runGeneratedCode(page, context, test.playwrightCode, expect, healingHints, {
          onStepCapture: async (stepNumber, _page) => {
            try {
              const shot = await captureScreenshot(_page, runId, stepIndex, { stepNumber });
              // DIF-001: per-step visual regression check against the stored baseline.
              // Best-effort — any failure (missing baseline dir, decode error) is swallowed
              // because a step capture must never break test execution.
              let visualDiff = null;
              try {
                visualDiff = diffScreenshot({
                  runId,
                  testId: test.id,
                  browser: browserName,
                  stepNumber,
                  pngBuffer: Buffer.from(shot.base64, "base64"),
                });
              } catch { /* ignore */ }
              return { screenshot: shot.base64, screenshotPath: shot.artifactPath, visualDiff };
            } catch { return null; }
          },
        });
        persistHealingEvents(healingScopeId, codeResult.healingEvents);

        // Collect per-step captures and timings from the instrumented run
        result.stepCaptures = codeResult.stepCaptures || [];
        result.stepTimings = codeResult.stepTimings || [];

      } else {
        // ── FALLBACK: No parseable code — run a basic smoke test ───────────
        await page.goto(test.sourceUrl, { waitUntil: "domcontentloaded", timeout: NAVIGATION_TIMEOUT });
        await page.waitForTimeout(500);

        const title = await page.title();
        if (!title) throw new Error("Page has no title — possible load failure");

        const url = page.url();
        if (!url.startsWith("http")) throw new Error("Invalid URL after navigation");
      }

      // S3-02: Wait for DOM to settle before capturing artifacts or asserting.
      // SPAs, streaming responses, and skeleton screens mutate the DOM
      // unpredictably after the last interaction. waitForStable() uses a
      // MutationObserver to detect when the page has gone quiet for 2 s,
      // preventing screenshots and assertions from running on half-rendered UIs.
      // On timeout (30 s) it returns gracefully — the test can still pass.
      await waitForStable(page);

      // Capture artifacts on success.
      // Skip screenshot / DOM snapshot / bounding boxes when the test ends
      // with a non-visual action (assertion, wait, evaluate) — the page
      // hasn't visually changed so these artifacts are redundant. This saves
      // ~50-200ms per test. Failure screenshots are always captured regardless.
      const skipVisualArtifacts = endsWithNonVisualAction(test.playwrightCode);

      if (!skipVisualArtifacts) {
        result.domSnapshot = await captureDomSnapshot(page);

        const shot = await captureScreenshot(page, runId, stepIndex);
        result.screenshot = shot.base64;
        result.screenshotPath = shot.artifactPath;

        // DIF-001: Diff the final screenshot against the test's baseline
        // (stepNumber 0 is reserved for the end-of-test capture).
        try {
          result.visualDiff = diffScreenshot({
            runId,
            testId: test.id,
            browser: browserName,
            stepNumber: 0,
            pngBuffer: Buffer.from(shot.base64, "base64"),
          });
        } catch { /* visual diff is best-effort */ }

        result.boundingBoxes = await captureBoundingBoxes(page);
      }
    })();

    // Swallow the losing promise to prevent unhandled rejection
    testExecution.catch(() => {});
    await Promise.race([testExecution, testTimeoutPromise]);

  } catch (err) {
    result.status = "failed";
    result.error = formatTestError(err);

    // Persist healing events from the failed run
    const healingScopeId = `${test.id}@v${test.codeVersion || 0}`;
    persistHealingEvents(healingScopeId, err.__healingEvents);

    // Collect any per-step captures/timings gathered before the failure
    result.stepCaptures = err.__stepCaptures || [];
    result.stepTimings = err.__stepTimings || [];

    // Screenshot the failure state
    try {
      const shot = await captureScreenshot(page, runId, stepIndex, { failed: true });
      result.screenshot = shot.base64;
      result.screenshotPath = shot.artifactPath;
    } catch { /* page may be closed */ }

  } finally {
    clearTimeout(testTimeoutHandle);

    // Capture the final page URL for the frontend BrowserChrome
    try { result.url = page.url(); } catch { /* page already closed */ }
    if (!result.url || result.url === "about:blank") result.url = test.sourceUrl || "";

    result.durationMs = Date.now() - start;
    result.runTimestamp = start - runStart;
    result.network = networkLogs;
    result.consoleLogs = consoleLogs;

    // Stop CDP screencast before closing the page
    if (stopScreencast) await stopScreencast();

    // Signal listeners to stop before closing — prevents async response
    // handlers from calling res.url()/res.status() on a closed page,
    // which would throw an unhandled rejection and crash Node.js.
    disposeListeners();

    // Close any popup / new-tab pages opened during the test so they don't
    // leak browser memory. context.pages() includes the main page — skip it
    // and close everything else. (#41)
    for (const p of context.pages()) {
      if (p !== page) await p.close().catch(() => {});
    }

    // Close page first then context — this flushes video to disk
    await page.close().catch(() => {});
    await context.close().catch(() => {});

    // Move the video to a stable named path (skip when ffmpeg was missing)
    if (videoEnabled) {
      try {
        const files = fs.readdirSync(testVideoDir).filter(f => f.endsWith(".webm"));
        if (files.length > 0) {
          const src = path.join(testVideoDir, files[0]);
          const videoName = `${runId}-step${stepIndex}.webm`;
          const dst = path.join(VIDEOS_DIR, videoName);
          fs.renameSync(src, dst);
          result.videoPath = `/artifacts/videos/${videoName}`;
        }
        fs.rmSync(testVideoDir, { recursive: true, force: true });
      } catch (videoErr) {
        console.warn(formatLogLine("warn", null, `[executeTest] Video move failed for step ${stepIndex}: ${videoErr.message}`));
      }
    } else {
      // No video was recorded — clean up the empty directory
      try { fs.rmSync(testVideoDir, { recursive: true, force: true }); } catch { /* ignore */ }
    }
  }

  return result;
}

/**
 * executeApiTest(test, runId, stepIndex, runStart) → result object
 *
 * Runs an API-only test (one that uses `request.newContext()`) without
 * spinning up a browser page. Skips screenshots, video, DOM snapshots,
 * and screencast — none of which apply to API tests.
 */
async function executeApiTest(test, runId, stepIndex, runStart) {
  const result = {
    testId: test.id,
    testName: test.name,
    steps: test.steps || [],
    status: "passed",
    durationMs: 0,
    error: null,
    screenshot: null,
    screenshotPath: null,
    videoPath: null,
    runTimestamp: 0,
    network: [],
    consoleLogs: [],
    domSnapshot: null,
    boundingBoxes: [],
    url: test.sourceUrl || "",
    isApiTest: true,
  };

  const start = Date.now();
  result.startedAt = start;

  // AbortController lets us forcibly dispose Playwright request contexts
  // inside runApiTestCode when the timeout fires, preventing lingering
  // HTTP connections from leaking in the background.
  const ac = new AbortController();
  let timeoutHandle;

  try {
    const expect = await getExpect();
    const apiPromise = runApiTestCode(test.playwrightCode, expect, { signal: ac.signal });
    const timeoutPromise = new Promise((_, reject) => {
      timeoutHandle = setTimeout(() => {
        ac.abort(new Error(`API test timed out after ${API_TEST_TIMEOUT}ms`));
        reject(new Error(`API test timed out after ${API_TEST_TIMEOUT}ms`));
      }, API_TEST_TIMEOUT);
    });
    // Swallow the losing promise's rejection to prevent unhandled rejection
    // crashes in Node.js v15+. When the timeout wins, apiPromise continues
    // running until the abort signal disposes its contexts — its eventual
    // rejection must be caught here so it doesn't crash the process.
    apiPromise.catch(() => {});
    const apiResult = await Promise.race([apiPromise, timeoutPromise]);
    // Populate network logs from the instrumented API request context
    result.network = apiResult.apiLogs || [];
  } catch (err) {
    result.status = "failed";
    result.error = formatTestError(err);
    // Capture any API logs collected before the failure
    result.network = err.__apiLogs || [];
  } finally {
    clearTimeout(timeoutHandle);
    result.durationMs = Date.now() - start;
    result.runTimestamp = start - runStart;
  }

  return result;
}