src/lib/ai/prompts/field-classifier.prompt.ts

Total Symbols
4
Lines of Code
177
Avg Complexity
4.3
Avg Coverage
96.2%

Symbols by Kind

interface 2
method 2

All Symbols

Name Kind Visibility Status Lines Signature
FieldClassifierInput interface exported- 19-26 interface FieldClassifierInput
FieldClassifierOutput interface exported- 29-33 interface FieldClassifierOutput
buildPrompt method - 113-139 buildPrompt(input: FieldClassifierInput): : string
parseResponse method - 141-175 parseResponse(raw: string): : FieldClassifierOutput | null

Full Source

/**
 * Field Classifier Prompt
 *
 * Structured prompt for classifying form fields via Chrome AI (Gemini Nano).
 * Determines the semantic type of an HTML form field and which data generator
 * should be used to fill it.
 *
 * This prompt is consumed by the `chromeAiClassifier` strategy and can be
 * reused in any context that needs field classification via an LLM.
 */

import { FIELD_TYPES, type FieldType } from "@/types";
import type { StructuredPrompt } from "./prompt.interface";
import { renderPromptBase } from "./prompt-renderer";

// ── Input / Output types ──────────────────────────────────────────────────────

/** Input context extracted from a form field for classification. */
export interface FieldClassifierInput {
  /** Raw `outerHTML` of the form element (truncated to limit). */
  readonly elementHtml: string;
  /** Surrounding container HTML (label, fieldset, wrapper). */
  readonly contextHtml?: string;
  /** Normalised text signals (name + id + label + placeholder). */
  readonly signals?: string;
}

/** Parsed classification result from the AI response. */
export interface FieldClassifierOutput {
  readonly fieldType: FieldType;
  readonly confidence: number;
  readonly generatorType: FieldType;
}

// ── Constants ─────────────────────────────────────────────────────────────────

export const CLASSIFIER_MIN_CONFIDENCE = 0.6;

const MAX_ELEMENT_HTML_CHARS = 1000;
const MAX_CONTEXT_HTML_CHARS = 1000;

const VALID_FIELD_TYPES = new Set<string>(FIELD_TYPES);
const VALID_FIELD_TYPES_TEXT = FIELD_TYPES.join(", ");

// ── Prompt definition ─────────────────────────────────────────────────────────

export const fieldClassifierPrompt: StructuredPrompt<
  FieldClassifierInput,
  FieldClassifierOutput
> = {
  id: "field-classifier",
  version: "2.0.0",

  role: "system",
  persona: "You are a form field type classifier for Brazilian web forms.",

  task: "Given the raw HTML of an input element and its surrounding container, determine the semantic type of the field and which data generator should fill it.",

  rules: [
    "Return ONLY the JSON object, nothing else. No introductory text, no markdown.",
    `confidence is a float between 0.0 and 1.0. Only return >= ${CLASSIFIER_MIN_CONFIDENCE} when reasonably sure.`,
    'Use "unknown" when the field purpose is unclear.',
    "generatorType should be the most specific generator for the field (e.g. birth-date instead of date).",
    "Analyse HTML attributes (name, id, placeholder, class, aria-label, autocomplete) and visible label text.",
    `Valid types: ${VALID_FIELD_TYPES_TEXT}`,
  ],

  outputSchema: [
    {
      name: "fieldType",
      type: "string",
      description: "semantic field type",
      required: true,
      enum: FIELD_TYPES,
    },
    {
      name: "confidence",
      type: "number",
      description: "0.0–1.0",
      required: true,
      range: { min: 0, max: 1 },
    },
    {
      name: "generatorType",
      type: "string",
      description: "most specific generator type",
      required: true,
      enum: FIELD_TYPES,
    },
  ],

  examples: [
    {
      input:
        '<input type="email" name="user_email" placeholder="seu@email.com">',
      output:
        '{"fieldType": "email", "confidence": 0.98, "generatorType": "email"}',
    },
    {
      input:
        '<input type="text" name="cpf" id="cpf" maxlength="14" placeholder="000.000.000-00">',
      output:
        '{"fieldType": "cpf", "confidence": 0.95, "generatorType": "cpf"}',
    },
    {
      input:
        '<input type="text" name="dt_nascimento" placeholder="DD/MM/AAAA">',
      output:
        '{"fieldType": "birth-date", "confidence": 0.90, "generatorType": "birth-date"}',
    },
  ],

  buildPrompt(input: FieldClassifierInput): string {
    const base = renderPromptBase(this);
    const sections: string[] = [];

    // Element HTML (truncated for token economy)
    const html =
      input.elementHtml.length > MAX_ELEMENT_HTML_CHARS
        ? input.elementHtml.slice(0, MAX_ELEMENT_HTML_CHARS) + "…"
        : input.elementHtml;
    sections.push(`=== Element HTML ===\n${html}`);

    // Surrounding container HTML
    if (input.contextHtml) {
      const ctx =
        input.contextHtml.length > MAX_CONTEXT_HTML_CHARS
          ? input.contextHtml.slice(0, MAX_CONTEXT_HTML_CHARS) + "…"
          : input.contextHtml;
      sections.push(`=== Surrounding HTML ===\n${ctx}`);
    }

    // Pre-computed text signals
    if (input.signals?.trim()) {
      sections.push(`=== Field Signals ===\n${input.signals}`);
    }

    return `${base}\n\nClassify this form field:\n\n${sections.join("\n\n")}`;
  },

  parseResponse(raw: string): FieldClassifierOutput | null {
    try {
      const match = raw.match(/\{[^{}]+\}/);
      if (!match) return null;

      const parsed = JSON.parse(match[0]) as Record<string, unknown>;

      if (
        typeof parsed.fieldType !== "string" ||
        !VALID_FIELD_TYPES.has(parsed.fieldType) ||
        typeof parsed.confidence !== "number"
      ) {
        return null;
      }

      const fieldType = parsed.fieldType as FieldType;
      const confidence = Math.max(0, Math.min(1, parsed.confidence));

      // Discard low-confidence and "unknown" results
      if (fieldType === "unknown" || confidence < CLASSIFIER_MIN_CONFIDENCE) {
        return null;
      }

      // generatorType — must be a valid FieldType; falls back to fieldType
      const generatorType =
        typeof parsed.generatorType === "string" &&
        VALID_FIELD_TYPES.has(parsed.generatorType)
          ? (parsed.generatorType as FieldType)
          : fieldType;

      return { fieldType, confidence, generatorType };
    } catch {
      return null;
    }
  },
};