src/lib/form/detectors/classifiers.ts

Total Symbols
10
Lines of Code
344
Avg Complexity
3.4
Avg Coverage
100.0%

File Relationships

graph LR buildClassificationChain["buildClassificationChain"] getActiveClassifiers["getActiveClassifiers"] detectNativeFieldsAsync["detectNativeFieldsAsync"] collectNativeFields["collectNativeFields"] reclassifyFieldBySelector["reclassifyFieldBySelector"] detect["detect"] classifyCustomFieldsAsync["classifyCustomFieldsAsync"] buildClassificationChain -->|calls| getActiveClassifiers detectNativeFieldsAsync -->|calls| buildClassificationChain detectNativeFieldsAsync -->|calls| collectNativeFields reclassifyFieldBySelector -->|calls| buildClassificationChain getActiveClassifiers -->|calls| buildClassificationChain getActiveClassifiers -->|calls| collectNativeFields detect -->|calls| collectNativeFields detect -->|calls| getActiveClassifiers classifyCustomFieldsAsync -->|calls| getActiveClassifiers click buildClassificationChain "../symbols/66b48c3d68622e6a.html" click getActiveClassifiers "../symbols/94c3286cfdb569c3.html" click detectNativeFieldsAsync "../symbols/5423f52b3a4d371e.html" click collectNativeFields "../symbols/ce8fa7f6620f4bb2.html" click reclassifyFieldBySelector "../symbols/7794dcddf31eccff.html" click detect "../symbols/327f71678956ca9e.html" click classifyCustomFieldsAsync "../symbols/d6918a7c3b58c267.html"

Symbols by Kind

function 9
method 1

All Symbols

Name Kind Visibility Status Lines Signature
getActiveClassifiers function exported- 106-108 getActiveClassifiers(): : ReadonlyArray<FieldClassifier>
setActiveClassifiers function exported- 114-116 setActiveClassifiers(classifiers: FieldClassifier[]): : void
buildClassifiersFromSettings function exported- 123-136 buildClassifiersFromSettings( config: Array<{ name: string; enabled: boolean }>, ): : FieldClassifier[]
collectNativeFields function - 145-149 collectNativeFields(): : FormField[]
buildClassificationChain function - 158-160 buildClassificationChain(): : FieldProcessingChain
detectNativeFieldsAsync function exported- 168-170 detectNativeFieldsAsync(): : Promise<FormField[]>
reclassifyFieldBySelector function exported- 177-185 reclassifyFieldBySelector( selector: string, ): : Promise<FormField | null>
detect method - 204-223 detect(): : FormField[]
classifyCustomFieldsSync function exported- 260-281 classifyCustomFieldsSync(fields: FormField[]): : FormField[]
classifyCustomFieldsAsync function exported- 299-343 classifyCustomFieldsAsync( fields: FormField[], ): : Promise<FormField[]>

Full Source

/**
 * Classifier Registry & Page-Level Detectors
 *
 * Central module that wires field classifiers into pipelines and
 * exposes the scanners used by form-detector.ts and content-script.ts.
 *
 * Classifier implementations live in ./strategies/ — add / edit there.
 *
 * Exported classifiers (in default priority order inside ALL_CLASSIFIERS):
 *
 *   htmlTypeClassifier     — deterministic mapping from input[type] / tagName
 *   keywordClassifier      — Portuguese keyword / label matching rules
 *   tensorflowClassifier   — TF.js cosine-similarity soft match (pre-trained model)
 *   chromeAiClassifier     — Gemini Nano via Chrome Built-in AI (async only)
 *   htmlFallbackClassifier — last-resort input[type] → FieldType mapping
 *
 * Runtime injection:
 *
 *   ALL_CLASSIFIERS            — default ordered list of all classifiers
 *   getActiveClassifiers()     — returns current active list (may be customised)
 *   setActiveClassifiers()     — overrides active list (called by content-script)
 *   buildClassifiersFromSettings() — builds list from user settings
 *
 * Exported pipelines:
 *
 *   DEFAULT_PIPELINE           — static DetectionPipeline over ALL_CLASSIFIERS
 *   DEFAULT_COLLECTION_PIPELINE— page-level: native-inputs → custom-selects → interactive-fields
 */

import type { FormField } from "@/types";
import {
  htmlTypeClassifier,
  htmlFallbackClassifier,
  keywordClassifier,
  tensorflowClassifier,
  chromeAiClassifier,
} from "./strategies";
import { FieldProcessingChain } from "../extractors/field-processing-chain";
import {
  INPUT_SELECTOR,
  isVisible,
  isNotCustomSelect,
  buildNativeField,
  type NativeElement,
} from "./native-input-config";
import {
  DetectionPipeline,
  FieldCollectionPipeline,
  type FieldClassifier,
  type PageDetector,
} from "./pipeline";

// ── All classifiers (canonical ordered list) ─────────────────────────────────

/**
 * All available field classifiers in default priority order:
 *   html-type → keyword → tensorflow → chrome-ai (async only) → html-fallback
 *
 * This is the source of truth for the default classification sequence.
 * Inject a subset or reordered list via setActiveClassifiers().
 */
export const ALL_CLASSIFIERS: ReadonlyArray<FieldClassifier> = [
  htmlTypeClassifier,
  keywordClassifier,
  tensorflowClassifier,
  chromeAiClassifier,
  htmlFallbackClassifier,
];

// ── Re-export classifiers for external consumers ──────────────────────────────
export {
  htmlTypeClassifier,
  htmlFallbackClassifier,
  keywordClassifier,
  tensorflowClassifier,
  chromeAiClassifier,
} from "./strategies";

// ── Default pipeline (static, for field-icon and other direct consumers) ──────

/**
 * Static DetectionPipeline wrapping ALL_CLASSIFIERS.
 * Used by field-icon.ts and field-icon-utils.ts for single-field re-classification.
 * For page-level scanning, use the native-input scanners below.
 */
export const DEFAULT_PIPELINE = new DetectionPipeline([...ALL_CLASSIFIERS]);

// ── Configurable active classifiers ──────────────────────────────────────────

/** Runtime-mutable list — overridden by content-script based on user settings. */
let _activeClassifiers: FieldClassifier[] = [...ALL_CLASSIFIERS];

/** All named classifiers available for dynamic composition. */
const NAMED_CLASSIFIERS: Record<string, FieldClassifier> = {
  "html-type": htmlTypeClassifier,
  keyword: keywordClassifier,
  tensorflow: tensorflowClassifier,
  "chrome-ai": chromeAiClassifier,
  "html-fallback": htmlFallbackClassifier,
};

/**
 * Returns the currently active classifier list.
 * Defaults to ALL_CLASSIFIERS unless overridden via setActiveClassifiers().
 */
export function getActiveClassifiers(): ReadonlyArray<FieldClassifier> {
  return _activeClassifiers;
}

/**
 * Overrides the active classifier list used by the native-input chain.
 * Called by the content script on startup based on user settings.
 */
export function setActiveClassifiers(classifiers: FieldClassifier[]): void {
  _activeClassifiers = classifiers;
}

/**
 * Builds a classifier list from a user-defined ordered config.
 * Strategies not listed or disabled are excluded.
 * html-fallback is always appended last when not already present.
 */
export function buildClassifiersFromSettings(
  config: Array<{ name: string; enabled: boolean }>,
): FieldClassifier[] {
  const ordered = config
    .filter((s) => s.enabled && NAMED_CLASSIFIERS[s.name])
    .map((s) => NAMED_CLASSIFIERS[s.name]);

  // Ensure there's always a fallback terminator
  if (!ordered.find((c) => c.name === "html-fallback")) {
    ordered.push(htmlFallbackClassifier);
  }

  return ordered;
}

// ── Native field collection (Steps 1–3) ─────────────────────────────────────

/**
 * Queries the DOM for native form controls, applies visibility/exclusion
 * filters, and returns bare FormField stubs (selector, label, signals —
 * no fieldType yet). Classification is handled separately.
 */
function collectNativeFields(): FormField[] {
  return Array.from(document.querySelectorAll<NativeElement>(INPUT_SELECTOR))
    .filter((el) => isVisible(el) && isNotCustomSelect(el))
    .map(buildNativeField);
}

// ── Classification chain factory (Step 4) ────────────────────────────────────

/**
 * Builds a classification chain with the current active classifiers.
 * getActiveClassifiers() is called at build time so each scan starts
 * fresh with the current classifier list.
 */
function buildClassificationChain(): FieldProcessingChain {
  return new FieldProcessingChain().classify(...getActiveClassifiers());
}

// ── Native-input scanners ─────────────────────────────────────────────────────

/**
 * Async run — classifies every native input (including Chrome AI) and returns
 * all fields at once. Use streamNativeFieldsAsync() for real-time feedback.
 */
export async function detectNativeFieldsAsync(): Promise<FormField[]> {
  return buildClassificationChain().runAsync(collectNativeFields());
}

/**
 * Reclassifies a single field element identified by its CSS selector.
 * Runs the full async pipeline (including Chrome AI) on just that one element.
 * Returns null if the element is not found in the DOM.
 */
export async function reclassifyFieldBySelector(
  selector: string,
): Promise<FormField | null> {
  const element = document.querySelector<NativeElement>(selector);
  if (!element) return null;
  const field = buildNativeField(element);
  const [classified] = await buildClassificationChain().runAsync([field]);
  return classified ?? null;
}

/**
 * Streaming run — yields each FormField immediately after it is classified.
 * Enables real-time UI updates while classification is still in progress.
 */
export async function* streamNativeFieldsAsync(): AsyncGenerator<FormField> {
  yield* buildClassificationChain().stream(collectNativeFields());
}

// ── Page-level detectors ──────────────────────────────────────────────────────

/**
 * Scans native input/select/textarea elements synchronously using the active
 * classifiers. Chrome AI (async-only) is skipped. Used by dom-watcher and
 * detectAllFields().
 */
export const nativeInputDetector: PageDetector = {
  name: "native-inputs",
  detect(): FormField[] {
    const fields = collectNativeFields();
    const classifiers = getActiveClassifiers();
    for (const field of fields) {
      for (const classifier of classifiers) {
        const result = classifier.detect(field);
        if (result !== null && result.type !== "unknown") {
          field.fieldType = result.type;
          field.detectionMethod = classifier.name;
          field.detectionConfidence = result.confidence;
          break;
        }
      }
      if (field.fieldType === "unknown") {
        field.detectionMethod = "html-fallback";
        field.detectionConfidence = 0.1;
      }
    }
    return fields;
  },
};

// ── Default collection pipeline ───────────────────────────────────────────────

/**
 * Default page-level collection pipeline.
 * Runs all three scanners in order: native-inputs → custom-selects → interactive-fields.
 *
 * Create variants with:
 *   DEFAULT_COLLECTION_PIPELINE.without("interactive-fields")
 *   DEFAULT_COLLECTION_PIPELINE.with(myPageDetector)
 */
export const DEFAULT_COLLECTION_PIPELINE = new FieldCollectionPipeline([
  nativeInputDetector,
]);

/**
 * Applies keyword classification to custom component fields in-place.
 *
 * Custom component adapters know the semantic widget type (select, checkbox,
 * datepicker, etc.) but cannot infer domain-level context (e.g. "CPF", "email")
 * from the DOM structure alone.  Running keywordClassifier here allows
 * label-based patterns ("cpf", "e-mail", "data de nascimento" …) to upgrade the
 * fieldType even for Ant Design / Select2 components.
 *
 * If no keyword pattern matches the adapter-set fieldType is preserved and
 * detectionMethod is stamped as "custom-select".
 *
 * Keyword results with generic types ("text", "unknown") never override a
 * concrete adapter-set type (e.g. "select"), preventing lorem-ipsum generators
 * from being used on dropdown components.
 */

/** Types that are too generic to override a concrete adapter-set type. */
const GENERIC_TYPES = new Set<FormField["fieldType"]>(["text", "unknown"]);

export function classifyCustomFieldsSync(fields: FormField[]): FormField[] {
  for (const field of fields) {
    const result = keywordClassifier.detect(field);
    if (result) {
      // Only upgrade when keyword adds semantic specificity.
      // Never let a generic type ("text", "unknown") override a concrete
      // adapter-set type such as "select" — that would cause the fill logic
      // to generate lorem-ipsum and type it into a dropdown search box.
      const shouldOverride =
        !GENERIC_TYPES.has(result.type) || GENERIC_TYPES.has(field.fieldType);
      if (shouldOverride) {
        field.fieldType = result.type;
      }
      field.detectionMethod = "keyword";
      field.detectionConfidence = result.confidence;
    } else if (!field.detectionMethod) {
      field.detectionMethod = "custom-select";
      field.detectionConfidence = 0.9;
    }
  }
  return fields;
}

/**
 * Async classification for custom component fields.
 *
 * Custom adapter adapters populate label / name / placeholder / autocomplete but
 * cannot determine domain-level fieldType (e.g. "company", "cpf"). This function
 * runs the full active classifier chain — keyword → tensorflow → chrome-ai — so
 * ambiguous fields reach TF.js/Gemini Nano with their contextSignals.
 *
 * Rules:
 *  - html-type and html-fallback are excluded (not meaningful for custom DOM).
 *  - Generic results ("text", "unknown") never stop the search — we keep trying
 *    the next classifier so TF.js gets a chance to classify via contextSignals.
 *  - The adapter's concrete type ("select", "checkbox", …) is preserved when
 *    all classifiers fail to add semantic context.
 *  - "custom-select" is stamped ONLY as a last resort when nothing matched.
 */
export async function classifyCustomFieldsAsync(
  fields: FormField[],
): Promise<FormField[]> {
  const classifiers = getActiveClassifiers().filter(
    // html-type probes native input[type] — meaningless for custom wrappers.
    // html-fallback is a last-resort for native inputs — not for custom.
    (c) => c.name !== "html-type" && c.name !== "html-fallback",
  );

  for (const field of fields) {
    const adapterType = field.fieldType; // type set by the adapter (may be "unknown")
    let classified = false;
    const t0 = performance.now();

    for (const classifier of classifiers) {
      const result = classifier.detectAsync
        ? await classifier.detectAsync(field)
        : classifier.detect(field);

      if (result === null) continue;

      // Generic results ("text", "unknown") are not useful for custom fields —
      // continue to the next classifier (e.g. TF.js) which may be more certain.
      if (GENERIC_TYPES.has(result.type)) continue;

      field.fieldType = result.type;
      field.detectionMethod = classifier.name;
      field.detectionConfidence = result.confidence;
      field.detectionDurationMs = performance.now() - t0;
      classified = true;
      break;
    }

    if (!classified) {
      // Nothing could determine the semantic type — preserve the adapter-set
      // concrete type (e.g. "select") and stamp the fallback method.
      // adapterType is already on field.fieldType; just record the method.
      field.detectionMethod = "custom-select";
      field.detectionConfidence = GENERIC_TYPES.has(adapterType) ? 0.5 : 0.9;
      field.detectionDurationMs = performance.now() - t0;
    }
  }

  return fields;
}