src/lib/form/detectors/__tests__/classifiers.test.ts

Total Symbols
2
Lines of Code
581
Avg Complexity
1.0
Symbol Types
1

File Relationships

graph LR classifierNames["classifierNames"] makeCustomField["makeCustomField"] classifierNames -->|calls| classifierNames classifierNames -->|calls| makeCustomField click classifierNames "../symbols/a00f1b67db4c0c6d.html" click makeCustomField "../symbols/0b037aae4dc33a68.html"

Symbols by Kind

function 2

All Symbols

Name Kind Visibility Status Lines Signature
classifierNames function - 32-34 classifierNames(list: ReadonlyArray<{ name: string }>)
makeCustomField function - 342-356 makeCustomField(overrides: Partial<FormField> = {}): : FormField

Full Source

// @vitest-environment happy-dom
import { describe, expect, it, beforeEach, afterEach, vi } from "vitest";
import type { FormField } from "@/types";

vi.mock("@/lib/form/extractors", () => ({
  getUniqueSelector: vi.fn().mockReturnValue("#mock-field"),
  findLabelWithStrategy: vi
    .fn()
    .mockReturnValue({ text: "Mock Label", strategy: "aria-label" }),
  buildSignals: vi.fn().mockReturnValue("mock label"),
}));

import {
  ALL_CLASSIFIERS,
  DEFAULT_PIPELINE,
  DEFAULT_COLLECTION_PIPELINE,
  getActiveClassifiers,
  setActiveClassifiers,
  buildClassifiersFromSettings,
  nativeInputDetector,
  classifyCustomFieldsSync,
  classifyCustomFieldsAsync,
  detectNativeFieldsAsync,
  streamNativeFieldsAsync,
  keywordClassifier,
  tensorflowClassifier,
  chromeAiClassifier,
} from "../classifiers";

// ── Helpers ───────────────────────────────────────────────────────────────────

function classifierNames(list: ReadonlyArray<{ name: string }>) {
  return list.map((c) => c.name);
}

// ── ALL_CLASSIFIERS ───────────────────────────────────────────────────────────

describe("ALL_CLASSIFIERS", () => {
  it("contains 5 classifiers", () => {
    expect(ALL_CLASSIFIERS).toHaveLength(5);
  });

  it("starts with html-type", () => {
    expect(ALL_CLASSIFIERS[0].name).toBe("html-type");
  });

  it("has keyword second", () => {
    expect(ALL_CLASSIFIERS[1].name).toBe("keyword");
  });

  it("has tensorflow third", () => {
    expect(ALL_CLASSIFIERS[2].name).toBe("tensorflow");
  });

  it("has chrome-ai fourth", () => {
    expect(ALL_CLASSIFIERS[3].name).toBe("chrome-ai");
  });

  it("ends with html-fallback", () => {
    expect(ALL_CLASSIFIERS[4].name).toBe("html-fallback");
  });

  it("every classifier has a detect function", () => {
    for (const c of ALL_CLASSIFIERS) {
      expect(typeof c.detect).toBe("function");
    }
  });
});

// ── DEFAULT_PIPELINE ──────────────────────────────────────────────────────────

describe("DEFAULT_PIPELINE", () => {
  it("is a DetectionPipeline instance with all classifiers", () => {
    expect(classifierNames(DEFAULT_PIPELINE.classifiers)).toEqual(
      classifierNames(ALL_CLASSIFIERS),
    );
  });
});

// ── getActiveClassifiers / setActiveClassifiers ───────────────────────────────

describe("getActiveClassifiers / setActiveClassifiers", () => {
  beforeEach(() => {
    // Reset to default after each test
    setActiveClassifiers([...ALL_CLASSIFIERS] as any);
  });

  it("returns all classifiers by default", () => {
    const active = getActiveClassifiers();
    expect(classifierNames(active)).toEqual(classifierNames(ALL_CLASSIFIERS));
  });

  it("reflects changes made via setActiveClassifiers", () => {
    const subset = ALL_CLASSIFIERS.filter((c) =>
      ["html-type", "html-fallback"].includes(c.name),
    ) as any[];
    setActiveClassifiers(subset);
    const active = getActiveClassifiers();
    expect(classifierNames(active)).toEqual(["html-type", "html-fallback"]);
  });

  it("returns empty array when set to empty", () => {
    setActiveClassifiers([]);
    expect(getActiveClassifiers()).toHaveLength(0);
  });

  it("returns the exact array reference passed in", () => {
    const custom: any[] = [];
    setActiveClassifiers(custom);
    expect(getActiveClassifiers()).toBe(custom);
  });
});

// ── buildClassifiersFromSettings ──────────────────────────────────────────────

describe("buildClassifiersFromSettings", () => {
  it("returns only enabled classifiers", () => {
    const result = buildClassifiersFromSettings([
      { name: "html-type", enabled: true },
      { name: "keyword", enabled: false },
      { name: "tensorflow", enabled: true },
    ]);
    const names = classifierNames(result);
    expect(names).toContain("html-type");
    expect(names).not.toContain("keyword");
    expect(names).toContain("tensorflow");
  });

  it("always appends html-fallback when not explicitly included", () => {
    const result = buildClassifiersFromSettings([
      { name: "html-type", enabled: true },
    ]);
    const names = classifierNames(result);
    expect(names[names.length - 1]).toBe("html-fallback");
  });

  it("does not duplicate html-fallback when already included", () => {
    const result = buildClassifiersFromSettings([
      { name: "html-type", enabled: true },
      { name: "html-fallback", enabled: true },
    ]);
    const fallbackCount = result.filter(
      (c) => c.name === "html-fallback",
    ).length;
    expect(fallbackCount).toBe(1);
  });

  it("respects the order of config entries", () => {
    const result = buildClassifiersFromSettings([
      { name: "tensorflow", enabled: true },
      { name: "keyword", enabled: true },
      { name: "html-type", enabled: true },
    ]);
    // html-fallback is always last; before that: tensorflow, keyword, html-type
    const names = classifierNames(result);
    expect(names[0]).toBe("tensorflow");
    expect(names[1]).toBe("keyword");
    expect(names[2]).toBe("html-type");
    // html-fallback appended at end
    expect(names[names.length - 1]).toBe("html-fallback");
  });

  it("ignores unknown classifier names", () => {
    const result = buildClassifiersFromSettings([
      { name: "nonexistent-classifier", enabled: true },
      { name: "html-type", enabled: true },
    ]);
    const names = classifierNames(result);
    expect(names).not.toContain("nonexistent-classifier");
    expect(names).toContain("html-type");
  });

  it("returns only html-fallback when all configs are disabled", () => {
    const result = buildClassifiersFromSettings([
      { name: "html-type", enabled: false },
      { name: "keyword", enabled: false },
    ]);
    const names = classifierNames(result);
    expect(names).toEqual(["html-fallback"]);
  });

  it("returns only html-fallback for empty config", () => {
    const result = buildClassifiersFromSettings([]);
    const names = classifierNames(result);
    expect(names).toEqual(["html-fallback"]);
  });
});

// ── DEFAULT_COLLECTION_PIPELINE ──────────────────────────────────────────────

describe("DEFAULT_COLLECTION_PIPELINE", () => {
  it("contains the nativeInputDetector", () => {
    const names = DEFAULT_COLLECTION_PIPELINE.detectors.map((d) => d.name);
    expect(names).toContain("native-inputs");
  });

  it("has nativeInputDetector as first detector", () => {
    expect(DEFAULT_COLLECTION_PIPELINE.detectors[0].name).toBe("native-inputs");
  });
});

// ── nativeInputDetector ──────────────────────────────────────────────────────

describe("nativeInputDetector", () => {
  beforeEach(() => {
    document.body.innerHTML = "";
    setActiveClassifiers([...ALL_CLASSIFIERS] as any);
  });

  afterEach(() => {
    document.body.innerHTML = "";
  });

  it('has name "native-inputs"', () => {
    expect(nativeInputDetector.name).toBe("native-inputs");
  });

  it("returns empty array when page has no native inputs", () => {
    document.body.innerHTML = "<div>no inputs here</div>";
    const fields = nativeInputDetector.detect();
    expect(fields).toEqual([]);
  });

  it("skips hidden and submit inputs", () => {
    document.body.innerHTML = `
      <input type="hidden" value="secret" />
      <input type="submit" value="Submit" />
      <input type="button" value="Click" />
    `;
    const fields = nativeInputDetector.detect();
    expect(fields).toEqual([]);
  });

  it("collects visible text inputs via getBoundingClientRect mock", () => {
    document.body.innerHTML = `<input type="text" id="name-field" />`;
    const input = document.getElementById("name-field") as HTMLInputElement;
    vi.spyOn(input, "getBoundingClientRect").mockReturnValue({
      width: 200,
      height: 40,
      top: 0,
      left: 0,
      bottom: 40,
      right: 200,
      x: 0,
      y: 0,
      toJSON: vi.fn(),
    } as DOMRect);
    const fields = nativeInputDetector.detect();
    expect(fields.length).toBeGreaterThanOrEqual(1);
  });

  it("skips disabled elements", () => {
    document.body.innerHTML = `<input type="text" disabled />`;
    const fields = nativeInputDetector.detect();
    expect(fields).toEqual([]);
  });
});

// ── detectNativeFieldsAsync ──────────────────────────────────────────────────

describe("detectNativeFieldsAsync", () => {
  beforeEach(() => {
    document.body.innerHTML = "";
    setActiveClassifiers([...ALL_CLASSIFIERS] as any);
  });

  afterEach(() => {
    document.body.innerHTML = "";
  });

  it("returns empty array when page has no visible inputs", async () => {
    document.body.innerHTML = "<div>nothing here</div>";
    const fields = await detectNativeFieldsAsync();
    expect(Array.isArray(fields)).toBe(true);
    expect(fields).toHaveLength(0);
  });

  it("returns FormField array with visible inputs", async () => {
    document.body.innerHTML = `<input type="text" id="async-field" />`;
    const input = document.getElementById("async-field") as HTMLInputElement;
    vi.spyOn(input, "getBoundingClientRect").mockReturnValue({
      width: 200,
      height: 40,
      top: 0,
      left: 0,
      bottom: 40,
      right: 200,
      x: 0,
      y: 0,
      toJSON: vi.fn(),
    } as DOMRect);
    const fields = await detectNativeFieldsAsync();
    expect(fields.length).toBeGreaterThanOrEqual(1);
  });
});

// ── streamNativeFieldsAsync ──────────────────────────────────────────────────

describe("streamNativeFieldsAsync", () => {
  beforeEach(() => {
    document.body.innerHTML = "";
    setActiveClassifiers([...ALL_CLASSIFIERS] as any);
  });

  afterEach(() => {
    document.body.innerHTML = "";
  });

  it("yields nothing when page has no visible inputs", async () => {
    document.body.innerHTML = "<p>empty page</p>";
    const collected: FormField[] = [];
    for await (const field of streamNativeFieldsAsync()) {
      collected.push(field);
    }
    expect(collected).toHaveLength(0);
  });

  it("yields FormField for each visible input", async () => {
    document.body.innerHTML = `<input type="text" id="stream-field" />`;
    const input = document.getElementById("stream-field") as HTMLInputElement;
    vi.spyOn(input, "getBoundingClientRect").mockReturnValue({
      width: 200,
      height: 40,
      top: 0,
      left: 0,
      bottom: 40,
      right: 200,
      x: 0,
      y: 0,
      toJSON: vi.fn(),
    } as DOMRect);
    const collected: FormField[] = [];
    for await (const field of streamNativeFieldsAsync()) {
      collected.push(field);
    }
    expect(collected.length).toBeGreaterThanOrEqual(1);
  });
});

// ── classifyCustomFieldsSync ─────────────────────────────────────────────────

function makeCustomField(overrides: Partial<FormField> = {}): FormField {
  return {
    element: document.createElement("input"),
    selector: "#custom-field",
    category: "generic",
    fieldType: "select",
    label: "Test",
    name: "test",
    id: "custom-field",
    placeholder: "",
    required: false,
    options: [],
    ...overrides,
  };
}

describe("classifyCustomFieldsSync", () => {
  it("returns same array reference", () => {
    const fields: FormField[] = [];
    const result = classifyCustomFieldsSync(fields);
    expect(result).toBe(fields);
  });

  it("returns empty array unchanged", () => {
    const result = classifyCustomFieldsSync([]);
    expect(result).toHaveLength(0);
  });

  it("applies keyword result when non-generic type is returned", () => {
    const field = makeCustomField({ fieldType: "select", label: "Email" });
    vi.spyOn(keywordClassifier, "detect").mockReturnValueOnce({
      type: "email",
      confidence: 0.95,
    });
    classifyCustomFieldsSync([field]);
    expect(field.fieldType).toBe("email");
    expect(field.detectionMethod).toBe("keyword");
    expect(field.detectionConfidence).toBe(0.95);
  });

  it("does NOT override concrete type when keyword returns generic 'text'", () => {
    const field = makeCustomField({ fieldType: "select", label: "Escolha" });
    vi.spyOn(keywordClassifier, "detect").mockReturnValueOnce({
      type: "text",
      confidence: 0.5,
    });
    classifyCustomFieldsSync([field]);
    expect(field.fieldType).toBe("select"); // not overridden
    expect(field.detectionMethod).toBe("keyword");
    expect(field.detectionConfidence).toBe(0.5);
  });

  it("overrides when both keyword and field have generic types", () => {
    const field = makeCustomField({ fieldType: "text", label: "Texto" });
    vi.spyOn(keywordClassifier, "detect").mockReturnValueOnce({
      type: "unknown",
      confidence: 0.3,
    });
    classifyCustomFieldsSync([field]);
    expect(field.fieldType).toBe("unknown"); // both generic → override
    expect(field.detectionMethod).toBe("keyword");
  });

  it("stamps custom-select when keyword returns null and no detectionMethod", () => {
    const field = makeCustomField({
      fieldType: "select",
      detectionMethod: undefined,
    });
    vi.spyOn(keywordClassifier, "detect").mockReturnValueOnce(null);
    classifyCustomFieldsSync([field]);
    expect(field.detectionMethod).toBe("custom-select");
    expect(field.detectionConfidence).toBe(0.9);
  });

  it("does not overwrite existing detectionMethod when keyword returns null", () => {
    const field = makeCustomField({
      fieldType: "select",
      detectionMethod: "html-type",
      detectionConfidence: 1.0,
    });
    vi.spyOn(keywordClassifier, "detect").mockReturnValueOnce(null);
    classifyCustomFieldsSync([field]);
    expect(field.detectionMethod).toBe("html-type"); // unchanged
    expect(field.detectionConfidence).toBe(1.0); // unchanged
  });

  it("processes multiple fields independently", () => {
    const field1 = makeCustomField({
      id: "f1",
      fieldType: "select",
      label: "CPF",
    });
    const field2 = makeCustomField({
      id: "f2",
      fieldType: "text",
      label: "Unknown",
    });
    vi.spyOn(keywordClassifier, "detect")
      .mockReturnValueOnce({ type: "cpf", confidence: 0.99 })
      .mockReturnValueOnce(null);
    classifyCustomFieldsSync([field1, field2]);
    expect(field1.fieldType).toBe("cpf");
    expect(field2.detectionMethod).toBe("custom-select");
  });
});

// ── classifyCustomFieldsAsync ─────────────────────────────────────────────────

describe("classifyCustomFieldsAsync", () => {
  afterEach(() => {
    vi.restoreAllMocks();
  });

  it("returns same array reference", async () => {
    const fields: FormField[] = [];
    const result = await classifyCustomFieldsAsync(fields);
    expect(result).toBe(fields);
  });

  it("classifies via keyword when it returns a concrete type", async () => {
    const field = makeCustomField({ fieldType: "unknown", label: "CPF" });
    vi.spyOn(keywordClassifier, "detect").mockReturnValueOnce({
      type: "cpf",
      confidence: 0.99,
    });
    await classifyCustomFieldsAsync([field]);
    expect(field.fieldType).toBe("cpf");
    expect(field.detectionMethod).toBe("keyword");
    expect(field.detectionConfidence).toBe(0.99);
  });

  it("skips keyword generic result and falls through to tensorflow", async () => {
    const field = makeCustomField({
      fieldType: "unknown",
      label: "Razão Social",
      contextSignals: "razão social companyname organization",
    });
    vi.spyOn(keywordClassifier, "detect").mockReturnValueOnce({
      type: "text", // generic — should be skipped
      confidence: 0.4,
    });
    vi.spyOn(tensorflowClassifier, "detect").mockReturnValueOnce({
      type: "company",
      confidence: 0.87,
    });
    await classifyCustomFieldsAsync([field]);
    expect(field.fieldType).toBe("company");
    expect(field.detectionMethod).toBe("tensorflow");
    expect(field.detectionConfidence).toBe(0.87);
  });

  it("skips keyword 'unknown' result and falls through to tensorflow", async () => {
    const field = makeCustomField({
      fieldType: "unknown",
      label: "Empresa",
    });
    vi.spyOn(keywordClassifier, "detect").mockReturnValueOnce({
      type: "unknown",
      confidence: 0.1,
    });
    vi.spyOn(tensorflowClassifier, "detect").mockReturnValueOnce({
      type: "company",
      confidence: 0.82,
    });
    await classifyCustomFieldsAsync([field]);
    expect(field.fieldType).toBe("company");
    expect(field.detectionMethod).toBe("tensorflow");
  });

  it("falls through to chrome-ai when keyword and tensorflow return generic/null", async () => {
    const field = makeCustomField({ fieldType: "unknown", label: "Empresa" });
    vi.spyOn(keywordClassifier, "detect").mockReturnValueOnce(null);
    vi.spyOn(tensorflowClassifier, "detect").mockReturnValueOnce(null);
    vi.spyOn(chromeAiClassifier, "detectAsync").mockResolvedValueOnce({
      type: "company",
      confidence: 0.75,
    });
    await classifyCustomFieldsAsync([field]);
    expect(field.fieldType).toBe("company");
    expect(field.detectionMethod).toBe("chrome-ai");
  });

  it("stamps custom-select with low confidence when fieldType stays unknown", async () => {
    const field = makeCustomField({ fieldType: "unknown", label: "???" });
    vi.spyOn(keywordClassifier, "detect").mockReturnValueOnce(null);
    vi.spyOn(tensorflowClassifier, "detect").mockReturnValueOnce(null);
    vi.spyOn(chromeAiClassifier, "detectAsync").mockResolvedValueOnce(null);
    await classifyCustomFieldsAsync([field]);
    expect(field.fieldType).toBe("unknown");
    expect(field.detectionMethod).toBe("custom-select");
    expect(field.detectionConfidence).toBe(0.5);
  });

  it("stamps custom-select with high confidence when adapter has concrete type but classifiers fail", async () => {
    const field = makeCustomField({ fieldType: "select", label: "Opções" });
    vi.spyOn(keywordClassifier, "detect").mockReturnValueOnce(null);
    vi.spyOn(tensorflowClassifier, "detect").mockReturnValueOnce(null);
    vi.spyOn(chromeAiClassifier, "detectAsync").mockResolvedValueOnce(null);
    await classifyCustomFieldsAsync([field]);
    expect(field.fieldType).toBe("select"); // preserved from adapter
    expect(field.detectionMethod).toBe("custom-select");
    expect(field.detectionConfidence).toBe(0.9); // high — we know it's a select
  });

  it("does NOT use html-type or html-fallback classifiers", async () => {
    const field = makeCustomField({ fieldType: "unknown", label: "Campo" });
    vi.spyOn(keywordClassifier, "detect").mockReturnValueOnce(null);
    vi.spyOn(tensorflowClassifier, "detect").mockReturnValueOnce(null);
    vi.spyOn(chromeAiClassifier, "detectAsync").mockResolvedValueOnce(null);
    // If html-type or html-fallback ran, they'd set detectionMethod to their names
    await classifyCustomFieldsAsync([field]);
    expect(field.detectionMethod).toBe("custom-select");
  });

  it("processes multiple fields independently", async () => {
    const field1 = makeCustomField({
      id: "f1",
      fieldType: "unknown",
      label: "CPF",
    });
    const field2 = makeCustomField({
      id: "f2",
      fieldType: "unknown",
      label: "Empresa",
    });
    vi.spyOn(keywordClassifier, "detect")
      .mockReturnValueOnce({ type: "cpf", confidence: 0.99 })
      .mockReturnValueOnce(null);
    vi.spyOn(tensorflowClassifier, "detect").mockReturnValueOnce({
      type: "company",
      confidence: 0.85,
    });
    await classifyCustomFieldsAsync([field1, field2]);
    expect(field1.fieldType).toBe("cpf");
    expect(field1.detectionMethod).toBe("keyword");
    expect(field2.fieldType).toBe("company");
    expect(field2.detectionMethod).toBe("tensorflow");
  });
});