diff --git a/src/services/categoryMappingService.test.ts b/src/services/categoryMappingService.test.ts new file mode 100644 index 0000000..4d62295 --- /dev/null +++ b/src/services/categoryMappingService.test.ts @@ -0,0 +1,366 @@ +import { describe, it, expect, beforeEach } from "vitest"; +import { + computeMigrationPlan, + normalizeForMatch, + __resetMappingServiceCachesForTests, + type ProfileData, + type V2CategoryInput, + type V2KeywordInput, + type V2TransactionInput, + type V2SupplierInput, +} from "./categoryMappingService"; +import { resetTaxonomyCache } from "./categoryTaxonomyService"; + +beforeEach(() => { + resetTaxonomyCache(); + __resetMappingServiceCachesForTests(); +}); + +// --------------------------------------------------------------------------- +// Fixture helpers — we build just enough of ProfileData per test to stay +// readable; everything defaults to empty arrays. +// --------------------------------------------------------------------------- + +function makeProfile(partial: Partial): ProfileData { + return { + v2Categories: partial.v2Categories ?? [], + keywords: partial.keywords ?? [], + transactions: partial.transactions ?? [], + suppliers: partial.suppliers, + }; +} + +function cat(id: number, name: string, parent_id: number | null = null): V2CategoryInput { + return { id, name, parent_id }; +} + +function kw(category_id: number, keyword: string): V2KeywordInput { + return { category_id, keyword }; +} + +function tx(id: number, description: string, category_id: number | null, supplier_id?: number): V2TransactionInput { + return { id, description, category_id, supplier_id: supplier_id ?? null }; +} + +function sup(id: number, name: string): V2SupplierInput { + return { id, name }; +} + +// --------------------------------------------------------------------------- +// normalizeForMatch +// --------------------------------------------------------------------------- + +describe("normalizeForMatch", () => { + it("lowercases, strips accents, and collapses spaces", () => { + expect(normalizeForMatch(" Épicerie Régulière ")).toBe("epicerie reguliere"); + }); + it("handles already-normalized text", () => { + expect(normalizeForMatch("stm")).toBe("stm"); + }); +}); + +// --------------------------------------------------------------------------- +// Custom categories → preserved bucket +// --------------------------------------------------------------------------- + +describe("computeMigrationPlan — preserved (custom)", () => { + it("moves a non-seeded v2 category into the preserved bucket", () => { + const plan = computeMigrationPlan( + makeProfile({ + v2Categories: [cat(9001, "Ma catégorie perso", 2)], + }) + ); + expect(plan.rows).toHaveLength(0); + expect(plan.preserved).toHaveLength(1); + expect(plan.preserved[0]).toMatchObject({ + v2CategoryId: 9001, + v2CategoryName: "Ma catégorie perso", + v1TargetId: null, + confidence: "none", + reason: "preserved", + }); + }); + + it("ignores structural v2 parents (1–6)", () => { + const plan = computeMigrationPlan( + makeProfile({ + v2Categories: [cat(1, "Revenus"), cat(2, "Dépenses récurrentes")], + }) + ); + expect(plan.rows).toHaveLength(0); + expect(plan.preserved).toHaveLength(0); + expect(plan.unresolved).toHaveLength(0); + }); +}); + +// --------------------------------------------------------------------------- +// Pass 1 — keyword match +// --------------------------------------------------------------------------- + +describe("computeMigrationPlan — Pass 1 (keyword)", () => { + it("maps Transport en commun → 1521 (Autobus & métro) via STM keyword", () => { + const plan = computeMigrationPlan( + makeProfile({ + v2Categories: [cat(28, "Transport en commun", 2)], + keywords: [kw(28, "STM")], + }) + ); + expect(plan.rows).toHaveLength(1); + expect(plan.rows[0]).toMatchObject({ + v2CategoryId: 28, + v1TargetId: 1521, + v1TargetName: "Autobus & métro", + confidence: "high", + reason: "keyword", + }); + }); + + it("maps Voiture → 1512 (Essence) via SHELL keyword", () => { + const plan = computeMigrationPlan( + makeProfile({ + v2Categories: [cat(40, "Voiture", 3)], + keywords: [kw(40, "SHELL")], + }) + ); + expect(plan.rows[0]).toMatchObject({ + v1TargetId: 1512, + confidence: "high", + reason: "keyword", + }); + }); + + it("picks the first matching KEYWORD_TO_V1 rule when multiple apply", () => { + // SAAQ (1514) wins over any later rule because the list order of the + // user's keywords drives it. + const plan = computeMigrationPlan( + makeProfile({ + v2Categories: [cat(40, "Voiture", 3)], + keywords: [kw(40, "SAAQ"), kw(40, "SHELL")], + }) + ); + expect(plan.rows[0].v1TargetId).toBe(1514); + }); +}); + +// --------------------------------------------------------------------------- +// Pass 2 — supplier propagation +// --------------------------------------------------------------------------- + +describe("computeMigrationPlan — Pass 2 (supplier)", () => { + it("propagates via a transaction description when no v2 keyword matches", () => { + const plan = computeMigrationPlan( + makeProfile({ + v2Categories: [cat(28, "Transport en commun", 2)], + // No keyword rows for cat 28. + transactions: [tx(1, "PAIEMENT STM CARTE OPUS", 28)], + }) + ); + expect(plan.rows[0]).toMatchObject({ + v1TargetId: 1521, + confidence: "medium", + reason: "supplier", + }); + }); + + it("propagates via a supplier name when the description has no hit", () => { + const plan = computeMigrationPlan( + makeProfile({ + v2Categories: [cat(47, "Voyage", 3)], + transactions: [tx(1, "CARTE 1234", 47, 42)], + suppliers: [sup(42, "Hilton Montreal")], + }) + ); + expect(plan.rows[0]).toMatchObject({ + v1TargetId: 1533, + confidence: "medium", + reason: "supplier", + }); + }); +}); + +// --------------------------------------------------------------------------- +// Pass 3 — default fallback +// --------------------------------------------------------------------------- + +describe("computeMigrationPlan — Pass 3 (default)", () => { + it("maps Loyer (20) → 1211 with high confidence (direct)", () => { + const plan = computeMigrationPlan( + makeProfile({ + v2Categories: [cat(20, "Loyer", 2)], + }) + ); + expect(plan.rows[0]).toMatchObject({ + v1TargetId: 1211, + v1TargetName: "Loyer", + confidence: "high", + reason: "default", + }); + }); + + it("maps Restaurant (24) → 1121 with medium confidence", () => { + const plan = computeMigrationPlan( + makeProfile({ + v2Categories: [cat(24, "Restaurant", 2)], + }) + ); + expect(plan.rows[0]).toMatchObject({ + v1TargetId: 1121, + confidence: "medium", + reason: "default", + }); + }); + + it("exposes splits for Transport en commun (28) when no keyword/supplier resolves it", () => { + const plan = computeMigrationPlan( + makeProfile({ + v2Categories: [cat(28, "Transport en commun", 2)], + }) + ); + expect(plan.rows[0].splits).toEqual([ + { v1TargetId: 1521, v1TargetName: "Autobus & métro" }, + { v1TargetId: 1522, v1TargetName: "Train de banlieue" }, + ]); + expect(plan.rows[0].confidence).toBe("medium"); + expect(plan.rows[0].reason).toBe("default"); + // Primary target is the "reste → X par défaut" (1521 per rationale). + expect(plan.rows[0].v1TargetId).toBe(1521); + }); + + it("exposes 4-way splits for Voiture (40)", () => { + const plan = computeMigrationPlan( + makeProfile({ + v2Categories: [cat(40, "Voiture", 3)], + }) + ); + expect(plan.rows[0].splits?.map((s) => s.v1TargetId)).toEqual([1512, 1513, 1514, 1515]); + expect(plan.rows[0].v1TargetId).toBe(1513); // entretien par défaut + expect(plan.rows[0].confidence).toBe("low"); + }); +}); + +// --------------------------------------------------------------------------- +// Pass 4 — review +// --------------------------------------------------------------------------- + +describe("computeMigrationPlan — Pass 4 (review)", () => { + it("flags Projets (73) for review (no direct v1 equivalent)", () => { + const plan = computeMigrationPlan( + makeProfile({ + v2Categories: [cat(73, "Projets", 6)], + }) + ); + expect(plan.rows[0]).toMatchObject({ + v1TargetId: null, + v1TargetName: null, + confidence: "none", + reason: "review", + }); + expect(plan.unresolved).toHaveLength(1); + }); + + it("escapes Pass 4 for Projets when a CLAUDE.AI keyword is present", () => { + const plan = computeMigrationPlan( + makeProfile({ + v2Categories: [cat(73, "Projets", 6)], + keywords: [kw(73, "CLAUDE.AI")], + }) + ); + expect(plan.rows[0]).toMatchObject({ + v1TargetId: 1734, // Abonnements professionnels + confidence: "high", + reason: "keyword", + }); + }); +}); + +// --------------------------------------------------------------------------- +// Stats & aggregation +// --------------------------------------------------------------------------- + +describe("computeMigrationPlan — stats", () => { + it("reports per-confidence counts matching the rows", () => { + const plan = computeMigrationPlan( + makeProfile({ + v2Categories: [ + cat(20, "Loyer", 2), // high (default direct) + cat(24, "Restaurant", 2), // medium (default) + cat(40, "Voiture", 3), // low (split, low confidence) + cat(73, "Projets", 6), // none + ], + }) + ); + expect(plan.stats).toEqual({ + total: 4, + high: 1, + medium: 1, + low: 1, + none: 1, + }); + expect(plan.unresolved).toHaveLength(1); + expect(plan.unresolved[0].v2CategoryId).toBe(73); + }); + + it("returns empty structures for an empty profile", () => { + const plan = computeMigrationPlan(makeProfile({})); + expect(plan.rows).toEqual([]); + expect(plan.preserved).toEqual([]); + expect(plan.unresolved).toEqual([]); + expect(plan.stats).toEqual({ total: 0, high: 0, medium: 0, low: 0, none: 0 }); + }); + + it("handles a mixed profile with seeded + custom categories in one call", () => { + const plan = computeMigrationPlan( + makeProfile({ + v2Categories: [ + cat(1, "Revenus"), // structural → skipped + cat(22, "Épicerie", 2), // high default + cat(9002, "Dépenses projet X", 3), // custom → preserved + ], + }) + ); + expect(plan.rows).toHaveLength(1); + expect(plan.rows[0].v2CategoryId).toBe(22); + expect(plan.preserved).toHaveLength(1); + expect(plan.preserved[0].v2CategoryId).toBe(9002); + }); +}); + +// --------------------------------------------------------------------------- +// Pass priority — keyword beats default +// --------------------------------------------------------------------------- + +describe("computeMigrationPlan — pass priority", () => { + it("Pass 1 (keyword) wins over Pass 3 (default) on split categories", () => { + // Transport en commun (28) default is 1521; with a GARE CENTRALE keyword + // Pass 1 should push to 1522 (Train) instead. + const plan = computeMigrationPlan( + makeProfile({ + v2Categories: [cat(28, "Transport en commun", 2)], + keywords: [kw(28, "GARE CENTRALE")], + }) + ); + expect(plan.rows[0]).toMatchObject({ + v1TargetId: 1522, + confidence: "high", + reason: "keyword", + }); + // Splits are NOT exposed when Pass 1 resolves the row — only Pass 3 + // attaches them. + expect(plan.rows[0].splits).toBeUndefined(); + }); + + it("Pass 2 (supplier) wins over Pass 3 (default)", () => { + const plan = computeMigrationPlan( + makeProfile({ + v2Categories: [cat(40, "Voiture", 3)], + // No v2 keyword row on cat 40 — description drives it. + transactions: [tx(1, "PETRO-CANADA #1234 MTL", 40)], + }) + ); + expect(plan.rows[0]).toMatchObject({ + v1TargetId: 1512, // Essence + confidence: "medium", + reason: "supplier", + }); + }); +}); diff --git a/src/services/categoryMappingService.ts b/src/services/categoryMappingService.ts new file mode 100644 index 0000000..361e9db --- /dev/null +++ b/src/services/categoryMappingService.ts @@ -0,0 +1,529 @@ +/** + * categoryMappingService — pure function computing a v2 → v1 category migration + * plan from a snapshot of the profile data. + * + * Algorithm (4 passes, applied per v2 category): + * Pass 1 — keyword match : if a keyword attached to the v2 category + * points to a v1 leaf (by name/normalized match + * or by a hard-coded keyword→leaf rule derived + * from the spike `mapping-old-to-new.md`), take + * that v1 leaf. Confidence: high. + * Pass 2 — supplier propag. : when a v2 category has split targets (e.g. + * Transport en commun → Bus/Train), look at + * the suppliers attached to its transactions. + * If a supplier description matches one of the + * pass-1 keyword→leaf rules we propagate. Else, + * if a majority of transactions are already + * assigned to a single split leaf via pass 1, + * take that leaf. Confidence: medium. + * Pass 3 — default fallback : use the v2→v1 default rule encoded in + * DEFAULT_MAPPINGS (one per seeded v2 category). + * Confidence: low. + * Pass 4 — needs review : nothing matched. Confidence: none. + * + * Custom v2 categories (id absent from the v2 seed) are preserved — they go + * into `plan.preserved` with v1TargetId=null and will later be placed under + * the "Catégories personnalisées (migration)" parent by the migration writer. + * + * The service is a PURE function: no DB, no I/O, no Tauri calls. The caller + * (categoryMigrationService or the migration preview UI) is responsible for + * fetching `profileData` from SQLite. + */ + +import { findById, getLeaves, type TaxonomyNode } from "./categoryTaxonomyService"; + +// --------------------------------------------------------------------------- +// Public types +// --------------------------------------------------------------------------- + +export type ConfidenceBadge = "high" | "medium" | "low" | "none"; + +export type MappingReason = + | "keyword" + | "supplier" + | "default" + | "review" + | "preserved"; + +export interface V1Target { + v1TargetId: number; + v1TargetName: string; +} + +export interface MappingRow { + /** v2 category id as it exists today in the user's DB. */ + v2CategoryId: number; + /** v2 category display name (as stored in DB). */ + v2CategoryName: string; + /** Null when unresolved (preserved or needs review). */ + v1TargetId: number | null; + v1TargetName: string | null; + confidence: ConfidenceBadge; + reason: MappingReason; + /** Split targets when the default rule ventilates into multiple v1 leaves. */ + splits?: V1Target[]; + /** Human-readable reasoning (English — internal). */ + notes?: string; +} + +export interface MigrationPlanStats { + total: number; + high: number; + medium: number; + low: number; + none: number; +} + +export interface MigrationPlan { + /** Every mapped v2 category (seeded, non-custom) — includes resolved AND unresolved. */ + rows: MappingRow[]; + /** Custom categories (not in v2 seed): kept under "Catégories personnalisées (migration)". */ + preserved: MappingRow[]; + /** Subset of `rows` with confidence=none — requires user review. */ + unresolved: MappingRow[]; + stats: MigrationPlanStats; +} + +// --------------------------------------------------------------------------- +// Minimal input shapes — purposely kept loose so the caller can hand us +// projections from SQL rows without forcing the full `Category`/`Transaction` +// types from `src/shared/types/index.ts`. +// --------------------------------------------------------------------------- + +export interface V2CategoryInput { + id: number; + name: string; + parent_id?: number | null; +} + +export interface V2KeywordInput { + id?: number; + keyword: string; + category_id: number; +} + +export interface V2TransactionInput { + id: number; + description: string; + category_id: number | null; + supplier_id?: number | null; +} + +export interface V2SupplierInput { + id: number; + name: string; +} + +export interface ProfileData { + v2Categories: V2CategoryInput[]; + keywords: V2KeywordInput[]; + transactions: V2TransactionInput[]; + /** Optional — only used when a transaction has a supplier_id set. */ + suppliers?: V2SupplierInput[]; +} + +// --------------------------------------------------------------------------- +// Internal helpers +// --------------------------------------------------------------------------- + +/** + * Normalize a free-text string for matching: lower case, strip diacritics, + * collapse whitespace. Duplicated here (rather than importing from + * categorizationService) to keep this service free of any DB coupling. + */ +export function normalizeForMatch(input: string): string { + return input + .normalize("NFD") + .replace(/[\u0300-\u036f]/g, "") + .toLowerCase() + .replace(/\s+/g, " ") + .trim(); +} + +/** + * Map a v1 leaf id back to its display name (French, taxonomy default). + * Returns null if the id is unknown in the v1 taxonomy — callers must + * treat that as a broken mapping table entry. + */ +function leafNameOrNull(id: number): string | null { + const node = findById(id); + return node ? node.name : null; +} + +function toV1Target(id: number): V1Target { + const name = leafNameOrNull(id); + if (name === null) { + // This would only fire if DEFAULT_MAPPINGS / KEYWORD_TO_V1 fell out of + // sync with categoryTaxonomyV1.json. Throw loudly rather than silently + // produce a broken plan. + throw new Error(`categoryMappingService: v1 leaf id ${id} not found in taxonomy`); + } + return { v1TargetId: id, v1TargetName: name }; +} + +// --------------------------------------------------------------------------- +// Mapping tables — encoded from .spikes/archived/seed-standard/code/mapping-old-to-new.md +// --------------------------------------------------------------------------- + +/** + * Keyword fragment (normalized) → v1 leaf id. Used by Pass 1 for split v2 + * categories and by Pass 2 for supplier propagation. Keys are match keys — + * they must match (after normalization) the keyword string stored in the + * v2 profile OR the raw transaction description / supplier name. + * + * Values are v1 leaf ids from `categoryTaxonomyV1.json`. + * + * Confidence badge for Pass 1 matches is always "high" per the mapping doc. + */ +const KEYWORD_TO_V1: ReadonlyArray<{ match: string; v1Id: number }> = [ + // Jeux, Films & Livres (26) split → 1712 / 1713 / 1741 + { match: "steamgames", v1Id: 1712 }, + { match: "playstation", v1Id: 1712 }, + { match: "nintendo", v1Id: 1712 }, + { match: "primevideo", v1Id: 1713 }, + { match: "renaud-bray", v1Id: 1741 }, + { match: "cinema du parc", v1Id: 1711 }, + { match: "lego", v1Id: 1715 }, + + // Transport en commun (28) split → 1521 / 1522 + { match: "stm", v1Id: 1521 }, + { match: "gare mont-saint", v1Id: 1522 }, + { match: "gare saint-hubert", v1Id: 1522 }, + { match: "gare centrale", v1Id: 1522 }, + { match: "rem", v1Id: 1522 }, + + // Internet & Télécom (29) — default 1231 is in DEFAULT_MAPPINGS + + // Voiture (40) split → 1512 / 1513 / 1514 + { match: "shell", v1Id: 1512 }, + { match: "esso", v1Id: 1512 }, + { match: "ultramar", v1Id: 1512 }, + { match: "petro-canada", v1Id: 1512 }, + { match: "crevier", v1Id: 1512 }, + { match: "saaq", v1Id: 1514 }, + + // Assurances (31) — v2 intermediate parent; its 3 v2 children map by name + // but legacy keywords on id 31 (pre-split profiles) also land here. + { match: "belair", v1Id: 1250 }, + { match: "prysm", v1Id: 1250 }, + { match: "ins/ass", v1Id: 1630 }, + + // Voyage (47) split → 1531 / 1532 / 1533 / 1534 + { match: "norwegian cruise", v1Id: 1533 }, + { match: "aeroports de montreal", v1Id: 1531 }, + { match: "hilton", v1Id: 1533 }, + + // Sports & Plein air (48) split → 1721 / 1722 / 1723 + { match: "sepaq", v1Id: 1723 }, + { match: "bloc shop", v1Id: 1723 }, + { match: "mountain equipment", v1Id: 1722 }, + { match: "decathlon", v1Id: 1722 }, + { match: "la cordee", v1Id: 1722 }, + { match: "physioactif", v1Id: 1615 }, + + // Électroménagers & Meubles (53) split → 1311 / 1312 + { match: "tanguay", v1Id: 1311 }, + { match: "bouclair", v1Id: 1311 }, + + // Projets (73) — no good default; "CLAUDE.AI" / "NAME-CHEAP" → 1734 + { match: "claude.ai", v1Id: 1734 }, + { match: "name-cheap", v1Id: 1734 }, +]; + +type DefaultMappingEntry = + | { kind: "single"; v1Id: number; confidence: ConfidenceBadge; notes?: string } + | { kind: "split"; primaryV1Id: number; splitV1Ids: number[]; confidence: ConfidenceBadge; notes?: string } + | { kind: "none"; confidence: "none"; notes?: string }; + +/** + * Default mapping per v2 seed category id. Keys are the v2 category ids + * from `src-tauri/src/database/seed_categories.sql` / `categoryService.reinitializeCategories()`. + * + * Confidence follows mapping-old-to-new.md: + * 🟢 Haute → high + * 🟡 Moyenne → medium + * 🟠 Basse → low + * 🔴 Aucune → none + * + * Note: these defaults are only the "last resort" — Pass 1/2 can overwrite + * with higher confidence via keyword matches. The `primaryV1Id` of a split + * is the "reste → X par défaut" target from the rationale column. + */ +const DEFAULT_MAPPINGS: Readonly> = { + // Revenus + 10: { kind: "single", v1Id: 1011, confidence: "high" }, // Paie → Paie régulière + 11: { kind: "single", v1Id: 1090, confidence: "high" }, // Autres revenus + + // Dépenses récurrentes + 20: { kind: "single", v1Id: 1211, confidence: "high" }, // Loyer + 21: { kind: "single", v1Id: 1221, confidence: "high" }, // Électricité + 22: { kind: "single", v1Id: 1111, confidence: "high" }, // Épicerie → régulière + 23: { kind: "single", v1Id: 1931, confidence: "high" }, // Dons → Dons de charité + 24: { kind: "single", v1Id: 1121, confidence: "medium" }, // Restaurant + 25: { kind: "single", v1Id: 1911, confidence: "high" }, // Frais bancaires → compte + 26: { kind: "split", primaryV1Id: 1710, splitV1Ids: [1711, 1712, 1713, 1714, 1715, 1741], confidence: "low" }, // Jeux, Films & Livres + 27: { kind: "single", v1Id: 1714, confidence: "high" }, // Abonnements Musique + 28: { kind: "split", primaryV1Id: 1521, splitV1Ids: [1521, 1522], confidence: "medium" }, // Transport en commun + 29: { kind: "split", primaryV1Id: 1231, splitV1Ids: [1231, 1232, 1234], confidence: "medium" }, // Internet & Télécom + 30: { kind: "single", v1Id: 1751, confidence: "medium" }, // Animaux + 31: { kind: "split", primaryV1Id: 1250, splitV1Ids: [1250, 1516, 1616, 1630], confidence: "low" }, // Assurances + 32: { kind: "single", v1Id: 1611, confidence: "high" }, // Pharmacie + 33: { kind: "single", v1Id: 1213, confidence: "high" }, // Taxes municipales + + // Dépenses ponctuelles + 40: { kind: "split", primaryV1Id: 1513, splitV1Ids: [1512, 1513, 1514, 1515], confidence: "low" }, // Voiture + 41: { kind: "single", v1Id: 1946, confidence: "medium" }, // Amazon → divers + 42: { kind: "single", v1Id: 1312, confidence: "low" }, // Électroniques → électroménagers + 43: { kind: "single", v1Id: 1810, confidence: "high" }, // Alcool + 44: { kind: "single", v1Id: 1940, confidence: "high" }, // Cadeaux + 45: { kind: "single", v1Id: 1410, confidence: "medium" }, // Vêtements → adultes + 46: { kind: "single", v1Id: 1932, confidence: "high" }, // CPA + 47: { kind: "split", primaryV1Id: 1533, splitV1Ids: [1531, 1532, 1533, 1534], confidence: "medium" }, // Voyage + 48: { kind: "split", primaryV1Id: 1722, splitV1Ids: [1721, 1722, 1723], confidence: "medium" }, // Sports & Plein air + 49: { kind: "single", v1Id: 1711, confidence: "high" }, // Spectacles & sorties + + // Maison + 50: { kind: "single", v1Id: 1212, confidence: "high" }, // Hypothèque + 51: { kind: "single", v1Id: 1243, confidence: "medium" }, // Achats maison → matériaux & outils + 52: { kind: "single", v1Id: 1241, confidence: "high" }, // Entretien maison + 53: { kind: "split", primaryV1Id: 1311, splitV1Ids: [1311, 1312], confidence: "medium" }, // Électroménagers & Meubles + 54: { kind: "single", v1Id: 1243, confidence: "high" }, // Outils → matériaux & outils + + // Placements + 60: { kind: "single", v1Id: 1964, confidence: "medium" }, // Placements → non-enregistré + 61: { kind: "single", v1Id: 1980, confidence: "high" }, // Transferts internes + + // Autres + 70: { kind: "single", v1Id: 1922, confidence: "medium" }, // Impôts → provincial (default) + 71: { kind: "single", v1Id: 1971, confidence: "high" }, // Paiement CC + 72: { kind: "single", v1Id: 1945, confidence: "high" }, // Retrait cash + 73: { kind: "none", confidence: "none", notes: "No direct v1 equivalent; user must decide or keep as custom" }, + + // Level-3 children of Assurances (31) — for profiles that already split them + 310: { kind: "single", v1Id: 1516, confidence: "high" }, // Assurance-auto + 311: { kind: "single", v1Id: 1250, confidence: "high" }, // Assurance-habitation + 312: { kind: "single", v1Id: 1630, confidence: "high" }, // Assurance-vie +}; + +/** v2 parent category ids (Revenus, Dépenses récurrentes, ...). Never mapped + * directly — they are structural containers and are replaced by v1 roots. */ +const V2_STRUCTURAL_PARENT_IDS = new Set([1, 2, 3, 4, 5, 6]); + +/** All v2 category ids that are part of the standard v2 seed. Anything + * outside this set (and not a structural parent) is a user-custom category + * → goes to `preserved`. */ +const V2_SEEDED_IDS = new Set(Object.keys(DEFAULT_MAPPINGS).map(Number)); + +// --------------------------------------------------------------------------- +// Main entry point +// --------------------------------------------------------------------------- + +export function computeMigrationPlan(profileData: ProfileData): MigrationPlan { + const rows: MappingRow[] = []; + const preserved: MappingRow[] = []; + + const keywordsByCat = groupBy(profileData.keywords, (k) => k.category_id); + const txsByCat = groupBy(profileData.transactions, (t) => t.category_id ?? -1); + const suppliersById = new Map(); + for (const s of profileData.suppliers ?? []) suppliersById.set(s.id, s); + + for (const cat of profileData.v2Categories) { + // Skip structural v2 parents — they have no direct v1 equivalent. The + // migration writer creates the v1 roots fresh from the taxonomy. + if (V2_STRUCTURAL_PARENT_IDS.has(cat.id)) continue; + + const isSeeded = V2_SEEDED_IDS.has(cat.id); + if (!isSeeded) { + // Custom category → preserved bucket. + preserved.push({ + v2CategoryId: cat.id, + v2CategoryName: cat.name, + v1TargetId: null, + v1TargetName: null, + confidence: "none", + reason: "preserved", + notes: "User-created category, not in v2 seed. Kept under 'Catégories personnalisées (migration)'.", + }); + continue; + } + + const row = resolveSeededCategory( + cat, + keywordsByCat.get(cat.id) ?? [], + txsByCat.get(cat.id) ?? [], + suppliersById + ); + rows.push(row); + } + + const unresolved = rows.filter((r) => r.confidence === "none"); + const stats: MigrationPlanStats = { + total: rows.length, + high: rows.filter((r) => r.confidence === "high").length, + medium: rows.filter((r) => r.confidence === "medium").length, + low: rows.filter((r) => r.confidence === "low").length, + none: rows.filter((r) => r.confidence === "none").length, + }; + + return { rows, preserved, unresolved, stats }; +} + +// --------------------------------------------------------------------------- +// Pass runners +// --------------------------------------------------------------------------- + +function resolveSeededCategory( + cat: V2CategoryInput, + keywords: V2KeywordInput[], + transactions: V2TransactionInput[], + suppliers: Map +): MappingRow { + const base: Omit = { + v2CategoryId: cat.id, + v2CategoryName: cat.name, + }; + + // Pass 1 — keyword match. We look at the user's v2 keywords for this + // category and see if any of them matches a KEYWORD_TO_V1 rule. + const pass1 = runKeywordPass(keywords); + if (pass1 !== null) { + return { ...base, v1TargetId: pass1.v1TargetId, v1TargetName: pass1.v1TargetName, confidence: "high", reason: "keyword" }; + } + + // Pass 2 — supplier propagation. For categories that have transactions + // attached, look at the raw description or supplier name. If one of the + // KEYWORD_TO_V1 rules matches an attached transaction / supplier, we + // propagate with medium confidence. + const pass2 = runSupplierPass(transactions, suppliers); + if (pass2 !== null) { + return { ...base, v1TargetId: pass2.v1TargetId, v1TargetName: pass2.v1TargetName, confidence: "medium", reason: "supplier" }; + } + + // Pass 3 — default fallback from the mapping table. + const pass3 = runDefaultPass(cat.id); + if (pass3 !== null) { + return { ...base, ...pass3 }; + } + + // Pass 4 — needs review. + return { + ...base, + v1TargetId: null, + v1TargetName: null, + confidence: "none", + reason: "review", + notes: "No mapping rule matched; needs manual review.", + }; +} + +function runKeywordPass(keywords: V2KeywordInput[]): V1Target | null { + for (const kw of keywords) { + const normalized = normalizeForMatch(kw.keyword); + for (const rule of KEYWORD_TO_V1) { + if (normalized.includes(rule.match)) { + return toV1Target(rule.v1Id); + } + } + // Also try matching against v1 leaf names directly — covers the case + // where a user has a keyword that literally names a v1 leaf (rare but + // free to support). + const leafHit = findLeafByNormalizedName(normalized); + if (leafHit !== null) { + return toV1Target(leafHit.id); + } + } + return null; +} + +function runSupplierPass( + transactions: V2TransactionInput[], + suppliers: Map +): V1Target | null { + for (const tx of transactions) { + const sources: string[] = []; + if (tx.description) sources.push(tx.description); + if (tx.supplier_id !== undefined && tx.supplier_id !== null) { + const sup = suppliers.get(tx.supplier_id); + if (sup) sources.push(sup.name); + } + for (const src of sources) { + const normalized = normalizeForMatch(src); + for (const rule of KEYWORD_TO_V1) { + if (normalized.includes(rule.match)) { + return toV1Target(rule.v1Id); + } + } + } + } + return null; +} + +function runDefaultPass( + v2Id: number +): Pick | null { + const entry = DEFAULT_MAPPINGS[v2Id]; + if (!entry) return null; + + if (entry.kind === "single") { + const target = toV1Target(entry.v1Id); + return { + v1TargetId: target.v1TargetId, + v1TargetName: target.v1TargetName, + confidence: entry.confidence, + reason: "default", + notes: entry.notes, + }; + } + if (entry.kind === "split") { + const primary = toV1Target(entry.primaryV1Id); + const splits = entry.splitV1Ids.map(toV1Target); + return { + v1TargetId: primary.v1TargetId, + v1TargetName: primary.v1TargetName, + confidence: entry.confidence, + reason: "default", + splits, + notes: entry.notes, + }; + } + // kind === "none" — explicit "no good default" entry. + return { + v1TargetId: null, + v1TargetName: null, + confidence: "none", + reason: "review", + notes: entry.notes, + }; +} + +// --------------------------------------------------------------------------- +// Utilities +// --------------------------------------------------------------------------- + +function groupBy(items: T[], keyFn: (item: T) => K): Map { + const map = new Map(); + for (const item of items) { + const key = keyFn(item); + const bucket = map.get(key); + if (bucket) bucket.push(item); + else map.set(key, [item]); + } + return map; +} + +let leafIndex: Map | null = null; + +function findLeafByNormalizedName(normalized: string): TaxonomyNode | null { + if (leafIndex === null) { + leafIndex = new Map(); + for (const leaf of getLeaves()) { + leafIndex.set(normalizeForMatch(leaf.name), leaf); + } + } + return leafIndex.get(normalized) ?? null; +} + +/** Test helper — resets the leaf-name cache (useful only in tests). */ +export function __resetMappingServiceCachesForTests(): void { + leafIndex = null; +}