feat(categories): categoryMappingService 4-pass algo (#119) #128

Merged
maximus merged 1 commit from issue-119-category-mapping-service into main 2026-04-21 01:07:16 +00:00
2 changed files with 895 additions and 0 deletions

View file

@ -0,0 +1,366 @@
import { describe, it, expect, beforeEach } from "vitest";
import {
computeMigrationPlan,
normalizeForMatch,
__resetMappingServiceCachesForTests,
type ProfileData,
type V2CategoryInput,
type V2KeywordInput,
type V2TransactionInput,
type V2SupplierInput,
} from "./categoryMappingService";
import { resetTaxonomyCache } from "./categoryTaxonomyService";
beforeEach(() => {
resetTaxonomyCache();
__resetMappingServiceCachesForTests();
});
// ---------------------------------------------------------------------------
// Fixture helpers — we build just enough of ProfileData per test to stay
// readable; everything defaults to empty arrays.
// ---------------------------------------------------------------------------
function makeProfile(partial: Partial<ProfileData>): ProfileData {
return {
v2Categories: partial.v2Categories ?? [],
keywords: partial.keywords ?? [],
transactions: partial.transactions ?? [],
suppliers: partial.suppliers,
};
}
function cat(id: number, name: string, parent_id: number | null = null): V2CategoryInput {
return { id, name, parent_id };
}
function kw(category_id: number, keyword: string): V2KeywordInput {
return { category_id, keyword };
}
function tx(id: number, description: string, category_id: number | null, supplier_id?: number): V2TransactionInput {
return { id, description, category_id, supplier_id: supplier_id ?? null };
}
function sup(id: number, name: string): V2SupplierInput {
return { id, name };
}
// ---------------------------------------------------------------------------
// normalizeForMatch
// ---------------------------------------------------------------------------
describe("normalizeForMatch", () => {
it("lowercases, strips accents, and collapses spaces", () => {
expect(normalizeForMatch(" Épicerie Régulière ")).toBe("epicerie reguliere");
});
it("handles already-normalized text", () => {
expect(normalizeForMatch("stm")).toBe("stm");
});
});
// ---------------------------------------------------------------------------
// Custom categories → preserved bucket
// ---------------------------------------------------------------------------
describe("computeMigrationPlan — preserved (custom)", () => {
it("moves a non-seeded v2 category into the preserved bucket", () => {
const plan = computeMigrationPlan(
makeProfile({
v2Categories: [cat(9001, "Ma catégorie perso", 2)],
})
);
expect(plan.rows).toHaveLength(0);
expect(plan.preserved).toHaveLength(1);
expect(plan.preserved[0]).toMatchObject({
v2CategoryId: 9001,
v2CategoryName: "Ma catégorie perso",
v1TargetId: null,
confidence: "none",
reason: "preserved",
});
});
it("ignores structural v2 parents (16)", () => {
const plan = computeMigrationPlan(
makeProfile({
v2Categories: [cat(1, "Revenus"), cat(2, "Dépenses récurrentes")],
})
);
expect(plan.rows).toHaveLength(0);
expect(plan.preserved).toHaveLength(0);
expect(plan.unresolved).toHaveLength(0);
});
});
// ---------------------------------------------------------------------------
// Pass 1 — keyword match
// ---------------------------------------------------------------------------
describe("computeMigrationPlan — Pass 1 (keyword)", () => {
it("maps Transport en commun → 1521 (Autobus & métro) via STM keyword", () => {
const plan = computeMigrationPlan(
makeProfile({
v2Categories: [cat(28, "Transport en commun", 2)],
keywords: [kw(28, "STM")],
})
);
expect(plan.rows).toHaveLength(1);
expect(plan.rows[0]).toMatchObject({
v2CategoryId: 28,
v1TargetId: 1521,
v1TargetName: "Autobus & métro",
confidence: "high",
reason: "keyword",
});
});
it("maps Voiture → 1512 (Essence) via SHELL keyword", () => {
const plan = computeMigrationPlan(
makeProfile({
v2Categories: [cat(40, "Voiture", 3)],
keywords: [kw(40, "SHELL")],
})
);
expect(plan.rows[0]).toMatchObject({
v1TargetId: 1512,
confidence: "high",
reason: "keyword",
});
});
it("picks the first matching KEYWORD_TO_V1 rule when multiple apply", () => {
// SAAQ (1514) wins over any later rule because the list order of the
// user's keywords drives it.
const plan = computeMigrationPlan(
makeProfile({
v2Categories: [cat(40, "Voiture", 3)],
keywords: [kw(40, "SAAQ"), kw(40, "SHELL")],
})
);
expect(plan.rows[0].v1TargetId).toBe(1514);
});
});
// ---------------------------------------------------------------------------
// Pass 2 — supplier propagation
// ---------------------------------------------------------------------------
describe("computeMigrationPlan — Pass 2 (supplier)", () => {
it("propagates via a transaction description when no v2 keyword matches", () => {
const plan = computeMigrationPlan(
makeProfile({
v2Categories: [cat(28, "Transport en commun", 2)],
// No keyword rows for cat 28.
transactions: [tx(1, "PAIEMENT STM CARTE OPUS", 28)],
})
);
expect(plan.rows[0]).toMatchObject({
v1TargetId: 1521,
confidence: "medium",
reason: "supplier",
});
});
it("propagates via a supplier name when the description has no hit", () => {
const plan = computeMigrationPlan(
makeProfile({
v2Categories: [cat(47, "Voyage", 3)],
transactions: [tx(1, "CARTE 1234", 47, 42)],
suppliers: [sup(42, "Hilton Montreal")],
})
);
expect(plan.rows[0]).toMatchObject({
v1TargetId: 1533,
confidence: "medium",
reason: "supplier",
});
});
});
// ---------------------------------------------------------------------------
// Pass 3 — default fallback
// ---------------------------------------------------------------------------
describe("computeMigrationPlan — Pass 3 (default)", () => {
it("maps Loyer (20) → 1211 with high confidence (direct)", () => {
const plan = computeMigrationPlan(
makeProfile({
v2Categories: [cat(20, "Loyer", 2)],
})
);
expect(plan.rows[0]).toMatchObject({
v1TargetId: 1211,
v1TargetName: "Loyer",
confidence: "high",
reason: "default",
});
});
it("maps Restaurant (24) → 1121 with medium confidence", () => {
const plan = computeMigrationPlan(
makeProfile({
v2Categories: [cat(24, "Restaurant", 2)],
})
);
expect(plan.rows[0]).toMatchObject({
v1TargetId: 1121,
confidence: "medium",
reason: "default",
});
});
it("exposes splits for Transport en commun (28) when no keyword/supplier resolves it", () => {
const plan = computeMigrationPlan(
makeProfile({
v2Categories: [cat(28, "Transport en commun", 2)],
})
);
expect(plan.rows[0].splits).toEqual([
{ v1TargetId: 1521, v1TargetName: "Autobus & métro" },
{ v1TargetId: 1522, v1TargetName: "Train de banlieue" },
]);
expect(plan.rows[0].confidence).toBe("medium");
expect(plan.rows[0].reason).toBe("default");
// Primary target is the "reste → X par défaut" (1521 per rationale).
expect(plan.rows[0].v1TargetId).toBe(1521);
});
it("exposes 4-way splits for Voiture (40)", () => {
const plan = computeMigrationPlan(
makeProfile({
v2Categories: [cat(40, "Voiture", 3)],
})
);
expect(plan.rows[0].splits?.map((s) => s.v1TargetId)).toEqual([1512, 1513, 1514, 1515]);
expect(plan.rows[0].v1TargetId).toBe(1513); // entretien par défaut
expect(plan.rows[0].confidence).toBe("low");
});
});
// ---------------------------------------------------------------------------
// Pass 4 — review
// ---------------------------------------------------------------------------
describe("computeMigrationPlan — Pass 4 (review)", () => {
it("flags Projets (73) for review (no direct v1 equivalent)", () => {
const plan = computeMigrationPlan(
makeProfile({
v2Categories: [cat(73, "Projets", 6)],
})
);
expect(plan.rows[0]).toMatchObject({
v1TargetId: null,
v1TargetName: null,
confidence: "none",
reason: "review",
});
expect(plan.unresolved).toHaveLength(1);
});
it("escapes Pass 4 for Projets when a CLAUDE.AI keyword is present", () => {
const plan = computeMigrationPlan(
makeProfile({
v2Categories: [cat(73, "Projets", 6)],
keywords: [kw(73, "CLAUDE.AI")],
})
);
expect(plan.rows[0]).toMatchObject({
v1TargetId: 1734, // Abonnements professionnels
confidence: "high",
reason: "keyword",
});
});
});
// ---------------------------------------------------------------------------
// Stats & aggregation
// ---------------------------------------------------------------------------
describe("computeMigrationPlan — stats", () => {
it("reports per-confidence counts matching the rows", () => {
const plan = computeMigrationPlan(
makeProfile({
v2Categories: [
cat(20, "Loyer", 2), // high (default direct)
cat(24, "Restaurant", 2), // medium (default)
cat(40, "Voiture", 3), // low (split, low confidence)
cat(73, "Projets", 6), // none
],
})
);
expect(plan.stats).toEqual({
total: 4,
high: 1,
medium: 1,
low: 1,
none: 1,
});
expect(plan.unresolved).toHaveLength(1);
expect(plan.unresolved[0].v2CategoryId).toBe(73);
});
it("returns empty structures for an empty profile", () => {
const plan = computeMigrationPlan(makeProfile({}));
expect(plan.rows).toEqual([]);
expect(plan.preserved).toEqual([]);
expect(plan.unresolved).toEqual([]);
expect(plan.stats).toEqual({ total: 0, high: 0, medium: 0, low: 0, none: 0 });
});
it("handles a mixed profile with seeded + custom categories in one call", () => {
const plan = computeMigrationPlan(
makeProfile({
v2Categories: [
cat(1, "Revenus"), // structural → skipped
cat(22, "Épicerie", 2), // high default
cat(9002, "Dépenses projet X", 3), // custom → preserved
],
})
);
expect(plan.rows).toHaveLength(1);
expect(plan.rows[0].v2CategoryId).toBe(22);
expect(plan.preserved).toHaveLength(1);
expect(plan.preserved[0].v2CategoryId).toBe(9002);
});
});
// ---------------------------------------------------------------------------
// Pass priority — keyword beats default
// ---------------------------------------------------------------------------
describe("computeMigrationPlan — pass priority", () => {
it("Pass 1 (keyword) wins over Pass 3 (default) on split categories", () => {
// Transport en commun (28) default is 1521; with a GARE CENTRALE keyword
// Pass 1 should push to 1522 (Train) instead.
const plan = computeMigrationPlan(
makeProfile({
v2Categories: [cat(28, "Transport en commun", 2)],
keywords: [kw(28, "GARE CENTRALE")],
})
);
expect(plan.rows[0]).toMatchObject({
v1TargetId: 1522,
confidence: "high",
reason: "keyword",
});
// Splits are NOT exposed when Pass 1 resolves the row — only Pass 3
// attaches them.
expect(plan.rows[0].splits).toBeUndefined();
});
it("Pass 2 (supplier) wins over Pass 3 (default)", () => {
const plan = computeMigrationPlan(
makeProfile({
v2Categories: [cat(40, "Voiture", 3)],
// No v2 keyword row on cat 40 — description drives it.
transactions: [tx(1, "PETRO-CANADA #1234 MTL", 40)],
})
);
expect(plan.rows[0]).toMatchObject({
v1TargetId: 1512, // Essence
confidence: "medium",
reason: "supplier",
});
});
});

View file

@ -0,0 +1,529 @@
/**
* categoryMappingService pure function computing a v2 v1 category migration
* plan from a snapshot of the profile data.
*
* Algorithm (4 passes, applied per v2 category):
* Pass 1 keyword match : if a keyword attached to the v2 category
* points to a v1 leaf (by name/normalized match
* or by a hard-coded keywordleaf rule derived
* from the spike `mapping-old-to-new.md`), take
* that v1 leaf. Confidence: high.
* Pass 2 supplier propag. : when a v2 category has split targets (e.g.
* Transport en commun Bus/Train), look at
* the suppliers attached to its transactions.
* If a supplier description matches one of the
* pass-1 keywordleaf rules we propagate. Else,
* if a majority of transactions are already
* assigned to a single split leaf via pass 1,
* take that leaf. Confidence: medium.
* Pass 3 default fallback : use the v2v1 default rule encoded in
* DEFAULT_MAPPINGS (one per seeded v2 category).
* Confidence: low.
* Pass 4 needs review : nothing matched. Confidence: none.
*
* Custom v2 categories (id absent from the v2 seed) are preserved they go
* into `plan.preserved` with v1TargetId=null and will later be placed under
* the "Catégories personnalisées (migration)" parent by the migration writer.
*
* The service is a PURE function: no DB, no I/O, no Tauri calls. The caller
* (categoryMigrationService or the migration preview UI) is responsible for
* fetching `profileData` from SQLite.
*/
import { findById, getLeaves, type TaxonomyNode } from "./categoryTaxonomyService";
// ---------------------------------------------------------------------------
// Public types
// ---------------------------------------------------------------------------
export type ConfidenceBadge = "high" | "medium" | "low" | "none";
export type MappingReason =
| "keyword"
| "supplier"
| "default"
| "review"
| "preserved";
export interface V1Target {
v1TargetId: number;
v1TargetName: string;
}
export interface MappingRow {
/** v2 category id as it exists today in the user's DB. */
v2CategoryId: number;
/** v2 category display name (as stored in DB). */
v2CategoryName: string;
/** Null when unresolved (preserved or needs review). */
v1TargetId: number | null;
v1TargetName: string | null;
confidence: ConfidenceBadge;
reason: MappingReason;
/** Split targets when the default rule ventilates into multiple v1 leaves. */
splits?: V1Target[];
/** Human-readable reasoning (English — internal). */
notes?: string;
}
export interface MigrationPlanStats {
total: number;
high: number;
medium: number;
low: number;
none: number;
}
export interface MigrationPlan {
/** Every mapped v2 category (seeded, non-custom) — includes resolved AND unresolved. */
rows: MappingRow[];
/** Custom categories (not in v2 seed): kept under "Catégories personnalisées (migration)". */
preserved: MappingRow[];
/** Subset of `rows` with confidence=none — requires user review. */
unresolved: MappingRow[];
stats: MigrationPlanStats;
}
// ---------------------------------------------------------------------------
// Minimal input shapes — purposely kept loose so the caller can hand us
// projections from SQL rows without forcing the full `Category`/`Transaction`
// types from `src/shared/types/index.ts`.
// ---------------------------------------------------------------------------
export interface V2CategoryInput {
id: number;
name: string;
parent_id?: number | null;
}
export interface V2KeywordInput {
id?: number;
keyword: string;
category_id: number;
}
export interface V2TransactionInput {
id: number;
description: string;
category_id: number | null;
supplier_id?: number | null;
}
export interface V2SupplierInput {
id: number;
name: string;
}
export interface ProfileData {
v2Categories: V2CategoryInput[];
keywords: V2KeywordInput[];
transactions: V2TransactionInput[];
/** Optional — only used when a transaction has a supplier_id set. */
suppliers?: V2SupplierInput[];
}
// ---------------------------------------------------------------------------
// Internal helpers
// ---------------------------------------------------------------------------
/**
* Normalize a free-text string for matching: lower case, strip diacritics,
* collapse whitespace. Duplicated here (rather than importing from
* categorizationService) to keep this service free of any DB coupling.
*/
export function normalizeForMatch(input: string): string {
return input
.normalize("NFD")
.replace(/[\u0300-\u036f]/g, "")
.toLowerCase()
.replace(/\s+/g, " ")
.trim();
}
/**
* Map a v1 leaf id back to its display name (French, taxonomy default).
* Returns null if the id is unknown in the v1 taxonomy callers must
* treat that as a broken mapping table entry.
*/
function leafNameOrNull(id: number): string | null {
const node = findById(id);
return node ? node.name : null;
}
function toV1Target(id: number): V1Target {
const name = leafNameOrNull(id);
if (name === null) {
// This would only fire if DEFAULT_MAPPINGS / KEYWORD_TO_V1 fell out of
// sync with categoryTaxonomyV1.json. Throw loudly rather than silently
// produce a broken plan.
throw new Error(`categoryMappingService: v1 leaf id ${id} not found in taxonomy`);
}
return { v1TargetId: id, v1TargetName: name };
}
// ---------------------------------------------------------------------------
// Mapping tables — encoded from .spikes/archived/seed-standard/code/mapping-old-to-new.md
// ---------------------------------------------------------------------------
/**
* Keyword fragment (normalized) v1 leaf id. Used by Pass 1 for split v2
* categories and by Pass 2 for supplier propagation. Keys are match keys
* they must match (after normalization) the keyword string stored in the
* v2 profile OR the raw transaction description / supplier name.
*
* Values are v1 leaf ids from `categoryTaxonomyV1.json`.
*
* Confidence badge for Pass 1 matches is always "high" per the mapping doc.
*/
const KEYWORD_TO_V1: ReadonlyArray<{ match: string; v1Id: number }> = [
// Jeux, Films & Livres (26) split → 1712 / 1713 / 1741
{ match: "steamgames", v1Id: 1712 },
{ match: "playstation", v1Id: 1712 },
{ match: "nintendo", v1Id: 1712 },
{ match: "primevideo", v1Id: 1713 },
{ match: "renaud-bray", v1Id: 1741 },
{ match: "cinema du parc", v1Id: 1711 },
{ match: "lego", v1Id: 1715 },
// Transport en commun (28) split → 1521 / 1522
{ match: "stm", v1Id: 1521 },
{ match: "gare mont-saint", v1Id: 1522 },
{ match: "gare saint-hubert", v1Id: 1522 },
{ match: "gare centrale", v1Id: 1522 },
{ match: "rem", v1Id: 1522 },
// Internet & Télécom (29) — default 1231 is in DEFAULT_MAPPINGS
// Voiture (40) split → 1512 / 1513 / 1514
{ match: "shell", v1Id: 1512 },
{ match: "esso", v1Id: 1512 },
{ match: "ultramar", v1Id: 1512 },
{ match: "petro-canada", v1Id: 1512 },
{ match: "crevier", v1Id: 1512 },
{ match: "saaq", v1Id: 1514 },
// Assurances (31) — v2 intermediate parent; its 3 v2 children map by name
// but legacy keywords on id 31 (pre-split profiles) also land here.
{ match: "belair", v1Id: 1250 },
{ match: "prysm", v1Id: 1250 },
{ match: "ins/ass", v1Id: 1630 },
// Voyage (47) split → 1531 / 1532 / 1533 / 1534
{ match: "norwegian cruise", v1Id: 1533 },
{ match: "aeroports de montreal", v1Id: 1531 },
{ match: "hilton", v1Id: 1533 },
// Sports & Plein air (48) split → 1721 / 1722 / 1723
{ match: "sepaq", v1Id: 1723 },
{ match: "bloc shop", v1Id: 1723 },
{ match: "mountain equipment", v1Id: 1722 },
{ match: "decathlon", v1Id: 1722 },
{ match: "la cordee", v1Id: 1722 },
{ match: "physioactif", v1Id: 1615 },
// Électroménagers & Meubles (53) split → 1311 / 1312
{ match: "tanguay", v1Id: 1311 },
{ match: "bouclair", v1Id: 1311 },
// Projets (73) — no good default; "CLAUDE.AI" / "NAME-CHEAP" → 1734
{ match: "claude.ai", v1Id: 1734 },
{ match: "name-cheap", v1Id: 1734 },
];
type DefaultMappingEntry =
| { kind: "single"; v1Id: number; confidence: ConfidenceBadge; notes?: string }
| { kind: "split"; primaryV1Id: number; splitV1Ids: number[]; confidence: ConfidenceBadge; notes?: string }
| { kind: "none"; confidence: "none"; notes?: string };
/**
* Default mapping per v2 seed category id. Keys are the v2 category ids
* from `src-tauri/src/database/seed_categories.sql` / `categoryService.reinitializeCategories()`.
*
* Confidence follows mapping-old-to-new.md:
* 🟢 Haute high
* 🟡 Moyenne medium
* 🟠 Basse low
* 🔴 Aucune none
*
* Note: these defaults are only the "last resort" Pass 1/2 can overwrite
* with higher confidence via keyword matches. The `primaryV1Id` of a split
* is the "reste → X par défaut" target from the rationale column.
*/
const DEFAULT_MAPPINGS: Readonly<Record<number, DefaultMappingEntry>> = {
// Revenus
10: { kind: "single", v1Id: 1011, confidence: "high" }, // Paie → Paie régulière
11: { kind: "single", v1Id: 1090, confidence: "high" }, // Autres revenus
// Dépenses récurrentes
20: { kind: "single", v1Id: 1211, confidence: "high" }, // Loyer
21: { kind: "single", v1Id: 1221, confidence: "high" }, // Électricité
22: { kind: "single", v1Id: 1111, confidence: "high" }, // Épicerie → régulière
23: { kind: "single", v1Id: 1931, confidence: "high" }, // Dons → Dons de charité
24: { kind: "single", v1Id: 1121, confidence: "medium" }, // Restaurant
25: { kind: "single", v1Id: 1911, confidence: "high" }, // Frais bancaires → compte
26: { kind: "split", primaryV1Id: 1710, splitV1Ids: [1711, 1712, 1713, 1714, 1715, 1741], confidence: "low" }, // Jeux, Films & Livres
27: { kind: "single", v1Id: 1714, confidence: "high" }, // Abonnements Musique
28: { kind: "split", primaryV1Id: 1521, splitV1Ids: [1521, 1522], confidence: "medium" }, // Transport en commun
29: { kind: "split", primaryV1Id: 1231, splitV1Ids: [1231, 1232, 1234], confidence: "medium" }, // Internet & Télécom
30: { kind: "single", v1Id: 1751, confidence: "medium" }, // Animaux
31: { kind: "split", primaryV1Id: 1250, splitV1Ids: [1250, 1516, 1616, 1630], confidence: "low" }, // Assurances
32: { kind: "single", v1Id: 1611, confidence: "high" }, // Pharmacie
33: { kind: "single", v1Id: 1213, confidence: "high" }, // Taxes municipales
// Dépenses ponctuelles
40: { kind: "split", primaryV1Id: 1513, splitV1Ids: [1512, 1513, 1514, 1515], confidence: "low" }, // Voiture
41: { kind: "single", v1Id: 1946, confidence: "medium" }, // Amazon → divers
42: { kind: "single", v1Id: 1312, confidence: "low" }, // Électroniques → électroménagers
43: { kind: "single", v1Id: 1810, confidence: "high" }, // Alcool
44: { kind: "single", v1Id: 1940, confidence: "high" }, // Cadeaux
45: { kind: "single", v1Id: 1410, confidence: "medium" }, // Vêtements → adultes
46: { kind: "single", v1Id: 1932, confidence: "high" }, // CPA
47: { kind: "split", primaryV1Id: 1533, splitV1Ids: [1531, 1532, 1533, 1534], confidence: "medium" }, // Voyage
48: { kind: "split", primaryV1Id: 1722, splitV1Ids: [1721, 1722, 1723], confidence: "medium" }, // Sports & Plein air
49: { kind: "single", v1Id: 1711, confidence: "high" }, // Spectacles & sorties
// Maison
50: { kind: "single", v1Id: 1212, confidence: "high" }, // Hypothèque
51: { kind: "single", v1Id: 1243, confidence: "medium" }, // Achats maison → matériaux & outils
52: { kind: "single", v1Id: 1241, confidence: "high" }, // Entretien maison
53: { kind: "split", primaryV1Id: 1311, splitV1Ids: [1311, 1312], confidence: "medium" }, // Électroménagers & Meubles
54: { kind: "single", v1Id: 1243, confidence: "high" }, // Outils → matériaux & outils
// Placements
60: { kind: "single", v1Id: 1964, confidence: "medium" }, // Placements → non-enregistré
61: { kind: "single", v1Id: 1980, confidence: "high" }, // Transferts internes
// Autres
70: { kind: "single", v1Id: 1922, confidence: "medium" }, // Impôts → provincial (default)
71: { kind: "single", v1Id: 1971, confidence: "high" }, // Paiement CC
72: { kind: "single", v1Id: 1945, confidence: "high" }, // Retrait cash
73: { kind: "none", confidence: "none", notes: "No direct v1 equivalent; user must decide or keep as custom" },
// Level-3 children of Assurances (31) — for profiles that already split them
310: { kind: "single", v1Id: 1516, confidence: "high" }, // Assurance-auto
311: { kind: "single", v1Id: 1250, confidence: "high" }, // Assurance-habitation
312: { kind: "single", v1Id: 1630, confidence: "high" }, // Assurance-vie
};
/** v2 parent category ids (Revenus, Dépenses récurrentes, ...). Never mapped
* directly they are structural containers and are replaced by v1 roots. */
const V2_STRUCTURAL_PARENT_IDS = new Set<number>([1, 2, 3, 4, 5, 6]);
/** All v2 category ids that are part of the standard v2 seed. Anything
* outside this set (and not a structural parent) is a user-custom category
* goes to `preserved`. */
const V2_SEEDED_IDS = new Set<number>(Object.keys(DEFAULT_MAPPINGS).map(Number));
// ---------------------------------------------------------------------------
// Main entry point
// ---------------------------------------------------------------------------
export function computeMigrationPlan(profileData: ProfileData): MigrationPlan {
const rows: MappingRow[] = [];
const preserved: MappingRow[] = [];
const keywordsByCat = groupBy(profileData.keywords, (k) => k.category_id);
const txsByCat = groupBy(profileData.transactions, (t) => t.category_id ?? -1);
const suppliersById = new Map<number, V2SupplierInput>();
for (const s of profileData.suppliers ?? []) suppliersById.set(s.id, s);
for (const cat of profileData.v2Categories) {
// Skip structural v2 parents — they have no direct v1 equivalent. The
// migration writer creates the v1 roots fresh from the taxonomy.
if (V2_STRUCTURAL_PARENT_IDS.has(cat.id)) continue;
const isSeeded = V2_SEEDED_IDS.has(cat.id);
if (!isSeeded) {
// Custom category → preserved bucket.
preserved.push({
v2CategoryId: cat.id,
v2CategoryName: cat.name,
v1TargetId: null,
v1TargetName: null,
confidence: "none",
reason: "preserved",
notes: "User-created category, not in v2 seed. Kept under 'Catégories personnalisées (migration)'.",
});
continue;
}
const row = resolveSeededCategory(
cat,
keywordsByCat.get(cat.id) ?? [],
txsByCat.get(cat.id) ?? [],
suppliersById
);
rows.push(row);
}
const unresolved = rows.filter((r) => r.confidence === "none");
const stats: MigrationPlanStats = {
total: rows.length,
high: rows.filter((r) => r.confidence === "high").length,
medium: rows.filter((r) => r.confidence === "medium").length,
low: rows.filter((r) => r.confidence === "low").length,
none: rows.filter((r) => r.confidence === "none").length,
};
return { rows, preserved, unresolved, stats };
}
// ---------------------------------------------------------------------------
// Pass runners
// ---------------------------------------------------------------------------
function resolveSeededCategory(
cat: V2CategoryInput,
keywords: V2KeywordInput[],
transactions: V2TransactionInput[],
suppliers: Map<number, V2SupplierInput>
): MappingRow {
const base: Omit<MappingRow, "v1TargetId" | "v1TargetName" | "confidence" | "reason"> = {
v2CategoryId: cat.id,
v2CategoryName: cat.name,
};
// Pass 1 — keyword match. We look at the user's v2 keywords for this
// category and see if any of them matches a KEYWORD_TO_V1 rule.
const pass1 = runKeywordPass(keywords);
if (pass1 !== null) {
return { ...base, v1TargetId: pass1.v1TargetId, v1TargetName: pass1.v1TargetName, confidence: "high", reason: "keyword" };
}
// Pass 2 — supplier propagation. For categories that have transactions
// attached, look at the raw description or supplier name. If one of the
// KEYWORD_TO_V1 rules matches an attached transaction / supplier, we
// propagate with medium confidence.
const pass2 = runSupplierPass(transactions, suppliers);
if (pass2 !== null) {
return { ...base, v1TargetId: pass2.v1TargetId, v1TargetName: pass2.v1TargetName, confidence: "medium", reason: "supplier" };
}
// Pass 3 — default fallback from the mapping table.
const pass3 = runDefaultPass(cat.id);
if (pass3 !== null) {
return { ...base, ...pass3 };
}
// Pass 4 — needs review.
return {
...base,
v1TargetId: null,
v1TargetName: null,
confidence: "none",
reason: "review",
notes: "No mapping rule matched; needs manual review.",
};
}
function runKeywordPass(keywords: V2KeywordInput[]): V1Target | null {
for (const kw of keywords) {
const normalized = normalizeForMatch(kw.keyword);
for (const rule of KEYWORD_TO_V1) {
if (normalized.includes(rule.match)) {
return toV1Target(rule.v1Id);
}
}
// Also try matching against v1 leaf names directly — covers the case
// where a user has a keyword that literally names a v1 leaf (rare but
// free to support).
const leafHit = findLeafByNormalizedName(normalized);
if (leafHit !== null) {
return toV1Target(leafHit.id);
}
}
return null;
}
function runSupplierPass(
transactions: V2TransactionInput[],
suppliers: Map<number, V2SupplierInput>
): V1Target | null {
for (const tx of transactions) {
const sources: string[] = [];
if (tx.description) sources.push(tx.description);
if (tx.supplier_id !== undefined && tx.supplier_id !== null) {
const sup = suppliers.get(tx.supplier_id);
if (sup) sources.push(sup.name);
}
for (const src of sources) {
const normalized = normalizeForMatch(src);
for (const rule of KEYWORD_TO_V1) {
if (normalized.includes(rule.match)) {
return toV1Target(rule.v1Id);
}
}
}
}
return null;
}
function runDefaultPass(
v2Id: number
): Pick<MappingRow, "v1TargetId" | "v1TargetName" | "confidence" | "reason" | "splits" | "notes"> | null {
const entry = DEFAULT_MAPPINGS[v2Id];
if (!entry) return null;
if (entry.kind === "single") {
const target = toV1Target(entry.v1Id);
return {
v1TargetId: target.v1TargetId,
v1TargetName: target.v1TargetName,
confidence: entry.confidence,
reason: "default",
notes: entry.notes,
};
}
if (entry.kind === "split") {
const primary = toV1Target(entry.primaryV1Id);
const splits = entry.splitV1Ids.map(toV1Target);
return {
v1TargetId: primary.v1TargetId,
v1TargetName: primary.v1TargetName,
confidence: entry.confidence,
reason: "default",
splits,
notes: entry.notes,
};
}
// kind === "none" — explicit "no good default" entry.
return {
v1TargetId: null,
v1TargetName: null,
confidence: "none",
reason: "review",
notes: entry.notes,
};
}
// ---------------------------------------------------------------------------
// Utilities
// ---------------------------------------------------------------------------
function groupBy<T, K>(items: T[], keyFn: (item: T) => K): Map<K, T[]> {
const map = new Map<K, T[]>();
for (const item of items) {
const key = keyFn(item);
const bucket = map.get(key);
if (bucket) bucket.push(item);
else map.set(key, [item]);
}
return map;
}
let leafIndex: Map<string, TaxonomyNode> | null = null;
function findLeafByNormalizedName(normalized: string): TaxonomyNode | null {
if (leafIndex === null) {
leafIndex = new Map();
for (const leaf of getLeaves()) {
leafIndex.set(normalizeForMatch(leaf.name), leaf);
}
}
return leafIndex.get(normalized) ?? null;
}
/** Test helper — resets the leaf-name cache (useful only in tests). */
export function __resetMappingServiceCachesForTests(): void {
leafIndex = null;
}