Merge pull request 'feat(categories): categoryMappingService 4-pass algo (#119)' (#128) from issue-119-category-mapping-service into main
This commit is contained in:
commit
1640a73499
2 changed files with 895 additions and 0 deletions
366
src/services/categoryMappingService.test.ts
Normal file
366
src/services/categoryMappingService.test.ts
Normal file
|
|
@ -0,0 +1,366 @@
|
|||
import { describe, it, expect, beforeEach } from "vitest";
|
||||
import {
|
||||
computeMigrationPlan,
|
||||
normalizeForMatch,
|
||||
__resetMappingServiceCachesForTests,
|
||||
type ProfileData,
|
||||
type V2CategoryInput,
|
||||
type V2KeywordInput,
|
||||
type V2TransactionInput,
|
||||
type V2SupplierInput,
|
||||
} from "./categoryMappingService";
|
||||
import { resetTaxonomyCache } from "./categoryTaxonomyService";
|
||||
|
||||
beforeEach(() => {
|
||||
resetTaxonomyCache();
|
||||
__resetMappingServiceCachesForTests();
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Fixture helpers — we build just enough of ProfileData per test to stay
|
||||
// readable; everything defaults to empty arrays.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function makeProfile(partial: Partial<ProfileData>): ProfileData {
|
||||
return {
|
||||
v2Categories: partial.v2Categories ?? [],
|
||||
keywords: partial.keywords ?? [],
|
||||
transactions: partial.transactions ?? [],
|
||||
suppliers: partial.suppliers,
|
||||
};
|
||||
}
|
||||
|
||||
function cat(id: number, name: string, parent_id: number | null = null): V2CategoryInput {
|
||||
return { id, name, parent_id };
|
||||
}
|
||||
|
||||
function kw(category_id: number, keyword: string): V2KeywordInput {
|
||||
return { category_id, keyword };
|
||||
}
|
||||
|
||||
function tx(id: number, description: string, category_id: number | null, supplier_id?: number): V2TransactionInput {
|
||||
return { id, description, category_id, supplier_id: supplier_id ?? null };
|
||||
}
|
||||
|
||||
function sup(id: number, name: string): V2SupplierInput {
|
||||
return { id, name };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// normalizeForMatch
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe("normalizeForMatch", () => {
|
||||
it("lowercases, strips accents, and collapses spaces", () => {
|
||||
expect(normalizeForMatch(" Épicerie Régulière ")).toBe("epicerie reguliere");
|
||||
});
|
||||
it("handles already-normalized text", () => {
|
||||
expect(normalizeForMatch("stm")).toBe("stm");
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Custom categories → preserved bucket
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe("computeMigrationPlan — preserved (custom)", () => {
|
||||
it("moves a non-seeded v2 category into the preserved bucket", () => {
|
||||
const plan = computeMigrationPlan(
|
||||
makeProfile({
|
||||
v2Categories: [cat(9001, "Ma catégorie perso", 2)],
|
||||
})
|
||||
);
|
||||
expect(plan.rows).toHaveLength(0);
|
||||
expect(plan.preserved).toHaveLength(1);
|
||||
expect(plan.preserved[0]).toMatchObject({
|
||||
v2CategoryId: 9001,
|
||||
v2CategoryName: "Ma catégorie perso",
|
||||
v1TargetId: null,
|
||||
confidence: "none",
|
||||
reason: "preserved",
|
||||
});
|
||||
});
|
||||
|
||||
it("ignores structural v2 parents (1–6)", () => {
|
||||
const plan = computeMigrationPlan(
|
||||
makeProfile({
|
||||
v2Categories: [cat(1, "Revenus"), cat(2, "Dépenses récurrentes")],
|
||||
})
|
||||
);
|
||||
expect(plan.rows).toHaveLength(0);
|
||||
expect(plan.preserved).toHaveLength(0);
|
||||
expect(plan.unresolved).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Pass 1 — keyword match
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe("computeMigrationPlan — Pass 1 (keyword)", () => {
|
||||
it("maps Transport en commun → 1521 (Autobus & métro) via STM keyword", () => {
|
||||
const plan = computeMigrationPlan(
|
||||
makeProfile({
|
||||
v2Categories: [cat(28, "Transport en commun", 2)],
|
||||
keywords: [kw(28, "STM")],
|
||||
})
|
||||
);
|
||||
expect(plan.rows).toHaveLength(1);
|
||||
expect(plan.rows[0]).toMatchObject({
|
||||
v2CategoryId: 28,
|
||||
v1TargetId: 1521,
|
||||
v1TargetName: "Autobus & métro",
|
||||
confidence: "high",
|
||||
reason: "keyword",
|
||||
});
|
||||
});
|
||||
|
||||
it("maps Voiture → 1512 (Essence) via SHELL keyword", () => {
|
||||
const plan = computeMigrationPlan(
|
||||
makeProfile({
|
||||
v2Categories: [cat(40, "Voiture", 3)],
|
||||
keywords: [kw(40, "SHELL")],
|
||||
})
|
||||
);
|
||||
expect(plan.rows[0]).toMatchObject({
|
||||
v1TargetId: 1512,
|
||||
confidence: "high",
|
||||
reason: "keyword",
|
||||
});
|
||||
});
|
||||
|
||||
it("picks the first matching KEYWORD_TO_V1 rule when multiple apply", () => {
|
||||
// SAAQ (1514) wins over any later rule because the list order of the
|
||||
// user's keywords drives it.
|
||||
const plan = computeMigrationPlan(
|
||||
makeProfile({
|
||||
v2Categories: [cat(40, "Voiture", 3)],
|
||||
keywords: [kw(40, "SAAQ"), kw(40, "SHELL")],
|
||||
})
|
||||
);
|
||||
expect(plan.rows[0].v1TargetId).toBe(1514);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Pass 2 — supplier propagation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe("computeMigrationPlan — Pass 2 (supplier)", () => {
|
||||
it("propagates via a transaction description when no v2 keyword matches", () => {
|
||||
const plan = computeMigrationPlan(
|
||||
makeProfile({
|
||||
v2Categories: [cat(28, "Transport en commun", 2)],
|
||||
// No keyword rows for cat 28.
|
||||
transactions: [tx(1, "PAIEMENT STM CARTE OPUS", 28)],
|
||||
})
|
||||
);
|
||||
expect(plan.rows[0]).toMatchObject({
|
||||
v1TargetId: 1521,
|
||||
confidence: "medium",
|
||||
reason: "supplier",
|
||||
});
|
||||
});
|
||||
|
||||
it("propagates via a supplier name when the description has no hit", () => {
|
||||
const plan = computeMigrationPlan(
|
||||
makeProfile({
|
||||
v2Categories: [cat(47, "Voyage", 3)],
|
||||
transactions: [tx(1, "CARTE 1234", 47, 42)],
|
||||
suppliers: [sup(42, "Hilton Montreal")],
|
||||
})
|
||||
);
|
||||
expect(plan.rows[0]).toMatchObject({
|
||||
v1TargetId: 1533,
|
||||
confidence: "medium",
|
||||
reason: "supplier",
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Pass 3 — default fallback
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe("computeMigrationPlan — Pass 3 (default)", () => {
|
||||
it("maps Loyer (20) → 1211 with high confidence (direct)", () => {
|
||||
const plan = computeMigrationPlan(
|
||||
makeProfile({
|
||||
v2Categories: [cat(20, "Loyer", 2)],
|
||||
})
|
||||
);
|
||||
expect(plan.rows[0]).toMatchObject({
|
||||
v1TargetId: 1211,
|
||||
v1TargetName: "Loyer",
|
||||
confidence: "high",
|
||||
reason: "default",
|
||||
});
|
||||
});
|
||||
|
||||
it("maps Restaurant (24) → 1121 with medium confidence", () => {
|
||||
const plan = computeMigrationPlan(
|
||||
makeProfile({
|
||||
v2Categories: [cat(24, "Restaurant", 2)],
|
||||
})
|
||||
);
|
||||
expect(plan.rows[0]).toMatchObject({
|
||||
v1TargetId: 1121,
|
||||
confidence: "medium",
|
||||
reason: "default",
|
||||
});
|
||||
});
|
||||
|
||||
it("exposes splits for Transport en commun (28) when no keyword/supplier resolves it", () => {
|
||||
const plan = computeMigrationPlan(
|
||||
makeProfile({
|
||||
v2Categories: [cat(28, "Transport en commun", 2)],
|
||||
})
|
||||
);
|
||||
expect(plan.rows[0].splits).toEqual([
|
||||
{ v1TargetId: 1521, v1TargetName: "Autobus & métro" },
|
||||
{ v1TargetId: 1522, v1TargetName: "Train de banlieue" },
|
||||
]);
|
||||
expect(plan.rows[0].confidence).toBe("medium");
|
||||
expect(plan.rows[0].reason).toBe("default");
|
||||
// Primary target is the "reste → X par défaut" (1521 per rationale).
|
||||
expect(plan.rows[0].v1TargetId).toBe(1521);
|
||||
});
|
||||
|
||||
it("exposes 4-way splits for Voiture (40)", () => {
|
||||
const plan = computeMigrationPlan(
|
||||
makeProfile({
|
||||
v2Categories: [cat(40, "Voiture", 3)],
|
||||
})
|
||||
);
|
||||
expect(plan.rows[0].splits?.map((s) => s.v1TargetId)).toEqual([1512, 1513, 1514, 1515]);
|
||||
expect(plan.rows[0].v1TargetId).toBe(1513); // entretien par défaut
|
||||
expect(plan.rows[0].confidence).toBe("low");
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Pass 4 — review
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe("computeMigrationPlan — Pass 4 (review)", () => {
|
||||
it("flags Projets (73) for review (no direct v1 equivalent)", () => {
|
||||
const plan = computeMigrationPlan(
|
||||
makeProfile({
|
||||
v2Categories: [cat(73, "Projets", 6)],
|
||||
})
|
||||
);
|
||||
expect(plan.rows[0]).toMatchObject({
|
||||
v1TargetId: null,
|
||||
v1TargetName: null,
|
||||
confidence: "none",
|
||||
reason: "review",
|
||||
});
|
||||
expect(plan.unresolved).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("escapes Pass 4 for Projets when a CLAUDE.AI keyword is present", () => {
|
||||
const plan = computeMigrationPlan(
|
||||
makeProfile({
|
||||
v2Categories: [cat(73, "Projets", 6)],
|
||||
keywords: [kw(73, "CLAUDE.AI")],
|
||||
})
|
||||
);
|
||||
expect(plan.rows[0]).toMatchObject({
|
||||
v1TargetId: 1734, // Abonnements professionnels
|
||||
confidence: "high",
|
||||
reason: "keyword",
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Stats & aggregation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe("computeMigrationPlan — stats", () => {
|
||||
it("reports per-confidence counts matching the rows", () => {
|
||||
const plan = computeMigrationPlan(
|
||||
makeProfile({
|
||||
v2Categories: [
|
||||
cat(20, "Loyer", 2), // high (default direct)
|
||||
cat(24, "Restaurant", 2), // medium (default)
|
||||
cat(40, "Voiture", 3), // low (split, low confidence)
|
||||
cat(73, "Projets", 6), // none
|
||||
],
|
||||
})
|
||||
);
|
||||
expect(plan.stats).toEqual({
|
||||
total: 4,
|
||||
high: 1,
|
||||
medium: 1,
|
||||
low: 1,
|
||||
none: 1,
|
||||
});
|
||||
expect(plan.unresolved).toHaveLength(1);
|
||||
expect(plan.unresolved[0].v2CategoryId).toBe(73);
|
||||
});
|
||||
|
||||
it("returns empty structures for an empty profile", () => {
|
||||
const plan = computeMigrationPlan(makeProfile({}));
|
||||
expect(plan.rows).toEqual([]);
|
||||
expect(plan.preserved).toEqual([]);
|
||||
expect(plan.unresolved).toEqual([]);
|
||||
expect(plan.stats).toEqual({ total: 0, high: 0, medium: 0, low: 0, none: 0 });
|
||||
});
|
||||
|
||||
it("handles a mixed profile with seeded + custom categories in one call", () => {
|
||||
const plan = computeMigrationPlan(
|
||||
makeProfile({
|
||||
v2Categories: [
|
||||
cat(1, "Revenus"), // structural → skipped
|
||||
cat(22, "Épicerie", 2), // high default
|
||||
cat(9002, "Dépenses projet X", 3), // custom → preserved
|
||||
],
|
||||
})
|
||||
);
|
||||
expect(plan.rows).toHaveLength(1);
|
||||
expect(plan.rows[0].v2CategoryId).toBe(22);
|
||||
expect(plan.preserved).toHaveLength(1);
|
||||
expect(plan.preserved[0].v2CategoryId).toBe(9002);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Pass priority — keyword beats default
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe("computeMigrationPlan — pass priority", () => {
|
||||
it("Pass 1 (keyword) wins over Pass 3 (default) on split categories", () => {
|
||||
// Transport en commun (28) default is 1521; with a GARE CENTRALE keyword
|
||||
// Pass 1 should push to 1522 (Train) instead.
|
||||
const plan = computeMigrationPlan(
|
||||
makeProfile({
|
||||
v2Categories: [cat(28, "Transport en commun", 2)],
|
||||
keywords: [kw(28, "GARE CENTRALE")],
|
||||
})
|
||||
);
|
||||
expect(plan.rows[0]).toMatchObject({
|
||||
v1TargetId: 1522,
|
||||
confidence: "high",
|
||||
reason: "keyword",
|
||||
});
|
||||
// Splits are NOT exposed when Pass 1 resolves the row — only Pass 3
|
||||
// attaches them.
|
||||
expect(plan.rows[0].splits).toBeUndefined();
|
||||
});
|
||||
|
||||
it("Pass 2 (supplier) wins over Pass 3 (default)", () => {
|
||||
const plan = computeMigrationPlan(
|
||||
makeProfile({
|
||||
v2Categories: [cat(40, "Voiture", 3)],
|
||||
// No v2 keyword row on cat 40 — description drives it.
|
||||
transactions: [tx(1, "PETRO-CANADA #1234 MTL", 40)],
|
||||
})
|
||||
);
|
||||
expect(plan.rows[0]).toMatchObject({
|
||||
v1TargetId: 1512, // Essence
|
||||
confidence: "medium",
|
||||
reason: "supplier",
|
||||
});
|
||||
});
|
||||
});
|
||||
529
src/services/categoryMappingService.ts
Normal file
529
src/services/categoryMappingService.ts
Normal file
|
|
@ -0,0 +1,529 @@
|
|||
/**
|
||||
* categoryMappingService — pure function computing a v2 → v1 category migration
|
||||
* plan from a snapshot of the profile data.
|
||||
*
|
||||
* Algorithm (4 passes, applied per v2 category):
|
||||
* Pass 1 — keyword match : if a keyword attached to the v2 category
|
||||
* points to a v1 leaf (by name/normalized match
|
||||
* or by a hard-coded keyword→leaf rule derived
|
||||
* from the spike `mapping-old-to-new.md`), take
|
||||
* that v1 leaf. Confidence: high.
|
||||
* Pass 2 — supplier propag. : when a v2 category has split targets (e.g.
|
||||
* Transport en commun → Bus/Train), look at
|
||||
* the suppliers attached to its transactions.
|
||||
* If a supplier description matches one of the
|
||||
* pass-1 keyword→leaf rules we propagate. Else,
|
||||
* if a majority of transactions are already
|
||||
* assigned to a single split leaf via pass 1,
|
||||
* take that leaf. Confidence: medium.
|
||||
* Pass 3 — default fallback : use the v2→v1 default rule encoded in
|
||||
* DEFAULT_MAPPINGS (one per seeded v2 category).
|
||||
* Confidence: low.
|
||||
* Pass 4 — needs review : nothing matched. Confidence: none.
|
||||
*
|
||||
* Custom v2 categories (id absent from the v2 seed) are preserved — they go
|
||||
* into `plan.preserved` with v1TargetId=null and will later be placed under
|
||||
* the "Catégories personnalisées (migration)" parent by the migration writer.
|
||||
*
|
||||
* The service is a PURE function: no DB, no I/O, no Tauri calls. The caller
|
||||
* (categoryMigrationService or the migration preview UI) is responsible for
|
||||
* fetching `profileData` from SQLite.
|
||||
*/
|
||||
|
||||
import { findById, getLeaves, type TaxonomyNode } from "./categoryTaxonomyService";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Public types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export type ConfidenceBadge = "high" | "medium" | "low" | "none";
|
||||
|
||||
export type MappingReason =
|
||||
| "keyword"
|
||||
| "supplier"
|
||||
| "default"
|
||||
| "review"
|
||||
| "preserved";
|
||||
|
||||
export interface V1Target {
|
||||
v1TargetId: number;
|
||||
v1TargetName: string;
|
||||
}
|
||||
|
||||
export interface MappingRow {
|
||||
/** v2 category id as it exists today in the user's DB. */
|
||||
v2CategoryId: number;
|
||||
/** v2 category display name (as stored in DB). */
|
||||
v2CategoryName: string;
|
||||
/** Null when unresolved (preserved or needs review). */
|
||||
v1TargetId: number | null;
|
||||
v1TargetName: string | null;
|
||||
confidence: ConfidenceBadge;
|
||||
reason: MappingReason;
|
||||
/** Split targets when the default rule ventilates into multiple v1 leaves. */
|
||||
splits?: V1Target[];
|
||||
/** Human-readable reasoning (English — internal). */
|
||||
notes?: string;
|
||||
}
|
||||
|
||||
export interface MigrationPlanStats {
|
||||
total: number;
|
||||
high: number;
|
||||
medium: number;
|
||||
low: number;
|
||||
none: number;
|
||||
}
|
||||
|
||||
export interface MigrationPlan {
|
||||
/** Every mapped v2 category (seeded, non-custom) — includes resolved AND unresolved. */
|
||||
rows: MappingRow[];
|
||||
/** Custom categories (not in v2 seed): kept under "Catégories personnalisées (migration)". */
|
||||
preserved: MappingRow[];
|
||||
/** Subset of `rows` with confidence=none — requires user review. */
|
||||
unresolved: MappingRow[];
|
||||
stats: MigrationPlanStats;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Minimal input shapes — purposely kept loose so the caller can hand us
|
||||
// projections from SQL rows without forcing the full `Category`/`Transaction`
|
||||
// types from `src/shared/types/index.ts`.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface V2CategoryInput {
|
||||
id: number;
|
||||
name: string;
|
||||
parent_id?: number | null;
|
||||
}
|
||||
|
||||
export interface V2KeywordInput {
|
||||
id?: number;
|
||||
keyword: string;
|
||||
category_id: number;
|
||||
}
|
||||
|
||||
export interface V2TransactionInput {
|
||||
id: number;
|
||||
description: string;
|
||||
category_id: number | null;
|
||||
supplier_id?: number | null;
|
||||
}
|
||||
|
||||
export interface V2SupplierInput {
|
||||
id: number;
|
||||
name: string;
|
||||
}
|
||||
|
||||
export interface ProfileData {
|
||||
v2Categories: V2CategoryInput[];
|
||||
keywords: V2KeywordInput[];
|
||||
transactions: V2TransactionInput[];
|
||||
/** Optional — only used when a transaction has a supplier_id set. */
|
||||
suppliers?: V2SupplierInput[];
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Normalize a free-text string for matching: lower case, strip diacritics,
|
||||
* collapse whitespace. Duplicated here (rather than importing from
|
||||
* categorizationService) to keep this service free of any DB coupling.
|
||||
*/
|
||||
export function normalizeForMatch(input: string): string {
|
||||
return input
|
||||
.normalize("NFD")
|
||||
.replace(/[\u0300-\u036f]/g, "")
|
||||
.toLowerCase()
|
||||
.replace(/\s+/g, " ")
|
||||
.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Map a v1 leaf id back to its display name (French, taxonomy default).
|
||||
* Returns null if the id is unknown in the v1 taxonomy — callers must
|
||||
* treat that as a broken mapping table entry.
|
||||
*/
|
||||
function leafNameOrNull(id: number): string | null {
|
||||
const node = findById(id);
|
||||
return node ? node.name : null;
|
||||
}
|
||||
|
||||
function toV1Target(id: number): V1Target {
|
||||
const name = leafNameOrNull(id);
|
||||
if (name === null) {
|
||||
// This would only fire if DEFAULT_MAPPINGS / KEYWORD_TO_V1 fell out of
|
||||
// sync with categoryTaxonomyV1.json. Throw loudly rather than silently
|
||||
// produce a broken plan.
|
||||
throw new Error(`categoryMappingService: v1 leaf id ${id} not found in taxonomy`);
|
||||
}
|
||||
return { v1TargetId: id, v1TargetName: name };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mapping tables — encoded from .spikes/archived/seed-standard/code/mapping-old-to-new.md
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Keyword fragment (normalized) → v1 leaf id. Used by Pass 1 for split v2
|
||||
* categories and by Pass 2 for supplier propagation. Keys are match keys —
|
||||
* they must match (after normalization) the keyword string stored in the
|
||||
* v2 profile OR the raw transaction description / supplier name.
|
||||
*
|
||||
* Values are v1 leaf ids from `categoryTaxonomyV1.json`.
|
||||
*
|
||||
* Confidence badge for Pass 1 matches is always "high" per the mapping doc.
|
||||
*/
|
||||
const KEYWORD_TO_V1: ReadonlyArray<{ match: string; v1Id: number }> = [
|
||||
// Jeux, Films & Livres (26) split → 1712 / 1713 / 1741
|
||||
{ match: "steamgames", v1Id: 1712 },
|
||||
{ match: "playstation", v1Id: 1712 },
|
||||
{ match: "nintendo", v1Id: 1712 },
|
||||
{ match: "primevideo", v1Id: 1713 },
|
||||
{ match: "renaud-bray", v1Id: 1741 },
|
||||
{ match: "cinema du parc", v1Id: 1711 },
|
||||
{ match: "lego", v1Id: 1715 },
|
||||
|
||||
// Transport en commun (28) split → 1521 / 1522
|
||||
{ match: "stm", v1Id: 1521 },
|
||||
{ match: "gare mont-saint", v1Id: 1522 },
|
||||
{ match: "gare saint-hubert", v1Id: 1522 },
|
||||
{ match: "gare centrale", v1Id: 1522 },
|
||||
{ match: "rem", v1Id: 1522 },
|
||||
|
||||
// Internet & Télécom (29) — default 1231 is in DEFAULT_MAPPINGS
|
||||
|
||||
// Voiture (40) split → 1512 / 1513 / 1514
|
||||
{ match: "shell", v1Id: 1512 },
|
||||
{ match: "esso", v1Id: 1512 },
|
||||
{ match: "ultramar", v1Id: 1512 },
|
||||
{ match: "petro-canada", v1Id: 1512 },
|
||||
{ match: "crevier", v1Id: 1512 },
|
||||
{ match: "saaq", v1Id: 1514 },
|
||||
|
||||
// Assurances (31) — v2 intermediate parent; its 3 v2 children map by name
|
||||
// but legacy keywords on id 31 (pre-split profiles) also land here.
|
||||
{ match: "belair", v1Id: 1250 },
|
||||
{ match: "prysm", v1Id: 1250 },
|
||||
{ match: "ins/ass", v1Id: 1630 },
|
||||
|
||||
// Voyage (47) split → 1531 / 1532 / 1533 / 1534
|
||||
{ match: "norwegian cruise", v1Id: 1533 },
|
||||
{ match: "aeroports de montreal", v1Id: 1531 },
|
||||
{ match: "hilton", v1Id: 1533 },
|
||||
|
||||
// Sports & Plein air (48) split → 1721 / 1722 / 1723
|
||||
{ match: "sepaq", v1Id: 1723 },
|
||||
{ match: "bloc shop", v1Id: 1723 },
|
||||
{ match: "mountain equipment", v1Id: 1722 },
|
||||
{ match: "decathlon", v1Id: 1722 },
|
||||
{ match: "la cordee", v1Id: 1722 },
|
||||
{ match: "physioactif", v1Id: 1615 },
|
||||
|
||||
// Électroménagers & Meubles (53) split → 1311 / 1312
|
||||
{ match: "tanguay", v1Id: 1311 },
|
||||
{ match: "bouclair", v1Id: 1311 },
|
||||
|
||||
// Projets (73) — no good default; "CLAUDE.AI" / "NAME-CHEAP" → 1734
|
||||
{ match: "claude.ai", v1Id: 1734 },
|
||||
{ match: "name-cheap", v1Id: 1734 },
|
||||
];
|
||||
|
||||
type DefaultMappingEntry =
|
||||
| { kind: "single"; v1Id: number; confidence: ConfidenceBadge; notes?: string }
|
||||
| { kind: "split"; primaryV1Id: number; splitV1Ids: number[]; confidence: ConfidenceBadge; notes?: string }
|
||||
| { kind: "none"; confidence: "none"; notes?: string };
|
||||
|
||||
/**
|
||||
* Default mapping per v2 seed category id. Keys are the v2 category ids
|
||||
* from `src-tauri/src/database/seed_categories.sql` / `categoryService.reinitializeCategories()`.
|
||||
*
|
||||
* Confidence follows mapping-old-to-new.md:
|
||||
* 🟢 Haute → high
|
||||
* 🟡 Moyenne → medium
|
||||
* 🟠 Basse → low
|
||||
* 🔴 Aucune → none
|
||||
*
|
||||
* Note: these defaults are only the "last resort" — Pass 1/2 can overwrite
|
||||
* with higher confidence via keyword matches. The `primaryV1Id` of a split
|
||||
* is the "reste → X par défaut" target from the rationale column.
|
||||
*/
|
||||
const DEFAULT_MAPPINGS: Readonly<Record<number, DefaultMappingEntry>> = {
|
||||
// Revenus
|
||||
10: { kind: "single", v1Id: 1011, confidence: "high" }, // Paie → Paie régulière
|
||||
11: { kind: "single", v1Id: 1090, confidence: "high" }, // Autres revenus
|
||||
|
||||
// Dépenses récurrentes
|
||||
20: { kind: "single", v1Id: 1211, confidence: "high" }, // Loyer
|
||||
21: { kind: "single", v1Id: 1221, confidence: "high" }, // Électricité
|
||||
22: { kind: "single", v1Id: 1111, confidence: "high" }, // Épicerie → régulière
|
||||
23: { kind: "single", v1Id: 1931, confidence: "high" }, // Dons → Dons de charité
|
||||
24: { kind: "single", v1Id: 1121, confidence: "medium" }, // Restaurant
|
||||
25: { kind: "single", v1Id: 1911, confidence: "high" }, // Frais bancaires → compte
|
||||
26: { kind: "split", primaryV1Id: 1710, splitV1Ids: [1711, 1712, 1713, 1714, 1715, 1741], confidence: "low" }, // Jeux, Films & Livres
|
||||
27: { kind: "single", v1Id: 1714, confidence: "high" }, // Abonnements Musique
|
||||
28: { kind: "split", primaryV1Id: 1521, splitV1Ids: [1521, 1522], confidence: "medium" }, // Transport en commun
|
||||
29: { kind: "split", primaryV1Id: 1231, splitV1Ids: [1231, 1232, 1234], confidence: "medium" }, // Internet & Télécom
|
||||
30: { kind: "single", v1Id: 1751, confidence: "medium" }, // Animaux
|
||||
31: { kind: "split", primaryV1Id: 1250, splitV1Ids: [1250, 1516, 1616, 1630], confidence: "low" }, // Assurances
|
||||
32: { kind: "single", v1Id: 1611, confidence: "high" }, // Pharmacie
|
||||
33: { kind: "single", v1Id: 1213, confidence: "high" }, // Taxes municipales
|
||||
|
||||
// Dépenses ponctuelles
|
||||
40: { kind: "split", primaryV1Id: 1513, splitV1Ids: [1512, 1513, 1514, 1515], confidence: "low" }, // Voiture
|
||||
41: { kind: "single", v1Id: 1946, confidence: "medium" }, // Amazon → divers
|
||||
42: { kind: "single", v1Id: 1312, confidence: "low" }, // Électroniques → électroménagers
|
||||
43: { kind: "single", v1Id: 1810, confidence: "high" }, // Alcool
|
||||
44: { kind: "single", v1Id: 1940, confidence: "high" }, // Cadeaux
|
||||
45: { kind: "single", v1Id: 1410, confidence: "medium" }, // Vêtements → adultes
|
||||
46: { kind: "single", v1Id: 1932, confidence: "high" }, // CPA
|
||||
47: { kind: "split", primaryV1Id: 1533, splitV1Ids: [1531, 1532, 1533, 1534], confidence: "medium" }, // Voyage
|
||||
48: { kind: "split", primaryV1Id: 1722, splitV1Ids: [1721, 1722, 1723], confidence: "medium" }, // Sports & Plein air
|
||||
49: { kind: "single", v1Id: 1711, confidence: "high" }, // Spectacles & sorties
|
||||
|
||||
// Maison
|
||||
50: { kind: "single", v1Id: 1212, confidence: "high" }, // Hypothèque
|
||||
51: { kind: "single", v1Id: 1243, confidence: "medium" }, // Achats maison → matériaux & outils
|
||||
52: { kind: "single", v1Id: 1241, confidence: "high" }, // Entretien maison
|
||||
53: { kind: "split", primaryV1Id: 1311, splitV1Ids: [1311, 1312], confidence: "medium" }, // Électroménagers & Meubles
|
||||
54: { kind: "single", v1Id: 1243, confidence: "high" }, // Outils → matériaux & outils
|
||||
|
||||
// Placements
|
||||
60: { kind: "single", v1Id: 1964, confidence: "medium" }, // Placements → non-enregistré
|
||||
61: { kind: "single", v1Id: 1980, confidence: "high" }, // Transferts internes
|
||||
|
||||
// Autres
|
||||
70: { kind: "single", v1Id: 1922, confidence: "medium" }, // Impôts → provincial (default)
|
||||
71: { kind: "single", v1Id: 1971, confidence: "high" }, // Paiement CC
|
||||
72: { kind: "single", v1Id: 1945, confidence: "high" }, // Retrait cash
|
||||
73: { kind: "none", confidence: "none", notes: "No direct v1 equivalent; user must decide or keep as custom" },
|
||||
|
||||
// Level-3 children of Assurances (31) — for profiles that already split them
|
||||
310: { kind: "single", v1Id: 1516, confidence: "high" }, // Assurance-auto
|
||||
311: { kind: "single", v1Id: 1250, confidence: "high" }, // Assurance-habitation
|
||||
312: { kind: "single", v1Id: 1630, confidence: "high" }, // Assurance-vie
|
||||
};
|
||||
|
||||
/** v2 parent category ids (Revenus, Dépenses récurrentes, ...). Never mapped
|
||||
* directly — they are structural containers and are replaced by v1 roots. */
|
||||
const V2_STRUCTURAL_PARENT_IDS = new Set<number>([1, 2, 3, 4, 5, 6]);
|
||||
|
||||
/** All v2 category ids that are part of the standard v2 seed. Anything
|
||||
* outside this set (and not a structural parent) is a user-custom category
|
||||
* → goes to `preserved`. */
|
||||
const V2_SEEDED_IDS = new Set<number>(Object.keys(DEFAULT_MAPPINGS).map(Number));
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Main entry point
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export function computeMigrationPlan(profileData: ProfileData): MigrationPlan {
|
||||
const rows: MappingRow[] = [];
|
||||
const preserved: MappingRow[] = [];
|
||||
|
||||
const keywordsByCat = groupBy(profileData.keywords, (k) => k.category_id);
|
||||
const txsByCat = groupBy(profileData.transactions, (t) => t.category_id ?? -1);
|
||||
const suppliersById = new Map<number, V2SupplierInput>();
|
||||
for (const s of profileData.suppliers ?? []) suppliersById.set(s.id, s);
|
||||
|
||||
for (const cat of profileData.v2Categories) {
|
||||
// Skip structural v2 parents — they have no direct v1 equivalent. The
|
||||
// migration writer creates the v1 roots fresh from the taxonomy.
|
||||
if (V2_STRUCTURAL_PARENT_IDS.has(cat.id)) continue;
|
||||
|
||||
const isSeeded = V2_SEEDED_IDS.has(cat.id);
|
||||
if (!isSeeded) {
|
||||
// Custom category → preserved bucket.
|
||||
preserved.push({
|
||||
v2CategoryId: cat.id,
|
||||
v2CategoryName: cat.name,
|
||||
v1TargetId: null,
|
||||
v1TargetName: null,
|
||||
confidence: "none",
|
||||
reason: "preserved",
|
||||
notes: "User-created category, not in v2 seed. Kept under 'Catégories personnalisées (migration)'.",
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
const row = resolveSeededCategory(
|
||||
cat,
|
||||
keywordsByCat.get(cat.id) ?? [],
|
||||
txsByCat.get(cat.id) ?? [],
|
||||
suppliersById
|
||||
);
|
||||
rows.push(row);
|
||||
}
|
||||
|
||||
const unresolved = rows.filter((r) => r.confidence === "none");
|
||||
const stats: MigrationPlanStats = {
|
||||
total: rows.length,
|
||||
high: rows.filter((r) => r.confidence === "high").length,
|
||||
medium: rows.filter((r) => r.confidence === "medium").length,
|
||||
low: rows.filter((r) => r.confidence === "low").length,
|
||||
none: rows.filter((r) => r.confidence === "none").length,
|
||||
};
|
||||
|
||||
return { rows, preserved, unresolved, stats };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Pass runners
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function resolveSeededCategory(
|
||||
cat: V2CategoryInput,
|
||||
keywords: V2KeywordInput[],
|
||||
transactions: V2TransactionInput[],
|
||||
suppliers: Map<number, V2SupplierInput>
|
||||
): MappingRow {
|
||||
const base: Omit<MappingRow, "v1TargetId" | "v1TargetName" | "confidence" | "reason"> = {
|
||||
v2CategoryId: cat.id,
|
||||
v2CategoryName: cat.name,
|
||||
};
|
||||
|
||||
// Pass 1 — keyword match. We look at the user's v2 keywords for this
|
||||
// category and see if any of them matches a KEYWORD_TO_V1 rule.
|
||||
const pass1 = runKeywordPass(keywords);
|
||||
if (pass1 !== null) {
|
||||
return { ...base, v1TargetId: pass1.v1TargetId, v1TargetName: pass1.v1TargetName, confidence: "high", reason: "keyword" };
|
||||
}
|
||||
|
||||
// Pass 2 — supplier propagation. For categories that have transactions
|
||||
// attached, look at the raw description or supplier name. If one of the
|
||||
// KEYWORD_TO_V1 rules matches an attached transaction / supplier, we
|
||||
// propagate with medium confidence.
|
||||
const pass2 = runSupplierPass(transactions, suppliers);
|
||||
if (pass2 !== null) {
|
||||
return { ...base, v1TargetId: pass2.v1TargetId, v1TargetName: pass2.v1TargetName, confidence: "medium", reason: "supplier" };
|
||||
}
|
||||
|
||||
// Pass 3 — default fallback from the mapping table.
|
||||
const pass3 = runDefaultPass(cat.id);
|
||||
if (pass3 !== null) {
|
||||
return { ...base, ...pass3 };
|
||||
}
|
||||
|
||||
// Pass 4 — needs review.
|
||||
return {
|
||||
...base,
|
||||
v1TargetId: null,
|
||||
v1TargetName: null,
|
||||
confidence: "none",
|
||||
reason: "review",
|
||||
notes: "No mapping rule matched; needs manual review.",
|
||||
};
|
||||
}
|
||||
|
||||
function runKeywordPass(keywords: V2KeywordInput[]): V1Target | null {
|
||||
for (const kw of keywords) {
|
||||
const normalized = normalizeForMatch(kw.keyword);
|
||||
for (const rule of KEYWORD_TO_V1) {
|
||||
if (normalized.includes(rule.match)) {
|
||||
return toV1Target(rule.v1Id);
|
||||
}
|
||||
}
|
||||
// Also try matching against v1 leaf names directly — covers the case
|
||||
// where a user has a keyword that literally names a v1 leaf (rare but
|
||||
// free to support).
|
||||
const leafHit = findLeafByNormalizedName(normalized);
|
||||
if (leafHit !== null) {
|
||||
return toV1Target(leafHit.id);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function runSupplierPass(
|
||||
transactions: V2TransactionInput[],
|
||||
suppliers: Map<number, V2SupplierInput>
|
||||
): V1Target | null {
|
||||
for (const tx of transactions) {
|
||||
const sources: string[] = [];
|
||||
if (tx.description) sources.push(tx.description);
|
||||
if (tx.supplier_id !== undefined && tx.supplier_id !== null) {
|
||||
const sup = suppliers.get(tx.supplier_id);
|
||||
if (sup) sources.push(sup.name);
|
||||
}
|
||||
for (const src of sources) {
|
||||
const normalized = normalizeForMatch(src);
|
||||
for (const rule of KEYWORD_TO_V1) {
|
||||
if (normalized.includes(rule.match)) {
|
||||
return toV1Target(rule.v1Id);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function runDefaultPass(
|
||||
v2Id: number
|
||||
): Pick<MappingRow, "v1TargetId" | "v1TargetName" | "confidence" | "reason" | "splits" | "notes"> | null {
|
||||
const entry = DEFAULT_MAPPINGS[v2Id];
|
||||
if (!entry) return null;
|
||||
|
||||
if (entry.kind === "single") {
|
||||
const target = toV1Target(entry.v1Id);
|
||||
return {
|
||||
v1TargetId: target.v1TargetId,
|
||||
v1TargetName: target.v1TargetName,
|
||||
confidence: entry.confidence,
|
||||
reason: "default",
|
||||
notes: entry.notes,
|
||||
};
|
||||
}
|
||||
if (entry.kind === "split") {
|
||||
const primary = toV1Target(entry.primaryV1Id);
|
||||
const splits = entry.splitV1Ids.map(toV1Target);
|
||||
return {
|
||||
v1TargetId: primary.v1TargetId,
|
||||
v1TargetName: primary.v1TargetName,
|
||||
confidence: entry.confidence,
|
||||
reason: "default",
|
||||
splits,
|
||||
notes: entry.notes,
|
||||
};
|
||||
}
|
||||
// kind === "none" — explicit "no good default" entry.
|
||||
return {
|
||||
v1TargetId: null,
|
||||
v1TargetName: null,
|
||||
confidence: "none",
|
||||
reason: "review",
|
||||
notes: entry.notes,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Utilities
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function groupBy<T, K>(items: T[], keyFn: (item: T) => K): Map<K, T[]> {
|
||||
const map = new Map<K, T[]>();
|
||||
for (const item of items) {
|
||||
const key = keyFn(item);
|
||||
const bucket = map.get(key);
|
||||
if (bucket) bucket.push(item);
|
||||
else map.set(key, [item]);
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
let leafIndex: Map<string, TaxonomyNode> | null = null;
|
||||
|
||||
function findLeafByNormalizedName(normalized: string): TaxonomyNode | null {
|
||||
if (leafIndex === null) {
|
||||
leafIndex = new Map();
|
||||
for (const leaf of getLeaves()) {
|
||||
leafIndex.set(normalizeForMatch(leaf.name), leaf);
|
||||
}
|
||||
}
|
||||
return leafIndex.get(normalized) ?? null;
|
||||
}
|
||||
|
||||
/** Test helper — resets the leaf-name cache (useful only in tests). */
|
||||
export function __resetMappingServiceCachesForTests(): void {
|
||||
leafIndex = null;
|
||||
}
|
||||
Loading…
Reference in a new issue