Simpl-Resultat/src/services/categorizationService.ts
Le-King-Fu ca531262f7 feat: add auto-categorize button and fix keyword word-boundary matching
Replace substring matching (.includes) with \b word-boundary regex so
keywords like "Pay" no longer match "Payment". Add an auto-categorize
button on the transactions page that re-runs keyword matching on
uncategorized transactions and displays the result count.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 00:02:51 +00:00

78 lines
2.2 KiB
TypeScript

import { getDb } from "./db";
import type { Keyword } from "../shared/types";
/**
* Normalize a description for keyword matching:
* - lowercase
* - strip accents via NFD decomposition
* - collapse whitespace
*/
function normalizeDescription(desc: string): string {
return desc
.normalize("NFD")
.replace(/[\u0300-\u036f]/g, "")
.toLowerCase()
.replace(/\s+/g, " ")
.trim();
}
interface CategorizationResult {
category_id: number | null;
supplier_id: number | null;
}
/**
* Auto-categorize a single transaction description.
* Returns matching category_id and supplier_id, or nulls if no match.
*/
export async function categorizeDescription(
description: string
): Promise<CategorizationResult> {
const db = await getDb();
const keywords = await db.select<Keyword[]>(
"SELECT * FROM keywords WHERE is_active = 1 ORDER BY priority DESC"
);
const normalized = normalizeDescription(description);
for (const kw of keywords) {
const normalizedKeyword = normalizeDescription(kw.keyword);
const escaped = normalizedKeyword.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
if (new RegExp(`\\b${escaped}\\b`).test(normalized)) {
return {
category_id: kw.category_id,
supplier_id: kw.supplier_id ?? null,
};
}
}
return { category_id: null, supplier_id: null };
}
/**
* Auto-categorize a batch of transactions (by their descriptions).
* Returns an array of results in the same order.
*/
export async function categorizeBatch(
descriptions: string[]
): Promise<CategorizationResult[]> {
const db = await getDb();
const keywords = await db.select<Keyword[]>(
"SELECT * FROM keywords WHERE is_active = 1 ORDER BY priority DESC"
);
return descriptions.map((desc) => {
const normalized = normalizeDescription(desc);
for (const kw of keywords) {
const normalizedKeyword = normalizeDescription(kw.keyword);
const escaped = normalizedKeyword.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
if (new RegExp(`\\b${escaped}\\b`).test(normalized)) {
return {
category_id: kw.category_id,
supplier_id: kw.supplier_id ?? null,
};
}
}
return { category_id: null, supplier_id: null };
});
}