Simpl-Resultat/src/services/categorizationService.ts
le king fu 62430c63dc
Some checks failed
PR Check / rust (push) Has been cancelled
PR Check / frontend (push) Has been cancelled
PR Check / rust (pull_request) Has been cancelled
PR Check / frontend (pull_request) Has been cancelled
feat: category zoom + secure AddKeywordDialog with context menu (#74)
Service layer
- New reportService.getCategoryZoom(categoryId, from, to, includeChildren) —
  bounded recursive CTE (WHERE ct.depth < 5) protects against parent_id cycles;
  direct-only path skips the CTE; every binding is parameterised
- Export categorizationService helpers normalizeDescription / buildKeywordRegex /
  compileKeywords so the dialog can reuse them
- New validateKeyword() enforces 2–64 char length (anti-ReDoS), whitespace-only
  rejection, returns discriminated result
- New previewKeywordMatches(keyword, limit=50) uses parameterised LIKE + regex
  filter in memory; caps candidate scan at 1000 rows to protect against
  catastrophic backtracking
- New applyKeywordWithReassignment wraps INSERT (or UPDATE-reassign) +
  per-transaction UPDATEs in an explicit BEGIN/COMMIT/ROLLBACK; rejects
  existing keyword reassignment unless allowReplaceExisting is set; never
  recategorises historical transactions beyond the ids the caller supplied

Hook
- Flesh out useCategoryZoom with reducer + fetch + refetch hook

Components (flat under src/components/reports/)
- CategoryZoomHeader — category combobox + include/direct toggle
- CategoryDonutChart — template'd from dashboard/CategoryPieChart with
  innerRadius=55 and ChartPatternDefs for SVG patterns
- CategoryEvolutionChart — AreaChart with Intl-formatted axes
- CategoryTransactionsTable — sortable table with per-row onContextMenu
  → ContextMenu → "Add as keyword" action

AddKeywordDialog — src/components/categories/AddKeywordDialog.tsx
- Lives in categories/ (not reports/) because it is a keyword-editing widget
  consumed from multiple sections
- Renders transaction descriptions as React children only (no
  dangerouslySetInnerHTML); CSS truncation (CWE-79 safe)
- Per-row checkboxes for applying recategorisation; cap visible rows at 50;
  explicit opt-in checkbox to extend to N-50 non-displayed matches
- Surfaces apply errors + "keyword already exists" replace prompt
- Re-runs category zoom fetch on success so the zoomed view updates

Page
- ReportsCategoryPage composes header + donut + evolution + transactions
  + AddKeywordDialog, fetches from useCategoryZoom, preserves query string
  for back navigation

i18n
- New keys reports.category.* and reports.keyword.* in FR + EN
- Plural forms use i18next v25 _one / _other suffixes (nMatches)

Tests
- 3 reportService tests cover bounded CTE, cycle-guard depth check, direct-only fallthrough
- New categorizationService.test.ts: 13 tests covering validation boundaries,
  parameterised LIKE preview, regex word-boundary filter, explicit BEGIN/COMMIT
  wrapping, rollback on failure, existing keyword reassignment policy
- 62 total tests passing

Fixes #74

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 15:09:17 -04:00

273 lines
8.6 KiB
TypeScript

import { getDb } from "./db";
import type { Keyword, RecentTransaction } from "../shared/types";
/**
* Normalize a description for keyword matching:
* - lowercase
* - strip accents via NFD decomposition
* - collapse whitespace
*/
export function normalizeDescription(desc: string): string {
return desc
.normalize("NFD")
.replace(/[\u0300-\u036f]/g, "")
.toLowerCase()
.replace(/\s+/g, " ")
.trim();
}
const WORD_CHAR = /\w/;
/**
* Build a regex pattern for a keyword with smart boundaries.
* Uses \b when the keyword edge is a word character (a-z, 0-9, _),
* and uses (?<=\s|^) / (?=\s|$) when the edge is a non-word character
* (e.g., brackets, parentheses, dashes). This ensures keywords like
* "[VIREMENT]" or "(INTERAC)" can match correctly.
*/
export function buildKeywordRegex(normalizedKeyword: string): RegExp {
const escaped = normalizedKeyword.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
const left = WORD_CHAR.test(normalizedKeyword[0])
? "\\b"
: "(?<=\\s|^)";
const right = WORD_CHAR.test(normalizedKeyword[normalizedKeyword.length - 1])
? "\\b"
: "(?=\\s|$)";
return new RegExp(`${left}${escaped}${right}`);
}
interface CategorizationResult {
category_id: number | null;
supplier_id: number | null;
}
interface CompiledKeyword {
regex: RegExp;
category_id: number;
supplier_id: number | null;
}
/**
* Compile keywords into regex patterns once for reuse across multiple matches.
*/
export function compileKeywords(keywords: Keyword[]): CompiledKeyword[] {
return keywords.map((kw) => ({
regex: buildKeywordRegex(normalizeDescription(kw.keyword)),
category_id: kw.category_id,
supplier_id: kw.supplier_id ?? null,
}));
}
/**
* Match a normalized description against compiled keywords.
*/
function matchDescription(
normalized: string,
compiled: CompiledKeyword[]
): CategorizationResult {
for (const kw of compiled) {
if (kw.regex.test(normalized)) {
return {
category_id: kw.category_id,
supplier_id: kw.supplier_id,
};
}
}
return { category_id: null, supplier_id: null };
}
/**
* Auto-categorize a single transaction description.
* Returns matching category_id and supplier_id, or nulls if no match.
*/
export async function categorizeDescription(
description: string
): Promise<CategorizationResult> {
const db = await getDb();
const keywords = await db.select<Keyword[]>(
"SELECT * FROM keywords WHERE is_active = 1 ORDER BY priority DESC"
);
const compiled = compileKeywords(keywords);
const normalized = normalizeDescription(description);
return matchDescription(normalized, compiled);
}
/**
* Auto-categorize a batch of transactions (by their descriptions).
* Returns an array of results in the same order.
*/
export async function categorizeBatch(
descriptions: string[]
): Promise<CategorizationResult[]> {
const db = await getDb();
const keywords = await db.select<Keyword[]>(
"SELECT * FROM keywords WHERE is_active = 1 ORDER BY priority DESC"
);
const compiled = compileKeywords(keywords);
return descriptions.map((desc) => {
const normalized = normalizeDescription(desc);
return matchDescription(normalized, compiled);
});
}
// --- AddKeywordDialog support (Issue #74) ---
export const KEYWORD_MIN_LENGTH = 2;
export const KEYWORD_MAX_LENGTH = 64;
export const KEYWORD_PREVIEW_LIMIT = 50;
/**
* Validate a keyword before it hits the regex engine.
*
* Rejects whitespace-only input and caps length at 64 chars to prevent
* ReDoS (CWE-1333) when the compiled regex is replayed across many
* transactions later.
*/
export function validateKeyword(raw: string): { ok: true; value: string } | { ok: false; reason: "tooShort" | "tooLong" } {
const trimmed = raw.trim();
if (trimmed.length < KEYWORD_MIN_LENGTH) return { ok: false, reason: "tooShort" };
if (trimmed.length > KEYWORD_MAX_LENGTH) return { ok: false, reason: "tooLong" };
return { ok: true, value: trimmed };
}
/**
* Preview the transactions that would be recategorised if the user commits
* the given keyword. Uses a parameterised `LIKE ?1` to scope the candidates,
* then re-filters in memory with `buildKeywordRegex` for exact word-boundary
* matching. Results are capped at `limit` visible rows — callers decide what
* to do with the `totalMatches` (which may be greater than the returned list).
*
* SECURITY: the keyword is never interpolated into the SQL string. `LIKE ?1`
* is the only parameterised binding, and the `%...%` wrapping happens inside
* the bound parameter value.
*/
export async function previewKeywordMatches(
keyword: string,
limit: number = KEYWORD_PREVIEW_LIMIT,
): Promise<{ visible: RecentTransaction[]; totalMatches: number }> {
const validation = validateKeyword(keyword);
if (!validation.ok) {
return { visible: [], totalMatches: 0 };
}
const normalized = normalizeDescription(validation.value);
const regex = buildKeywordRegex(normalized);
const db = await getDb();
// Coarse pre-filter via parameterised LIKE (case-insensitive thanks to
// normalize on the JS side). A small cap protects against catastrophic
// backtracking across a huge candidate set — hard-capped to 1000 rows
// before the in-memory filter.
const likePattern = `%${normalized}%`;
const candidates = await db.select<RecentTransaction[]>(
`SELECT t.id, t.date, t.description, t.amount,
c.name AS category_name,
c.color AS category_color
FROM transactions t
LEFT JOIN categories c ON t.category_id = c.id
WHERE LOWER(t.description) LIKE $1
ORDER BY t.date DESC
LIMIT 1000`,
[likePattern],
);
const matched: RecentTransaction[] = [];
for (const tx of candidates) {
const normDesc = normalizeDescription(tx.description);
if (regex.test(normDesc)) matched.push(tx);
}
return {
visible: matched.slice(0, limit),
totalMatches: matched.length,
};
}
export interface ApplyKeywordInput {
keyword: string;
categoryId: number;
/** ids of transactions to recategorise (only those the user checked). */
transactionIds: number[];
/**
* When true, and a keyword with the same spelling already exists for a
* different category, that existing keyword is **reassigned** to the new
* category rather than creating a duplicate. Matches the spec decision
* that history is never touched — only the visible transactions are
* recategorised.
*/
allowReplaceExisting: boolean;
}
export interface ApplyKeywordResult {
keywordId: number;
updatedTransactions: number;
replacedExisting: boolean;
}
/**
* INSERTs (or reassigns) a keyword and recategorises the given transaction
* ids in a single SQL transaction. Either all writes commit or none do.
*
* SECURITY: every query is parameterised. The caller is expected to have
* vetted `transactionIds` from a preview window that the user confirmed.
*/
export async function applyKeywordWithReassignment(
input: ApplyKeywordInput,
): Promise<ApplyKeywordResult> {
const validation = validateKeyword(input.keyword);
if (!validation.ok) {
throw new Error(`invalid_keyword:${validation.reason}`);
}
const keyword = validation.value;
const db = await getDb();
await db.execute("BEGIN");
try {
// Is there already a row for this keyword spelling?
const existing = await db.select<Array<{ id: number; category_id: number }>>(
`SELECT id, category_id FROM keywords WHERE keyword = $1 LIMIT 1`,
[keyword],
);
let keywordId: number;
let replacedExisting = false;
if (existing.length > 0) {
if (!input.allowReplaceExisting && existing[0].category_id !== input.categoryId) {
throw new Error("keyword_already_exists");
}
await db.execute(
`UPDATE keywords SET category_id = $1, is_active = 1 WHERE id = $2`,
[input.categoryId, existing[0].id],
);
keywordId = existing[0].id;
replacedExisting = existing[0].category_id !== input.categoryId;
} else {
const result = await db.execute(
`INSERT INTO keywords (keyword, category_id, priority) VALUES ($1, $2, $3)`,
[keyword, input.categoryId, 100],
);
keywordId = Number(result.lastInsertId ?? 0);
}
let updatedTransactions = 0;
for (const txId of input.transactionIds) {
await db.execute(
`UPDATE transactions
SET category_id = $1,
is_manually_categorized = 1,
updated_at = CURRENT_TIMESTAMP
WHERE id = $2`,
[input.categoryId, txId],
);
updatedTransactions++;
}
await db.execute("COMMIT");
return { keywordId, updatedTransactions, replacedExisting };
} catch (e) {
await db.execute("ROLLBACK");
throw e;
}
}