Service layer - New reportService.getCategoryZoom(categoryId, from, to, includeChildren) — bounded recursive CTE (WHERE ct.depth < 5) protects against parent_id cycles; direct-only path skips the CTE; every binding is parameterised - Export categorizationService helpers normalizeDescription / buildKeywordRegex / compileKeywords so the dialog can reuse them - New validateKeyword() enforces 2–64 char length (anti-ReDoS), whitespace-only rejection, returns discriminated result - New previewKeywordMatches(keyword, limit=50) uses parameterised LIKE + regex filter in memory; caps candidate scan at 1000 rows to protect against catastrophic backtracking - New applyKeywordWithReassignment wraps INSERT (or UPDATE-reassign) + per-transaction UPDATEs in an explicit BEGIN/COMMIT/ROLLBACK; rejects existing keyword reassignment unless allowReplaceExisting is set; never recategorises historical transactions beyond the ids the caller supplied Hook - Flesh out useCategoryZoom with reducer + fetch + refetch hook Components (flat under src/components/reports/) - CategoryZoomHeader — category combobox + include/direct toggle - CategoryDonutChart — template'd from dashboard/CategoryPieChart with innerRadius=55 and ChartPatternDefs for SVG patterns - CategoryEvolutionChart — AreaChart with Intl-formatted axes - CategoryTransactionsTable — sortable table with per-row onContextMenu → ContextMenu → "Add as keyword" action AddKeywordDialog — src/components/categories/AddKeywordDialog.tsx - Lives in categories/ (not reports/) because it is a keyword-editing widget consumed from multiple sections - Renders transaction descriptions as React children only (no dangerouslySetInnerHTML); CSS truncation (CWE-79 safe) - Per-row checkboxes for applying recategorisation; cap visible rows at 50; explicit opt-in checkbox to extend to N-50 non-displayed matches - Surfaces apply errors + "keyword already exists" replace prompt - Re-runs category zoom fetch on success so the zoomed view updates Page - ReportsCategoryPage composes header + donut + evolution + transactions + AddKeywordDialog, fetches from useCategoryZoom, preserves query string for back navigation i18n - New keys reports.category.* and reports.keyword.* in FR + EN - Plural forms use i18next v25 _one / _other suffixes (nMatches) Tests - 3 reportService tests cover bounded CTE, cycle-guard depth check, direct-only fallthrough - New categorizationService.test.ts: 13 tests covering validation boundaries, parameterised LIKE preview, regex word-boundary filter, explicit BEGIN/COMMIT wrapping, rollback on failure, existing keyword reassignment policy - 62 total tests passing Fixes #74 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
273 lines
8.6 KiB
TypeScript
273 lines
8.6 KiB
TypeScript
import { getDb } from "./db";
|
|
import type { Keyword, RecentTransaction } from "../shared/types";
|
|
|
|
/**
|
|
* Normalize a description for keyword matching:
|
|
* - lowercase
|
|
* - strip accents via NFD decomposition
|
|
* - collapse whitespace
|
|
*/
|
|
export function normalizeDescription(desc: string): string {
|
|
return desc
|
|
.normalize("NFD")
|
|
.replace(/[\u0300-\u036f]/g, "")
|
|
.toLowerCase()
|
|
.replace(/\s+/g, " ")
|
|
.trim();
|
|
}
|
|
|
|
const WORD_CHAR = /\w/;
|
|
|
|
/**
|
|
* Build a regex pattern for a keyword with smart boundaries.
|
|
* Uses \b when the keyword edge is a word character (a-z, 0-9, _),
|
|
* and uses (?<=\s|^) / (?=\s|$) when the edge is a non-word character
|
|
* (e.g., brackets, parentheses, dashes). This ensures keywords like
|
|
* "[VIREMENT]" or "(INTERAC)" can match correctly.
|
|
*/
|
|
export function buildKeywordRegex(normalizedKeyword: string): RegExp {
|
|
const escaped = normalizedKeyword.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
const left = WORD_CHAR.test(normalizedKeyword[0])
|
|
? "\\b"
|
|
: "(?<=\\s|^)";
|
|
const right = WORD_CHAR.test(normalizedKeyword[normalizedKeyword.length - 1])
|
|
? "\\b"
|
|
: "(?=\\s|$)";
|
|
return new RegExp(`${left}${escaped}${right}`);
|
|
}
|
|
|
|
interface CategorizationResult {
|
|
category_id: number | null;
|
|
supplier_id: number | null;
|
|
}
|
|
|
|
interface CompiledKeyword {
|
|
regex: RegExp;
|
|
category_id: number;
|
|
supplier_id: number | null;
|
|
}
|
|
|
|
/**
|
|
* Compile keywords into regex patterns once for reuse across multiple matches.
|
|
*/
|
|
export function compileKeywords(keywords: Keyword[]): CompiledKeyword[] {
|
|
return keywords.map((kw) => ({
|
|
regex: buildKeywordRegex(normalizeDescription(kw.keyword)),
|
|
category_id: kw.category_id,
|
|
supplier_id: kw.supplier_id ?? null,
|
|
}));
|
|
}
|
|
|
|
/**
|
|
* Match a normalized description against compiled keywords.
|
|
*/
|
|
function matchDescription(
|
|
normalized: string,
|
|
compiled: CompiledKeyword[]
|
|
): CategorizationResult {
|
|
for (const kw of compiled) {
|
|
if (kw.regex.test(normalized)) {
|
|
return {
|
|
category_id: kw.category_id,
|
|
supplier_id: kw.supplier_id,
|
|
};
|
|
}
|
|
}
|
|
return { category_id: null, supplier_id: null };
|
|
}
|
|
|
|
/**
|
|
* Auto-categorize a single transaction description.
|
|
* Returns matching category_id and supplier_id, or nulls if no match.
|
|
*/
|
|
export async function categorizeDescription(
|
|
description: string
|
|
): Promise<CategorizationResult> {
|
|
const db = await getDb();
|
|
const keywords = await db.select<Keyword[]>(
|
|
"SELECT * FROM keywords WHERE is_active = 1 ORDER BY priority DESC"
|
|
);
|
|
|
|
const compiled = compileKeywords(keywords);
|
|
const normalized = normalizeDescription(description);
|
|
return matchDescription(normalized, compiled);
|
|
}
|
|
|
|
/**
|
|
* Auto-categorize a batch of transactions (by their descriptions).
|
|
* Returns an array of results in the same order.
|
|
*/
|
|
export async function categorizeBatch(
|
|
descriptions: string[]
|
|
): Promise<CategorizationResult[]> {
|
|
const db = await getDb();
|
|
const keywords = await db.select<Keyword[]>(
|
|
"SELECT * FROM keywords WHERE is_active = 1 ORDER BY priority DESC"
|
|
);
|
|
|
|
const compiled = compileKeywords(keywords);
|
|
|
|
return descriptions.map((desc) => {
|
|
const normalized = normalizeDescription(desc);
|
|
return matchDescription(normalized, compiled);
|
|
});
|
|
}
|
|
|
|
// --- AddKeywordDialog support (Issue #74) ---
|
|
|
|
export const KEYWORD_MIN_LENGTH = 2;
|
|
export const KEYWORD_MAX_LENGTH = 64;
|
|
export const KEYWORD_PREVIEW_LIMIT = 50;
|
|
|
|
/**
|
|
* Validate a keyword before it hits the regex engine.
|
|
*
|
|
* Rejects whitespace-only input and caps length at 64 chars to prevent
|
|
* ReDoS (CWE-1333) when the compiled regex is replayed across many
|
|
* transactions later.
|
|
*/
|
|
export function validateKeyword(raw: string): { ok: true; value: string } | { ok: false; reason: "tooShort" | "tooLong" } {
|
|
const trimmed = raw.trim();
|
|
if (trimmed.length < KEYWORD_MIN_LENGTH) return { ok: false, reason: "tooShort" };
|
|
if (trimmed.length > KEYWORD_MAX_LENGTH) return { ok: false, reason: "tooLong" };
|
|
return { ok: true, value: trimmed };
|
|
}
|
|
|
|
/**
|
|
* Preview the transactions that would be recategorised if the user commits
|
|
* the given keyword. Uses a parameterised `LIKE ?1` to scope the candidates,
|
|
* then re-filters in memory with `buildKeywordRegex` for exact word-boundary
|
|
* matching. Results are capped at `limit` visible rows — callers decide what
|
|
* to do with the `totalMatches` (which may be greater than the returned list).
|
|
*
|
|
* SECURITY: the keyword is never interpolated into the SQL string. `LIKE ?1`
|
|
* is the only parameterised binding, and the `%...%` wrapping happens inside
|
|
* the bound parameter value.
|
|
*/
|
|
export async function previewKeywordMatches(
|
|
keyword: string,
|
|
limit: number = KEYWORD_PREVIEW_LIMIT,
|
|
): Promise<{ visible: RecentTransaction[]; totalMatches: number }> {
|
|
const validation = validateKeyword(keyword);
|
|
if (!validation.ok) {
|
|
return { visible: [], totalMatches: 0 };
|
|
}
|
|
const normalized = normalizeDescription(validation.value);
|
|
const regex = buildKeywordRegex(normalized);
|
|
const db = await getDb();
|
|
|
|
// Coarse pre-filter via parameterised LIKE (case-insensitive thanks to
|
|
// normalize on the JS side). A small cap protects against catastrophic
|
|
// backtracking across a huge candidate set — hard-capped to 1000 rows
|
|
// before the in-memory filter.
|
|
const likePattern = `%${normalized}%`;
|
|
const candidates = await db.select<RecentTransaction[]>(
|
|
`SELECT t.id, t.date, t.description, t.amount,
|
|
c.name AS category_name,
|
|
c.color AS category_color
|
|
FROM transactions t
|
|
LEFT JOIN categories c ON t.category_id = c.id
|
|
WHERE LOWER(t.description) LIKE $1
|
|
ORDER BY t.date DESC
|
|
LIMIT 1000`,
|
|
[likePattern],
|
|
);
|
|
|
|
const matched: RecentTransaction[] = [];
|
|
for (const tx of candidates) {
|
|
const normDesc = normalizeDescription(tx.description);
|
|
if (regex.test(normDesc)) matched.push(tx);
|
|
}
|
|
|
|
return {
|
|
visible: matched.slice(0, limit),
|
|
totalMatches: matched.length,
|
|
};
|
|
}
|
|
|
|
export interface ApplyKeywordInput {
|
|
keyword: string;
|
|
categoryId: number;
|
|
/** ids of transactions to recategorise (only those the user checked). */
|
|
transactionIds: number[];
|
|
/**
|
|
* When true, and a keyword with the same spelling already exists for a
|
|
* different category, that existing keyword is **reassigned** to the new
|
|
* category rather than creating a duplicate. Matches the spec decision
|
|
* that history is never touched — only the visible transactions are
|
|
* recategorised.
|
|
*/
|
|
allowReplaceExisting: boolean;
|
|
}
|
|
|
|
export interface ApplyKeywordResult {
|
|
keywordId: number;
|
|
updatedTransactions: number;
|
|
replacedExisting: boolean;
|
|
}
|
|
|
|
/**
|
|
* INSERTs (or reassigns) a keyword and recategorises the given transaction
|
|
* ids in a single SQL transaction. Either all writes commit or none do.
|
|
*
|
|
* SECURITY: every query is parameterised. The caller is expected to have
|
|
* vetted `transactionIds` from a preview window that the user confirmed.
|
|
*/
|
|
export async function applyKeywordWithReassignment(
|
|
input: ApplyKeywordInput,
|
|
): Promise<ApplyKeywordResult> {
|
|
const validation = validateKeyword(input.keyword);
|
|
if (!validation.ok) {
|
|
throw new Error(`invalid_keyword:${validation.reason}`);
|
|
}
|
|
const keyword = validation.value;
|
|
|
|
const db = await getDb();
|
|
await db.execute("BEGIN");
|
|
try {
|
|
// Is there already a row for this keyword spelling?
|
|
const existing = await db.select<Array<{ id: number; category_id: number }>>(
|
|
`SELECT id, category_id FROM keywords WHERE keyword = $1 LIMIT 1`,
|
|
[keyword],
|
|
);
|
|
|
|
let keywordId: number;
|
|
let replacedExisting = false;
|
|
if (existing.length > 0) {
|
|
if (!input.allowReplaceExisting && existing[0].category_id !== input.categoryId) {
|
|
throw new Error("keyword_already_exists");
|
|
}
|
|
await db.execute(
|
|
`UPDATE keywords SET category_id = $1, is_active = 1 WHERE id = $2`,
|
|
[input.categoryId, existing[0].id],
|
|
);
|
|
keywordId = existing[0].id;
|
|
replacedExisting = existing[0].category_id !== input.categoryId;
|
|
} else {
|
|
const result = await db.execute(
|
|
`INSERT INTO keywords (keyword, category_id, priority) VALUES ($1, $2, $3)`,
|
|
[keyword, input.categoryId, 100],
|
|
);
|
|
keywordId = Number(result.lastInsertId ?? 0);
|
|
}
|
|
|
|
let updatedTransactions = 0;
|
|
for (const txId of input.transactionIds) {
|
|
await db.execute(
|
|
`UPDATE transactions
|
|
SET category_id = $1,
|
|
is_manually_categorized = 1,
|
|
updated_at = CURRENT_TIMESTAMP
|
|
WHERE id = $2`,
|
|
[input.categoryId, txId],
|
|
);
|
|
updatedTransactions++;
|
|
}
|
|
|
|
await db.execute("COMMIT");
|
|
return { keywordId, updatedTransactions, replacedExisting };
|
|
} catch (e) {
|
|
await db.execute("ROLLBACK");
|
|
throw e;
|
|
}
|
|
}
|