diff --git a/src/components/import/SourceConfigPanel.tsx b/src/components/import/SourceConfigPanel.tsx index ad61d11..0a32a4c 100644 --- a/src/components/import/SourceConfigPanel.tsx +++ b/src/components/import/SourceConfigPanel.tsx @@ -1,4 +1,5 @@ import { useTranslation } from "react-i18next"; +import { Wand2 } from "lucide-react"; import type { ScannedSource, ScannedFile, @@ -16,6 +17,8 @@ interface SourceConfigPanelProps { onConfigChange: (config: SourceConfig) => void; onFileToggle: (file: ScannedFile) => void; onSelectAllFiles: () => void; + onAutoDetect: () => void; + isLoading?: boolean; } export default function SourceConfigPanel({ @@ -26,6 +29,8 @@ export default function SourceConfigPanel({ onConfigChange, onFileToggle, onSelectAllFiles, + onAutoDetect, + isLoading, }: SourceConfigPanelProps) { const { t } = useTranslation(); @@ -39,9 +44,19 @@ export default function SourceConfigPanel({ return (
-

- {t("import.config.title")} — {source.folder_name} -

+
+

+ {t("import.config.title")} — {source.folder_name} +

+ +
{/* Source name */}
@@ -102,6 +117,7 @@ export default function SourceConfigPanel({ + diff --git a/src/hooks/useImportWizard.ts b/src/hooks/useImportWizard.ts index 74ca12d..06da0a5 100644 --- a/src/hooks/useImportWizard.ts +++ b/src/hooks/useImportWizard.ts @@ -35,6 +35,10 @@ import { import { categorizeBatch } from "../services/categorizationService"; import { parseDate } from "../utils/dateParser"; import { parseFrenchAmount } from "../utils/amountParser"; +import { + preprocessQuotedCSV, + autoDetectConfig as runAutoDetect, +} from "../utils/csvAutoDetect"; interface WizardState { step: ImportWizardStep; @@ -429,7 +433,9 @@ export function useImportWizard() { encoding: config.encoding, }); - const parsed = Papa.parse(content, { + const preprocessed = preprocessQuotedCSV(content); + + const parsed = Papa.parse(preprocessed, { delimiter: config.delimiter, skipEmptyLines: true, }); @@ -772,6 +778,57 @@ export function useImportWizard() { dispatch({ type: "RESET" }); }, []); + const autoDetectConfig = useCallback(async () => { + if (state.selectedFiles.length === 0) return; + + dispatch({ type: "SET_LOADING", payload: true }); + dispatch({ type: "SET_ERROR", payload: null }); + + try { + const content = await invoke("read_file_content", { + filePath: state.selectedFiles[0].file_path, + encoding: state.sourceConfig.encoding, + }); + + const preprocessed = preprocessQuotedCSV(content); + const result = runAutoDetect(preprocessed); + + if (result) { + const newConfig = { + ...state.sourceConfig, + delimiter: result.delimiter, + hasHeader: result.hasHeader, + skipLines: result.skipLines, + dateFormat: result.dateFormat, + columnMapping: result.columnMapping, + amountMode: result.amountMode, + signConvention: result.signConvention, + }; + dispatch({ type: "SET_SOURCE_CONFIG", payload: newConfig }); + dispatch({ type: "SET_LOADING", payload: false }); + + // Refresh column headers with new config + await loadHeadersWithConfig( + state.selectedFiles[0].file_path, + newConfig.delimiter, + newConfig.encoding, + newConfig.skipLines, + newConfig.hasHeader + ); + } else { + dispatch({ + type: "SET_ERROR", + payload: "Auto-detection failed. Please configure manually.", + }); + } + } catch (e) { + dispatch({ + type: "SET_ERROR", + payload: e instanceof Error ? e.message : String(e), + }); + } + }, [state.selectedFiles, state.sourceConfig, loadHeadersWithConfig]); + return { state, browseFolder, @@ -785,6 +842,7 @@ export function useImportWizard() { executeImport, goToStep, reset, + autoDetectConfig, toggleDuplicateRow: (index: number) => dispatch({ type: "TOGGLE_DUPLICATE_ROW", payload: index }), setSkipAllDuplicates: (skipAll: boolean) => diff --git a/src/i18n/locales/en.json b/src/i18n/locales/en.json index 4294a64..1510fb2 100644 --- a/src/i18n/locales/en.json +++ b/src/i18n/locales/en.json @@ -82,7 +82,8 @@ "debitColumn": "Debit column", "creditColumn": "Credit column", "selectFiles": "Files to import", - "selectAll": "Select all" + "selectAll": "Select all", + "autoDetect": "Auto-detect" }, "preview": { "title": "Data Preview", diff --git a/src/i18n/locales/fr.json b/src/i18n/locales/fr.json index 442fc1c..52160e4 100644 --- a/src/i18n/locales/fr.json +++ b/src/i18n/locales/fr.json @@ -82,7 +82,8 @@ "debitColumn": "Colonne débit", "creditColumn": "Colonne crédit", "selectFiles": "Fichiers à importer", - "selectAll": "Tout sélectionner" + "selectAll": "Tout sélectionner", + "autoDetect": "Auto-détecter" }, "preview": { "title": "Aperçu des données", diff --git a/src/pages/ImportPage.tsx b/src/pages/ImportPage.tsx index b3055ed..6e41341 100644 --- a/src/pages/ImportPage.tsx +++ b/src/pages/ImportPage.tsx @@ -28,6 +28,7 @@ export default function ImportPage() { executeImport, goToStep, reset, + autoDetectConfig, toggleDuplicateRow, setSkipAllDuplicates, } = useImportWizard(); @@ -80,6 +81,8 @@ export default function ImportPage() { onConfigChange={updateConfig} onFileToggle={toggleFile} onSelectAllFiles={selectAllFiles} + onAutoDetect={autoDetectConfig} + isLoading={state.isLoading} /> goToStep("source-list")} diff --git a/src/utils/csvAutoDetect.ts b/src/utils/csvAutoDetect.ts new file mode 100644 index 0000000..d0a8896 --- /dev/null +++ b/src/utils/csvAutoDetect.ts @@ -0,0 +1,460 @@ +import Papa from "papaparse"; +import { parseDate } from "./dateParser"; +import { parseFrenchAmount } from "./amountParser"; +import type { ColumnMapping, AmountMode, SignConvention } from "../shared/types"; + +export interface AutoDetectResult { + delimiter: string; + hasHeader: boolean; + skipLines: number; + dateFormat: string; + columnMapping: ColumnMapping; + amountMode: AmountMode; + signConvention: SignConvention; +} + +const DATE_FORMATS = [ + "DD/MM/YYYY", + "MM/DD/YYYY", + "YYYY-MM-DD", + "YYYY/MM/DD", + "DD-MM-YYYY", + "DD.MM.YYYY", + "YYYYMMDD", +]; + +const DELIMITERS = [",", ";", "\t"]; + +/** + * Detect and unwrap Desjardins-style CSVs where each entire line is + * wrapped in quotes with "" escaping inside. + */ +export function preprocessQuotedCSV(content: string): string { + const lines = content.split(/\r?\n/); + const nonEmpty = lines.filter((l) => l.trim()); + if (nonEmpty.length === 0) return content; + + const isLineQuoted = nonEmpty.every((l) => { + const t = l.trim(); + return t.startsWith('"') && t.endsWith('"') && t.includes(',""'); + }); + + if (!isLineQuoted) return content; + + return lines + .map((l) => { + const t = l.trim(); + if (!t) return ""; + return t.slice(1, -1).replace(/""/g, '"'); + }) + .join("\n"); +} + +/** + * Analyze raw CSV content and return a suggested configuration, + * or null if detection fails. + */ +export function autoDetectConfig(rawContent: string): AutoDetectResult | null { + const content = preprocessQuotedCSV(rawContent); + const lines = content.split(/\r?\n/).filter((l) => l.trim()); + if (lines.length < 2) return null; + + // Step 1: Detect delimiter + const delimiter = detectDelimiter(lines.slice(0, 10)); + if (!delimiter) return null; + + const parsed = Papa.parse(content, { delimiter, skipEmptyLines: true }); + const data = parsed.data as string[][]; + if (data.length < 2) return null; + + // Step 2: Detect header + const hasHeader = detectHeader(data[0]); + + const dataStartIdx = hasHeader ? 1 : 0; + const sampleRows = data.slice(dataStartIdx, dataStartIdx + 20); + if (sampleRows.length === 0) return null; + + const colCount = Math.max(...data.slice(0, 10).map((r) => r.length)); + + // Step 3: Detect date column + format + const dateResult = detectDateColumn(sampleRows, colCount); + if (!dateResult) return null; + + // Step 4: Detect numeric columns + const numericCols = detectNumericColumns(sampleRows, colCount); + + // Step 5: Detect balance columns and exclude them + const balanceCols = detectBalanceColumns(sampleRows, numericCols); + const amountCandidates = numericCols.filter((c) => !balanceCols.has(c)); + + // Step 6: Detect description column + const descriptionCol = detectDescriptionColumn( + sampleRows, + colCount, + dateResult.column, + new Set([...numericCols]) + ); + + // Step 7: Determine amount mode + const amountResult = detectAmountMode(sampleRows, amountCandidates); + if (!amountResult) return null; + + const mapping: ColumnMapping = { + date: dateResult.column, + description: descriptionCol, + }; + + let signConvention: SignConvention = "negative_expense"; + + if (amountResult.mode === "debit_credit") { + mapping.debitAmount = amountResult.debitCol; + mapping.creditAmount = amountResult.creditCol; + } else { + mapping.amount = amountResult.amountCol; + signConvention = amountResult.signConvention; + } + + return { + delimiter, + hasHeader, + skipLines: 0, + dateFormat: dateResult.format, + columnMapping: mapping, + amountMode: amountResult.mode, + signConvention, + }; +} + +function detectDelimiter(lines: string[]): string | null { + let bestDelimiter: string | null = null; + let bestScore = 0; + + for (const delim of DELIMITERS) { + const counts = lines.map( + (line) => + Papa.parse(line, { delimiter: delim }).data[0] as string[] + ).map((row) => row.length); + + // All lines should give consistent column count > 1 + if (counts.length === 0 || counts[0] <= 1) continue; + + const firstCount = counts[0]; + const consistent = counts.filter((c) => c === firstCount).length; + const score = (consistent / counts.length) * firstCount; + + if (score > bestScore) { + bestScore = score; + bestDelimiter = delim; + } + } + + return bestDelimiter; +} + +function detectHeader(firstRow: string[]): boolean { + // A header row typically has no parseable dates and no parseable numbers + let hasDate = false; + let hasNumber = false; + + for (const cell of firstRow) { + const trimmed = cell?.trim(); + if (!trimmed) continue; + + // Check for number + if (!isNaN(parseFrenchAmount(trimmed))) { + hasNumber = true; + } + + // Check for date + for (const fmt of DATE_FORMATS) { + if (parseDate(trimmed, fmt)) { + hasDate = true; + break; + } + } + } + + return !hasDate && !hasNumber; +} + +function detectDateColumn( + rows: string[][], + colCount: number +): { column: number; format: string } | null { + let bestCol = -1; + let bestFormat = ""; + let bestRate = 0; + + for (let col = 0; col < colCount; col++) { + for (const fmt of DATE_FORMATS) { + let success = 0; + let total = 0; + + for (const row of rows) { + const cell = row[col]?.trim(); + if (!cell) continue; + total++; + if (parseDate(cell, fmt)) { + success++; + } + } + + if (total === 0) continue; + const rate = success / total; + if (rate > bestRate) { + bestRate = rate; + bestCol = col; + bestFormat = fmt; + } + } + } + + if (bestRate < 0.8 || bestCol < 0) return null; + + return { column: bestCol, format: bestFormat }; +} + +function detectNumericColumns(rows: string[][], colCount: number): number[] { + const result: number[] = []; + + for (let col = 0; col < colCount; col++) { + let numericCount = 0; + let nonEmpty = 0; + + for (const row of rows) { + const cell = row[col]?.trim(); + if (!cell) continue; + nonEmpty++; + if (!isNaN(parseFrenchAmount(cell))) { + numericCount++; + } + } + + if (nonEmpty > 0 && numericCount / nonEmpty >= 0.5) { + result.push(col); + } + } + + return result; +} + +function detectBalanceColumns( + rows: string[][], + numericCols: number[] +): Set { + const balanceCols = new Set(); + if (numericCols.length < 2 || rows.length < 3) return balanceCols; + + const TOLERANCE = 0.015; // tolerance for floating-point comparison + + // Parse all numeric values once + const values: Map = new Map(); + for (const col of numericCols) { + values.set( + col, + rows.map((row) => { + const cell = row[col]?.trim(); + if (!cell) return null; + const v = parseFrenchAmount(cell); + return isNaN(v) ? null : v; + }) + ); + } + + for (const balCol of numericCols) { + const balVals = values.get(balCol)!; + + // Test single-column balance: balance[i] ≈ balance[i-1] ± amount[i] + for (const amtCol of numericCols) { + if (amtCol === balCol) continue; + const amtVals = values.get(amtCol)!; + + let matches = 0; + let tested = 0; + + for (let i = 1; i < rows.length; i++) { + if (balVals[i] === null || balVals[i - 1] === null || amtVals[i] === null) + continue; + tested++; + + const diff = balVals[i]! - balVals[i - 1]!; + // balance[i] = balance[i-1] + amount[i] OR balance[i] = balance[i-1] - amount[i] + if ( + Math.abs(diff - amtVals[i]!) < TOLERANCE || + Math.abs(diff + amtVals[i]!) < TOLERANCE + ) { + matches++; + } + } + + if (tested >= 2 && matches / tested >= 0.8) { + balanceCols.add(balCol); + break; + } + } + + if (balanceCols.has(balCol)) continue; + + // Test two-column balance: balance[i] ≈ balance[i-1] - debit[i] + credit[i] + for (let a = 0; a < numericCols.length; a++) { + for (let b = a + 1; b < numericCols.length; b++) { + const colA = numericCols[a]; + const colB = numericCols[b]; + if (colA === balCol || colB === balCol) continue; + + const valsA = values.get(colA)!; + const valsB = values.get(colB)!; + + let matches = 0; + let tested = 0; + + for (let i = 1; i < rows.length; i++) { + if (balVals[i] === null || balVals[i - 1] === null) continue; + const da = valsA[i] ?? 0; + const db = valsB[i] ?? 0; + tested++; + + const diff = balVals[i]! - balVals[i - 1]!; + // Try both orderings: diff ≈ -colA + colB or diff ≈ colA - colB + if ( + Math.abs(diff - (-da + db)) < TOLERANCE || + Math.abs(diff - (da - db)) < TOLERANCE + ) { + matches++; + } + } + + if (tested >= 2 && matches / tested >= 0.8) { + balanceCols.add(balCol); + break; + } + } + if (balanceCols.has(balCol)) break; + } + } + + return balanceCols; +} + +function detectDescriptionColumn( + rows: string[][], + colCount: number, + dateCol: number, + numericCols: Set +): number { + let bestCol = 0; + let bestAvgLen = 0; + + for (let col = 0; col < colCount; col++) { + if (col === dateCol || numericCols.has(col)) continue; + + let totalLen = 0; + let count = 0; + + for (const row of rows) { + const cell = row[col]?.trim(); + if (!cell) continue; + totalLen += cell.length; + count++; + } + + const avgLen = count > 0 ? totalLen / count : 0; + if (avgLen > bestAvgLen) { + bestAvgLen = avgLen; + bestCol = col; + } + } + + return bestCol; +} + +interface SingleAmountResult { + mode: "single"; + amountCol: number; + signConvention: SignConvention; +} + +interface DebitCreditResult { + mode: "debit_credit"; + debitCol: number; + creditCol: number; +} + +type AmountModeResult = SingleAmountResult | DebitCreditResult; + +function detectAmountMode( + rows: string[][], + amountCandidates: number[] +): AmountModeResult | null { + if (amountCandidates.length === 0) return null; + + if (amountCandidates.length === 1) { + return detectSingleAmount(rows, amountCandidates[0]); + } + + // Check for sparse-complementary pair (debit/credit pattern) + for (let a = 0; a < amountCandidates.length; a++) { + for (let b = a + 1; b < amountCandidates.length; b++) { + const colA = amountCandidates[a]; + const colB = amountCandidates[b]; + + if (isSparseComplementary(rows, colA, colB)) { + return { mode: "debit_credit", debitCol: colA, creditCol: colB }; + } + } + } + + // No complementary pair found — use first candidate as single amount + return detectSingleAmount(rows, amountCandidates[0]); +} + +function detectSingleAmount( + rows: string[][], + col: number +): SingleAmountResult { + let negCount = 0; + let total = 0; + + for (const row of rows) { + const cell = row[col]?.trim(); + if (!cell) continue; + const val = parseFrenchAmount(cell); + if (isNaN(val)) continue; + total++; + if (val < 0) negCount++; + } + + // If most values are negative, they likely represent expenses as negative + const signConvention: SignConvention = + total > 0 && negCount / total > 0.5 + ? "negative_expense" + : "positive_expense"; + + return { mode: "single", amountCol: col, signConvention }; +} + +function isSparseComplementary( + rows: string[][], + colA: number, + colB: number +): boolean { + let complementary = 0; + let total = 0; + + for (const row of rows) { + const cellA = row[colA]?.trim(); + const cellB = row[colB]?.trim(); + const hasA = cellA !== "" && cellA != null && !isNaN(parseFrenchAmount(cellA)); + const hasB = cellB !== "" && cellB != null && !isNaN(parseFrenchAmount(cellB)); + + if (!hasA && !hasB) continue; + total++; + + // Complementary: exactly one has a value + if (hasA !== hasB) { + complementary++; + } + } + + return total > 0 && complementary / total >= 0.7; +}