diff --git a/src/components/import/SourceConfigPanel.tsx b/src/components/import/SourceConfigPanel.tsx
index ad61d11..0a32a4c 100644
--- a/src/components/import/SourceConfigPanel.tsx
+++ b/src/components/import/SourceConfigPanel.tsx
@@ -1,4 +1,5 @@
import { useTranslation } from "react-i18next";
+import { Wand2 } from "lucide-react";
import type {
ScannedSource,
ScannedFile,
@@ -16,6 +17,8 @@ interface SourceConfigPanelProps {
onConfigChange: (config: SourceConfig) => void;
onFileToggle: (file: ScannedFile) => void;
onSelectAllFiles: () => void;
+ onAutoDetect: () => void;
+ isLoading?: boolean;
}
export default function SourceConfigPanel({
@@ -26,6 +29,8 @@ export default function SourceConfigPanel({
onConfigChange,
onFileToggle,
onSelectAllFiles,
+ onAutoDetect,
+ isLoading,
}: SourceConfigPanelProps) {
const { t } = useTranslation();
@@ -39,9 +44,19 @@ export default function SourceConfigPanel({
return (
-
- {t("import.config.title")} — {source.folder_name}
-
+
+
+ {t("import.config.title")} — {source.folder_name}
+
+
+
{/* Source name */}
@@ -102,6 +117,7 @@ export default function SourceConfigPanel({
+
diff --git a/src/hooks/useImportWizard.ts b/src/hooks/useImportWizard.ts
index 74ca12d..06da0a5 100644
--- a/src/hooks/useImportWizard.ts
+++ b/src/hooks/useImportWizard.ts
@@ -35,6 +35,10 @@ import {
import { categorizeBatch } from "../services/categorizationService";
import { parseDate } from "../utils/dateParser";
import { parseFrenchAmount } from "../utils/amountParser";
+import {
+ preprocessQuotedCSV,
+ autoDetectConfig as runAutoDetect,
+} from "../utils/csvAutoDetect";
interface WizardState {
step: ImportWizardStep;
@@ -429,7 +433,9 @@ export function useImportWizard() {
encoding: config.encoding,
});
- const parsed = Papa.parse(content, {
+ const preprocessed = preprocessQuotedCSV(content);
+
+ const parsed = Papa.parse(preprocessed, {
delimiter: config.delimiter,
skipEmptyLines: true,
});
@@ -772,6 +778,57 @@ export function useImportWizard() {
dispatch({ type: "RESET" });
}, []);
+ const autoDetectConfig = useCallback(async () => {
+ if (state.selectedFiles.length === 0) return;
+
+ dispatch({ type: "SET_LOADING", payload: true });
+ dispatch({ type: "SET_ERROR", payload: null });
+
+ try {
+ const content = await invoke("read_file_content", {
+ filePath: state.selectedFiles[0].file_path,
+ encoding: state.sourceConfig.encoding,
+ });
+
+ const preprocessed = preprocessQuotedCSV(content);
+ const result = runAutoDetect(preprocessed);
+
+ if (result) {
+ const newConfig = {
+ ...state.sourceConfig,
+ delimiter: result.delimiter,
+ hasHeader: result.hasHeader,
+ skipLines: result.skipLines,
+ dateFormat: result.dateFormat,
+ columnMapping: result.columnMapping,
+ amountMode: result.amountMode,
+ signConvention: result.signConvention,
+ };
+ dispatch({ type: "SET_SOURCE_CONFIG", payload: newConfig });
+ dispatch({ type: "SET_LOADING", payload: false });
+
+ // Refresh column headers with new config
+ await loadHeadersWithConfig(
+ state.selectedFiles[0].file_path,
+ newConfig.delimiter,
+ newConfig.encoding,
+ newConfig.skipLines,
+ newConfig.hasHeader
+ );
+ } else {
+ dispatch({
+ type: "SET_ERROR",
+ payload: "Auto-detection failed. Please configure manually.",
+ });
+ }
+ } catch (e) {
+ dispatch({
+ type: "SET_ERROR",
+ payload: e instanceof Error ? e.message : String(e),
+ });
+ }
+ }, [state.selectedFiles, state.sourceConfig, loadHeadersWithConfig]);
+
return {
state,
browseFolder,
@@ -785,6 +842,7 @@ export function useImportWizard() {
executeImport,
goToStep,
reset,
+ autoDetectConfig,
toggleDuplicateRow: (index: number) =>
dispatch({ type: "TOGGLE_DUPLICATE_ROW", payload: index }),
setSkipAllDuplicates: (skipAll: boolean) =>
diff --git a/src/i18n/locales/en.json b/src/i18n/locales/en.json
index 4294a64..1510fb2 100644
--- a/src/i18n/locales/en.json
+++ b/src/i18n/locales/en.json
@@ -82,7 +82,8 @@
"debitColumn": "Debit column",
"creditColumn": "Credit column",
"selectFiles": "Files to import",
- "selectAll": "Select all"
+ "selectAll": "Select all",
+ "autoDetect": "Auto-detect"
},
"preview": {
"title": "Data Preview",
diff --git a/src/i18n/locales/fr.json b/src/i18n/locales/fr.json
index 442fc1c..52160e4 100644
--- a/src/i18n/locales/fr.json
+++ b/src/i18n/locales/fr.json
@@ -82,7 +82,8 @@
"debitColumn": "Colonne débit",
"creditColumn": "Colonne crédit",
"selectFiles": "Fichiers à importer",
- "selectAll": "Tout sélectionner"
+ "selectAll": "Tout sélectionner",
+ "autoDetect": "Auto-détecter"
},
"preview": {
"title": "Aperçu des données",
diff --git a/src/pages/ImportPage.tsx b/src/pages/ImportPage.tsx
index b3055ed..6e41341 100644
--- a/src/pages/ImportPage.tsx
+++ b/src/pages/ImportPage.tsx
@@ -28,6 +28,7 @@ export default function ImportPage() {
executeImport,
goToStep,
reset,
+ autoDetectConfig,
toggleDuplicateRow,
setSkipAllDuplicates,
} = useImportWizard();
@@ -80,6 +81,8 @@ export default function ImportPage() {
onConfigChange={updateConfig}
onFileToggle={toggleFile}
onSelectAllFiles={selectAllFiles}
+ onAutoDetect={autoDetectConfig}
+ isLoading={state.isLoading}
/>
goToStep("source-list")}
diff --git a/src/utils/csvAutoDetect.ts b/src/utils/csvAutoDetect.ts
new file mode 100644
index 0000000..d0a8896
--- /dev/null
+++ b/src/utils/csvAutoDetect.ts
@@ -0,0 +1,460 @@
+import Papa from "papaparse";
+import { parseDate } from "./dateParser";
+import { parseFrenchAmount } from "./amountParser";
+import type { ColumnMapping, AmountMode, SignConvention } from "../shared/types";
+
+export interface AutoDetectResult {
+ delimiter: string;
+ hasHeader: boolean;
+ skipLines: number;
+ dateFormat: string;
+ columnMapping: ColumnMapping;
+ amountMode: AmountMode;
+ signConvention: SignConvention;
+}
+
+const DATE_FORMATS = [
+ "DD/MM/YYYY",
+ "MM/DD/YYYY",
+ "YYYY-MM-DD",
+ "YYYY/MM/DD",
+ "DD-MM-YYYY",
+ "DD.MM.YYYY",
+ "YYYYMMDD",
+];
+
+const DELIMITERS = [",", ";", "\t"];
+
+/**
+ * Detect and unwrap Desjardins-style CSVs where each entire line is
+ * wrapped in quotes with "" escaping inside.
+ */
+export function preprocessQuotedCSV(content: string): string {
+ const lines = content.split(/\r?\n/);
+ const nonEmpty = lines.filter((l) => l.trim());
+ if (nonEmpty.length === 0) return content;
+
+ const isLineQuoted = nonEmpty.every((l) => {
+ const t = l.trim();
+ return t.startsWith('"') && t.endsWith('"') && t.includes(',""');
+ });
+
+ if (!isLineQuoted) return content;
+
+ return lines
+ .map((l) => {
+ const t = l.trim();
+ if (!t) return "";
+ return t.slice(1, -1).replace(/""/g, '"');
+ })
+ .join("\n");
+}
+
+/**
+ * Analyze raw CSV content and return a suggested configuration,
+ * or null if detection fails.
+ */
+export function autoDetectConfig(rawContent: string): AutoDetectResult | null {
+ const content = preprocessQuotedCSV(rawContent);
+ const lines = content.split(/\r?\n/).filter((l) => l.trim());
+ if (lines.length < 2) return null;
+
+ // Step 1: Detect delimiter
+ const delimiter = detectDelimiter(lines.slice(0, 10));
+ if (!delimiter) return null;
+
+ const parsed = Papa.parse(content, { delimiter, skipEmptyLines: true });
+ const data = parsed.data as string[][];
+ if (data.length < 2) return null;
+
+ // Step 2: Detect header
+ const hasHeader = detectHeader(data[0]);
+
+ const dataStartIdx = hasHeader ? 1 : 0;
+ const sampleRows = data.slice(dataStartIdx, dataStartIdx + 20);
+ if (sampleRows.length === 0) return null;
+
+ const colCount = Math.max(...data.slice(0, 10).map((r) => r.length));
+
+ // Step 3: Detect date column + format
+ const dateResult = detectDateColumn(sampleRows, colCount);
+ if (!dateResult) return null;
+
+ // Step 4: Detect numeric columns
+ const numericCols = detectNumericColumns(sampleRows, colCount);
+
+ // Step 5: Detect balance columns and exclude them
+ const balanceCols = detectBalanceColumns(sampleRows, numericCols);
+ const amountCandidates = numericCols.filter((c) => !balanceCols.has(c));
+
+ // Step 6: Detect description column
+ const descriptionCol = detectDescriptionColumn(
+ sampleRows,
+ colCount,
+ dateResult.column,
+ new Set([...numericCols])
+ );
+
+ // Step 7: Determine amount mode
+ const amountResult = detectAmountMode(sampleRows, amountCandidates);
+ if (!amountResult) return null;
+
+ const mapping: ColumnMapping = {
+ date: dateResult.column,
+ description: descriptionCol,
+ };
+
+ let signConvention: SignConvention = "negative_expense";
+
+ if (amountResult.mode === "debit_credit") {
+ mapping.debitAmount = amountResult.debitCol;
+ mapping.creditAmount = amountResult.creditCol;
+ } else {
+ mapping.amount = amountResult.amountCol;
+ signConvention = amountResult.signConvention;
+ }
+
+ return {
+ delimiter,
+ hasHeader,
+ skipLines: 0,
+ dateFormat: dateResult.format,
+ columnMapping: mapping,
+ amountMode: amountResult.mode,
+ signConvention,
+ };
+}
+
+function detectDelimiter(lines: string[]): string | null {
+ let bestDelimiter: string | null = null;
+ let bestScore = 0;
+
+ for (const delim of DELIMITERS) {
+ const counts = lines.map(
+ (line) =>
+ Papa.parse(line, { delimiter: delim }).data[0] as string[]
+ ).map((row) => row.length);
+
+ // All lines should give consistent column count > 1
+ if (counts.length === 0 || counts[0] <= 1) continue;
+
+ const firstCount = counts[0];
+ const consistent = counts.filter((c) => c === firstCount).length;
+ const score = (consistent / counts.length) * firstCount;
+
+ if (score > bestScore) {
+ bestScore = score;
+ bestDelimiter = delim;
+ }
+ }
+
+ return bestDelimiter;
+}
+
+function detectHeader(firstRow: string[]): boolean {
+ // A header row typically has no parseable dates and no parseable numbers
+ let hasDate = false;
+ let hasNumber = false;
+
+ for (const cell of firstRow) {
+ const trimmed = cell?.trim();
+ if (!trimmed) continue;
+
+ // Check for number
+ if (!isNaN(parseFrenchAmount(trimmed))) {
+ hasNumber = true;
+ }
+
+ // Check for date
+ for (const fmt of DATE_FORMATS) {
+ if (parseDate(trimmed, fmt)) {
+ hasDate = true;
+ break;
+ }
+ }
+ }
+
+ return !hasDate && !hasNumber;
+}
+
+function detectDateColumn(
+ rows: string[][],
+ colCount: number
+): { column: number; format: string } | null {
+ let bestCol = -1;
+ let bestFormat = "";
+ let bestRate = 0;
+
+ for (let col = 0; col < colCount; col++) {
+ for (const fmt of DATE_FORMATS) {
+ let success = 0;
+ let total = 0;
+
+ for (const row of rows) {
+ const cell = row[col]?.trim();
+ if (!cell) continue;
+ total++;
+ if (parseDate(cell, fmt)) {
+ success++;
+ }
+ }
+
+ if (total === 0) continue;
+ const rate = success / total;
+ if (rate > bestRate) {
+ bestRate = rate;
+ bestCol = col;
+ bestFormat = fmt;
+ }
+ }
+ }
+
+ if (bestRate < 0.8 || bestCol < 0) return null;
+
+ return { column: bestCol, format: bestFormat };
+}
+
+function detectNumericColumns(rows: string[][], colCount: number): number[] {
+ const result: number[] = [];
+
+ for (let col = 0; col < colCount; col++) {
+ let numericCount = 0;
+ let nonEmpty = 0;
+
+ for (const row of rows) {
+ const cell = row[col]?.trim();
+ if (!cell) continue;
+ nonEmpty++;
+ if (!isNaN(parseFrenchAmount(cell))) {
+ numericCount++;
+ }
+ }
+
+ if (nonEmpty > 0 && numericCount / nonEmpty >= 0.5) {
+ result.push(col);
+ }
+ }
+
+ return result;
+}
+
+function detectBalanceColumns(
+ rows: string[][],
+ numericCols: number[]
+): Set {
+ const balanceCols = new Set();
+ if (numericCols.length < 2 || rows.length < 3) return balanceCols;
+
+ const TOLERANCE = 0.015; // tolerance for floating-point comparison
+
+ // Parse all numeric values once
+ const values: Map = new Map();
+ for (const col of numericCols) {
+ values.set(
+ col,
+ rows.map((row) => {
+ const cell = row[col]?.trim();
+ if (!cell) return null;
+ const v = parseFrenchAmount(cell);
+ return isNaN(v) ? null : v;
+ })
+ );
+ }
+
+ for (const balCol of numericCols) {
+ const balVals = values.get(balCol)!;
+
+ // Test single-column balance: balance[i] ≈ balance[i-1] ± amount[i]
+ for (const amtCol of numericCols) {
+ if (amtCol === balCol) continue;
+ const amtVals = values.get(amtCol)!;
+
+ let matches = 0;
+ let tested = 0;
+
+ for (let i = 1; i < rows.length; i++) {
+ if (balVals[i] === null || balVals[i - 1] === null || amtVals[i] === null)
+ continue;
+ tested++;
+
+ const diff = balVals[i]! - balVals[i - 1]!;
+ // balance[i] = balance[i-1] + amount[i] OR balance[i] = balance[i-1] - amount[i]
+ if (
+ Math.abs(diff - amtVals[i]!) < TOLERANCE ||
+ Math.abs(diff + amtVals[i]!) < TOLERANCE
+ ) {
+ matches++;
+ }
+ }
+
+ if (tested >= 2 && matches / tested >= 0.8) {
+ balanceCols.add(balCol);
+ break;
+ }
+ }
+
+ if (balanceCols.has(balCol)) continue;
+
+ // Test two-column balance: balance[i] ≈ balance[i-1] - debit[i] + credit[i]
+ for (let a = 0; a < numericCols.length; a++) {
+ for (let b = a + 1; b < numericCols.length; b++) {
+ const colA = numericCols[a];
+ const colB = numericCols[b];
+ if (colA === balCol || colB === balCol) continue;
+
+ const valsA = values.get(colA)!;
+ const valsB = values.get(colB)!;
+
+ let matches = 0;
+ let tested = 0;
+
+ for (let i = 1; i < rows.length; i++) {
+ if (balVals[i] === null || balVals[i - 1] === null) continue;
+ const da = valsA[i] ?? 0;
+ const db = valsB[i] ?? 0;
+ tested++;
+
+ const diff = balVals[i]! - balVals[i - 1]!;
+ // Try both orderings: diff ≈ -colA + colB or diff ≈ colA - colB
+ if (
+ Math.abs(diff - (-da + db)) < TOLERANCE ||
+ Math.abs(diff - (da - db)) < TOLERANCE
+ ) {
+ matches++;
+ }
+ }
+
+ if (tested >= 2 && matches / tested >= 0.8) {
+ balanceCols.add(balCol);
+ break;
+ }
+ }
+ if (balanceCols.has(balCol)) break;
+ }
+ }
+
+ return balanceCols;
+}
+
+function detectDescriptionColumn(
+ rows: string[][],
+ colCount: number,
+ dateCol: number,
+ numericCols: Set
+): number {
+ let bestCol = 0;
+ let bestAvgLen = 0;
+
+ for (let col = 0; col < colCount; col++) {
+ if (col === dateCol || numericCols.has(col)) continue;
+
+ let totalLen = 0;
+ let count = 0;
+
+ for (const row of rows) {
+ const cell = row[col]?.trim();
+ if (!cell) continue;
+ totalLen += cell.length;
+ count++;
+ }
+
+ const avgLen = count > 0 ? totalLen / count : 0;
+ if (avgLen > bestAvgLen) {
+ bestAvgLen = avgLen;
+ bestCol = col;
+ }
+ }
+
+ return bestCol;
+}
+
+interface SingleAmountResult {
+ mode: "single";
+ amountCol: number;
+ signConvention: SignConvention;
+}
+
+interface DebitCreditResult {
+ mode: "debit_credit";
+ debitCol: number;
+ creditCol: number;
+}
+
+type AmountModeResult = SingleAmountResult | DebitCreditResult;
+
+function detectAmountMode(
+ rows: string[][],
+ amountCandidates: number[]
+): AmountModeResult | null {
+ if (amountCandidates.length === 0) return null;
+
+ if (amountCandidates.length === 1) {
+ return detectSingleAmount(rows, amountCandidates[0]);
+ }
+
+ // Check for sparse-complementary pair (debit/credit pattern)
+ for (let a = 0; a < amountCandidates.length; a++) {
+ for (let b = a + 1; b < amountCandidates.length; b++) {
+ const colA = amountCandidates[a];
+ const colB = amountCandidates[b];
+
+ if (isSparseComplementary(rows, colA, colB)) {
+ return { mode: "debit_credit", debitCol: colA, creditCol: colB };
+ }
+ }
+ }
+
+ // No complementary pair found — use first candidate as single amount
+ return detectSingleAmount(rows, amountCandidates[0]);
+}
+
+function detectSingleAmount(
+ rows: string[][],
+ col: number
+): SingleAmountResult {
+ let negCount = 0;
+ let total = 0;
+
+ for (const row of rows) {
+ const cell = row[col]?.trim();
+ if (!cell) continue;
+ const val = parseFrenchAmount(cell);
+ if (isNaN(val)) continue;
+ total++;
+ if (val < 0) negCount++;
+ }
+
+ // If most values are negative, they likely represent expenses as negative
+ const signConvention: SignConvention =
+ total > 0 && negCount / total > 0.5
+ ? "negative_expense"
+ : "positive_expense";
+
+ return { mode: "single", amountCol: col, signConvention };
+}
+
+function isSparseComplementary(
+ rows: string[][],
+ colA: number,
+ colB: number
+): boolean {
+ let complementary = 0;
+ let total = 0;
+
+ for (const row of rows) {
+ const cellA = row[colA]?.trim();
+ const cellB = row[colB]?.trim();
+ const hasA = cellA !== "" && cellA != null && !isNaN(parseFrenchAmount(cellA));
+ const hasB = cellB !== "" && cellB != null && !isNaN(parseFrenchAmount(cellB));
+
+ if (!hasA && !hasB) continue;
+ total++;
+
+ // Complementary: exactly one has a value
+ if (hasA !== hasB) {
+ complementary++;
+ }
+ }
+
+ return total > 0 && complementary / total >= 0.7;
+}