feat: add CSV auto-detect configuration button

Analyzes imported CSV files to automatically detect delimiter, header, date format, column mapping, amount mode, and sign convention. Excludes running balance columns from amount mapping via consecutive-row math. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 18:09:35 +00:00 · 2026-02-11 18:09:35 +00:00 · bd6ff3deac
commit bd6ff3deac
parent 6037c87846
6 changed files with 545 additions and 6 deletions
--- a/src/components/import/SourceConfigPanel.tsx
+++ b/src/components/import/SourceConfigPanel.tsx
@ -1,4 +1,5 @@
 import { useTranslation } from "react-i18next";
+import { Wand2 } from "lucide-react";
 import type {
  ScannedSource,
  ScannedFile,
@ -16,6 +17,8 @@ interface SourceConfigPanelProps {
  onConfigChange: (config: SourceConfig) => void;
  onFileToggle: (file: ScannedFile) => void;
  onSelectAllFiles: () => void;
+  onAutoDetect: () => void;
+  isLoading?: boolean;
 }

 export default function SourceConfigPanel({
@ -26,6 +29,8 @@ export default function SourceConfigPanel({
  onConfigChange,
  onFileToggle,
  onSelectAllFiles,
+  onAutoDetect,
+  isLoading,
 }: SourceConfigPanelProps) {
  const { t } = useTranslation();

@ -39,9 +44,19 @@ export default function SourceConfigPanel({

  return (
    <div className="space-y-6">
-      <h2 className="text-lg font-semibold">
-        {t("import.config.title")} — {source.folder_name}
-      </h2>
+      <div className="flex items-center justify-between">
+        <h2 className="text-lg font-semibold">
+          {t("import.config.title")} — {source.folder_name}
+        </h2>
+        <button
+          onClick={onAutoDetect}
+          disabled={isLoading || selectedFiles.length === 0}
+          className="flex items-center gap-1.5 px-3 py-1.5 text-sm rounded-lg bg-[var(--primary)] text-white hover:opacity-90 disabled:opacity-50 transition-opacity"
+        >
+          <Wand2 size={16} />
+          {t("import.config.autoDetect")}
+        </button>
+      </div>

      {/* Source name */}
      <div>
@ -102,6 +117,7 @@ export default function SourceConfigPanel({
            <option value="DD/MM/YYYY">DD/MM/YYYY</option>
            <option value="MM/DD/YYYY">MM/DD/YYYY</option>
            <option value="YYYY-MM-DD">YYYY-MM-DD</option>
+            <option value="YYYY/MM/DD">YYYY/MM/DD</option>
            <option value="DD-MM-YYYY">DD-MM-YYYY</option>
            <option value="DD.MM.YYYY">DD.MM.YYYY</option>
            <option value="YYYYMMDD">YYYYMMDD</option>
--- a/src/hooks/useImportWizard.ts
+++ b/src/hooks/useImportWizard.ts
@ -35,6 +35,10 @@ import {
 import { categorizeBatch } from "../services/categorizationService";
 import { parseDate } from "../utils/dateParser";
 import { parseFrenchAmount } from "../utils/amountParser";
+import {
+  preprocessQuotedCSV,
+  autoDetectConfig as runAutoDetect,
+} from "../utils/csvAutoDetect";

 interface WizardState {
  step: ImportWizardStep;
@ -429,7 +433,9 @@ export function useImportWizard() {
          encoding: config.encoding,
        });

-        const parsed = Papa.parse(content, {
+        const preprocessed = preprocessQuotedCSV(content);
+
+        const parsed = Papa.parse(preprocessed, {
          delimiter: config.delimiter,
          skipEmptyLines: true,
        });
@ -772,6 +778,57 @@ export function useImportWizard() {
    dispatch({ type: "RESET" });
  }, []);

+  const autoDetectConfig = useCallback(async () => {
+    if (state.selectedFiles.length === 0) return;
+
+    dispatch({ type: "SET_LOADING", payload: true });
+    dispatch({ type: "SET_ERROR", payload: null });
+
+    try {
+      const content = await invoke<string>("read_file_content", {
+        filePath: state.selectedFiles[0].file_path,
+        encoding: state.sourceConfig.encoding,
+      });
+
+      const preprocessed = preprocessQuotedCSV(content);
+      const result = runAutoDetect(preprocessed);
+
+      if (result) {
+        const newConfig = {
+          ...state.sourceConfig,
+          delimiter: result.delimiter,
+          hasHeader: result.hasHeader,
+          skipLines: result.skipLines,
+          dateFormat: result.dateFormat,
+          columnMapping: result.columnMapping,
+          amountMode: result.amountMode,
+          signConvention: result.signConvention,
+        };
+        dispatch({ type: "SET_SOURCE_CONFIG", payload: newConfig });
+        dispatch({ type: "SET_LOADING", payload: false });
+
+        // Refresh column headers with new config
+        await loadHeadersWithConfig(
+          state.selectedFiles[0].file_path,
+          newConfig.delimiter,
+          newConfig.encoding,
+          newConfig.skipLines,
+          newConfig.hasHeader
+        );
+      } else {
+        dispatch({
+          type: "SET_ERROR",
+          payload: "Auto-detection failed. Please configure manually.",
+        });
+      }
+    } catch (e) {
+      dispatch({
+        type: "SET_ERROR",
+        payload: e instanceof Error ? e.message : String(e),
+      });
+    }
+  }, [state.selectedFiles, state.sourceConfig, loadHeadersWithConfig]);
+
  return {
    state,
    browseFolder,
@ -785,6 +842,7 @@ export function useImportWizard() {
    executeImport,
    goToStep,
    reset,
+    autoDetectConfig,
    toggleDuplicateRow: (index: number) =>
      dispatch({ type: "TOGGLE_DUPLICATE_ROW", payload: index }),
    setSkipAllDuplicates: (skipAll: boolean) =>
--- a/src/i18n/locales/en.json
+++ b/src/i18n/locales/en.json
@ -82,7 +82,8 @@
      "debitColumn": "Debit column",
      "creditColumn": "Credit column",
      "selectFiles": "Files to import",
-      "selectAll": "Select all"
+      "selectAll": "Select all",
+      "autoDetect": "Auto-detect"
    },
    "preview": {
      "title": "Data Preview",
--- a/src/i18n/locales/fr.json
+++ b/src/i18n/locales/fr.json
@ -82,7 +82,8 @@
      "debitColumn": "Colonne débit",
      "creditColumn": "Colonne crédit",
      "selectFiles": "Fichiers à importer",
-      "selectAll": "Tout sélectionner"
+      "selectAll": "Tout sélectionner",
+      "autoDetect": "Auto-détecter"
    },
    "preview": {
      "title": "Aperçu des données",
--- a/src/pages/ImportPage.tsx
+++ b/src/pages/ImportPage.tsx
@ -28,6 +28,7 @@ export default function ImportPage() {
    executeImport,
    goToStep,
    reset,
+    autoDetectConfig,
    toggleDuplicateRow,
    setSkipAllDuplicates,
  } = useImportWizard();
@ -80,6 +81,8 @@ export default function ImportPage() {
            onConfigChange={updateConfig}
            onFileToggle={toggleFile}
            onSelectAllFiles={selectAllFiles}
+            onAutoDetect={autoDetectConfig}
+            isLoading={state.isLoading}
          />
          <WizardNavigation
            onBack={() => goToStep("source-list")}
--- a/src/utils/csvAutoDetect.ts
+++ b/src/utils/csvAutoDetect.ts
@ -0,0 +1,460 @@
+import Papa from "papaparse";
+import { parseDate } from "./dateParser";
+import { parseFrenchAmount } from "./amountParser";
+import type { ColumnMapping, AmountMode, SignConvention } from "../shared/types";
+
+export interface AutoDetectResult {
+  delimiter: string;
+  hasHeader: boolean;
+  skipLines: number;
+  dateFormat: string;
+  columnMapping: ColumnMapping;
+  amountMode: AmountMode;
+  signConvention: SignConvention;
+}
+
+const DATE_FORMATS = [
+  "DD/MM/YYYY",
+  "MM/DD/YYYY",
+  "YYYY-MM-DD",
+  "YYYY/MM/DD",
+  "DD-MM-YYYY",
+  "DD.MM.YYYY",
+  "YYYYMMDD",
+];
+
+const DELIMITERS = [",", ";", "\t"];
+
+/**
+ * Detect and unwrap Desjardins-style CSVs where each entire line is
+ * wrapped in quotes with "" escaping inside.
+ */
+export function preprocessQuotedCSV(content: string): string {
+  const lines = content.split(/\r?\n/);
+  const nonEmpty = lines.filter((l) => l.trim());
+  if (nonEmpty.length === 0) return content;
+
+  const isLineQuoted = nonEmpty.every((l) => {
+    const t = l.trim();
+    return t.startsWith('"') && t.endsWith('"') && t.includes(',""');
+  });
+
+  if (!isLineQuoted) return content;
+
+  return lines
+    .map((l) => {
+      const t = l.trim();
+      if (!t) return "";
+      return t.slice(1, -1).replace(/""/g, '"');
+    })
+    .join("\n");
+}
+
+/**
+ * Analyze raw CSV content and return a suggested configuration,
+ * or null if detection fails.
+ */
+export function autoDetectConfig(rawContent: string): AutoDetectResult | null {
+  const content = preprocessQuotedCSV(rawContent);
+  const lines = content.split(/\r?\n/).filter((l) => l.trim());
+  if (lines.length < 2) return null;
+
+  // Step 1: Detect delimiter
+  const delimiter = detectDelimiter(lines.slice(0, 10));
+  if (!delimiter) return null;
+
+  const parsed = Papa.parse(content, { delimiter, skipEmptyLines: true });
+  const data = parsed.data as string[][];
+  if (data.length < 2) return null;
+
+  // Step 2: Detect header
+  const hasHeader = detectHeader(data[0]);
+
+  const dataStartIdx = hasHeader ? 1 : 0;
+  const sampleRows = data.slice(dataStartIdx, dataStartIdx + 20);
+  if (sampleRows.length === 0) return null;
+
+  const colCount = Math.max(...data.slice(0, 10).map((r) => r.length));
+
+  // Step 3: Detect date column + format
+  const dateResult = detectDateColumn(sampleRows, colCount);
+  if (!dateResult) return null;
+
+  // Step 4: Detect numeric columns
+  const numericCols = detectNumericColumns(sampleRows, colCount);
+
+  // Step 5: Detect balance columns and exclude them
+  const balanceCols = detectBalanceColumns(sampleRows, numericCols);
+  const amountCandidates = numericCols.filter((c) => !balanceCols.has(c));
+
+  // Step 6: Detect description column
+  const descriptionCol = detectDescriptionColumn(
+    sampleRows,
+    colCount,
+    dateResult.column,
+    new Set([...numericCols])
+  );
+
+  // Step 7: Determine amount mode
+  const amountResult = detectAmountMode(sampleRows, amountCandidates);
+  if (!amountResult) return null;
+
+  const mapping: ColumnMapping = {
+    date: dateResult.column,
+    description: descriptionCol,
+  };
+
+  let signConvention: SignConvention = "negative_expense";
+
+  if (amountResult.mode === "debit_credit") {
+    mapping.debitAmount = amountResult.debitCol;
+    mapping.creditAmount = amountResult.creditCol;
+  } else {
+    mapping.amount = amountResult.amountCol;
+    signConvention = amountResult.signConvention;
+  }
+
+  return {
+    delimiter,
+    hasHeader,
+    skipLines: 0,
+    dateFormat: dateResult.format,
+    columnMapping: mapping,
+    amountMode: amountResult.mode,
+    signConvention,
+  };
+}
+
+function detectDelimiter(lines: string[]): string | null {
+  let bestDelimiter: string | null = null;
+  let bestScore = 0;
+
+  for (const delim of DELIMITERS) {
+    const counts = lines.map(
+      (line) =>
+        Papa.parse(line, { delimiter: delim }).data[0] as string[]
+    ).map((row) => row.length);
+
+    // All lines should give consistent column count > 1
+    if (counts.length === 0 || counts[0] <= 1) continue;
+
+    const firstCount = counts[0];
+    const consistent = counts.filter((c) => c === firstCount).length;
+    const score = (consistent / counts.length) * firstCount;
+
+    if (score > bestScore) {
+      bestScore = score;
+      bestDelimiter = delim;
+    }
+  }
+
+  return bestDelimiter;
+}
+
+function detectHeader(firstRow: string[]): boolean {
+  // A header row typically has no parseable dates and no parseable numbers
+  let hasDate = false;
+  let hasNumber = false;
+
+  for (const cell of firstRow) {
+    const trimmed = cell?.trim();
+    if (!trimmed) continue;
+
+    // Check for number
+    if (!isNaN(parseFrenchAmount(trimmed))) {
+      hasNumber = true;
+    }
+
+    // Check for date
+    for (const fmt of DATE_FORMATS) {
+      if (parseDate(trimmed, fmt)) {
+        hasDate = true;
+        break;
+      }
+    }
+  }
+
+  return !hasDate && !hasNumber;
+}
+
+function detectDateColumn(
+  rows: string[][],
+  colCount: number
+): { column: number; format: string } | null {
+  let bestCol = -1;
+  let bestFormat = "";
+  let bestRate = 0;
+
+  for (let col = 0; col < colCount; col++) {
+    for (const fmt of DATE_FORMATS) {
+      let success = 0;
+      let total = 0;
+
+      for (const row of rows) {
+        const cell = row[col]?.trim();
+        if (!cell) continue;
+        total++;
+        if (parseDate(cell, fmt)) {
+          success++;
+        }
+      }
+
+      if (total === 0) continue;
+      const rate = success / total;
+      if (rate > bestRate) {
+        bestRate = rate;
+        bestCol = col;
+        bestFormat = fmt;
+      }
+    }
+  }
+
+  if (bestRate < 0.8 || bestCol < 0) return null;
+
+  return { column: bestCol, format: bestFormat };
+}
+
+function detectNumericColumns(rows: string[][], colCount: number): number[] {
+  const result: number[] = [];
+
+  for (let col = 0; col < colCount; col++) {
+    let numericCount = 0;
+    let nonEmpty = 0;
+
+    for (const row of rows) {
+      const cell = row[col]?.trim();
+      if (!cell) continue;
+      nonEmpty++;
+      if (!isNaN(parseFrenchAmount(cell))) {
+        numericCount++;
+      }
+    }
+
+    if (nonEmpty > 0 && numericCount / nonEmpty >= 0.5) {
+      result.push(col);
+    }
+  }
+
+  return result;
+}
+
+function detectBalanceColumns(
+  rows: string[][],
+  numericCols: number[]
+): Set<number> {
+  const balanceCols = new Set<number>();
+  if (numericCols.length < 2 || rows.length < 3) return balanceCols;
+
+  const TOLERANCE = 0.015; // tolerance for floating-point comparison
+
+  // Parse all numeric values once
+  const values: Map<number, (number | null)[]> = new Map();
+  for (const col of numericCols) {
+    values.set(
+      col,
+      rows.map((row) => {
+        const cell = row[col]?.trim();
+        if (!cell) return null;
+        const v = parseFrenchAmount(cell);
+        return isNaN(v) ? null : v;
+      })
+    );
+  }
+
+  for (const balCol of numericCols) {
+    const balVals = values.get(balCol)!;
+
+    // Test single-column balance: balance[i] ≈ balance[i-1] ± amount[i]
+    for (const amtCol of numericCols) {
+      if (amtCol === balCol) continue;
+      const amtVals = values.get(amtCol)!;
+
+      let matches = 0;
+      let tested = 0;
+
+      for (let i = 1; i < rows.length; i++) {
+        if (balVals[i] === null || balVals[i - 1] === null || amtVals[i] === null)
+          continue;
+        tested++;
+
+        const diff = balVals[i]! - balVals[i - 1]!;
+        // balance[i] = balance[i-1] + amount[i] OR balance[i] = balance[i-1] - amount[i]
+        if (
+          Math.abs(diff - amtVals[i]!) < TOLERANCE ||
+          Math.abs(diff + amtVals[i]!) < TOLERANCE
+        ) {
+          matches++;
+        }
+      }
+
+      if (tested >= 2 && matches / tested >= 0.8) {
+        balanceCols.add(balCol);
+        break;
+      }
+    }
+
+    if (balanceCols.has(balCol)) continue;
+
+    // Test two-column balance: balance[i] ≈ balance[i-1] - debit[i] + credit[i]
+    for (let a = 0; a < numericCols.length; a++) {
+      for (let b = a + 1; b < numericCols.length; b++) {
+        const colA = numericCols[a];
+        const colB = numericCols[b];
+        if (colA === balCol || colB === balCol) continue;
+
+        const valsA = values.get(colA)!;
+        const valsB = values.get(colB)!;
+
+        let matches = 0;
+        let tested = 0;
+
+        for (let i = 1; i < rows.length; i++) {
+          if (balVals[i] === null || balVals[i - 1] === null) continue;
+          const da = valsA[i] ?? 0;
+          const db = valsB[i] ?? 0;
+          tested++;
+
+          const diff = balVals[i]! - balVals[i - 1]!;
+          // Try both orderings: diff ≈ -colA + colB or diff ≈ colA - colB
+          if (
+            Math.abs(diff - (-da + db)) < TOLERANCE ||
+            Math.abs(diff - (da - db)) < TOLERANCE
+          ) {
+            matches++;
+          }
+        }
+
+        if (tested >= 2 && matches / tested >= 0.8) {
+          balanceCols.add(balCol);
+          break;
+        }
+      }
+      if (balanceCols.has(balCol)) break;
+    }
+  }
+
+  return balanceCols;
+}
+
+function detectDescriptionColumn(
+  rows: string[][],
+  colCount: number,
+  dateCol: number,
+  numericCols: Set<number>
+): number {
+  let bestCol = 0;
+  let bestAvgLen = 0;
+
+  for (let col = 0; col < colCount; col++) {
+    if (col === dateCol || numericCols.has(col)) continue;
+
+    let totalLen = 0;
+    let count = 0;
+
+    for (const row of rows) {
+      const cell = row[col]?.trim();
+      if (!cell) continue;
+      totalLen += cell.length;
+      count++;
+    }
+
+    const avgLen = count > 0 ? totalLen / count : 0;
+    if (avgLen > bestAvgLen) {
+      bestAvgLen = avgLen;
+      bestCol = col;
+    }
+  }
+
+  return bestCol;
+}
+
+interface SingleAmountResult {
+  mode: "single";
+  amountCol: number;
+  signConvention: SignConvention;
+}
+
+interface DebitCreditResult {
+  mode: "debit_credit";
+  debitCol: number;
+  creditCol: number;
+}
+
+type AmountModeResult = SingleAmountResult | DebitCreditResult;
+
+function detectAmountMode(
+  rows: string[][],
+  amountCandidates: number[]
+): AmountModeResult | null {
+  if (amountCandidates.length === 0) return null;
+
+  if (amountCandidates.length === 1) {
+    return detectSingleAmount(rows, amountCandidates[0]);
+  }
+
+  // Check for sparse-complementary pair (debit/credit pattern)
+  for (let a = 0; a < amountCandidates.length; a++) {
+    for (let b = a + 1; b < amountCandidates.length; b++) {
+      const colA = amountCandidates[a];
+      const colB = amountCandidates[b];
+
+      if (isSparseComplementary(rows, colA, colB)) {
+        return { mode: "debit_credit", debitCol: colA, creditCol: colB };
+      }
+    }
+  }
+
+  // No complementary pair found — use first candidate as single amount
+  return detectSingleAmount(rows, amountCandidates[0]);
+}
+
+function detectSingleAmount(
+  rows: string[][],
+  col: number
+): SingleAmountResult {
+  let negCount = 0;
+  let total = 0;
+
+  for (const row of rows) {
+    const cell = row[col]?.trim();
+    if (!cell) continue;
+    const val = parseFrenchAmount(cell);
+    if (isNaN(val)) continue;
+    total++;
+    if (val < 0) negCount++;
+  }
+
+  // If most values are negative, they likely represent expenses as negative
+  const signConvention: SignConvention =
+    total > 0 && negCount / total > 0.5
+      ? "negative_expense"
+      : "positive_expense";
+
+  return { mode: "single", amountCol: col, signConvention };
+}
+
+function isSparseComplementary(
+  rows: string[][],
+  colA: number,
+  colB: number
+): boolean {
+  let complementary = 0;
+  let total = 0;
+
+  for (const row of rows) {
+    const cellA = row[colA]?.trim();
+    const cellB = row[colB]?.trim();
+    const hasA = cellA !== "" && cellA != null && !isNaN(parseFrenchAmount(cellA));
+    const hasB = cellB !== "" && cellB != null && !isNaN(parseFrenchAmount(cellB));
+
+    if (!hasA && !hasB) continue;
+    total++;
+
+    // Complementary: exactly one has a value
+    if (hasA !== hasB) {
+      complementary++;
+    }
+  }
+
+  return total > 0 && complementary / total >= 0.7;
+}