Files
trader-ml/docs/BACKTESTING_GUIDE.md
Tika da30ef19ed Initial commit — Trading AI Secure project complet
Architecture Docker (8 services), FastAPI, TimescaleDB, Redis, Streamlit.
Stratégies : scalping, intraday, swing. MLEngine + RegimeDetector (HMM).
BacktestEngine + WalkForwardAnalyzer + Optuna optimizer.
Routes API complètes dont /optimize async.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-08 17:38:09 +00:00

19 KiB

🧪 Guide de Backtesting - Trading AI Secure

📋 Table des Matières

  1. Philosophie Anti-Overfitting
  2. Walk-Forward Analysis
  3. Out-of-Sample Testing
  4. Monte Carlo Simulation
  5. Paper Trading
  6. Métriques de Validation
  7. Implémentation

🎯 Philosophie Anti-Overfitting

Principe Fondamental

"Une stratégie qui performe parfaitement en backtest est probablement sur-optimisée"

Les 7 Règles d'Or du Backtesting

  1. Toujours réserver 30% des données pour out-of-sample
  2. Utiliser walk-forward analysis (pas de look-ahead bias)
  3. Valider avec Monte Carlo (10,000+ simulations)
  4. Paper trading obligatoire 30 jours minimum
  5. Inclure coûts réalistes (slippage, commissions)
  6. Tester sur multiple marchés/périodes
  7. Documenter tous les paramètres testés (éviter cherry-picking)

🔄 Walk-Forward Analysis

Concept

La walk-forward analysis simule le trading réel en :

  1. Entraînant sur une fenêtre passée
  2. Testant sur la fenêtre suivante
  3. Glissant les fenêtres progressivement
┌────────────────────────────────────────────────────────────┐
│              WALK-FORWARD ANALYSIS                         │
├────────────────────────────────────────────────────────────┤
│                                                            │
│  Période 1:                                               │
│  [====TRAIN====][TEST]                                    │
│                                                            │
│  Période 2:                                               │
│      [====TRAIN====][TEST]                                │
│                                                            │
│  Période 3:                                               │
│          [====TRAIN====][TEST]                            │
│                                                            │
│  Période 4:                                               │
│              [====TRAIN====][TEST]                        │
│                                                            │
└────────────────────────────────────────────────────────────┘

Implémentation

# src/backtesting/walk_forward.py

from typing import Dict, List, Tuple
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

class WalkForwardAnalyzer:
    """
    Walk-Forward Analysis Engine
    
    Paramètres:
    - train_window: Taille fenêtre entraînement (ex: 252 jours = 1 an)
    - test_window: Taille fenêtre test (ex: 63 jours = 3 mois)
    - step_size: Pas de glissement (ex: 21 jours = 1 mois)
    """
    
    def __init__(
        self,
        train_window: int = 252,
        test_window: int = 63,
        step_size: int = 21
    ):
        self.train_window = train_window
        self.test_window = test_window
        self.step_size = step_size
        
        self.results = []
    
    def run(
        self,
        data: pd.DataFrame,
        strategy_class,
        optimization_func
    ) -> Dict:
        """
        Exécute walk-forward analysis
        
        Args:
            data: Données historiques complètes
            strategy_class: Classe de stratégie à tester
            optimization_func: Fonction d'optimisation des paramètres
            
        Returns:
            Résultats agrégés de tous les walks
        """
        total_length = len(data)
        current_pos = 0
        
        while current_pos + self.train_window + self.test_window <= total_length:
            # Fenêtre entraînement
            train_start = current_pos
            train_end = current_pos + self.train_window
            train_data = data.iloc[train_start:train_end]
            
            # Fenêtre test
            test_start = train_end
            test_end = train_end + self.test_window
            test_data = data.iloc[test_start:test_end]
            
            # Optimiser paramètres sur train
            optimal_params = optimization_func(train_data, strategy_class)
            
            # Tester sur test
            strategy = strategy_class(optimal_params)
            test_results = self._backtest_strategy(strategy, test_data)
            
            # Enregistrer résultats
            self.results.append({
                'train_period': (train_data.index[0], train_data.index[-1]),
                'test_period': (test_data.index[0], test_data.index[-1]),
                'optimal_params': optimal_params,
                'test_sharpe': test_results['sharpe'],
                'test_returns': test_results['total_return'],
                'test_max_dd': test_results['max_drawdown'],
                'test_win_rate': test_results['win_rate'],
                'num_trades': test_results['num_trades']
            })
            
            # Glisser fenêtre
            current_pos += self.step_size
        
        return self._aggregate_results()
    
    def _backtest_strategy(
        self,
        strategy,
        data: pd.DataFrame
    ) -> Dict:
        """
        Backtest stratégie sur données
        """
        trades = []
        equity_curve = [10000]  # Capital initial
        
        for i in range(len(data)):
            current_data = data.iloc[:i+1]
            
            # Générer signal
            signal = strategy.analyze(current_data)
            
            if signal:
                # Simuler trade
                trade_result = self._simulate_trade(
                    signal,
                    data.iloc[i:min(i+100, len(data))]  # Données futures pour exit
                )
                trades.append(trade_result)
                equity_curve.append(equity_curve[-1] + trade_result['pnl'])
        
        # Calculer métriques
        returns = pd.Series(equity_curve).pct_change().dropna()
        
        return {
            'total_return': (equity_curve[-1] - equity_curve[0]) / equity_curve[0],
            'sharpe': self._calculate_sharpe(returns),
            'max_drawdown': self._calculate_max_drawdown(equity_curve),
            'win_rate': len([t for t in trades if t['pnl'] > 0]) / len(trades) if trades else 0,
            'num_trades': len(trades),
            'equity_curve': equity_curve
        }
    
    def _simulate_trade(
        self,
        signal: 'Signal',
        future_data: pd.DataFrame
    ) -> Dict:
        """
        Simule exécution d'un trade
        """
        entry_price = signal.entry_price
        stop_loss = signal.stop_loss
        take_profit = signal.take_profit
        
        # Ajouter slippage réaliste
        slippage = 0.001  # 0.1%
        entry_price *= (1 + slippage if signal.direction == 'LONG' else 1 - slippage)
        
        # Simuler holding jusqu'à exit
        for i, row in future_data.iterrows():
            # Check stop-loss
            if signal.direction == 'LONG':
                if row['low'] <= stop_loss:
                    exit_price = stop_loss * (1 - slippage)
                    pnl = (exit_price - entry_price) / entry_price
                    return {'pnl': pnl, 'exit_reason': 'stop_loss', 'holding_bars': i}
                
                # Check take-profit
                if row['high'] >= take_profit:
                    exit_price = take_profit * (1 - slippage)
                    pnl = (exit_price - entry_price) / entry_price
                    return {'pnl': pnl, 'exit_reason': 'take_profit', 'holding_bars': i}
            
            else:  # SHORT
                if row['high'] >= stop_loss:
                    exit_price = stop_loss * (1 + slippage)
                    pnl = (entry_price - exit_price) / entry_price
                    return {'pnl': pnl, 'exit_reason': 'stop_loss', 'holding_bars': i}
                
                if row['low'] <= take_profit:
                    exit_price = take_profit * (1 + slippage)
                    pnl = (entry_price - exit_price) / entry_price
                    return {'pnl': pnl, 'exit_reason': 'take_profit', 'holding_bars': i}
        
        # Timeout (max holding time)
        exit_price = future_data.iloc[-1]['close']
        if signal.direction == 'LONG':
            pnl = (exit_price - entry_price) / entry_price
        else:
            pnl = (entry_price - exit_price) / entry_price
        
        return {'pnl': pnl, 'exit_reason': 'timeout', 'holding_bars': len(future_data)}
    
    def _aggregate_results(self) -> Dict:
        """
        Agrège résultats de tous les walks
        """
        if not self.results:
            return {}
        
        sharpes = [r['test_sharpe'] for r in self.results]
        returns = [r['test_returns'] for r in self.results]
        drawdowns = [r['test_max_dd'] for r in self.results]
        win_rates = [r['test_win_rate'] for r in self.results]
        
        return {
            'num_walks': len(self.results),
            'avg_sharpe': np.mean(sharpes),
            'std_sharpe': np.std(sharpes),
            'min_sharpe': np.min(sharpes),
            'max_sharpe': np.max(sharpes),
            'avg_return': np.mean(returns),
            'avg_max_dd': np.mean(drawdowns),
            'worst_max_dd': np.max(drawdowns),
            'avg_win_rate': np.mean(win_rates),
            'consistency': len([s for s in sharpes if s > 1.0]) / len(sharpes),
            'all_walks': self.results
        }
    
    def _calculate_sharpe(self, returns: pd.Series, risk_free=0.02) -> float:
        """Calcule Sharpe Ratio"""
        excess_returns = returns - risk_free / 252
        return np.mean(excess_returns) / np.std(excess_returns) * np.sqrt(252)
    
    def _calculate_max_drawdown(self, equity_curve: List[float]) -> float:
        """Calcule Maximum Drawdown"""
        peak = np.maximum.accumulate(equity_curve)
        drawdown = (np.array(equity_curve) - peak) / peak
        return np.min(drawdown)

📊 Out-of-Sample Testing

Principe

Jamais optimiser sur 100% des données !

┌────────────────────────────────────────────────────────────┐
│           SPLIT IN-SAMPLE / OUT-OF-SAMPLE                  │
├────────────────────────────────────────────────────────────┤
│                                                            │
│  [========== IN-SAMPLE 70% ==========][OUT-SAMPLE 30%]    │
│                                                            │
│  Optimisation paramètres              Validation finale   │
│  Walk-forward analysis                Performance réelle  │
│  Tuning hyperparamètres               JAMAIS touché       │
│                                                            │
└────────────────────────────────────────────────────────────┘

Implémentation

class OutOfSampleValidator:
    """
    Validation out-of-sample stricte
    """
    
    def __init__(self, oos_ratio=0.30):
        self.oos_ratio = oos_ratio
    
    def split_data(
        self,
        data: pd.DataFrame
    ) -> Tuple[pd.DataFrame, pd.DataFrame]:
        """
        Split données en in-sample / out-of-sample
        """
        split_point = int(len(data) * (1 - self.oos_ratio))
        
        in_sample = data.iloc[:split_point]
        out_of_sample = data.iloc[split_point:]
        
        return in_sample, out_of_sample
    
    def validate(
        self,
        strategy,
        in_sample_data: pd.DataFrame,
        out_of_sample_data: pd.DataFrame
    ) -> Dict:
        """
        Valide stratégie sur out-of-sample
        """
        # Performance in-sample
        is_results = self._backtest(strategy, in_sample_data)
        
        # Performance out-of-sample (CRITIQUE)
        oos_results = self._backtest(strategy, out_of_sample_data)
        
        # Comparer performances
        degradation = self._calculate_degradation(is_results, oos_results)
        
        return {
            'in_sample': is_results,
            'out_of_sample': oos_results,
            'degradation': degradation,
            'is_valid': self._is_valid_strategy(degradation)
        }
    
    def _calculate_degradation(
        self,
        is_results: Dict,
        oos_results: Dict
    ) -> Dict:
        """
        Calcule dégradation performance IS → OOS
        """
        return {
            'sharpe_degradation': (is_results['sharpe'] - oos_results['sharpe']) / is_results['sharpe'],
            'return_degradation': (is_results['total_return'] - oos_results['total_return']) / is_results['total_return'],
            'winrate_degradation': (is_results['win_rate'] - oos_results['win_rate']) / is_results['win_rate'],
        }
    
    def _is_valid_strategy(self, degradation: Dict) -> bool:
        """
        Critères de validation
        
        Stratégie valide si:
        - Sharpe OOS > 1.0
        - Dégradation Sharpe < 30%
        - Dégradation Return < 40%
        """
        if degradation['sharpe_degradation'] > 0.30:
            return False
        if degradation['return_degradation'] > 0.40:
            return False
        
        return True

🎲 Monte Carlo Simulation

Objectif

Tester robustesse en simulant milliers de scénarios aléatoires

class MonteCarloSimulator:
    """
    Simulation Monte Carlo pour validation robustesse
    """
    
    def __init__(self, n_simulations=10000):
        self.n_simulations = n_simulations
    
    def simulate(
        self,
        historical_trades: List[Dict]
    ) -> Dict:
        """
        Simule N scénarios en réordonnant trades
        
        Principe:
        - Même trades, ordre différent
        - Teste sensibilité à la séquence
        - Identifie lucky streaks vs. edge réel
        """
        results = []
        
        for _ in range(self.n_simulations):
            # Réordonner trades aléatoirement
            shuffled_trades = np.random.permutation(historical_trades)
            
            # Calculer equity curve
            equity = self._calculate_equity_curve(shuffled_trades)
            
            # Métriques
            sharpe = self._calculate_sharpe(equity)
            max_dd = self._calculate_max_drawdown(equity)
            final_return = (equity[-1] - equity[0]) / equity[0]
            
            results.append({
                'sharpe': sharpe,
                'max_dd': max_dd,
                'return': final_return
            })
        
        return self._analyze_distribution(results)
    
    def _analyze_distribution(self, results: List[Dict]) -> Dict:
        """
        Analyse distribution résultats Monte Carlo
        """
        sharpes = [r['sharpe'] for r in results]
        returns = [r['return'] for r in results]
        drawdowns = [r['max_dd'] for r in results]
        
        return {
            # Sharpe
            'sharpe_mean': np.mean(sharpes),
            'sharpe_median': np.median(sharpes),
            'sharpe_5th_percentile': np.percentile(sharpes, 5),
            'sharpe_95th_percentile': np.percentile(sharpes, 95),
            'prob_sharpe_positive': np.mean(np.array(sharpes) > 0),
            'prob_sharpe_above_1': np.mean(np.array(sharpes) > 1.0),
            
            # Returns
            'return_mean': np.mean(returns),
            'return_5th_percentile': np.percentile(returns, 5),
            'return_95th_percentile': np.percentile(returns, 95),
            'prob_positive_return': np.mean(np.array(returns) > 0),
            
            # Drawdown
            'max_dd_mean': np.mean(drawdowns),
            'max_dd_95th_percentile': np.percentile(drawdowns, 95),
            'prob_dd_below_10pct': np.mean(np.array(drawdowns) > -0.10),
        }
    
    def _calculate_equity_curve(self, trades: List[Dict]) -> List[float]:
        """Calcule equity curve à partir de trades"""
        equity = [10000]  # Capital initial
        
        for trade in trades:
            pnl = trade['pnl'] * equity[-1]
            equity.append(equity[-1] + pnl)
        
        return equity

📝 Paper Trading

Protocole Strict

class PaperTradingEngine:
    """
    Paper trading avec conditions réelles
    
    Règles:
    - Minimum 30 jours de trading
    - Données temps réel (pas historiques)
    - Slippage et commissions réalistes
    - Latence simulée
    - Validation quotidienne
    """
    
    def __init__(self, min_days=30):
        self.min_days = min_days
        self.start_date = None
        self.trades = []
        self.daily_metrics = []
    
    def start(self):
        """Démarre paper trading"""
        self.start_date = datetime.now()
        logger.info(f"Paper trading started. Minimum duration: {self.min_days} days")
    
    def can_go_live(self) -> Tuple[bool, str]:
        """
        Vérifie si stratégie peut passer en live
        
        Critères:
        - Minimum 30 jours
        - Sharpe > 1.5
        - Max DD < 10%
        - Win rate > 55%
        - Minimum 50 trades
        """
        if not self.start_date:
            return False, "Paper trading not started"
        
        days_elapsed = (datetime.now() - self.start_date).days
        
        if days_elapsed < self.min_days:
            return False, f"Only {days_elapsed}/{self.min_days} days completed"
        
        # Calculer métriques
        metrics = self._calculate_metrics()
        
        # Vérifier critères
        if metrics['sharpe'] < 1.5:
            return False, f"Sharpe {metrics['sharpe']:.2f} below 1.5"
        
        if metrics['max_dd'] > 0.10:
            return False, f"Max DD {metrics['max_dd']:.2%} above 10%"
        
        if metrics['win_rate'] < 0.55:
            return False, f"Win rate {metrics['win_rate']:.2%} below 55%"
        
        if len(self.trades) < 50:
            return False, f"Only {len(self.trades)} trades (minimum 50)"
        
        return True, "All criteria met. Ready for live trading."
    
    def _calculate_metrics(self) -> Dict:
        """Calcule métriques paper trading"""
        # TODO: Implémenter calculs
        pass

📊 Métriques de Validation

Seuils Minimaux

validation_criteria:
  # Performance
  sharpe_ratio:
    in_sample: 1.8
    out_of_sample: 1.5
    paper_trading: 1.5
  
  # Risk
  max_drawdown:
    in_sample: 0.08
    out_of_sample: 0.10
    paper_trading: 0.10
  
  # Consistency
  win_rate:
    minimum: 0.55
    target: 0.60
  
  profit_factor:
    minimum: 1.3
    target: 1.5
  
  # Robustness
  monte_carlo:
    prob_positive_sharpe: 0.95
    sharpe_5th_percentile: 0.8
  
  # Sample size
  minimum_trades:
    in_sample: 100
    out_of_sample: 30
    paper_trading: 50

Documentation complète du backtesting terminée !