# 🧪 Guide de Backtesting - Trading AI Secure ## 📋 Table des Matières 1. [Philosophie Anti-Overfitting](#philosophie-anti-overfitting) 2. [Walk-Forward Analysis](#walk-forward-analysis) 3. [Out-of-Sample Testing](#out-of-sample-testing) 4. [Monte Carlo Simulation](#monte-carlo-simulation) 5. [Paper Trading](#paper-trading) 6. [Métriques de Validation](#métriques-de-validation) 7. [Implémentation](#implémentation) --- ## 🎯 Philosophie Anti-Overfitting ### Principe Fondamental **"Une stratégie qui performe parfaitement en backtest est probablement sur-optimisée"** ### Les 7 Règles d'Or du Backtesting 1. ✅ **Toujours réserver 30% des données pour out-of-sample** 2. ✅ **Utiliser walk-forward analysis (pas de look-ahead bias)** 3. ✅ **Valider avec Monte Carlo (10,000+ simulations)** 4. ✅ **Paper trading obligatoire 30 jours minimum** 5. ✅ **Inclure coûts réalistes (slippage, commissions)** 6. ✅ **Tester sur multiple marchés/périodes** 7. ✅ **Documenter tous les paramètres testés (éviter cherry-picking)** --- ## 🔄 Walk-Forward Analysis ### Concept La walk-forward analysis simule le trading réel en : 1. Entraînant sur une fenêtre passée 2. Testant sur la fenêtre suivante 3. Glissant les fenêtres progressivement ``` ┌────────────────────────────────────────────────────────────┐ │ WALK-FORWARD ANALYSIS │ ├────────────────────────────────────────────────────────────┤ │ │ │ Période 1: │ │ [====TRAIN====][TEST] │ │ │ │ Période 2: │ │ [====TRAIN====][TEST] │ │ │ │ Période 3: │ │ [====TRAIN====][TEST] │ │ │ │ Période 4: │ │ [====TRAIN====][TEST] │ │ │ └────────────────────────────────────────────────────────────┘ ``` ### Implémentation ```python # src/backtesting/walk_forward.py from typing import Dict, List, Tuple import pandas as pd import numpy as np from datetime import datetime, timedelta class WalkForwardAnalyzer: """ Walk-Forward Analysis Engine Paramètres: - train_window: Taille fenêtre entraînement (ex: 252 jours = 1 an) - test_window: Taille fenêtre test (ex: 63 jours = 3 mois) - step_size: Pas de glissement (ex: 21 jours = 1 mois) """ def __init__( self, train_window: int = 252, test_window: int = 63, step_size: int = 21 ): self.train_window = train_window self.test_window = test_window self.step_size = step_size self.results = [] def run( self, data: pd.DataFrame, strategy_class, optimization_func ) -> Dict: """ Exécute walk-forward analysis Args: data: Données historiques complètes strategy_class: Classe de stratégie à tester optimization_func: Fonction d'optimisation des paramètres Returns: Résultats agrégés de tous les walks """ total_length = len(data) current_pos = 0 while current_pos + self.train_window + self.test_window <= total_length: # Fenêtre entraînement train_start = current_pos train_end = current_pos + self.train_window train_data = data.iloc[train_start:train_end] # Fenêtre test test_start = train_end test_end = train_end + self.test_window test_data = data.iloc[test_start:test_end] # Optimiser paramètres sur train optimal_params = optimization_func(train_data, strategy_class) # Tester sur test strategy = strategy_class(optimal_params) test_results = self._backtest_strategy(strategy, test_data) # Enregistrer résultats self.results.append({ 'train_period': (train_data.index[0], train_data.index[-1]), 'test_period': (test_data.index[0], test_data.index[-1]), 'optimal_params': optimal_params, 'test_sharpe': test_results['sharpe'], 'test_returns': test_results['total_return'], 'test_max_dd': test_results['max_drawdown'], 'test_win_rate': test_results['win_rate'], 'num_trades': test_results['num_trades'] }) # Glisser fenêtre current_pos += self.step_size return self._aggregate_results() def _backtest_strategy( self, strategy, data: pd.DataFrame ) -> Dict: """ Backtest stratégie sur données """ trades = [] equity_curve = [10000] # Capital initial for i in range(len(data)): current_data = data.iloc[:i+1] # Générer signal signal = strategy.analyze(current_data) if signal: # Simuler trade trade_result = self._simulate_trade( signal, data.iloc[i:min(i+100, len(data))] # Données futures pour exit ) trades.append(trade_result) equity_curve.append(equity_curve[-1] + trade_result['pnl']) # Calculer métriques returns = pd.Series(equity_curve).pct_change().dropna() return { 'total_return': (equity_curve[-1] - equity_curve[0]) / equity_curve[0], 'sharpe': self._calculate_sharpe(returns), 'max_drawdown': self._calculate_max_drawdown(equity_curve), 'win_rate': len([t for t in trades if t['pnl'] > 0]) / len(trades) if trades else 0, 'num_trades': len(trades), 'equity_curve': equity_curve } def _simulate_trade( self, signal: 'Signal', future_data: pd.DataFrame ) -> Dict: """ Simule exécution d'un trade """ entry_price = signal.entry_price stop_loss = signal.stop_loss take_profit = signal.take_profit # Ajouter slippage réaliste slippage = 0.001 # 0.1% entry_price *= (1 + slippage if signal.direction == 'LONG' else 1 - slippage) # Simuler holding jusqu'à exit for i, row in future_data.iterrows(): # Check stop-loss if signal.direction == 'LONG': if row['low'] <= stop_loss: exit_price = stop_loss * (1 - slippage) pnl = (exit_price - entry_price) / entry_price return {'pnl': pnl, 'exit_reason': 'stop_loss', 'holding_bars': i} # Check take-profit if row['high'] >= take_profit: exit_price = take_profit * (1 - slippage) pnl = (exit_price - entry_price) / entry_price return {'pnl': pnl, 'exit_reason': 'take_profit', 'holding_bars': i} else: # SHORT if row['high'] >= stop_loss: exit_price = stop_loss * (1 + slippage) pnl = (entry_price - exit_price) / entry_price return {'pnl': pnl, 'exit_reason': 'stop_loss', 'holding_bars': i} if row['low'] <= take_profit: exit_price = take_profit * (1 + slippage) pnl = (entry_price - exit_price) / entry_price return {'pnl': pnl, 'exit_reason': 'take_profit', 'holding_bars': i} # Timeout (max holding time) exit_price = future_data.iloc[-1]['close'] if signal.direction == 'LONG': pnl = (exit_price - entry_price) / entry_price else: pnl = (entry_price - exit_price) / entry_price return {'pnl': pnl, 'exit_reason': 'timeout', 'holding_bars': len(future_data)} def _aggregate_results(self) -> Dict: """ Agrège résultats de tous les walks """ if not self.results: return {} sharpes = [r['test_sharpe'] for r in self.results] returns = [r['test_returns'] for r in self.results] drawdowns = [r['test_max_dd'] for r in self.results] win_rates = [r['test_win_rate'] for r in self.results] return { 'num_walks': len(self.results), 'avg_sharpe': np.mean(sharpes), 'std_sharpe': np.std(sharpes), 'min_sharpe': np.min(sharpes), 'max_sharpe': np.max(sharpes), 'avg_return': np.mean(returns), 'avg_max_dd': np.mean(drawdowns), 'worst_max_dd': np.max(drawdowns), 'avg_win_rate': np.mean(win_rates), 'consistency': len([s for s in sharpes if s > 1.0]) / len(sharpes), 'all_walks': self.results } def _calculate_sharpe(self, returns: pd.Series, risk_free=0.02) -> float: """Calcule Sharpe Ratio""" excess_returns = returns - risk_free / 252 return np.mean(excess_returns) / np.std(excess_returns) * np.sqrt(252) def _calculate_max_drawdown(self, equity_curve: List[float]) -> float: """Calcule Maximum Drawdown""" peak = np.maximum.accumulate(equity_curve) drawdown = (np.array(equity_curve) - peak) / peak return np.min(drawdown) ``` --- ## 📊 Out-of-Sample Testing ### Principe **Jamais optimiser sur 100% des données !** ``` ┌────────────────────────────────────────────────────────────┐ │ SPLIT IN-SAMPLE / OUT-OF-SAMPLE │ ├────────────────────────────────────────────────────────────┤ │ │ │ [========== IN-SAMPLE 70% ==========][OUT-SAMPLE 30%] │ │ │ │ Optimisation paramètres Validation finale │ │ Walk-forward analysis Performance réelle │ │ Tuning hyperparamètres JAMAIS touché │ │ │ └────────────────────────────────────────────────────────────┘ ``` ### Implémentation ```python class OutOfSampleValidator: """ Validation out-of-sample stricte """ def __init__(self, oos_ratio=0.30): self.oos_ratio = oos_ratio def split_data( self, data: pd.DataFrame ) -> Tuple[pd.DataFrame, pd.DataFrame]: """ Split données en in-sample / out-of-sample """ split_point = int(len(data) * (1 - self.oos_ratio)) in_sample = data.iloc[:split_point] out_of_sample = data.iloc[split_point:] return in_sample, out_of_sample def validate( self, strategy, in_sample_data: pd.DataFrame, out_of_sample_data: pd.DataFrame ) -> Dict: """ Valide stratégie sur out-of-sample """ # Performance in-sample is_results = self._backtest(strategy, in_sample_data) # Performance out-of-sample (CRITIQUE) oos_results = self._backtest(strategy, out_of_sample_data) # Comparer performances degradation = self._calculate_degradation(is_results, oos_results) return { 'in_sample': is_results, 'out_of_sample': oos_results, 'degradation': degradation, 'is_valid': self._is_valid_strategy(degradation) } def _calculate_degradation( self, is_results: Dict, oos_results: Dict ) -> Dict: """ Calcule dégradation performance IS → OOS """ return { 'sharpe_degradation': (is_results['sharpe'] - oos_results['sharpe']) / is_results['sharpe'], 'return_degradation': (is_results['total_return'] - oos_results['total_return']) / is_results['total_return'], 'winrate_degradation': (is_results['win_rate'] - oos_results['win_rate']) / is_results['win_rate'], } def _is_valid_strategy(self, degradation: Dict) -> bool: """ Critères de validation Stratégie valide si: - Sharpe OOS > 1.0 - Dégradation Sharpe < 30% - Dégradation Return < 40% """ if degradation['sharpe_degradation'] > 0.30: return False if degradation['return_degradation'] > 0.40: return False return True ``` --- ## 🎲 Monte Carlo Simulation ### Objectif Tester robustesse en simulant milliers de scénarios aléatoires ```python class MonteCarloSimulator: """ Simulation Monte Carlo pour validation robustesse """ def __init__(self, n_simulations=10000): self.n_simulations = n_simulations def simulate( self, historical_trades: List[Dict] ) -> Dict: """ Simule N scénarios en réordonnant trades Principe: - Même trades, ordre différent - Teste sensibilité à la séquence - Identifie lucky streaks vs. edge réel """ results = [] for _ in range(self.n_simulations): # Réordonner trades aléatoirement shuffled_trades = np.random.permutation(historical_trades) # Calculer equity curve equity = self._calculate_equity_curve(shuffled_trades) # Métriques sharpe = self._calculate_sharpe(equity) max_dd = self._calculate_max_drawdown(equity) final_return = (equity[-1] - equity[0]) / equity[0] results.append({ 'sharpe': sharpe, 'max_dd': max_dd, 'return': final_return }) return self._analyze_distribution(results) def _analyze_distribution(self, results: List[Dict]) -> Dict: """ Analyse distribution résultats Monte Carlo """ sharpes = [r['sharpe'] for r in results] returns = [r['return'] for r in results] drawdowns = [r['max_dd'] for r in results] return { # Sharpe 'sharpe_mean': np.mean(sharpes), 'sharpe_median': np.median(sharpes), 'sharpe_5th_percentile': np.percentile(sharpes, 5), 'sharpe_95th_percentile': np.percentile(sharpes, 95), 'prob_sharpe_positive': np.mean(np.array(sharpes) > 0), 'prob_sharpe_above_1': np.mean(np.array(sharpes) > 1.0), # Returns 'return_mean': np.mean(returns), 'return_5th_percentile': np.percentile(returns, 5), 'return_95th_percentile': np.percentile(returns, 95), 'prob_positive_return': np.mean(np.array(returns) > 0), # Drawdown 'max_dd_mean': np.mean(drawdowns), 'max_dd_95th_percentile': np.percentile(drawdowns, 95), 'prob_dd_below_10pct': np.mean(np.array(drawdowns) > -0.10), } def _calculate_equity_curve(self, trades: List[Dict]) -> List[float]: """Calcule equity curve à partir de trades""" equity = [10000] # Capital initial for trade in trades: pnl = trade['pnl'] * equity[-1] equity.append(equity[-1] + pnl) return equity ``` --- ## 📝 Paper Trading ### Protocole Strict ```python class PaperTradingEngine: """ Paper trading avec conditions réelles Règles: - Minimum 30 jours de trading - Données temps réel (pas historiques) - Slippage et commissions réalistes - Latence simulée - Validation quotidienne """ def __init__(self, min_days=30): self.min_days = min_days self.start_date = None self.trades = [] self.daily_metrics = [] def start(self): """Démarre paper trading""" self.start_date = datetime.now() logger.info(f"Paper trading started. Minimum duration: {self.min_days} days") def can_go_live(self) -> Tuple[bool, str]: """ Vérifie si stratégie peut passer en live Critères: - Minimum 30 jours - Sharpe > 1.5 - Max DD < 10% - Win rate > 55% - Minimum 50 trades """ if not self.start_date: return False, "Paper trading not started" days_elapsed = (datetime.now() - self.start_date).days if days_elapsed < self.min_days: return False, f"Only {days_elapsed}/{self.min_days} days completed" # Calculer métriques metrics = self._calculate_metrics() # Vérifier critères if metrics['sharpe'] < 1.5: return False, f"Sharpe {metrics['sharpe']:.2f} below 1.5" if metrics['max_dd'] > 0.10: return False, f"Max DD {metrics['max_dd']:.2%} above 10%" if metrics['win_rate'] < 0.55: return False, f"Win rate {metrics['win_rate']:.2%} below 55%" if len(self.trades) < 50: return False, f"Only {len(self.trades)} trades (minimum 50)" return True, "All criteria met. Ready for live trading." def _calculate_metrics(self) -> Dict: """Calcule métriques paper trading""" # TODO: Implémenter calculs pass ``` --- ## 📊 Métriques de Validation ### Seuils Minimaux ```yaml validation_criteria: # Performance sharpe_ratio: in_sample: 1.8 out_of_sample: 1.5 paper_trading: 1.5 # Risk max_drawdown: in_sample: 0.08 out_of_sample: 0.10 paper_trading: 0.10 # Consistency win_rate: minimum: 0.55 target: 0.60 profit_factor: minimum: 1.3 target: 1.5 # Robustness monte_carlo: prob_positive_sharpe: 0.95 sharpe_5th_percentile: 0.8 # Sample size minimum_trades: in_sample: 100 out_of_sample: 30 paper_trading: 50 ``` --- **Documentation complète du backtesting terminée !**