Files
trader-ml/docs/BACKTESTING_GUIDE.md
Tika da30ef19ed Initial commit — Trading AI Secure project complet
Architecture Docker (8 services), FastAPI, TimescaleDB, Redis, Streamlit.
Stratégies : scalping, intraday, swing. MLEngine + RegimeDetector (HMM).
BacktestEngine + WalkForwardAnalyzer + Optuna optimizer.
Routes API complètes dont /optimize async.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-08 17:38:09 +00:00

586 lines
19 KiB
Markdown

# 🧪 Guide de Backtesting - Trading AI Secure
## 📋 Table des Matières
1. [Philosophie Anti-Overfitting](#philosophie-anti-overfitting)
2. [Walk-Forward Analysis](#walk-forward-analysis)
3. [Out-of-Sample Testing](#out-of-sample-testing)
4. [Monte Carlo Simulation](#monte-carlo-simulation)
5. [Paper Trading](#paper-trading)
6. [Métriques de Validation](#métriques-de-validation)
7. [Implémentation](#implémentation)
---
## 🎯 Philosophie Anti-Overfitting
### Principe Fondamental
**"Une stratégie qui performe parfaitement en backtest est probablement sur-optimisée"**
### Les 7 Règles d'Or du Backtesting
1.**Toujours réserver 30% des données pour out-of-sample**
2.**Utiliser walk-forward analysis (pas de look-ahead bias)**
3.**Valider avec Monte Carlo (10,000+ simulations)**
4.**Paper trading obligatoire 30 jours minimum**
5.**Inclure coûts réalistes (slippage, commissions)**
6.**Tester sur multiple marchés/périodes**
7.**Documenter tous les paramètres testés (éviter cherry-picking)**
---
## 🔄 Walk-Forward Analysis
### Concept
La walk-forward analysis simule le trading réel en :
1. Entraînant sur une fenêtre passée
2. Testant sur la fenêtre suivante
3. Glissant les fenêtres progressivement
```
┌────────────────────────────────────────────────────────────┐
│ WALK-FORWARD ANALYSIS │
├────────────────────────────────────────────────────────────┤
│ │
│ Période 1: │
│ [====TRAIN====][TEST] │
│ │
│ Période 2: │
│ [====TRAIN====][TEST] │
│ │
│ Période 3: │
│ [====TRAIN====][TEST] │
│ │
│ Période 4: │
│ [====TRAIN====][TEST] │
│ │
└────────────────────────────────────────────────────────────┘
```
### Implémentation
```python
# src/backtesting/walk_forward.py
from typing import Dict, List, Tuple
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
class WalkForwardAnalyzer:
"""
Walk-Forward Analysis Engine
Paramètres:
- train_window: Taille fenêtre entraînement (ex: 252 jours = 1 an)
- test_window: Taille fenêtre test (ex: 63 jours = 3 mois)
- step_size: Pas de glissement (ex: 21 jours = 1 mois)
"""
def __init__(
self,
train_window: int = 252,
test_window: int = 63,
step_size: int = 21
):
self.train_window = train_window
self.test_window = test_window
self.step_size = step_size
self.results = []
def run(
self,
data: pd.DataFrame,
strategy_class,
optimization_func
) -> Dict:
"""
Exécute walk-forward analysis
Args:
data: Données historiques complètes
strategy_class: Classe de stratégie à tester
optimization_func: Fonction d'optimisation des paramètres
Returns:
Résultats agrégés de tous les walks
"""
total_length = len(data)
current_pos = 0
while current_pos + self.train_window + self.test_window <= total_length:
# Fenêtre entraînement
train_start = current_pos
train_end = current_pos + self.train_window
train_data = data.iloc[train_start:train_end]
# Fenêtre test
test_start = train_end
test_end = train_end + self.test_window
test_data = data.iloc[test_start:test_end]
# Optimiser paramètres sur train
optimal_params = optimization_func(train_data, strategy_class)
# Tester sur test
strategy = strategy_class(optimal_params)
test_results = self._backtest_strategy(strategy, test_data)
# Enregistrer résultats
self.results.append({
'train_period': (train_data.index[0], train_data.index[-1]),
'test_period': (test_data.index[0], test_data.index[-1]),
'optimal_params': optimal_params,
'test_sharpe': test_results['sharpe'],
'test_returns': test_results['total_return'],
'test_max_dd': test_results['max_drawdown'],
'test_win_rate': test_results['win_rate'],
'num_trades': test_results['num_trades']
})
# Glisser fenêtre
current_pos += self.step_size
return self._aggregate_results()
def _backtest_strategy(
self,
strategy,
data: pd.DataFrame
) -> Dict:
"""
Backtest stratégie sur données
"""
trades = []
equity_curve = [10000] # Capital initial
for i in range(len(data)):
current_data = data.iloc[:i+1]
# Générer signal
signal = strategy.analyze(current_data)
if signal:
# Simuler trade
trade_result = self._simulate_trade(
signal,
data.iloc[i:min(i+100, len(data))] # Données futures pour exit
)
trades.append(trade_result)
equity_curve.append(equity_curve[-1] + trade_result['pnl'])
# Calculer métriques
returns = pd.Series(equity_curve).pct_change().dropna()
return {
'total_return': (equity_curve[-1] - equity_curve[0]) / equity_curve[0],
'sharpe': self._calculate_sharpe(returns),
'max_drawdown': self._calculate_max_drawdown(equity_curve),
'win_rate': len([t for t in trades if t['pnl'] > 0]) / len(trades) if trades else 0,
'num_trades': len(trades),
'equity_curve': equity_curve
}
def _simulate_trade(
self,
signal: 'Signal',
future_data: pd.DataFrame
) -> Dict:
"""
Simule exécution d'un trade
"""
entry_price = signal.entry_price
stop_loss = signal.stop_loss
take_profit = signal.take_profit
# Ajouter slippage réaliste
slippage = 0.001 # 0.1%
entry_price *= (1 + slippage if signal.direction == 'LONG' else 1 - slippage)
# Simuler holding jusqu'à exit
for i, row in future_data.iterrows():
# Check stop-loss
if signal.direction == 'LONG':
if row['low'] <= stop_loss:
exit_price = stop_loss * (1 - slippage)
pnl = (exit_price - entry_price) / entry_price
return {'pnl': pnl, 'exit_reason': 'stop_loss', 'holding_bars': i}
# Check take-profit
if row['high'] >= take_profit:
exit_price = take_profit * (1 - slippage)
pnl = (exit_price - entry_price) / entry_price
return {'pnl': pnl, 'exit_reason': 'take_profit', 'holding_bars': i}
else: # SHORT
if row['high'] >= stop_loss:
exit_price = stop_loss * (1 + slippage)
pnl = (entry_price - exit_price) / entry_price
return {'pnl': pnl, 'exit_reason': 'stop_loss', 'holding_bars': i}
if row['low'] <= take_profit:
exit_price = take_profit * (1 + slippage)
pnl = (entry_price - exit_price) / entry_price
return {'pnl': pnl, 'exit_reason': 'take_profit', 'holding_bars': i}
# Timeout (max holding time)
exit_price = future_data.iloc[-1]['close']
if signal.direction == 'LONG':
pnl = (exit_price - entry_price) / entry_price
else:
pnl = (entry_price - exit_price) / entry_price
return {'pnl': pnl, 'exit_reason': 'timeout', 'holding_bars': len(future_data)}
def _aggregate_results(self) -> Dict:
"""
Agrège résultats de tous les walks
"""
if not self.results:
return {}
sharpes = [r['test_sharpe'] for r in self.results]
returns = [r['test_returns'] for r in self.results]
drawdowns = [r['test_max_dd'] for r in self.results]
win_rates = [r['test_win_rate'] for r in self.results]
return {
'num_walks': len(self.results),
'avg_sharpe': np.mean(sharpes),
'std_sharpe': np.std(sharpes),
'min_sharpe': np.min(sharpes),
'max_sharpe': np.max(sharpes),
'avg_return': np.mean(returns),
'avg_max_dd': np.mean(drawdowns),
'worst_max_dd': np.max(drawdowns),
'avg_win_rate': np.mean(win_rates),
'consistency': len([s for s in sharpes if s > 1.0]) / len(sharpes),
'all_walks': self.results
}
def _calculate_sharpe(self, returns: pd.Series, risk_free=0.02) -> float:
"""Calcule Sharpe Ratio"""
excess_returns = returns - risk_free / 252
return np.mean(excess_returns) / np.std(excess_returns) * np.sqrt(252)
def _calculate_max_drawdown(self, equity_curve: List[float]) -> float:
"""Calcule Maximum Drawdown"""
peak = np.maximum.accumulate(equity_curve)
drawdown = (np.array(equity_curve) - peak) / peak
return np.min(drawdown)
```
---
## 📊 Out-of-Sample Testing
### Principe
**Jamais optimiser sur 100% des données !**
```
┌────────────────────────────────────────────────────────────┐
│ SPLIT IN-SAMPLE / OUT-OF-SAMPLE │
├────────────────────────────────────────────────────────────┤
│ │
│ [========== IN-SAMPLE 70% ==========][OUT-SAMPLE 30%] │
│ │
│ Optimisation paramètres Validation finale │
│ Walk-forward analysis Performance réelle │
│ Tuning hyperparamètres JAMAIS touché │
│ │
└────────────────────────────────────────────────────────────┘
```
### Implémentation
```python
class OutOfSampleValidator:
"""
Validation out-of-sample stricte
"""
def __init__(self, oos_ratio=0.30):
self.oos_ratio = oos_ratio
def split_data(
self,
data: pd.DataFrame
) -> Tuple[pd.DataFrame, pd.DataFrame]:
"""
Split données en in-sample / out-of-sample
"""
split_point = int(len(data) * (1 - self.oos_ratio))
in_sample = data.iloc[:split_point]
out_of_sample = data.iloc[split_point:]
return in_sample, out_of_sample
def validate(
self,
strategy,
in_sample_data: pd.DataFrame,
out_of_sample_data: pd.DataFrame
) -> Dict:
"""
Valide stratégie sur out-of-sample
"""
# Performance in-sample
is_results = self._backtest(strategy, in_sample_data)
# Performance out-of-sample (CRITIQUE)
oos_results = self._backtest(strategy, out_of_sample_data)
# Comparer performances
degradation = self._calculate_degradation(is_results, oos_results)
return {
'in_sample': is_results,
'out_of_sample': oos_results,
'degradation': degradation,
'is_valid': self._is_valid_strategy(degradation)
}
def _calculate_degradation(
self,
is_results: Dict,
oos_results: Dict
) -> Dict:
"""
Calcule dégradation performance IS → OOS
"""
return {
'sharpe_degradation': (is_results['sharpe'] - oos_results['sharpe']) / is_results['sharpe'],
'return_degradation': (is_results['total_return'] - oos_results['total_return']) / is_results['total_return'],
'winrate_degradation': (is_results['win_rate'] - oos_results['win_rate']) / is_results['win_rate'],
}
def _is_valid_strategy(self, degradation: Dict) -> bool:
"""
Critères de validation
Stratégie valide si:
- Sharpe OOS > 1.0
- Dégradation Sharpe < 30%
- Dégradation Return < 40%
"""
if degradation['sharpe_degradation'] > 0.30:
return False
if degradation['return_degradation'] > 0.40:
return False
return True
```
---
## 🎲 Monte Carlo Simulation
### Objectif
Tester robustesse en simulant milliers de scénarios aléatoires
```python
class MonteCarloSimulator:
"""
Simulation Monte Carlo pour validation robustesse
"""
def __init__(self, n_simulations=10000):
self.n_simulations = n_simulations
def simulate(
self,
historical_trades: List[Dict]
) -> Dict:
"""
Simule N scénarios en réordonnant trades
Principe:
- Même trades, ordre différent
- Teste sensibilité à la séquence
- Identifie lucky streaks vs. edge réel
"""
results = []
for _ in range(self.n_simulations):
# Réordonner trades aléatoirement
shuffled_trades = np.random.permutation(historical_trades)
# Calculer equity curve
equity = self._calculate_equity_curve(shuffled_trades)
# Métriques
sharpe = self._calculate_sharpe(equity)
max_dd = self._calculate_max_drawdown(equity)
final_return = (equity[-1] - equity[0]) / equity[0]
results.append({
'sharpe': sharpe,
'max_dd': max_dd,
'return': final_return
})
return self._analyze_distribution(results)
def _analyze_distribution(self, results: List[Dict]) -> Dict:
"""
Analyse distribution résultats Monte Carlo
"""
sharpes = [r['sharpe'] for r in results]
returns = [r['return'] for r in results]
drawdowns = [r['max_dd'] for r in results]
return {
# Sharpe
'sharpe_mean': np.mean(sharpes),
'sharpe_median': np.median(sharpes),
'sharpe_5th_percentile': np.percentile(sharpes, 5),
'sharpe_95th_percentile': np.percentile(sharpes, 95),
'prob_sharpe_positive': np.mean(np.array(sharpes) > 0),
'prob_sharpe_above_1': np.mean(np.array(sharpes) > 1.0),
# Returns
'return_mean': np.mean(returns),
'return_5th_percentile': np.percentile(returns, 5),
'return_95th_percentile': np.percentile(returns, 95),
'prob_positive_return': np.mean(np.array(returns) > 0),
# Drawdown
'max_dd_mean': np.mean(drawdowns),
'max_dd_95th_percentile': np.percentile(drawdowns, 95),
'prob_dd_below_10pct': np.mean(np.array(drawdowns) > -0.10),
}
def _calculate_equity_curve(self, trades: List[Dict]) -> List[float]:
"""Calcule equity curve à partir de trades"""
equity = [10000] # Capital initial
for trade in trades:
pnl = trade['pnl'] * equity[-1]
equity.append(equity[-1] + pnl)
return equity
```
---
## 📝 Paper Trading
### Protocole Strict
```python
class PaperTradingEngine:
"""
Paper trading avec conditions réelles
Règles:
- Minimum 30 jours de trading
- Données temps réel (pas historiques)
- Slippage et commissions réalistes
- Latence simulée
- Validation quotidienne
"""
def __init__(self, min_days=30):
self.min_days = min_days
self.start_date = None
self.trades = []
self.daily_metrics = []
def start(self):
"""Démarre paper trading"""
self.start_date = datetime.now()
logger.info(f"Paper trading started. Minimum duration: {self.min_days} days")
def can_go_live(self) -> Tuple[bool, str]:
"""
Vérifie si stratégie peut passer en live
Critères:
- Minimum 30 jours
- Sharpe > 1.5
- Max DD < 10%
- Win rate > 55%
- Minimum 50 trades
"""
if not self.start_date:
return False, "Paper trading not started"
days_elapsed = (datetime.now() - self.start_date).days
if days_elapsed < self.min_days:
return False, f"Only {days_elapsed}/{self.min_days} days completed"
# Calculer métriques
metrics = self._calculate_metrics()
# Vérifier critères
if metrics['sharpe'] < 1.5:
return False, f"Sharpe {metrics['sharpe']:.2f} below 1.5"
if metrics['max_dd'] > 0.10:
return False, f"Max DD {metrics['max_dd']:.2%} above 10%"
if metrics['win_rate'] < 0.55:
return False, f"Win rate {metrics['win_rate']:.2%} below 55%"
if len(self.trades) < 50:
return False, f"Only {len(self.trades)} trades (minimum 50)"
return True, "All criteria met. Ready for live trading."
def _calculate_metrics(self) -> Dict:
"""Calcule métriques paper trading"""
# TODO: Implémenter calculs
pass
```
---
## 📊 Métriques de Validation
### Seuils Minimaux
```yaml
validation_criteria:
# Performance
sharpe_ratio:
in_sample: 1.8
out_of_sample: 1.5
paper_trading: 1.5
# Risk
max_drawdown:
in_sample: 0.08
out_of_sample: 0.10
paper_trading: 0.10
# Consistency
win_rate:
minimum: 0.55
target: 0.60
profit_factor:
minimum: 1.3
target: 1.5
# Robustness
monte_carlo:
prob_positive_sharpe: 0.95
sharpe_5th_percentile: 0.8
# Sample size
minimum_trades:
in_sample: 100
out_of_sample: 30
paper_trading: 50
```
---
**Documentation complète du backtesting terminée !**