Architecture Docker (8 services), FastAPI, TimescaleDB, Redis, Streamlit. Stratégies : scalping, intraday, swing. MLEngine + RegimeDetector (HMM). BacktestEngine + WalkForwardAnalyzer + Optuna optimizer. Routes API complètes dont /optimize async. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
586 lines
19 KiB
Markdown
586 lines
19 KiB
Markdown
# 🧪 Guide de Backtesting - Trading AI Secure
|
|
|
|
## 📋 Table des Matières
|
|
1. [Philosophie Anti-Overfitting](#philosophie-anti-overfitting)
|
|
2. [Walk-Forward Analysis](#walk-forward-analysis)
|
|
3. [Out-of-Sample Testing](#out-of-sample-testing)
|
|
4. [Monte Carlo Simulation](#monte-carlo-simulation)
|
|
5. [Paper Trading](#paper-trading)
|
|
6. [Métriques de Validation](#métriques-de-validation)
|
|
7. [Implémentation](#implémentation)
|
|
|
|
---
|
|
|
|
## 🎯 Philosophie Anti-Overfitting
|
|
|
|
### Principe Fondamental
|
|
|
|
**"Une stratégie qui performe parfaitement en backtest est probablement sur-optimisée"**
|
|
|
|
### Les 7 Règles d'Or du Backtesting
|
|
|
|
1. ✅ **Toujours réserver 30% des données pour out-of-sample**
|
|
2. ✅ **Utiliser walk-forward analysis (pas de look-ahead bias)**
|
|
3. ✅ **Valider avec Monte Carlo (10,000+ simulations)**
|
|
4. ✅ **Paper trading obligatoire 30 jours minimum**
|
|
5. ✅ **Inclure coûts réalistes (slippage, commissions)**
|
|
6. ✅ **Tester sur multiple marchés/périodes**
|
|
7. ✅ **Documenter tous les paramètres testés (éviter cherry-picking)**
|
|
|
|
---
|
|
|
|
## 🔄 Walk-Forward Analysis
|
|
|
|
### Concept
|
|
|
|
La walk-forward analysis simule le trading réel en :
|
|
1. Entraînant sur une fenêtre passée
|
|
2. Testant sur la fenêtre suivante
|
|
3. Glissant les fenêtres progressivement
|
|
|
|
```
|
|
┌────────────────────────────────────────────────────────────┐
|
|
│ WALK-FORWARD ANALYSIS │
|
|
├────────────────────────────────────────────────────────────┤
|
|
│ │
|
|
│ Période 1: │
|
|
│ [====TRAIN====][TEST] │
|
|
│ │
|
|
│ Période 2: │
|
|
│ [====TRAIN====][TEST] │
|
|
│ │
|
|
│ Période 3: │
|
|
│ [====TRAIN====][TEST] │
|
|
│ │
|
|
│ Période 4: │
|
|
│ [====TRAIN====][TEST] │
|
|
│ │
|
|
└────────────────────────────────────────────────────────────┘
|
|
```
|
|
|
|
### Implémentation
|
|
|
|
```python
|
|
# src/backtesting/walk_forward.py
|
|
|
|
from typing import Dict, List, Tuple
|
|
import pandas as pd
|
|
import numpy as np
|
|
from datetime import datetime, timedelta
|
|
|
|
class WalkForwardAnalyzer:
|
|
"""
|
|
Walk-Forward Analysis Engine
|
|
|
|
Paramètres:
|
|
- train_window: Taille fenêtre entraînement (ex: 252 jours = 1 an)
|
|
- test_window: Taille fenêtre test (ex: 63 jours = 3 mois)
|
|
- step_size: Pas de glissement (ex: 21 jours = 1 mois)
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
train_window: int = 252,
|
|
test_window: int = 63,
|
|
step_size: int = 21
|
|
):
|
|
self.train_window = train_window
|
|
self.test_window = test_window
|
|
self.step_size = step_size
|
|
|
|
self.results = []
|
|
|
|
def run(
|
|
self,
|
|
data: pd.DataFrame,
|
|
strategy_class,
|
|
optimization_func
|
|
) -> Dict:
|
|
"""
|
|
Exécute walk-forward analysis
|
|
|
|
Args:
|
|
data: Données historiques complètes
|
|
strategy_class: Classe de stratégie à tester
|
|
optimization_func: Fonction d'optimisation des paramètres
|
|
|
|
Returns:
|
|
Résultats agrégés de tous les walks
|
|
"""
|
|
total_length = len(data)
|
|
current_pos = 0
|
|
|
|
while current_pos + self.train_window + self.test_window <= total_length:
|
|
# Fenêtre entraînement
|
|
train_start = current_pos
|
|
train_end = current_pos + self.train_window
|
|
train_data = data.iloc[train_start:train_end]
|
|
|
|
# Fenêtre test
|
|
test_start = train_end
|
|
test_end = train_end + self.test_window
|
|
test_data = data.iloc[test_start:test_end]
|
|
|
|
# Optimiser paramètres sur train
|
|
optimal_params = optimization_func(train_data, strategy_class)
|
|
|
|
# Tester sur test
|
|
strategy = strategy_class(optimal_params)
|
|
test_results = self._backtest_strategy(strategy, test_data)
|
|
|
|
# Enregistrer résultats
|
|
self.results.append({
|
|
'train_period': (train_data.index[0], train_data.index[-1]),
|
|
'test_period': (test_data.index[0], test_data.index[-1]),
|
|
'optimal_params': optimal_params,
|
|
'test_sharpe': test_results['sharpe'],
|
|
'test_returns': test_results['total_return'],
|
|
'test_max_dd': test_results['max_drawdown'],
|
|
'test_win_rate': test_results['win_rate'],
|
|
'num_trades': test_results['num_trades']
|
|
})
|
|
|
|
# Glisser fenêtre
|
|
current_pos += self.step_size
|
|
|
|
return self._aggregate_results()
|
|
|
|
def _backtest_strategy(
|
|
self,
|
|
strategy,
|
|
data: pd.DataFrame
|
|
) -> Dict:
|
|
"""
|
|
Backtest stratégie sur données
|
|
"""
|
|
trades = []
|
|
equity_curve = [10000] # Capital initial
|
|
|
|
for i in range(len(data)):
|
|
current_data = data.iloc[:i+1]
|
|
|
|
# Générer signal
|
|
signal = strategy.analyze(current_data)
|
|
|
|
if signal:
|
|
# Simuler trade
|
|
trade_result = self._simulate_trade(
|
|
signal,
|
|
data.iloc[i:min(i+100, len(data))] # Données futures pour exit
|
|
)
|
|
trades.append(trade_result)
|
|
equity_curve.append(equity_curve[-1] + trade_result['pnl'])
|
|
|
|
# Calculer métriques
|
|
returns = pd.Series(equity_curve).pct_change().dropna()
|
|
|
|
return {
|
|
'total_return': (equity_curve[-1] - equity_curve[0]) / equity_curve[0],
|
|
'sharpe': self._calculate_sharpe(returns),
|
|
'max_drawdown': self._calculate_max_drawdown(equity_curve),
|
|
'win_rate': len([t for t in trades if t['pnl'] > 0]) / len(trades) if trades else 0,
|
|
'num_trades': len(trades),
|
|
'equity_curve': equity_curve
|
|
}
|
|
|
|
def _simulate_trade(
|
|
self,
|
|
signal: 'Signal',
|
|
future_data: pd.DataFrame
|
|
) -> Dict:
|
|
"""
|
|
Simule exécution d'un trade
|
|
"""
|
|
entry_price = signal.entry_price
|
|
stop_loss = signal.stop_loss
|
|
take_profit = signal.take_profit
|
|
|
|
# Ajouter slippage réaliste
|
|
slippage = 0.001 # 0.1%
|
|
entry_price *= (1 + slippage if signal.direction == 'LONG' else 1 - slippage)
|
|
|
|
# Simuler holding jusqu'à exit
|
|
for i, row in future_data.iterrows():
|
|
# Check stop-loss
|
|
if signal.direction == 'LONG':
|
|
if row['low'] <= stop_loss:
|
|
exit_price = stop_loss * (1 - slippage)
|
|
pnl = (exit_price - entry_price) / entry_price
|
|
return {'pnl': pnl, 'exit_reason': 'stop_loss', 'holding_bars': i}
|
|
|
|
# Check take-profit
|
|
if row['high'] >= take_profit:
|
|
exit_price = take_profit * (1 - slippage)
|
|
pnl = (exit_price - entry_price) / entry_price
|
|
return {'pnl': pnl, 'exit_reason': 'take_profit', 'holding_bars': i}
|
|
|
|
else: # SHORT
|
|
if row['high'] >= stop_loss:
|
|
exit_price = stop_loss * (1 + slippage)
|
|
pnl = (entry_price - exit_price) / entry_price
|
|
return {'pnl': pnl, 'exit_reason': 'stop_loss', 'holding_bars': i}
|
|
|
|
if row['low'] <= take_profit:
|
|
exit_price = take_profit * (1 + slippage)
|
|
pnl = (entry_price - exit_price) / entry_price
|
|
return {'pnl': pnl, 'exit_reason': 'take_profit', 'holding_bars': i}
|
|
|
|
# Timeout (max holding time)
|
|
exit_price = future_data.iloc[-1]['close']
|
|
if signal.direction == 'LONG':
|
|
pnl = (exit_price - entry_price) / entry_price
|
|
else:
|
|
pnl = (entry_price - exit_price) / entry_price
|
|
|
|
return {'pnl': pnl, 'exit_reason': 'timeout', 'holding_bars': len(future_data)}
|
|
|
|
def _aggregate_results(self) -> Dict:
|
|
"""
|
|
Agrège résultats de tous les walks
|
|
"""
|
|
if not self.results:
|
|
return {}
|
|
|
|
sharpes = [r['test_sharpe'] for r in self.results]
|
|
returns = [r['test_returns'] for r in self.results]
|
|
drawdowns = [r['test_max_dd'] for r in self.results]
|
|
win_rates = [r['test_win_rate'] for r in self.results]
|
|
|
|
return {
|
|
'num_walks': len(self.results),
|
|
'avg_sharpe': np.mean(sharpes),
|
|
'std_sharpe': np.std(sharpes),
|
|
'min_sharpe': np.min(sharpes),
|
|
'max_sharpe': np.max(sharpes),
|
|
'avg_return': np.mean(returns),
|
|
'avg_max_dd': np.mean(drawdowns),
|
|
'worst_max_dd': np.max(drawdowns),
|
|
'avg_win_rate': np.mean(win_rates),
|
|
'consistency': len([s for s in sharpes if s > 1.0]) / len(sharpes),
|
|
'all_walks': self.results
|
|
}
|
|
|
|
def _calculate_sharpe(self, returns: pd.Series, risk_free=0.02) -> float:
|
|
"""Calcule Sharpe Ratio"""
|
|
excess_returns = returns - risk_free / 252
|
|
return np.mean(excess_returns) / np.std(excess_returns) * np.sqrt(252)
|
|
|
|
def _calculate_max_drawdown(self, equity_curve: List[float]) -> float:
|
|
"""Calcule Maximum Drawdown"""
|
|
peak = np.maximum.accumulate(equity_curve)
|
|
drawdown = (np.array(equity_curve) - peak) / peak
|
|
return np.min(drawdown)
|
|
```
|
|
|
|
---
|
|
|
|
## 📊 Out-of-Sample Testing
|
|
|
|
### Principe
|
|
|
|
**Jamais optimiser sur 100% des données !**
|
|
|
|
```
|
|
┌────────────────────────────────────────────────────────────┐
|
|
│ SPLIT IN-SAMPLE / OUT-OF-SAMPLE │
|
|
├────────────────────────────────────────────────────────────┤
|
|
│ │
|
|
│ [========== IN-SAMPLE 70% ==========][OUT-SAMPLE 30%] │
|
|
│ │
|
|
│ Optimisation paramètres Validation finale │
|
|
│ Walk-forward analysis Performance réelle │
|
|
│ Tuning hyperparamètres JAMAIS touché │
|
|
│ │
|
|
└────────────────────────────────────────────────────────────┘
|
|
```
|
|
|
|
### Implémentation
|
|
|
|
```python
|
|
class OutOfSampleValidator:
|
|
"""
|
|
Validation out-of-sample stricte
|
|
"""
|
|
|
|
def __init__(self, oos_ratio=0.30):
|
|
self.oos_ratio = oos_ratio
|
|
|
|
def split_data(
|
|
self,
|
|
data: pd.DataFrame
|
|
) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
|
"""
|
|
Split données en in-sample / out-of-sample
|
|
"""
|
|
split_point = int(len(data) * (1 - self.oos_ratio))
|
|
|
|
in_sample = data.iloc[:split_point]
|
|
out_of_sample = data.iloc[split_point:]
|
|
|
|
return in_sample, out_of_sample
|
|
|
|
def validate(
|
|
self,
|
|
strategy,
|
|
in_sample_data: pd.DataFrame,
|
|
out_of_sample_data: pd.DataFrame
|
|
) -> Dict:
|
|
"""
|
|
Valide stratégie sur out-of-sample
|
|
"""
|
|
# Performance in-sample
|
|
is_results = self._backtest(strategy, in_sample_data)
|
|
|
|
# Performance out-of-sample (CRITIQUE)
|
|
oos_results = self._backtest(strategy, out_of_sample_data)
|
|
|
|
# Comparer performances
|
|
degradation = self._calculate_degradation(is_results, oos_results)
|
|
|
|
return {
|
|
'in_sample': is_results,
|
|
'out_of_sample': oos_results,
|
|
'degradation': degradation,
|
|
'is_valid': self._is_valid_strategy(degradation)
|
|
}
|
|
|
|
def _calculate_degradation(
|
|
self,
|
|
is_results: Dict,
|
|
oos_results: Dict
|
|
) -> Dict:
|
|
"""
|
|
Calcule dégradation performance IS → OOS
|
|
"""
|
|
return {
|
|
'sharpe_degradation': (is_results['sharpe'] - oos_results['sharpe']) / is_results['sharpe'],
|
|
'return_degradation': (is_results['total_return'] - oos_results['total_return']) / is_results['total_return'],
|
|
'winrate_degradation': (is_results['win_rate'] - oos_results['win_rate']) / is_results['win_rate'],
|
|
}
|
|
|
|
def _is_valid_strategy(self, degradation: Dict) -> bool:
|
|
"""
|
|
Critères de validation
|
|
|
|
Stratégie valide si:
|
|
- Sharpe OOS > 1.0
|
|
- Dégradation Sharpe < 30%
|
|
- Dégradation Return < 40%
|
|
"""
|
|
if degradation['sharpe_degradation'] > 0.30:
|
|
return False
|
|
if degradation['return_degradation'] > 0.40:
|
|
return False
|
|
|
|
return True
|
|
```
|
|
|
|
---
|
|
|
|
## 🎲 Monte Carlo Simulation
|
|
|
|
### Objectif
|
|
|
|
Tester robustesse en simulant milliers de scénarios aléatoires
|
|
|
|
```python
|
|
class MonteCarloSimulator:
|
|
"""
|
|
Simulation Monte Carlo pour validation robustesse
|
|
"""
|
|
|
|
def __init__(self, n_simulations=10000):
|
|
self.n_simulations = n_simulations
|
|
|
|
def simulate(
|
|
self,
|
|
historical_trades: List[Dict]
|
|
) -> Dict:
|
|
"""
|
|
Simule N scénarios en réordonnant trades
|
|
|
|
Principe:
|
|
- Même trades, ordre différent
|
|
- Teste sensibilité à la séquence
|
|
- Identifie lucky streaks vs. edge réel
|
|
"""
|
|
results = []
|
|
|
|
for _ in range(self.n_simulations):
|
|
# Réordonner trades aléatoirement
|
|
shuffled_trades = np.random.permutation(historical_trades)
|
|
|
|
# Calculer equity curve
|
|
equity = self._calculate_equity_curve(shuffled_trades)
|
|
|
|
# Métriques
|
|
sharpe = self._calculate_sharpe(equity)
|
|
max_dd = self._calculate_max_drawdown(equity)
|
|
final_return = (equity[-1] - equity[0]) / equity[0]
|
|
|
|
results.append({
|
|
'sharpe': sharpe,
|
|
'max_dd': max_dd,
|
|
'return': final_return
|
|
})
|
|
|
|
return self._analyze_distribution(results)
|
|
|
|
def _analyze_distribution(self, results: List[Dict]) -> Dict:
|
|
"""
|
|
Analyse distribution résultats Monte Carlo
|
|
"""
|
|
sharpes = [r['sharpe'] for r in results]
|
|
returns = [r['return'] for r in results]
|
|
drawdowns = [r['max_dd'] for r in results]
|
|
|
|
return {
|
|
# Sharpe
|
|
'sharpe_mean': np.mean(sharpes),
|
|
'sharpe_median': np.median(sharpes),
|
|
'sharpe_5th_percentile': np.percentile(sharpes, 5),
|
|
'sharpe_95th_percentile': np.percentile(sharpes, 95),
|
|
'prob_sharpe_positive': np.mean(np.array(sharpes) > 0),
|
|
'prob_sharpe_above_1': np.mean(np.array(sharpes) > 1.0),
|
|
|
|
# Returns
|
|
'return_mean': np.mean(returns),
|
|
'return_5th_percentile': np.percentile(returns, 5),
|
|
'return_95th_percentile': np.percentile(returns, 95),
|
|
'prob_positive_return': np.mean(np.array(returns) > 0),
|
|
|
|
# Drawdown
|
|
'max_dd_mean': np.mean(drawdowns),
|
|
'max_dd_95th_percentile': np.percentile(drawdowns, 95),
|
|
'prob_dd_below_10pct': np.mean(np.array(drawdowns) > -0.10),
|
|
}
|
|
|
|
def _calculate_equity_curve(self, trades: List[Dict]) -> List[float]:
|
|
"""Calcule equity curve à partir de trades"""
|
|
equity = [10000] # Capital initial
|
|
|
|
for trade in trades:
|
|
pnl = trade['pnl'] * equity[-1]
|
|
equity.append(equity[-1] + pnl)
|
|
|
|
return equity
|
|
```
|
|
|
|
---
|
|
|
|
## 📝 Paper Trading
|
|
|
|
### Protocole Strict
|
|
|
|
```python
|
|
class PaperTradingEngine:
|
|
"""
|
|
Paper trading avec conditions réelles
|
|
|
|
Règles:
|
|
- Minimum 30 jours de trading
|
|
- Données temps réel (pas historiques)
|
|
- Slippage et commissions réalistes
|
|
- Latence simulée
|
|
- Validation quotidienne
|
|
"""
|
|
|
|
def __init__(self, min_days=30):
|
|
self.min_days = min_days
|
|
self.start_date = None
|
|
self.trades = []
|
|
self.daily_metrics = []
|
|
|
|
def start(self):
|
|
"""Démarre paper trading"""
|
|
self.start_date = datetime.now()
|
|
logger.info(f"Paper trading started. Minimum duration: {self.min_days} days")
|
|
|
|
def can_go_live(self) -> Tuple[bool, str]:
|
|
"""
|
|
Vérifie si stratégie peut passer en live
|
|
|
|
Critères:
|
|
- Minimum 30 jours
|
|
- Sharpe > 1.5
|
|
- Max DD < 10%
|
|
- Win rate > 55%
|
|
- Minimum 50 trades
|
|
"""
|
|
if not self.start_date:
|
|
return False, "Paper trading not started"
|
|
|
|
days_elapsed = (datetime.now() - self.start_date).days
|
|
|
|
if days_elapsed < self.min_days:
|
|
return False, f"Only {days_elapsed}/{self.min_days} days completed"
|
|
|
|
# Calculer métriques
|
|
metrics = self._calculate_metrics()
|
|
|
|
# Vérifier critères
|
|
if metrics['sharpe'] < 1.5:
|
|
return False, f"Sharpe {metrics['sharpe']:.2f} below 1.5"
|
|
|
|
if metrics['max_dd'] > 0.10:
|
|
return False, f"Max DD {metrics['max_dd']:.2%} above 10%"
|
|
|
|
if metrics['win_rate'] < 0.55:
|
|
return False, f"Win rate {metrics['win_rate']:.2%} below 55%"
|
|
|
|
if len(self.trades) < 50:
|
|
return False, f"Only {len(self.trades)} trades (minimum 50)"
|
|
|
|
return True, "All criteria met. Ready for live trading."
|
|
|
|
def _calculate_metrics(self) -> Dict:
|
|
"""Calcule métriques paper trading"""
|
|
# TODO: Implémenter calculs
|
|
pass
|
|
```
|
|
|
|
---
|
|
|
|
## 📊 Métriques de Validation
|
|
|
|
### Seuils Minimaux
|
|
|
|
```yaml
|
|
validation_criteria:
|
|
# Performance
|
|
sharpe_ratio:
|
|
in_sample: 1.8
|
|
out_of_sample: 1.5
|
|
paper_trading: 1.5
|
|
|
|
# Risk
|
|
max_drawdown:
|
|
in_sample: 0.08
|
|
out_of_sample: 0.10
|
|
paper_trading: 0.10
|
|
|
|
# Consistency
|
|
win_rate:
|
|
minimum: 0.55
|
|
target: 0.60
|
|
|
|
profit_factor:
|
|
minimum: 1.3
|
|
target: 1.5
|
|
|
|
# Robustness
|
|
monte_carlo:
|
|
prob_positive_sharpe: 0.95
|
|
sharpe_5th_percentile: 0.8
|
|
|
|
# Sample size
|
|
minimum_trades:
|
|
in_sample: 100
|
|
out_of_sample: 30
|
|
paper_trading: 50
|
|
```
|
|
|
|
---
|
|
|
|
**Documentation complète du backtesting terminée !**
|