Architecture Docker (8 services), FastAPI, TimescaleDB, Redis, Streamlit. Stratégies : scalping, intraday, swing. MLEngine + RegimeDetector (HMM). BacktestEngine + WalkForwardAnalyzer + Optuna optimizer. Routes API complètes dont /optimize async. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
524 lines
17 KiB
Python
524 lines
17 KiB
Python
"""
|
|
Tests Unitaires - FeatureEngineering.
|
|
|
|
Tests de la création de features pour ML.
|
|
"""
|
|
|
|
import pytest
|
|
import pandas as pd
|
|
import numpy as np
|
|
from datetime import datetime, timedelta
|
|
|
|
from src.ml.feature_engineering import FeatureEngineering
|
|
|
|
|
|
class TestFeatureEngineeringInitialization:
|
|
"""Tests d'initialisation."""
|
|
|
|
def test_initialization_default(self):
|
|
"""Test initialisation par défaut."""
|
|
fe = FeatureEngineering()
|
|
|
|
assert fe.config == {}
|
|
assert len(fe.feature_names) == 0
|
|
|
|
def test_initialization_with_config(self):
|
|
"""Test initialisation avec config."""
|
|
config = {'param1': 'value1'}
|
|
fe = FeatureEngineering(config)
|
|
|
|
assert fe.config == config
|
|
|
|
|
|
class TestFeatureCreation:
|
|
"""Tests de création de features."""
|
|
|
|
@pytest.fixture
|
|
def sample_data(self):
|
|
"""Génère des données de test."""
|
|
dates = pd.date_range(start='2024-01-01', periods=300, freq='1H')
|
|
|
|
np.random.seed(42)
|
|
returns = np.random.normal(0.0001, 0.01, 300)
|
|
prices = 1.1000 * np.exp(np.cumsum(returns))
|
|
|
|
df = pd.DataFrame(index=dates)
|
|
df['close'] = prices
|
|
df['open'] = df['close'].shift(1).fillna(df['close'].iloc[0])
|
|
df['high'] = df[['open', 'close']].max(axis=1) * (1 + np.random.uniform(0, 0.001, 300))
|
|
df['low'] = df[['open', 'close']].min(axis=1) * (1 - np.random.uniform(0, 0.001, 300))
|
|
df['volume'] = np.random.randint(1000, 10000, 300)
|
|
|
|
return df
|
|
|
|
def test_create_all_features(self, sample_data):
|
|
"""Test création de toutes les features."""
|
|
fe = FeatureEngineering()
|
|
|
|
features_df = fe.create_all_features(sample_data)
|
|
|
|
assert isinstance(features_df, pd.DataFrame)
|
|
assert len(features_df) > 0
|
|
assert len(fe.feature_names) > 0
|
|
|
|
def test_features_count(self, sample_data):
|
|
"""Test que le nombre de features est correct."""
|
|
fe = FeatureEngineering()
|
|
|
|
features_df = fe.create_all_features(sample_data)
|
|
|
|
# Devrait créer 100+ features
|
|
assert len(fe.feature_names) >= 100
|
|
|
|
def test_no_nan_in_features(self, sample_data):
|
|
"""Test qu'il n'y a pas de NaN dans les features."""
|
|
fe = FeatureEngineering()
|
|
|
|
features_df = fe.create_all_features(sample_data)
|
|
|
|
# Après dropna, ne devrait pas y avoir de NaN
|
|
assert features_df.isna().sum().sum() == 0
|
|
|
|
|
|
class TestPriceFeatures:
|
|
"""Tests des features basées sur les prix."""
|
|
|
|
@pytest.fixture
|
|
def sample_data(self):
|
|
"""Génère des données de test."""
|
|
dates = pd.date_range(start='2024-01-01', periods=300, freq='1H')
|
|
|
|
np.random.seed(42)
|
|
returns = np.random.normal(0.0001, 0.01, 300)
|
|
prices = 1.1000 * np.exp(np.cumsum(returns))
|
|
|
|
df = pd.DataFrame(index=dates)
|
|
df['close'] = prices
|
|
df['open'] = df['close'].shift(1).fillna(df['close'].iloc[0])
|
|
df['high'] = df[['open', 'close']].max(axis=1) * 1.001
|
|
df['low'] = df[['open', 'close']].min(axis=1) * 0.999
|
|
df['volume'] = np.random.randint(1000, 10000, 300)
|
|
|
|
return df
|
|
|
|
def test_price_features_created(self, sample_data):
|
|
"""Test que les features de prix sont créées."""
|
|
fe = FeatureEngineering()
|
|
|
|
df = fe._create_price_features(sample_data.copy())
|
|
|
|
assert 'returns' in df.columns
|
|
assert 'log_returns' in df.columns
|
|
assert 'high_low_ratio' in df.columns
|
|
assert 'close_open_ratio' in df.columns
|
|
assert 'price_position' in df.columns
|
|
|
|
def test_returns_calculation(self, sample_data):
|
|
"""Test calcul des returns."""
|
|
fe = FeatureEngineering()
|
|
|
|
df = fe._create_price_features(sample_data.copy())
|
|
|
|
# Vérifier que returns est calculé correctement
|
|
expected_returns = sample_data['close'].pct_change()
|
|
pd.testing.assert_series_equal(
|
|
df['returns'].dropna(),
|
|
expected_returns.dropna(),
|
|
check_names=False
|
|
)
|
|
|
|
def test_price_position_range(self, sample_data):
|
|
"""Test que price_position est entre 0 et 1."""
|
|
fe = FeatureEngineering()
|
|
|
|
df = fe._create_price_features(sample_data.copy())
|
|
|
|
price_pos = df['price_position'].dropna()
|
|
assert (price_pos >= 0).all()
|
|
assert (price_pos <= 1).all()
|
|
|
|
|
|
class TestTechnicalIndicators:
|
|
"""Tests des indicateurs techniques."""
|
|
|
|
@pytest.fixture
|
|
def sample_data(self):
|
|
"""Génère des données de test."""
|
|
dates = pd.date_range(start='2024-01-01', periods=300, freq='1H')
|
|
|
|
np.random.seed(42)
|
|
returns = np.random.normal(0.0001, 0.01, 300)
|
|
prices = 1.1000 * np.exp(np.cumsum(returns))
|
|
|
|
df = pd.DataFrame(index=dates)
|
|
df['close'] = prices
|
|
df['open'] = df['close'].shift(1).fillna(df['close'].iloc[0])
|
|
df['high'] = df[['open', 'close']].max(axis=1) * 1.001
|
|
df['low'] = df[['open', 'close']].min(axis=1) * 0.999
|
|
df['volume'] = np.random.randint(1000, 10000, 300)
|
|
|
|
return df
|
|
|
|
def test_moving_averages_created(self, sample_data):
|
|
"""Test création des moyennes mobiles."""
|
|
fe = FeatureEngineering()
|
|
|
|
df = fe._create_technical_indicators(sample_data.copy())
|
|
|
|
# Vérifier SMA
|
|
for period in [5, 10, 20, 50, 100, 200]:
|
|
assert f'sma_{period}' in df.columns
|
|
assert f'ema_{period}' in df.columns
|
|
|
|
def test_rsi_calculation(self, sample_data):
|
|
"""Test calcul RSI."""
|
|
fe = FeatureEngineering()
|
|
|
|
df = fe._create_technical_indicators(sample_data.copy())
|
|
|
|
# Vérifier RSI
|
|
for period in [7, 14, 21]:
|
|
assert f'rsi_{period}' in df.columns
|
|
|
|
# RSI devrait être entre 0 et 100
|
|
rsi = df[f'rsi_{period}'].dropna()
|
|
assert (rsi >= 0).all()
|
|
assert (rsi <= 100).all()
|
|
|
|
def test_macd_calculation(self, sample_data):
|
|
"""Test calcul MACD."""
|
|
fe = FeatureEngineering()
|
|
|
|
df = fe._create_technical_indicators(sample_data.copy())
|
|
|
|
assert 'macd' in df.columns
|
|
assert 'macd_signal' in df.columns
|
|
assert 'macd_hist' in df.columns
|
|
|
|
def test_bollinger_bands(self, sample_data):
|
|
"""Test calcul Bollinger Bands."""
|
|
fe = FeatureEngineering()
|
|
|
|
df = fe._create_technical_indicators(sample_data.copy())
|
|
|
|
for period in [20, 50]:
|
|
assert f'bb_upper_{period}' in df.columns
|
|
assert f'bb_middle_{period}' in df.columns
|
|
assert f'bb_lower_{period}' in df.columns
|
|
assert f'bb_width_{period}' in df.columns
|
|
assert f'bb_position_{period}' in df.columns
|
|
|
|
# Vérifier ordre: upper > middle > lower
|
|
upper = df[f'bb_upper_{period}'].dropna()
|
|
middle = df[f'bb_middle_{period}'].dropna()
|
|
lower = df[f'bb_lower_{period}'].dropna()
|
|
|
|
assert (upper >= middle).all()
|
|
assert (middle >= lower).all()
|
|
|
|
def test_atr_calculation(self, sample_data):
|
|
"""Test calcul ATR."""
|
|
fe = FeatureEngineering()
|
|
|
|
df = fe._create_technical_indicators(sample_data.copy())
|
|
|
|
for period in [7, 14, 21]:
|
|
assert f'atr_{period}' in df.columns
|
|
|
|
# ATR devrait être positif
|
|
atr = df[f'atr_{period}'].dropna()
|
|
assert (atr > 0).all()
|
|
|
|
|
|
class TestStatisticalFeatures:
|
|
"""Tests des features statistiques."""
|
|
|
|
@pytest.fixture
|
|
def sample_data(self):
|
|
"""Génère des données de test."""
|
|
dates = pd.date_range(start='2024-01-01', periods=300, freq='1H')
|
|
|
|
np.random.seed(42)
|
|
returns = np.random.normal(0.0001, 0.01, 300)
|
|
prices = 1.1000 * np.exp(np.cumsum(returns))
|
|
|
|
df = pd.DataFrame(index=dates)
|
|
df['close'] = prices
|
|
df['open'] = df['close'].shift(1).fillna(df['close'].iloc[0])
|
|
df['high'] = df[['open', 'close']].max(axis=1) * 1.001
|
|
df['low'] = df[['open', 'close']].min(axis=1) * 0.999
|
|
df['volume'] = np.random.randint(1000, 10000, 300)
|
|
|
|
return df
|
|
|
|
def test_statistical_features_created(self, sample_data):
|
|
"""Test création features statistiques."""
|
|
fe = FeatureEngineering()
|
|
|
|
df = fe._create_statistical_features(sample_data.copy())
|
|
|
|
for period in [10, 20, 50]:
|
|
assert f'mean_{period}' in df.columns
|
|
assert f'std_{period}' in df.columns
|
|
assert f'skew_{period}' in df.columns
|
|
assert f'kurt_{period}' in df.columns
|
|
assert f'zscore_{period}' in df.columns
|
|
|
|
def test_zscore_calculation(self, sample_data):
|
|
"""Test calcul z-score."""
|
|
fe = FeatureEngineering()
|
|
|
|
df = fe._create_statistical_features(sample_data.copy())
|
|
|
|
# Z-score devrait avoir moyenne ~0 et std ~1
|
|
zscore = df['zscore_20'].dropna()
|
|
assert abs(zscore.mean()) < 0.5
|
|
assert abs(zscore.std() - 1.0) < 0.5
|
|
|
|
|
|
class TestVolatilityFeatures:
|
|
"""Tests des features de volatilité."""
|
|
|
|
@pytest.fixture
|
|
def sample_data(self):
|
|
"""Génère des données de test."""
|
|
dates = pd.date_range(start='2024-01-01', periods=300, freq='1H')
|
|
|
|
np.random.seed(42)
|
|
returns = np.random.normal(0.0001, 0.01, 300)
|
|
prices = 1.1000 * np.exp(np.cumsum(returns))
|
|
|
|
df = pd.DataFrame(index=dates)
|
|
df['close'] = prices
|
|
df['open'] = df['close'].shift(1).fillna(df['close'].iloc[0])
|
|
df['high'] = df[['open', 'close']].max(axis=1) * 1.001
|
|
df['low'] = df[['open', 'close']].min(axis=1) * 0.999
|
|
df['volume'] = np.random.randint(1000, 10000, 300)
|
|
|
|
return df
|
|
|
|
def test_volatility_features_created(self, sample_data):
|
|
"""Test création features volatilité."""
|
|
fe = FeatureEngineering()
|
|
|
|
# Ajouter returns d'abord
|
|
df = sample_data.copy()
|
|
df['returns'] = df['close'].pct_change()
|
|
|
|
df = fe._create_volatility_features(df)
|
|
|
|
for period in [10, 20, 50]:
|
|
assert f'volatility_{period}' in df.columns
|
|
|
|
assert 'parkinson_vol' in df.columns
|
|
assert 'gk_vol' in df.columns
|
|
assert 'vol_ratio' in df.columns
|
|
|
|
def test_volatility_positive(self, sample_data):
|
|
"""Test que la volatilité est positive."""
|
|
fe = FeatureEngineering()
|
|
|
|
df = sample_data.copy()
|
|
df['returns'] = df['close'].pct_change()
|
|
|
|
df = fe._create_volatility_features(df)
|
|
|
|
vol = df['volatility_20'].dropna()
|
|
assert (vol > 0).all()
|
|
|
|
|
|
class TestVolumeFeatures:
|
|
"""Tests des features de volume."""
|
|
|
|
@pytest.fixture
|
|
def sample_data(self):
|
|
"""Génère des données de test."""
|
|
dates = pd.date_range(start='2024-01-01', periods=300, freq='1H')
|
|
|
|
np.random.seed(42)
|
|
returns = np.random.normal(0.0001, 0.01, 300)
|
|
prices = 1.1000 * np.exp(np.cumsum(returns))
|
|
|
|
df = pd.DataFrame(index=dates)
|
|
df['close'] = prices
|
|
df['open'] = df['close'].shift(1).fillna(df['close'].iloc[0])
|
|
df['high'] = df[['open', 'close']].max(axis=1) * 1.001
|
|
df['low'] = df[['open', 'close']].min(axis=1) * 0.999
|
|
df['volume'] = np.random.randint(1000, 10000, 300)
|
|
|
|
return df
|
|
|
|
def test_volume_features_created(self, sample_data):
|
|
"""Test création features volume."""
|
|
fe = FeatureEngineering()
|
|
|
|
df = fe._create_volume_features(sample_data.copy())
|
|
|
|
for period in [5, 10, 20]:
|
|
assert f'volume_ma_{period}' in df.columns
|
|
|
|
assert 'volume_ratio' in df.columns
|
|
assert 'volume_change' in df.columns
|
|
assert 'obv' in df.columns
|
|
assert 'vwap' in df.columns
|
|
|
|
|
|
class TestTimeFeatures:
|
|
"""Tests des features temporelles."""
|
|
|
|
@pytest.fixture
|
|
def sample_data(self):
|
|
"""Génère des données de test avec index datetime."""
|
|
dates = pd.date_range(start='2024-01-01', periods=300, freq='1H')
|
|
|
|
np.random.seed(42)
|
|
returns = np.random.normal(0.0001, 0.01, 300)
|
|
prices = 1.1000 * np.exp(np.cumsum(returns))
|
|
|
|
df = pd.DataFrame(index=dates)
|
|
df['close'] = prices
|
|
df['open'] = df['close'].shift(1).fillna(df['close'].iloc[0])
|
|
df['high'] = df[['open', 'close']].max(axis=1) * 1.001
|
|
df['low'] = df[['open', 'close']].min(axis=1) * 0.999
|
|
df['volume'] = np.random.randint(1000, 10000, 300)
|
|
|
|
return df
|
|
|
|
def test_time_features_created(self, sample_data):
|
|
"""Test création features temporelles."""
|
|
fe = FeatureEngineering()
|
|
|
|
df = fe._create_time_features(sample_data.copy())
|
|
|
|
assert 'hour' in df.columns
|
|
assert 'hour_sin' in df.columns
|
|
assert 'hour_cos' in df.columns
|
|
assert 'day_of_week' in df.columns
|
|
assert 'dow_sin' in df.columns
|
|
assert 'dow_cos' in df.columns
|
|
assert 'month' in df.columns
|
|
assert 'month_sin' in df.columns
|
|
assert 'month_cos' in df.columns
|
|
|
|
def test_cyclic_encoding_range(self, sample_data):
|
|
"""Test que l'encodage cyclique est dans [-1, 1]."""
|
|
fe = FeatureEngineering()
|
|
|
|
df = fe._create_time_features(sample_data.copy())
|
|
|
|
for col in ['hour_sin', 'hour_cos', 'dow_sin', 'dow_cos', 'month_sin', 'month_cos']:
|
|
values = df[col].dropna()
|
|
assert (values >= -1).all()
|
|
assert (values <= 1).all()
|
|
|
|
|
|
class TestFeatureImportance:
|
|
"""Tests de feature importance."""
|
|
|
|
@pytest.fixture
|
|
def sample_features(self):
|
|
"""Génère des features de test."""
|
|
np.random.seed(42)
|
|
|
|
n_samples = 1000
|
|
n_features = 20
|
|
|
|
features = pd.DataFrame(
|
|
np.random.randn(n_samples, n_features),
|
|
columns=[f'feature_{i}' for i in range(n_features)]
|
|
)
|
|
|
|
return features
|
|
|
|
@pytest.fixture
|
|
def sample_target(self):
|
|
"""Génère une target de test."""
|
|
np.random.seed(42)
|
|
return pd.Series(np.random.randn(1000))
|
|
|
|
def test_get_feature_importance(self, sample_features, sample_target):
|
|
"""Test calcul feature importance."""
|
|
fe = FeatureEngineering()
|
|
|
|
importance = fe.get_feature_importance(
|
|
sample_features,
|
|
sample_target,
|
|
method='mutual_info'
|
|
)
|
|
|
|
assert isinstance(importance, pd.DataFrame)
|
|
assert 'feature' in importance.columns
|
|
assert 'importance' in importance.columns
|
|
assert len(importance) == len(sample_features.columns)
|
|
|
|
def test_select_top_features(self, sample_features, sample_target):
|
|
"""Test sélection top features."""
|
|
fe = FeatureEngineering()
|
|
|
|
top_features = fe.select_top_features(
|
|
sample_features,
|
|
sample_target,
|
|
n_features=10
|
|
)
|
|
|
|
assert isinstance(top_features, list)
|
|
assert len(top_features) == 10
|
|
assert all(f in sample_features.columns for f in top_features)
|
|
|
|
|
|
class TestFeatureEngineeringIntegration:
|
|
"""Tests d'intégration."""
|
|
|
|
@pytest.fixture
|
|
def sample_data(self):
|
|
"""Génère des données de test."""
|
|
dates = pd.date_range(start='2024-01-01', periods=500, freq='1H')
|
|
|
|
np.random.seed(42)
|
|
returns = np.random.normal(0.0001, 0.01, 500)
|
|
prices = 1.1000 * np.exp(np.cumsum(returns))
|
|
|
|
df = pd.DataFrame(index=dates)
|
|
df['close'] = prices
|
|
df['open'] = df['close'].shift(1).fillna(df['close'].iloc[0])
|
|
df['high'] = df[['open', 'close']].max(axis=1) * (1 + np.random.uniform(0, 0.001, 500))
|
|
df['low'] = df[['open', 'close']].min(axis=1) * (1 - np.random.uniform(0, 0.001, 500))
|
|
df['volume'] = np.random.randint(1000, 10000, 500)
|
|
|
|
return df
|
|
|
|
def test_full_workflow(self, sample_data):
|
|
"""Test workflow complet."""
|
|
fe = FeatureEngineering()
|
|
|
|
# 1. Créer toutes les features
|
|
features_df = fe.create_all_features(sample_data)
|
|
|
|
assert len(features_df) > 0
|
|
assert len(fe.feature_names) >= 100
|
|
|
|
# 2. Vérifier pas de NaN
|
|
assert features_df.isna().sum().sum() == 0
|
|
|
|
# 3. Créer target
|
|
target = features_df['returns'].shift(-1).dropna()
|
|
features_for_ml = features_df.iloc[:-1]
|
|
|
|
# 4. Feature importance
|
|
importance = fe.get_feature_importance(
|
|
features_for_ml[fe.feature_names],
|
|
target,
|
|
method='correlation'
|
|
)
|
|
|
|
assert len(importance) > 0
|
|
|
|
# 5. Sélectionner top features
|
|
top_features = fe.select_top_features(
|
|
features_for_ml[fe.feature_names],
|
|
target,
|
|
n_features=50
|
|
)
|
|
|
|
assert len(top_features) == 50
|