Files
trader-ml/tests/unit/test_ml/test_feature_engineering.py
Tika da30ef19ed Initial commit — Trading AI Secure project complet
Architecture Docker (8 services), FastAPI, TimescaleDB, Redis, Streamlit.
Stratégies : scalping, intraday, swing. MLEngine + RegimeDetector (HMM).
BacktestEngine + WalkForwardAnalyzer + Optuna optimizer.
Routes API complètes dont /optimize async.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-08 17:38:09 +00:00

524 lines
17 KiB
Python

"""
Tests Unitaires - FeatureEngineering.
Tests de la création de features pour ML.
"""
import pytest
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from src.ml.feature_engineering import FeatureEngineering
class TestFeatureEngineeringInitialization:
"""Tests d'initialisation."""
def test_initialization_default(self):
"""Test initialisation par défaut."""
fe = FeatureEngineering()
assert fe.config == {}
assert len(fe.feature_names) == 0
def test_initialization_with_config(self):
"""Test initialisation avec config."""
config = {'param1': 'value1'}
fe = FeatureEngineering(config)
assert fe.config == config
class TestFeatureCreation:
"""Tests de création de features."""
@pytest.fixture
def sample_data(self):
"""Génère des données de test."""
dates = pd.date_range(start='2024-01-01', periods=300, freq='1H')
np.random.seed(42)
returns = np.random.normal(0.0001, 0.01, 300)
prices = 1.1000 * np.exp(np.cumsum(returns))
df = pd.DataFrame(index=dates)
df['close'] = prices
df['open'] = df['close'].shift(1).fillna(df['close'].iloc[0])
df['high'] = df[['open', 'close']].max(axis=1) * (1 + np.random.uniform(0, 0.001, 300))
df['low'] = df[['open', 'close']].min(axis=1) * (1 - np.random.uniform(0, 0.001, 300))
df['volume'] = np.random.randint(1000, 10000, 300)
return df
def test_create_all_features(self, sample_data):
"""Test création de toutes les features."""
fe = FeatureEngineering()
features_df = fe.create_all_features(sample_data)
assert isinstance(features_df, pd.DataFrame)
assert len(features_df) > 0
assert len(fe.feature_names) > 0
def test_features_count(self, sample_data):
"""Test que le nombre de features est correct."""
fe = FeatureEngineering()
features_df = fe.create_all_features(sample_data)
# Devrait créer 100+ features
assert len(fe.feature_names) >= 100
def test_no_nan_in_features(self, sample_data):
"""Test qu'il n'y a pas de NaN dans les features."""
fe = FeatureEngineering()
features_df = fe.create_all_features(sample_data)
# Après dropna, ne devrait pas y avoir de NaN
assert features_df.isna().sum().sum() == 0
class TestPriceFeatures:
"""Tests des features basées sur les prix."""
@pytest.fixture
def sample_data(self):
"""Génère des données de test."""
dates = pd.date_range(start='2024-01-01', periods=300, freq='1H')
np.random.seed(42)
returns = np.random.normal(0.0001, 0.01, 300)
prices = 1.1000 * np.exp(np.cumsum(returns))
df = pd.DataFrame(index=dates)
df['close'] = prices
df['open'] = df['close'].shift(1).fillna(df['close'].iloc[0])
df['high'] = df[['open', 'close']].max(axis=1) * 1.001
df['low'] = df[['open', 'close']].min(axis=1) * 0.999
df['volume'] = np.random.randint(1000, 10000, 300)
return df
def test_price_features_created(self, sample_data):
"""Test que les features de prix sont créées."""
fe = FeatureEngineering()
df = fe._create_price_features(sample_data.copy())
assert 'returns' in df.columns
assert 'log_returns' in df.columns
assert 'high_low_ratio' in df.columns
assert 'close_open_ratio' in df.columns
assert 'price_position' in df.columns
def test_returns_calculation(self, sample_data):
"""Test calcul des returns."""
fe = FeatureEngineering()
df = fe._create_price_features(sample_data.copy())
# Vérifier que returns est calculé correctement
expected_returns = sample_data['close'].pct_change()
pd.testing.assert_series_equal(
df['returns'].dropna(),
expected_returns.dropna(),
check_names=False
)
def test_price_position_range(self, sample_data):
"""Test que price_position est entre 0 et 1."""
fe = FeatureEngineering()
df = fe._create_price_features(sample_data.copy())
price_pos = df['price_position'].dropna()
assert (price_pos >= 0).all()
assert (price_pos <= 1).all()
class TestTechnicalIndicators:
"""Tests des indicateurs techniques."""
@pytest.fixture
def sample_data(self):
"""Génère des données de test."""
dates = pd.date_range(start='2024-01-01', periods=300, freq='1H')
np.random.seed(42)
returns = np.random.normal(0.0001, 0.01, 300)
prices = 1.1000 * np.exp(np.cumsum(returns))
df = pd.DataFrame(index=dates)
df['close'] = prices
df['open'] = df['close'].shift(1).fillna(df['close'].iloc[0])
df['high'] = df[['open', 'close']].max(axis=1) * 1.001
df['low'] = df[['open', 'close']].min(axis=1) * 0.999
df['volume'] = np.random.randint(1000, 10000, 300)
return df
def test_moving_averages_created(self, sample_data):
"""Test création des moyennes mobiles."""
fe = FeatureEngineering()
df = fe._create_technical_indicators(sample_data.copy())
# Vérifier SMA
for period in [5, 10, 20, 50, 100, 200]:
assert f'sma_{period}' in df.columns
assert f'ema_{period}' in df.columns
def test_rsi_calculation(self, sample_data):
"""Test calcul RSI."""
fe = FeatureEngineering()
df = fe._create_technical_indicators(sample_data.copy())
# Vérifier RSI
for period in [7, 14, 21]:
assert f'rsi_{period}' in df.columns
# RSI devrait être entre 0 et 100
rsi = df[f'rsi_{period}'].dropna()
assert (rsi >= 0).all()
assert (rsi <= 100).all()
def test_macd_calculation(self, sample_data):
"""Test calcul MACD."""
fe = FeatureEngineering()
df = fe._create_technical_indicators(sample_data.copy())
assert 'macd' in df.columns
assert 'macd_signal' in df.columns
assert 'macd_hist' in df.columns
def test_bollinger_bands(self, sample_data):
"""Test calcul Bollinger Bands."""
fe = FeatureEngineering()
df = fe._create_technical_indicators(sample_data.copy())
for period in [20, 50]:
assert f'bb_upper_{period}' in df.columns
assert f'bb_middle_{period}' in df.columns
assert f'bb_lower_{period}' in df.columns
assert f'bb_width_{period}' in df.columns
assert f'bb_position_{period}' in df.columns
# Vérifier ordre: upper > middle > lower
upper = df[f'bb_upper_{period}'].dropna()
middle = df[f'bb_middle_{period}'].dropna()
lower = df[f'bb_lower_{period}'].dropna()
assert (upper >= middle).all()
assert (middle >= lower).all()
def test_atr_calculation(self, sample_data):
"""Test calcul ATR."""
fe = FeatureEngineering()
df = fe._create_technical_indicators(sample_data.copy())
for period in [7, 14, 21]:
assert f'atr_{period}' in df.columns
# ATR devrait être positif
atr = df[f'atr_{period}'].dropna()
assert (atr > 0).all()
class TestStatisticalFeatures:
"""Tests des features statistiques."""
@pytest.fixture
def sample_data(self):
"""Génère des données de test."""
dates = pd.date_range(start='2024-01-01', periods=300, freq='1H')
np.random.seed(42)
returns = np.random.normal(0.0001, 0.01, 300)
prices = 1.1000 * np.exp(np.cumsum(returns))
df = pd.DataFrame(index=dates)
df['close'] = prices
df['open'] = df['close'].shift(1).fillna(df['close'].iloc[0])
df['high'] = df[['open', 'close']].max(axis=1) * 1.001
df['low'] = df[['open', 'close']].min(axis=1) * 0.999
df['volume'] = np.random.randint(1000, 10000, 300)
return df
def test_statistical_features_created(self, sample_data):
"""Test création features statistiques."""
fe = FeatureEngineering()
df = fe._create_statistical_features(sample_data.copy())
for period in [10, 20, 50]:
assert f'mean_{period}' in df.columns
assert f'std_{period}' in df.columns
assert f'skew_{period}' in df.columns
assert f'kurt_{period}' in df.columns
assert f'zscore_{period}' in df.columns
def test_zscore_calculation(self, sample_data):
"""Test calcul z-score."""
fe = FeatureEngineering()
df = fe._create_statistical_features(sample_data.copy())
# Z-score devrait avoir moyenne ~0 et std ~1
zscore = df['zscore_20'].dropna()
assert abs(zscore.mean()) < 0.5
assert abs(zscore.std() - 1.0) < 0.5
class TestVolatilityFeatures:
"""Tests des features de volatilité."""
@pytest.fixture
def sample_data(self):
"""Génère des données de test."""
dates = pd.date_range(start='2024-01-01', periods=300, freq='1H')
np.random.seed(42)
returns = np.random.normal(0.0001, 0.01, 300)
prices = 1.1000 * np.exp(np.cumsum(returns))
df = pd.DataFrame(index=dates)
df['close'] = prices
df['open'] = df['close'].shift(1).fillna(df['close'].iloc[0])
df['high'] = df[['open', 'close']].max(axis=1) * 1.001
df['low'] = df[['open', 'close']].min(axis=1) * 0.999
df['volume'] = np.random.randint(1000, 10000, 300)
return df
def test_volatility_features_created(self, sample_data):
"""Test création features volatilité."""
fe = FeatureEngineering()
# Ajouter returns d'abord
df = sample_data.copy()
df['returns'] = df['close'].pct_change()
df = fe._create_volatility_features(df)
for period in [10, 20, 50]:
assert f'volatility_{period}' in df.columns
assert 'parkinson_vol' in df.columns
assert 'gk_vol' in df.columns
assert 'vol_ratio' in df.columns
def test_volatility_positive(self, sample_data):
"""Test que la volatilité est positive."""
fe = FeatureEngineering()
df = sample_data.copy()
df['returns'] = df['close'].pct_change()
df = fe._create_volatility_features(df)
vol = df['volatility_20'].dropna()
assert (vol > 0).all()
class TestVolumeFeatures:
"""Tests des features de volume."""
@pytest.fixture
def sample_data(self):
"""Génère des données de test."""
dates = pd.date_range(start='2024-01-01', periods=300, freq='1H')
np.random.seed(42)
returns = np.random.normal(0.0001, 0.01, 300)
prices = 1.1000 * np.exp(np.cumsum(returns))
df = pd.DataFrame(index=dates)
df['close'] = prices
df['open'] = df['close'].shift(1).fillna(df['close'].iloc[0])
df['high'] = df[['open', 'close']].max(axis=1) * 1.001
df['low'] = df[['open', 'close']].min(axis=1) * 0.999
df['volume'] = np.random.randint(1000, 10000, 300)
return df
def test_volume_features_created(self, sample_data):
"""Test création features volume."""
fe = FeatureEngineering()
df = fe._create_volume_features(sample_data.copy())
for period in [5, 10, 20]:
assert f'volume_ma_{period}' in df.columns
assert 'volume_ratio' in df.columns
assert 'volume_change' in df.columns
assert 'obv' in df.columns
assert 'vwap' in df.columns
class TestTimeFeatures:
"""Tests des features temporelles."""
@pytest.fixture
def sample_data(self):
"""Génère des données de test avec index datetime."""
dates = pd.date_range(start='2024-01-01', periods=300, freq='1H')
np.random.seed(42)
returns = np.random.normal(0.0001, 0.01, 300)
prices = 1.1000 * np.exp(np.cumsum(returns))
df = pd.DataFrame(index=dates)
df['close'] = prices
df['open'] = df['close'].shift(1).fillna(df['close'].iloc[0])
df['high'] = df[['open', 'close']].max(axis=1) * 1.001
df['low'] = df[['open', 'close']].min(axis=1) * 0.999
df['volume'] = np.random.randint(1000, 10000, 300)
return df
def test_time_features_created(self, sample_data):
"""Test création features temporelles."""
fe = FeatureEngineering()
df = fe._create_time_features(sample_data.copy())
assert 'hour' in df.columns
assert 'hour_sin' in df.columns
assert 'hour_cos' in df.columns
assert 'day_of_week' in df.columns
assert 'dow_sin' in df.columns
assert 'dow_cos' in df.columns
assert 'month' in df.columns
assert 'month_sin' in df.columns
assert 'month_cos' in df.columns
def test_cyclic_encoding_range(self, sample_data):
"""Test que l'encodage cyclique est dans [-1, 1]."""
fe = FeatureEngineering()
df = fe._create_time_features(sample_data.copy())
for col in ['hour_sin', 'hour_cos', 'dow_sin', 'dow_cos', 'month_sin', 'month_cos']:
values = df[col].dropna()
assert (values >= -1).all()
assert (values <= 1).all()
class TestFeatureImportance:
"""Tests de feature importance."""
@pytest.fixture
def sample_features(self):
"""Génère des features de test."""
np.random.seed(42)
n_samples = 1000
n_features = 20
features = pd.DataFrame(
np.random.randn(n_samples, n_features),
columns=[f'feature_{i}' for i in range(n_features)]
)
return features
@pytest.fixture
def sample_target(self):
"""Génère une target de test."""
np.random.seed(42)
return pd.Series(np.random.randn(1000))
def test_get_feature_importance(self, sample_features, sample_target):
"""Test calcul feature importance."""
fe = FeatureEngineering()
importance = fe.get_feature_importance(
sample_features,
sample_target,
method='mutual_info'
)
assert isinstance(importance, pd.DataFrame)
assert 'feature' in importance.columns
assert 'importance' in importance.columns
assert len(importance) == len(sample_features.columns)
def test_select_top_features(self, sample_features, sample_target):
"""Test sélection top features."""
fe = FeatureEngineering()
top_features = fe.select_top_features(
sample_features,
sample_target,
n_features=10
)
assert isinstance(top_features, list)
assert len(top_features) == 10
assert all(f in sample_features.columns for f in top_features)
class TestFeatureEngineeringIntegration:
"""Tests d'intégration."""
@pytest.fixture
def sample_data(self):
"""Génère des données de test."""
dates = pd.date_range(start='2024-01-01', periods=500, freq='1H')
np.random.seed(42)
returns = np.random.normal(0.0001, 0.01, 500)
prices = 1.1000 * np.exp(np.cumsum(returns))
df = pd.DataFrame(index=dates)
df['close'] = prices
df['open'] = df['close'].shift(1).fillna(df['close'].iloc[0])
df['high'] = df[['open', 'close']].max(axis=1) * (1 + np.random.uniform(0, 0.001, 500))
df['low'] = df[['open', 'close']].min(axis=1) * (1 - np.random.uniform(0, 0.001, 500))
df['volume'] = np.random.randint(1000, 10000, 500)
return df
def test_full_workflow(self, sample_data):
"""Test workflow complet."""
fe = FeatureEngineering()
# 1. Créer toutes les features
features_df = fe.create_all_features(sample_data)
assert len(features_df) > 0
assert len(fe.feature_names) >= 100
# 2. Vérifier pas de NaN
assert features_df.isna().sum().sum() == 0
# 3. Créer target
target = features_df['returns'].shift(-1).dropna()
features_for_ml = features_df.iloc[:-1]
# 4. Feature importance
importance = fe.get_feature_importance(
features_for_ml[fe.feature_names],
target,
method='correlation'
)
assert len(importance) > 0
# 5. Sélectionner top features
top_features = fe.select_top_features(
features_for_ml[fe.feature_names],
target,
n_features=50
)
assert len(top_features) == 50