""" Tests Unitaires - FeatureEngineering. Tests de la création de features pour ML. """ import pytest import pandas as pd import numpy as np from datetime import datetime, timedelta from src.ml.feature_engineering import FeatureEngineering class TestFeatureEngineeringInitialization: """Tests d'initialisation.""" def test_initialization_default(self): """Test initialisation par défaut.""" fe = FeatureEngineering() assert fe.config == {} assert len(fe.feature_names) == 0 def test_initialization_with_config(self): """Test initialisation avec config.""" config = {'param1': 'value1'} fe = FeatureEngineering(config) assert fe.config == config class TestFeatureCreation: """Tests de création de features.""" @pytest.fixture def sample_data(self): """Génère des données de test.""" dates = pd.date_range(start='2024-01-01', periods=300, freq='1H') np.random.seed(42) returns = np.random.normal(0.0001, 0.01, 300) prices = 1.1000 * np.exp(np.cumsum(returns)) df = pd.DataFrame(index=dates) df['close'] = prices df['open'] = df['close'].shift(1).fillna(df['close'].iloc[0]) df['high'] = df[['open', 'close']].max(axis=1) * (1 + np.random.uniform(0, 0.001, 300)) df['low'] = df[['open', 'close']].min(axis=1) * (1 - np.random.uniform(0, 0.001, 300)) df['volume'] = np.random.randint(1000, 10000, 300) return df def test_create_all_features(self, sample_data): """Test création de toutes les features.""" fe = FeatureEngineering() features_df = fe.create_all_features(sample_data) assert isinstance(features_df, pd.DataFrame) assert len(features_df) > 0 assert len(fe.feature_names) > 0 def test_features_count(self, sample_data): """Test que le nombre de features est correct.""" fe = FeatureEngineering() features_df = fe.create_all_features(sample_data) # Devrait créer 100+ features assert len(fe.feature_names) >= 100 def test_no_nan_in_features(self, sample_data): """Test qu'il n'y a pas de NaN dans les features.""" fe = FeatureEngineering() features_df = fe.create_all_features(sample_data) # Après dropna, ne devrait pas y avoir de NaN assert features_df.isna().sum().sum() == 0 class TestPriceFeatures: """Tests des features basées sur les prix.""" @pytest.fixture def sample_data(self): """Génère des données de test.""" dates = pd.date_range(start='2024-01-01', periods=300, freq='1H') np.random.seed(42) returns = np.random.normal(0.0001, 0.01, 300) prices = 1.1000 * np.exp(np.cumsum(returns)) df = pd.DataFrame(index=dates) df['close'] = prices df['open'] = df['close'].shift(1).fillna(df['close'].iloc[0]) df['high'] = df[['open', 'close']].max(axis=1) * 1.001 df['low'] = df[['open', 'close']].min(axis=1) * 0.999 df['volume'] = np.random.randint(1000, 10000, 300) return df def test_price_features_created(self, sample_data): """Test que les features de prix sont créées.""" fe = FeatureEngineering() df = fe._create_price_features(sample_data.copy()) assert 'returns' in df.columns assert 'log_returns' in df.columns assert 'high_low_ratio' in df.columns assert 'close_open_ratio' in df.columns assert 'price_position' in df.columns def test_returns_calculation(self, sample_data): """Test calcul des returns.""" fe = FeatureEngineering() df = fe._create_price_features(sample_data.copy()) # Vérifier que returns est calculé correctement expected_returns = sample_data['close'].pct_change() pd.testing.assert_series_equal( df['returns'].dropna(), expected_returns.dropna(), check_names=False ) def test_price_position_range(self, sample_data): """Test que price_position est entre 0 et 1.""" fe = FeatureEngineering() df = fe._create_price_features(sample_data.copy()) price_pos = df['price_position'].dropna() assert (price_pos >= 0).all() assert (price_pos <= 1).all() class TestTechnicalIndicators: """Tests des indicateurs techniques.""" @pytest.fixture def sample_data(self): """Génère des données de test.""" dates = pd.date_range(start='2024-01-01', periods=300, freq='1H') np.random.seed(42) returns = np.random.normal(0.0001, 0.01, 300) prices = 1.1000 * np.exp(np.cumsum(returns)) df = pd.DataFrame(index=dates) df['close'] = prices df['open'] = df['close'].shift(1).fillna(df['close'].iloc[0]) df['high'] = df[['open', 'close']].max(axis=1) * 1.001 df['low'] = df[['open', 'close']].min(axis=1) * 0.999 df['volume'] = np.random.randint(1000, 10000, 300) return df def test_moving_averages_created(self, sample_data): """Test création des moyennes mobiles.""" fe = FeatureEngineering() df = fe._create_technical_indicators(sample_data.copy()) # Vérifier SMA for period in [5, 10, 20, 50, 100, 200]: assert f'sma_{period}' in df.columns assert f'ema_{period}' in df.columns def test_rsi_calculation(self, sample_data): """Test calcul RSI.""" fe = FeatureEngineering() df = fe._create_technical_indicators(sample_data.copy()) # Vérifier RSI for period in [7, 14, 21]: assert f'rsi_{period}' in df.columns # RSI devrait être entre 0 et 100 rsi = df[f'rsi_{period}'].dropna() assert (rsi >= 0).all() assert (rsi <= 100).all() def test_macd_calculation(self, sample_data): """Test calcul MACD.""" fe = FeatureEngineering() df = fe._create_technical_indicators(sample_data.copy()) assert 'macd' in df.columns assert 'macd_signal' in df.columns assert 'macd_hist' in df.columns def test_bollinger_bands(self, sample_data): """Test calcul Bollinger Bands.""" fe = FeatureEngineering() df = fe._create_technical_indicators(sample_data.copy()) for period in [20, 50]: assert f'bb_upper_{period}' in df.columns assert f'bb_middle_{period}' in df.columns assert f'bb_lower_{period}' in df.columns assert f'bb_width_{period}' in df.columns assert f'bb_position_{period}' in df.columns # Vérifier ordre: upper > middle > lower upper = df[f'bb_upper_{period}'].dropna() middle = df[f'bb_middle_{period}'].dropna() lower = df[f'bb_lower_{period}'].dropna() assert (upper >= middle).all() assert (middle >= lower).all() def test_atr_calculation(self, sample_data): """Test calcul ATR.""" fe = FeatureEngineering() df = fe._create_technical_indicators(sample_data.copy()) for period in [7, 14, 21]: assert f'atr_{period}' in df.columns # ATR devrait être positif atr = df[f'atr_{period}'].dropna() assert (atr > 0).all() class TestStatisticalFeatures: """Tests des features statistiques.""" @pytest.fixture def sample_data(self): """Génère des données de test.""" dates = pd.date_range(start='2024-01-01', periods=300, freq='1H') np.random.seed(42) returns = np.random.normal(0.0001, 0.01, 300) prices = 1.1000 * np.exp(np.cumsum(returns)) df = pd.DataFrame(index=dates) df['close'] = prices df['open'] = df['close'].shift(1).fillna(df['close'].iloc[0]) df['high'] = df[['open', 'close']].max(axis=1) * 1.001 df['low'] = df[['open', 'close']].min(axis=1) * 0.999 df['volume'] = np.random.randint(1000, 10000, 300) return df def test_statistical_features_created(self, sample_data): """Test création features statistiques.""" fe = FeatureEngineering() df = fe._create_statistical_features(sample_data.copy()) for period in [10, 20, 50]: assert f'mean_{period}' in df.columns assert f'std_{period}' in df.columns assert f'skew_{period}' in df.columns assert f'kurt_{period}' in df.columns assert f'zscore_{period}' in df.columns def test_zscore_calculation(self, sample_data): """Test calcul z-score.""" fe = FeatureEngineering() df = fe._create_statistical_features(sample_data.copy()) # Z-score devrait avoir moyenne ~0 et std ~1 zscore = df['zscore_20'].dropna() assert abs(zscore.mean()) < 0.5 assert abs(zscore.std() - 1.0) < 0.5 class TestVolatilityFeatures: """Tests des features de volatilité.""" @pytest.fixture def sample_data(self): """Génère des données de test.""" dates = pd.date_range(start='2024-01-01', periods=300, freq='1H') np.random.seed(42) returns = np.random.normal(0.0001, 0.01, 300) prices = 1.1000 * np.exp(np.cumsum(returns)) df = pd.DataFrame(index=dates) df['close'] = prices df['open'] = df['close'].shift(1).fillna(df['close'].iloc[0]) df['high'] = df[['open', 'close']].max(axis=1) * 1.001 df['low'] = df[['open', 'close']].min(axis=1) * 0.999 df['volume'] = np.random.randint(1000, 10000, 300) return df def test_volatility_features_created(self, sample_data): """Test création features volatilité.""" fe = FeatureEngineering() # Ajouter returns d'abord df = sample_data.copy() df['returns'] = df['close'].pct_change() df = fe._create_volatility_features(df) for period in [10, 20, 50]: assert f'volatility_{period}' in df.columns assert 'parkinson_vol' in df.columns assert 'gk_vol' in df.columns assert 'vol_ratio' in df.columns def test_volatility_positive(self, sample_data): """Test que la volatilité est positive.""" fe = FeatureEngineering() df = sample_data.copy() df['returns'] = df['close'].pct_change() df = fe._create_volatility_features(df) vol = df['volatility_20'].dropna() assert (vol > 0).all() class TestVolumeFeatures: """Tests des features de volume.""" @pytest.fixture def sample_data(self): """Génère des données de test.""" dates = pd.date_range(start='2024-01-01', periods=300, freq='1H') np.random.seed(42) returns = np.random.normal(0.0001, 0.01, 300) prices = 1.1000 * np.exp(np.cumsum(returns)) df = pd.DataFrame(index=dates) df['close'] = prices df['open'] = df['close'].shift(1).fillna(df['close'].iloc[0]) df['high'] = df[['open', 'close']].max(axis=1) * 1.001 df['low'] = df[['open', 'close']].min(axis=1) * 0.999 df['volume'] = np.random.randint(1000, 10000, 300) return df def test_volume_features_created(self, sample_data): """Test création features volume.""" fe = FeatureEngineering() df = fe._create_volume_features(sample_data.copy()) for period in [5, 10, 20]: assert f'volume_ma_{period}' in df.columns assert 'volume_ratio' in df.columns assert 'volume_change' in df.columns assert 'obv' in df.columns assert 'vwap' in df.columns class TestTimeFeatures: """Tests des features temporelles.""" @pytest.fixture def sample_data(self): """Génère des données de test avec index datetime.""" dates = pd.date_range(start='2024-01-01', periods=300, freq='1H') np.random.seed(42) returns = np.random.normal(0.0001, 0.01, 300) prices = 1.1000 * np.exp(np.cumsum(returns)) df = pd.DataFrame(index=dates) df['close'] = prices df['open'] = df['close'].shift(1).fillna(df['close'].iloc[0]) df['high'] = df[['open', 'close']].max(axis=1) * 1.001 df['low'] = df[['open', 'close']].min(axis=1) * 0.999 df['volume'] = np.random.randint(1000, 10000, 300) return df def test_time_features_created(self, sample_data): """Test création features temporelles.""" fe = FeatureEngineering() df = fe._create_time_features(sample_data.copy()) assert 'hour' in df.columns assert 'hour_sin' in df.columns assert 'hour_cos' in df.columns assert 'day_of_week' in df.columns assert 'dow_sin' in df.columns assert 'dow_cos' in df.columns assert 'month' in df.columns assert 'month_sin' in df.columns assert 'month_cos' in df.columns def test_cyclic_encoding_range(self, sample_data): """Test que l'encodage cyclique est dans [-1, 1].""" fe = FeatureEngineering() df = fe._create_time_features(sample_data.copy()) for col in ['hour_sin', 'hour_cos', 'dow_sin', 'dow_cos', 'month_sin', 'month_cos']: values = df[col].dropna() assert (values >= -1).all() assert (values <= 1).all() class TestFeatureImportance: """Tests de feature importance.""" @pytest.fixture def sample_features(self): """Génère des features de test.""" np.random.seed(42) n_samples = 1000 n_features = 20 features = pd.DataFrame( np.random.randn(n_samples, n_features), columns=[f'feature_{i}' for i in range(n_features)] ) return features @pytest.fixture def sample_target(self): """Génère une target de test.""" np.random.seed(42) return pd.Series(np.random.randn(1000)) def test_get_feature_importance(self, sample_features, sample_target): """Test calcul feature importance.""" fe = FeatureEngineering() importance = fe.get_feature_importance( sample_features, sample_target, method='mutual_info' ) assert isinstance(importance, pd.DataFrame) assert 'feature' in importance.columns assert 'importance' in importance.columns assert len(importance) == len(sample_features.columns) def test_select_top_features(self, sample_features, sample_target): """Test sélection top features.""" fe = FeatureEngineering() top_features = fe.select_top_features( sample_features, sample_target, n_features=10 ) assert isinstance(top_features, list) assert len(top_features) == 10 assert all(f in sample_features.columns for f in top_features) class TestFeatureEngineeringIntegration: """Tests d'intégration.""" @pytest.fixture def sample_data(self): """Génère des données de test.""" dates = pd.date_range(start='2024-01-01', periods=500, freq='1H') np.random.seed(42) returns = np.random.normal(0.0001, 0.01, 500) prices = 1.1000 * np.exp(np.cumsum(returns)) df = pd.DataFrame(index=dates) df['close'] = prices df['open'] = df['close'].shift(1).fillna(df['close'].iloc[0]) df['high'] = df[['open', 'close']].max(axis=1) * (1 + np.random.uniform(0, 0.001, 500)) df['low'] = df[['open', 'close']].min(axis=1) * (1 - np.random.uniform(0, 0.001, 500)) df['volume'] = np.random.randint(1000, 10000, 500) return df def test_full_workflow(self, sample_data): """Test workflow complet.""" fe = FeatureEngineering() # 1. Créer toutes les features features_df = fe.create_all_features(sample_data) assert len(features_df) > 0 assert len(fe.feature_names) >= 100 # 2. Vérifier pas de NaN assert features_df.isna().sum().sum() == 0 # 3. Créer target target = features_df['returns'].shift(-1).dropna() features_for_ml = features_df.iloc[:-1] # 4. Feature importance importance = fe.get_feature_importance( features_for_ml[fe.feature_names], target, method='correlation' ) assert len(importance) > 0 # 5. Sélectionner top features top_features = fe.select_top_features( features_for_ml[fe.feature_names], target, n_features=50 ) assert len(top_features) == 50