Architecture Docker (8 services), FastAPI, TimescaleDB, Redis, Streamlit. Stratégies : scalping, intraday, swing. MLEngine + RegimeDetector (HMM). BacktestEngine + WalkForwardAnalyzer + Optuna optimizer. Routes API complètes dont /optimize async. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
206 lines
6.5 KiB
Python
206 lines
6.5 KiB
Python
"""
|
|
Tests Unitaires - DataValidator.
|
|
|
|
Tests de validation et nettoyage des données.
|
|
"""
|
|
|
|
import pytest
|
|
import pandas as pd
|
|
import numpy as np
|
|
|
|
from src.data.data_validator import DataValidator
|
|
|
|
|
|
class TestDataValidation:
|
|
"""Tests de validation des données."""
|
|
|
|
def test_validate_valid_data(self, sample_ohlcv_data):
|
|
"""Test validation de données valides."""
|
|
validator = DataValidator()
|
|
|
|
is_valid, errors = validator.validate(sample_ohlcv_data)
|
|
|
|
assert is_valid is True
|
|
assert len(errors) == 0
|
|
|
|
def test_validate_empty_dataframe(self):
|
|
"""Test rejet DataFrame vide."""
|
|
validator = DataValidator()
|
|
|
|
df = pd.DataFrame()
|
|
is_valid, errors = validator.validate(df)
|
|
|
|
assert is_valid is False
|
|
assert len(errors) > 0
|
|
assert 'empty' in errors[0].lower()
|
|
|
|
def test_validate_missing_columns(self):
|
|
"""Test rejet si colonnes manquantes."""
|
|
validator = DataValidator()
|
|
|
|
df = pd.DataFrame({
|
|
'open': [1.1, 1.2],
|
|
'close': [1.15, 1.25]
|
|
# Manque high, low, volume
|
|
})
|
|
|
|
is_valid, errors = validator.validate(df)
|
|
|
|
assert is_valid is False
|
|
assert any('missing columns' in e.lower() for e in errors)
|
|
|
|
def test_validate_price_inconsistency(self):
|
|
"""Test détection incohérences de prix."""
|
|
validator = DataValidator()
|
|
|
|
df = pd.DataFrame({
|
|
'open': [1.1, 1.2, 1.3],
|
|
'high': [1.15, 1.25, 1.35],
|
|
'low': [1.2, 1.3, 1.4], # Low > High (invalide)
|
|
'close': [1.12, 1.22, 1.32],
|
|
'volume': [1000, 2000, 3000]
|
|
})
|
|
|
|
is_valid, errors = validator.validate(df)
|
|
|
|
assert is_valid is False
|
|
assert any('high < low' in e.lower() for e in errors)
|
|
|
|
def test_validate_excessive_missing_values(self):
|
|
"""Test rejet si trop de valeurs manquantes."""
|
|
validator = DataValidator(config={'max_missing_pct': 0.05})
|
|
|
|
df = pd.DataFrame({
|
|
'open': [1.1, np.nan, 1.3, np.nan, 1.5] * 10,
|
|
'high': [1.15, 1.25, np.nan, 1.45, 1.55] * 10,
|
|
'low': [1.05, 1.15, 1.25, np.nan, 1.45] * 10,
|
|
'close': [1.12, 1.22, 1.32, 1.42, np.nan] * 10,
|
|
'volume': [1000] * 50
|
|
})
|
|
|
|
is_valid, errors = validator.validate(df)
|
|
|
|
assert is_valid is False
|
|
assert any('missing values' in e.lower() for e in errors)
|
|
|
|
|
|
class TestDataCleaning:
|
|
"""Tests de nettoyage des données."""
|
|
|
|
def test_clean_removes_duplicates(self):
|
|
"""Test suppression des doublons."""
|
|
validator = DataValidator()
|
|
|
|
dates = pd.date_range('2024-01-01', periods=10, freq='1H')
|
|
df = pd.DataFrame({
|
|
'open': [1.1] * 10,
|
|
'high': [1.15] * 10,
|
|
'low': [1.05] * 10,
|
|
'close': [1.12] * 10,
|
|
'volume': [1000] * 10
|
|
}, index=dates)
|
|
|
|
# Ajouter doublon
|
|
df = pd.concat([df, df.iloc[[5]]])
|
|
|
|
assert len(df) == 11
|
|
|
|
df_clean = validator.clean(df)
|
|
|
|
assert len(df_clean) == 10
|
|
|
|
def test_clean_sorts_chronologically(self):
|
|
"""Test tri chronologique."""
|
|
validator = DataValidator()
|
|
|
|
dates = pd.date_range('2024-01-01', periods=10, freq='1H')
|
|
df = pd.DataFrame({
|
|
'open': [1.1] * 10,
|
|
'high': [1.15] * 10,
|
|
'low': [1.05] * 10,
|
|
'close': [1.12] * 10,
|
|
'volume': [1000] * 10
|
|
}, index=dates)
|
|
|
|
# Mélanger l'ordre
|
|
df = df.sample(frac=1)
|
|
|
|
df_clean = validator.clean(df)
|
|
|
|
assert df_clean.index.is_monotonic_increasing
|
|
|
|
def test_clean_interpolates_missing_values(self):
|
|
"""Test interpolation valeurs manquantes."""
|
|
validator = DataValidator()
|
|
|
|
df = pd.DataFrame({
|
|
'open': [1.1, np.nan, 1.3, 1.4, 1.5],
|
|
'high': [1.15, 1.25, np.nan, 1.45, 1.55],
|
|
'low': [1.05, 1.15, 1.25, np.nan, 1.45],
|
|
'close': [1.12, 1.22, 1.32, 1.42, 1.52],
|
|
'volume': [1000, 2000, 3000, 4000, 5000]
|
|
})
|
|
|
|
df_clean = validator.clean(df)
|
|
|
|
# Vérifier que les NaN sont interpolés
|
|
assert df_clean['open'].isna().sum() == 0
|
|
assert df_clean['high'].isna().sum() == 0
|
|
assert df_clean['low'].isna().sum() == 0
|
|
|
|
def test_clean_fixes_price_inconsistencies(self):
|
|
"""Test correction incohérences de prix."""
|
|
validator = DataValidator()
|
|
|
|
df = pd.DataFrame({
|
|
'open': [1.1, 1.2, 1.3],
|
|
'high': [1.05, 1.15, 1.25], # High < Open (invalide)
|
|
'low': [1.15, 1.25, 1.35], # Low > Open (invalide)
|
|
'close': [1.12, 1.22, 1.32],
|
|
'volume': [1000, 2000, 3000]
|
|
})
|
|
|
|
df_clean = validator.clean(df)
|
|
|
|
# Vérifier cohérence
|
|
assert (df_clean['high'] >= df_clean['low']).all()
|
|
assert (df_clean['high'] >= df_clean['open']).all()
|
|
assert (df_clean['high'] >= df_clean['close']).all()
|
|
assert (df_clean['low'] <= df_clean['open']).all()
|
|
assert (df_clean['low'] <= df_clean['close']).all()
|
|
|
|
|
|
class TestDataQualityReport:
|
|
"""Tests du rapport de qualité."""
|
|
|
|
def test_generate_quality_report(self, sample_ohlcv_data):
|
|
"""Test génération rapport de qualité."""
|
|
validator = DataValidator()
|
|
|
|
report = validator.get_data_quality_report(sample_ohlcv_data)
|
|
|
|
assert 'total_rows' in report
|
|
assert 'date_range' in report
|
|
assert 'missing_values' in report
|
|
assert 'is_valid' in report
|
|
assert 'price_stats' in report
|
|
|
|
assert report['total_rows'] == len(sample_ohlcv_data)
|
|
assert report['is_valid'] is True
|
|
|
|
def test_report_includes_statistics(self, sample_ohlcv_data):
|
|
"""Test inclusion statistiques dans rapport."""
|
|
validator = DataValidator()
|
|
|
|
report = validator.get_data_quality_report(sample_ohlcv_data)
|
|
|
|
price_stats = report['price_stats']
|
|
|
|
assert 'mean_close' in price_stats
|
|
assert 'std_close' in price_stats
|
|
assert 'min_close' in price_stats
|
|
assert 'max_close' in price_stats
|
|
|
|
assert price_stats['mean_close'] > 0
|
|
assert price_stats['std_close'] > 0
|