Files
trader-ml/tests/unit/test_data_validator.py
Tika da30ef19ed Initial commit — Trading AI Secure project complet
Architecture Docker (8 services), FastAPI, TimescaleDB, Redis, Streamlit.
Stratégies : scalping, intraday, swing. MLEngine + RegimeDetector (HMM).
BacktestEngine + WalkForwardAnalyzer + Optuna optimizer.
Routes API complètes dont /optimize async.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-08 17:38:09 +00:00

206 lines
6.5 KiB
Python

"""
Tests Unitaires - DataValidator.
Tests de validation et nettoyage des données.
"""
import pytest
import pandas as pd
import numpy as np
from src.data.data_validator import DataValidator
class TestDataValidation:
"""Tests de validation des données."""
def test_validate_valid_data(self, sample_ohlcv_data):
"""Test validation de données valides."""
validator = DataValidator()
is_valid, errors = validator.validate(sample_ohlcv_data)
assert is_valid is True
assert len(errors) == 0
def test_validate_empty_dataframe(self):
"""Test rejet DataFrame vide."""
validator = DataValidator()
df = pd.DataFrame()
is_valid, errors = validator.validate(df)
assert is_valid is False
assert len(errors) > 0
assert 'empty' in errors[0].lower()
def test_validate_missing_columns(self):
"""Test rejet si colonnes manquantes."""
validator = DataValidator()
df = pd.DataFrame({
'open': [1.1, 1.2],
'close': [1.15, 1.25]
# Manque high, low, volume
})
is_valid, errors = validator.validate(df)
assert is_valid is False
assert any('missing columns' in e.lower() for e in errors)
def test_validate_price_inconsistency(self):
"""Test détection incohérences de prix."""
validator = DataValidator()
df = pd.DataFrame({
'open': [1.1, 1.2, 1.3],
'high': [1.15, 1.25, 1.35],
'low': [1.2, 1.3, 1.4], # Low > High (invalide)
'close': [1.12, 1.22, 1.32],
'volume': [1000, 2000, 3000]
})
is_valid, errors = validator.validate(df)
assert is_valid is False
assert any('high < low' in e.lower() for e in errors)
def test_validate_excessive_missing_values(self):
"""Test rejet si trop de valeurs manquantes."""
validator = DataValidator(config={'max_missing_pct': 0.05})
df = pd.DataFrame({
'open': [1.1, np.nan, 1.3, np.nan, 1.5] * 10,
'high': [1.15, 1.25, np.nan, 1.45, 1.55] * 10,
'low': [1.05, 1.15, 1.25, np.nan, 1.45] * 10,
'close': [1.12, 1.22, 1.32, 1.42, np.nan] * 10,
'volume': [1000] * 50
})
is_valid, errors = validator.validate(df)
assert is_valid is False
assert any('missing values' in e.lower() for e in errors)
class TestDataCleaning:
"""Tests de nettoyage des données."""
def test_clean_removes_duplicates(self):
"""Test suppression des doublons."""
validator = DataValidator()
dates = pd.date_range('2024-01-01', periods=10, freq='1H')
df = pd.DataFrame({
'open': [1.1] * 10,
'high': [1.15] * 10,
'low': [1.05] * 10,
'close': [1.12] * 10,
'volume': [1000] * 10
}, index=dates)
# Ajouter doublon
df = pd.concat([df, df.iloc[[5]]])
assert len(df) == 11
df_clean = validator.clean(df)
assert len(df_clean) == 10
def test_clean_sorts_chronologically(self):
"""Test tri chronologique."""
validator = DataValidator()
dates = pd.date_range('2024-01-01', periods=10, freq='1H')
df = pd.DataFrame({
'open': [1.1] * 10,
'high': [1.15] * 10,
'low': [1.05] * 10,
'close': [1.12] * 10,
'volume': [1000] * 10
}, index=dates)
# Mélanger l'ordre
df = df.sample(frac=1)
df_clean = validator.clean(df)
assert df_clean.index.is_monotonic_increasing
def test_clean_interpolates_missing_values(self):
"""Test interpolation valeurs manquantes."""
validator = DataValidator()
df = pd.DataFrame({
'open': [1.1, np.nan, 1.3, 1.4, 1.5],
'high': [1.15, 1.25, np.nan, 1.45, 1.55],
'low': [1.05, 1.15, 1.25, np.nan, 1.45],
'close': [1.12, 1.22, 1.32, 1.42, 1.52],
'volume': [1000, 2000, 3000, 4000, 5000]
})
df_clean = validator.clean(df)
# Vérifier que les NaN sont interpolés
assert df_clean['open'].isna().sum() == 0
assert df_clean['high'].isna().sum() == 0
assert df_clean['low'].isna().sum() == 0
def test_clean_fixes_price_inconsistencies(self):
"""Test correction incohérences de prix."""
validator = DataValidator()
df = pd.DataFrame({
'open': [1.1, 1.2, 1.3],
'high': [1.05, 1.15, 1.25], # High < Open (invalide)
'low': [1.15, 1.25, 1.35], # Low > Open (invalide)
'close': [1.12, 1.22, 1.32],
'volume': [1000, 2000, 3000]
})
df_clean = validator.clean(df)
# Vérifier cohérence
assert (df_clean['high'] >= df_clean['low']).all()
assert (df_clean['high'] >= df_clean['open']).all()
assert (df_clean['high'] >= df_clean['close']).all()
assert (df_clean['low'] <= df_clean['open']).all()
assert (df_clean['low'] <= df_clean['close']).all()
class TestDataQualityReport:
"""Tests du rapport de qualité."""
def test_generate_quality_report(self, sample_ohlcv_data):
"""Test génération rapport de qualité."""
validator = DataValidator()
report = validator.get_data_quality_report(sample_ohlcv_data)
assert 'total_rows' in report
assert 'date_range' in report
assert 'missing_values' in report
assert 'is_valid' in report
assert 'price_stats' in report
assert report['total_rows'] == len(sample_ohlcv_data)
assert report['is_valid'] is True
def test_report_includes_statistics(self, sample_ohlcv_data):
"""Test inclusion statistiques dans rapport."""
validator = DataValidator()
report = validator.get_data_quality_report(sample_ohlcv_data)
price_stats = report['price_stats']
assert 'mean_close' in price_stats
assert 'std_close' in price_stats
assert 'min_close' in price_stats
assert 'max_close' in price_stats
assert price_stats['mean_close'] > 0
assert price_stats['std_close'] > 0