""" Provider yfinance — données OHLCV gratuites. Limites : - M1 : 7 derniers jours - M5/M15/M30 : 60 derniers jours - H1/H4 : 730 derniers jours - D : illimité """ import asyncio import logging from datetime import datetime, timedelta, timezone from typing import Optional import pandas as pd from app.services.data_providers.constants import ( GRANULARITY_TO_YF, INSTRUMENT_TO_YF, YF_MAX_DAYS, ) logger = logging.getLogger(__name__) def _normalize(df: pd.DataFrame) -> pd.DataFrame: """Normalise un DataFrame yfinance vers le format interne.""" df = df.copy() df.index = pd.to_datetime(df.index, utc=True) df.index = df.index.tz_localize(None) if df.index.tz is not None else df.index df.columns = [c.lower() for c in df.columns] # yfinance peut retourner des colonnes multi-index if isinstance(df.columns, pd.MultiIndex): df.columns = df.columns.get_level_values(0) df = df.rename(columns={"adj close": "close"})[["open", "high", "low", "close", "volume"]] df = df.dropna(subset=["open", "high", "low", "close"]) df.index.name = "time" df = df.reset_index() df["time"] = pd.to_datetime(df["time"]).dt.tz_localize(None) return df def _fetch_sync( yf_symbol: str, yf_interval: str, start: datetime, end: datetime, ) -> pd.DataFrame: """Exécution synchrone de yfinance (sera appelée dans un thread).""" import yfinance as yf ticker = yf.Ticker(yf_symbol) df = ticker.history( interval=yf_interval, start=start.strftime("%Y-%m-%d"), end=(end + timedelta(days=1)).strftime("%Y-%m-%d"), auto_adjust=True, prepost=False, ) return df class YFinanceProvider: """Fetche des candles depuis Yahoo Finance.""" def yf_cutoff(self, granularity: str) -> Optional[datetime]: """Retourne la date la plus ancienne que yfinance peut fournir.""" max_days = YF_MAX_DAYS.get(granularity) if max_days is None: return None return datetime.utcnow() - timedelta(days=max_days - 1) def can_provide(self, granularity: str, start: datetime) -> bool: """Vérifie si yfinance peut fournir des données pour cette période.""" cutoff = self.yf_cutoff(granularity) if cutoff is None: return False return start >= cutoff async def fetch( self, instrument: str, granularity: str, start: datetime, end: Optional[datetime] = None, ) -> pd.DataFrame: """ Fetche les candles pour la période [start, end]. Tronque start à la limite yfinance si nécessaire. """ yf_symbol = INSTRUMENT_TO_YF.get(instrument) yf_interval = GRANULARITY_TO_YF.get(granularity) if not yf_symbol or not yf_interval: logger.warning("yfinance : instrument ou granularité non supporté — %s %s", instrument, granularity) return pd.DataFrame() # Tronquer start à la limite yfinance cutoff = self.yf_cutoff(granularity) if cutoff and start < cutoff: logger.debug("yfinance : start tronqué de %s à %s", start, cutoff) start = cutoff if end is None: end = datetime.utcnow() if start >= end: return pd.DataFrame() logger.info( "yfinance fetch : %s (%s) %s → %s", instrument, granularity, start.strftime("%Y-%m-%d"), end.strftime("%Y-%m-%d"), ) try: loop = asyncio.get_event_loop() raw = await loop.run_in_executor( None, _fetch_sync, yf_symbol, yf_interval, start, end ) except Exception as e: logger.error("yfinance erreur : %s", e) return pd.DataFrame() if raw.empty: logger.warning("yfinance : aucune donnée pour %s %s", instrument, granularity) return pd.DataFrame() df = _normalize(raw) df = df[(df["time"] >= start) & (df["time"] <= end)] logger.info("yfinance : %d bougies récupérées pour %s %s", len(df), instrument, granularity) return df