From 7af7248b4df11d7d89ae9eea7a3451d5f0e266db Mon Sep 17 00:00:00 2001 From: Tika Date: Tue, 10 Mar 2026 19:47:00 +0000 Subject: [PATCH] =?UTF-8?q?fix:=20MLStrategyModel=20=E2=80=94=20encodage?= =?UTF-8?q?=20labels=20[-1,0,1]=20=E2=86=92=20[0,1,2]=20pour=20XGBoost=20?= =?UTF-8?q?=E2=89=A5=202.x?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit XGBoost v2+ exige des classes dans [0, n_classes-1]. - train() : y_enc = y + 1 avant fit() - predict() : décodage pred_enc - 1, prob_map ajusté (0=SHORT, 1=NEUTRAL, 2=LONG) - _walk_forward_eval() : mask NEUTRAL corrigé (1 au lieu de 0 en espace encodé) Co-Authored-By: Claude Sonnet 4.6 --- src/ml/ml_strategy_model.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/src/ml/ml_strategy_model.py b/src/ml/ml_strategy_model.py index 238e411..15c689a 100644 --- a/src/ml/ml_strategy_model.py +++ b/src/ml/ml_strategy_model.py @@ -156,19 +156,23 @@ class MLStrategyModel: logger.info(f" {len(X)} échantillons, {len(self.feature_names)} features") logger.info(f" Distribution : LONG={( y==1).sum()}, SHORT={(y==-1).sum()}, NEUTRAL={(y==0).sum()}") + # Encodage labels [-1,0,1] → [0,1,2] (requis par XGBoost ≥ 2.x) + y_enc = y + 1 + # 3. Walk-forward cross-validation (3 folds temporels) - wf_metrics = self._walk_forward_eval(X, y, n_splits=3) + wf_metrics = self._walk_forward_eval(X, y_enc, n_splits=3) # 4. Entraînement sur la totalité des données self.scaler = StandardScaler() X_scaled = self.scaler.fit_transform(X) self.model = self._build_model() - self.model.fit(X_scaled, y) + self.model.fit(X_scaled, y_enc) self.is_trained = True # 5. Évaluation finale (in-sample — indicative) - y_pred = self.model.predict(X_scaled) + y_pred_enc = self.model.predict(X_scaled) + y_pred = y_pred_enc - 1 # décodage [0,1,2] → [-1,0,1] report = classification_report(y, y_pred, labels=[-1, 0, 1], target_names=['SHORT', 'NEUTRAL', 'LONG'], output_dict=True, zero_division=0) @@ -235,24 +239,25 @@ class MLStrategyModel: last = last[self.feature_names].fillna(0) X_scaled = self.scaler.transform(last) - pred = self.model.predict(X_scaled)[0] + pred_enc = self.model.predict(X_scaled)[0] + pred = int(pred_enc) - 1 # décodage [0,1,2] → [-1,0,1] # Probabilités si disponibles probas = {'long': 0.0, 'short': 0.0, 'neutral': 1.0} confidence = 0.0 if hasattr(self.model, 'predict_proba'): proba_arr = self.model.predict_proba(X_scaled)[0] - classes = list(self.model.classes_) + classes = list(self.model.classes_) # [0, 1, 2] encodés prob_map = {c: p for c, p in zip(classes, proba_arr)} probas = { - 'long': float(prob_map.get(1, 0.0)), - 'short': float(prob_map.get(-1, 0.0)), - 'neutral': float(prob_map.get(0, 1.0)), + 'long': float(prob_map.get(2, 0.0)), # encodé 2 = LONG (1) + 'short': float(prob_map.get(0, 0.0)), # encodé 0 = SHORT (-1) + 'neutral': float(prob_map.get(1, 1.0)), # encodé 1 = NEUTRAL (0) } confidence = float(max(probas['long'], probas['short'])) return { - 'signal': int(pred), + 'signal': pred, 'confidence': confidence, 'probas': probas, 'tradeable': confidence >= self.min_confidence and pred != 0, @@ -383,7 +388,8 @@ class MLStrategyModel: acc = (y_pred == y_te.values).mean() # Précision/Recall sur les signaux directionnels uniquement - mask = (y_te != 0) | (y_pred != 0) + # y encodé : 0=SHORT, 1=NEUTRAL, 2=LONG → NEUTRAL=1 + mask = (y_te != 1) | (y_pred != 1) prec, rec, _, _ = precision_recall_fscore_support( y_te[mask], y_pred[mask], average='macro', zero_division=0 ) if mask.sum() > 0 else (0, 0, 0, 0)