fix: MLStrategyModel — encodage labels [-1,0,1] → [0,1,2] pour XGBoost ≥ 2.x
XGBoost v2+ exige des classes dans [0, n_classes-1]. - train() : y_enc = y + 1 avant fit() - predict() : décodage pred_enc - 1, prob_map ajusté (0=SHORT, 1=NEUTRAL, 2=LONG) - _walk_forward_eval() : mask NEUTRAL corrigé (1 au lieu de 0 en espace encodé) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -156,19 +156,23 @@ class MLStrategyModel:
|
|||||||
logger.info(f" {len(X)} échantillons, {len(self.feature_names)} features")
|
logger.info(f" {len(X)} échantillons, {len(self.feature_names)} features")
|
||||||
logger.info(f" Distribution : LONG={( y==1).sum()}, SHORT={(y==-1).sum()}, NEUTRAL={(y==0).sum()}")
|
logger.info(f" Distribution : LONG={( y==1).sum()}, SHORT={(y==-1).sum()}, NEUTRAL={(y==0).sum()}")
|
||||||
|
|
||||||
|
# Encodage labels [-1,0,1] → [0,1,2] (requis par XGBoost ≥ 2.x)
|
||||||
|
y_enc = y + 1
|
||||||
|
|
||||||
# 3. Walk-forward cross-validation (3 folds temporels)
|
# 3. Walk-forward cross-validation (3 folds temporels)
|
||||||
wf_metrics = self._walk_forward_eval(X, y, n_splits=3)
|
wf_metrics = self._walk_forward_eval(X, y_enc, n_splits=3)
|
||||||
|
|
||||||
# 4. Entraînement sur la totalité des données
|
# 4. Entraînement sur la totalité des données
|
||||||
self.scaler = StandardScaler()
|
self.scaler = StandardScaler()
|
||||||
X_scaled = self.scaler.fit_transform(X)
|
X_scaled = self.scaler.fit_transform(X)
|
||||||
|
|
||||||
self.model = self._build_model()
|
self.model = self._build_model()
|
||||||
self.model.fit(X_scaled, y)
|
self.model.fit(X_scaled, y_enc)
|
||||||
self.is_trained = True
|
self.is_trained = True
|
||||||
|
|
||||||
# 5. Évaluation finale (in-sample — indicative)
|
# 5. Évaluation finale (in-sample — indicative)
|
||||||
y_pred = self.model.predict(X_scaled)
|
y_pred_enc = self.model.predict(X_scaled)
|
||||||
|
y_pred = y_pred_enc - 1 # décodage [0,1,2] → [-1,0,1]
|
||||||
report = classification_report(y, y_pred, labels=[-1, 0, 1],
|
report = classification_report(y, y_pred, labels=[-1, 0, 1],
|
||||||
target_names=['SHORT', 'NEUTRAL', 'LONG'],
|
target_names=['SHORT', 'NEUTRAL', 'LONG'],
|
||||||
output_dict=True, zero_division=0)
|
output_dict=True, zero_division=0)
|
||||||
@@ -235,24 +239,25 @@ class MLStrategyModel:
|
|||||||
last = last[self.feature_names].fillna(0)
|
last = last[self.feature_names].fillna(0)
|
||||||
|
|
||||||
X_scaled = self.scaler.transform(last)
|
X_scaled = self.scaler.transform(last)
|
||||||
pred = self.model.predict(X_scaled)[0]
|
pred_enc = self.model.predict(X_scaled)[0]
|
||||||
|
pred = int(pred_enc) - 1 # décodage [0,1,2] → [-1,0,1]
|
||||||
|
|
||||||
# Probabilités si disponibles
|
# Probabilités si disponibles
|
||||||
probas = {'long': 0.0, 'short': 0.0, 'neutral': 1.0}
|
probas = {'long': 0.0, 'short': 0.0, 'neutral': 1.0}
|
||||||
confidence = 0.0
|
confidence = 0.0
|
||||||
if hasattr(self.model, 'predict_proba'):
|
if hasattr(self.model, 'predict_proba'):
|
||||||
proba_arr = self.model.predict_proba(X_scaled)[0]
|
proba_arr = self.model.predict_proba(X_scaled)[0]
|
||||||
classes = list(self.model.classes_)
|
classes = list(self.model.classes_) # [0, 1, 2] encodés
|
||||||
prob_map = {c: p for c, p in zip(classes, proba_arr)}
|
prob_map = {c: p for c, p in zip(classes, proba_arr)}
|
||||||
probas = {
|
probas = {
|
||||||
'long': float(prob_map.get(1, 0.0)),
|
'long': float(prob_map.get(2, 0.0)), # encodé 2 = LONG (1)
|
||||||
'short': float(prob_map.get(-1, 0.0)),
|
'short': float(prob_map.get(0, 0.0)), # encodé 0 = SHORT (-1)
|
||||||
'neutral': float(prob_map.get(0, 1.0)),
|
'neutral': float(prob_map.get(1, 1.0)), # encodé 1 = NEUTRAL (0)
|
||||||
}
|
}
|
||||||
confidence = float(max(probas['long'], probas['short']))
|
confidence = float(max(probas['long'], probas['short']))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'signal': int(pred),
|
'signal': pred,
|
||||||
'confidence': confidence,
|
'confidence': confidence,
|
||||||
'probas': probas,
|
'probas': probas,
|
||||||
'tradeable': confidence >= self.min_confidence and pred != 0,
|
'tradeable': confidence >= self.min_confidence and pred != 0,
|
||||||
@@ -383,7 +388,8 @@ class MLStrategyModel:
|
|||||||
|
|
||||||
acc = (y_pred == y_te.values).mean()
|
acc = (y_pred == y_te.values).mean()
|
||||||
# Précision/Recall sur les signaux directionnels uniquement
|
# Précision/Recall sur les signaux directionnels uniquement
|
||||||
mask = (y_te != 0) | (y_pred != 0)
|
# y encodé : 0=SHORT, 1=NEUTRAL, 2=LONG → NEUTRAL=1
|
||||||
|
mask = (y_te != 1) | (y_pred != 1)
|
||||||
prec, rec, _, _ = precision_recall_fscore_support(
|
prec, rec, _, _ = precision_recall_fscore_support(
|
||||||
y_te[mask], y_pred[mask], average='macro', zero_division=0
|
y_te[mask], y_pred[mask], average='macro', zero_division=0
|
||||||
) if mask.sum() > 0 else (0, 0, 0, 0)
|
) if mask.sum() > 0 else (0, 0, 0, 0)
|
||||||
|
|||||||
Reference in New Issue
Block a user