Forex-Bot/ml/train_mlp.py

107 lines
3.2 KiB
Python
Raw Normal View History

2025-05-06 09:50:42 -04:00
# ml/train_mlp.py
import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from ml.models.forex_mlp import ForexMLP
from ml.preprocessing import load_and_preprocess_data
import numpy as np
import joblib
import pandas as pd
# Load + preprocess
features_df, labels = load_and_preprocess_data("data/DAT_XLSX_EURUSD_M1_2021.xlsx")
# Encode targets into two-element vectors: [Buy, Sell]
def encode_targets(labels):
encoded = []
for val in labels:
if val == 1:
encoded.append([1, 0])
elif val == -1:
encoded.append([0, 1])
else:
encoded.append([0, 0])
return torch.tensor(encoded, dtype=torch.float32)
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(features_df, labels, test_size=0.3, shuffle=False)
# Scale
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Save scaler
# 🟢 Convert to DataFrame for easier manipulation
train_df = X_train.copy()
train_df['target'] = y_train.values
# 🧪 Split into minority and majority
minority = train_df[train_df['target'] == 1]
majority = train_df[train_df['target'] == 0]
# 🔁 Oversample minority class to match majority count
minority_oversampled = minority.sample(len(majority), replace=True, random_state=42)
# 🔄 Combine + shuffle
balanced_df = pd.concat([majority, minority_oversampled]).sample(frac=1, random_state=42)
# ✅ Re-split into features and labels
X_balanced = balanced_df.drop(columns=['target'])
y_balanced = balanced_df['target']
# 🔢 Scale
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_balanced)
X_test_scaled = scaler.transform(X_test) # Use the original test set!
# 💾 Save scaler
os.makedirs("ml", exist_ok=True)
joblib.dump(scaler, "ml/scaler.pkl")
# 📦 Wrap in TensorDataset with encoded 2D targets
y_encoded = encode_targets(y_balanced.values)
train_dataset = TensorDataset(torch.tensor(X_scaled, dtype=torch.float32), y_encoded)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
# Init model
model = ForexMLP(input_size=X_train.shape[1])
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# Training loop
epochs = 100
for epoch in range(epochs):
total_loss = 0.0
for inputs, targets in train_loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
total_loss += loss.item()
print(f"📉 Epoch {epoch + 1} | Loss: {total_loss / len(train_loader):.4f}")
# Save model
torch.save(model.state_dict(), "ml/models/forex_mlp.pt")
print("✅ Trained model saved to ml/models/forex_mlp.pt")
# Evaluation
model.eval()
with torch.no_grad():
preds = model(torch.tensor(X_test_scaled, dtype=torch.float32))
preds_bin = (preds > 0.5).int()
y_test_encoded = encode_targets(y_test.values).int()
print("📊 Evaluation Report:")
print(classification_report(y_test_encoded, preds_bin))