# ml/train_mlp.py import os import sys sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) import torch import torch.nn as nn from torch.utils.data import DataLoader, TensorDataset from sklearn.preprocessing import StandardScaler from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report from ml.models.forex_mlp import ForexMLP from ml.preprocessing import load_and_preprocess_data import numpy as np import joblib import pandas as pd # Load + preprocess features_df, labels = load_and_preprocess_data("data/DAT_XLSX_EURUSD_M1_2021.xlsx") # Encode targets into two-element vectors: [Buy, Sell] def encode_targets(labels): encoded = [] for val in labels: if val == 1: encoded.append([1, 0]) elif val == -1: encoded.append([0, 1]) else: encoded.append([0, 0]) return torch.tensor(encoded, dtype=torch.float32) # Train-test split X_train, X_test, y_train, y_test = train_test_split(features_df, labels, test_size=0.3, shuffle=False) # Scale scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) # Save scaler # ๐ŸŸข Convert to DataFrame for easier manipulation train_df = X_train.copy() train_df['target'] = y_train.values # ๐Ÿงช Split into minority and majority minority = train_df[train_df['target'] == 1] majority = train_df[train_df['target'] == 0] # ๐Ÿ” Oversample minority class to match majority count minority_oversampled = minority.sample(len(majority), replace=True, random_state=42) # ๐Ÿ”„ Combine + shuffle balanced_df = pd.concat([majority, minority_oversampled]).sample(frac=1, random_state=42) # โœ… Re-split into features and labels X_balanced = balanced_df.drop(columns=['target']) y_balanced = balanced_df['target'] # ๐Ÿ”ข Scale scaler = StandardScaler() X_scaled = scaler.fit_transform(X_balanced) X_test_scaled = scaler.transform(X_test) # Use the original test set! # ๐Ÿ’พ Save scaler os.makedirs("ml", exist_ok=True) joblib.dump(scaler, "ml/scaler.pkl") # ๐Ÿ“ฆ Wrap in TensorDataset with encoded 2D targets y_encoded = encode_targets(y_balanced.values) train_dataset = TensorDataset(torch.tensor(X_scaled, dtype=torch.float32), y_encoded) train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True) # Init model model = ForexMLP(input_size=X_train.shape[1]) criterion = nn.BCELoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # Training loop epochs = 100 for epoch in range(epochs): total_loss = 0.0 for inputs, targets in train_loader: optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, targets) loss.backward() optimizer.step() total_loss += loss.item() print(f"๐Ÿ“‰ Epoch {epoch + 1} | Loss: {total_loss / len(train_loader):.4f}") # Save model torch.save(model.state_dict(), "ml/models/forex_mlp.pt") print("โœ… Trained model saved to ml/models/forex_mlp.pt") # Evaluation model.eval() with torch.no_grad(): preds = model(torch.tensor(X_test_scaled, dtype=torch.float32)) preds_bin = (preds > 0.5).int() y_test_encoded = encode_targets(y_test.values).int() print("๐Ÿ“Š Evaluation Report:") print(classification_report(y_test_encoded, preds_bin))