107 lines
3.2 KiB
Python
107 lines
3.2 KiB
Python
|
|
# ml/train_mlp.py
|
||
|
|
|
||
|
|
import os
|
||
|
|
import sys
|
||
|
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
||
|
|
|
||
|
|
import torch
|
||
|
|
import torch.nn as nn
|
||
|
|
from torch.utils.data import DataLoader, TensorDataset
|
||
|
|
from sklearn.preprocessing import StandardScaler
|
||
|
|
from sklearn.model_selection import train_test_split
|
||
|
|
from sklearn.metrics import classification_report
|
||
|
|
from ml.models.forex_mlp import ForexMLP
|
||
|
|
from ml.preprocessing import load_and_preprocess_data
|
||
|
|
import numpy as np
|
||
|
|
import joblib
|
||
|
|
import pandas as pd
|
||
|
|
|
||
|
|
|
||
|
|
# Load + preprocess
|
||
|
|
features_df, labels = load_and_preprocess_data("data/DAT_XLSX_EURUSD_M1_2021.xlsx")
|
||
|
|
|
||
|
|
# Encode targets into two-element vectors: [Buy, Sell]
|
||
|
|
def encode_targets(labels):
|
||
|
|
encoded = []
|
||
|
|
for val in labels:
|
||
|
|
if val == 1:
|
||
|
|
encoded.append([1, 0])
|
||
|
|
elif val == -1:
|
||
|
|
encoded.append([0, 1])
|
||
|
|
else:
|
||
|
|
encoded.append([0, 0])
|
||
|
|
return torch.tensor(encoded, dtype=torch.float32)
|
||
|
|
|
||
|
|
# Train-test split
|
||
|
|
X_train, X_test, y_train, y_test = train_test_split(features_df, labels, test_size=0.3, shuffle=False)
|
||
|
|
|
||
|
|
# Scale
|
||
|
|
scaler = StandardScaler()
|
||
|
|
X_train_scaled = scaler.fit_transform(X_train)
|
||
|
|
X_test_scaled = scaler.transform(X_test)
|
||
|
|
|
||
|
|
# Save scaler
|
||
|
|
# 🟢 Convert to DataFrame for easier manipulation
|
||
|
|
train_df = X_train.copy()
|
||
|
|
train_df['target'] = y_train.values
|
||
|
|
|
||
|
|
# 🧪 Split into minority and majority
|
||
|
|
minority = train_df[train_df['target'] == 1]
|
||
|
|
majority = train_df[train_df['target'] == 0]
|
||
|
|
|
||
|
|
# 🔁 Oversample minority class to match majority count
|
||
|
|
minority_oversampled = minority.sample(len(majority), replace=True, random_state=42)
|
||
|
|
|
||
|
|
# 🔄 Combine + shuffle
|
||
|
|
balanced_df = pd.concat([majority, minority_oversampled]).sample(frac=1, random_state=42)
|
||
|
|
|
||
|
|
# ✅ Re-split into features and labels
|
||
|
|
X_balanced = balanced_df.drop(columns=['target'])
|
||
|
|
y_balanced = balanced_df['target']
|
||
|
|
|
||
|
|
# 🔢 Scale
|
||
|
|
scaler = StandardScaler()
|
||
|
|
X_scaled = scaler.fit_transform(X_balanced)
|
||
|
|
X_test_scaled = scaler.transform(X_test) # Use the original test set!
|
||
|
|
|
||
|
|
# 💾 Save scaler
|
||
|
|
os.makedirs("ml", exist_ok=True)
|
||
|
|
joblib.dump(scaler, "ml/scaler.pkl")
|
||
|
|
|
||
|
|
# 📦 Wrap in TensorDataset with encoded 2D targets
|
||
|
|
y_encoded = encode_targets(y_balanced.values)
|
||
|
|
train_dataset = TensorDataset(torch.tensor(X_scaled, dtype=torch.float32), y_encoded)
|
||
|
|
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
|
||
|
|
|
||
|
|
|
||
|
|
# Init model
|
||
|
|
model = ForexMLP(input_size=X_train.shape[1])
|
||
|
|
criterion = nn.BCELoss()
|
||
|
|
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
|
||
|
|
|
||
|
|
# Training loop
|
||
|
|
epochs = 100
|
||
|
|
for epoch in range(epochs):
|
||
|
|
total_loss = 0.0
|
||
|
|
for inputs, targets in train_loader:
|
||
|
|
optimizer.zero_grad()
|
||
|
|
outputs = model(inputs)
|
||
|
|
loss = criterion(outputs, targets)
|
||
|
|
loss.backward()
|
||
|
|
optimizer.step()
|
||
|
|
total_loss += loss.item()
|
||
|
|
print(f"📉 Epoch {epoch + 1} | Loss: {total_loss / len(train_loader):.4f}")
|
||
|
|
|
||
|
|
# Save model
|
||
|
|
torch.save(model.state_dict(), "ml/models/forex_mlp.pt")
|
||
|
|
print("✅ Trained model saved to ml/models/forex_mlp.pt")
|
||
|
|
|
||
|
|
# Evaluation
|
||
|
|
model.eval()
|
||
|
|
with torch.no_grad():
|
||
|
|
preds = model(torch.tensor(X_test_scaled, dtype=torch.float32))
|
||
|
|
preds_bin = (preds > 0.5).int()
|
||
|
|
y_test_encoded = encode_targets(y_test.values).int()
|
||
|
|
print("📊 Evaluation Report:")
|
||
|
|
print(classification_report(y_test_encoded, preds_bin))
|