Forex-Bot/ml/preprocessing.py
2025-05-06 10:00:57 -04:00

65 lines
2.2 KiB
Python

# ml/preprocessing.py
import pandas as pd
import numpy as np
def load_and_preprocess_data(path):
df = pd.read_excel(path, header=None, names=[
'timestamp', 'open', 'high', 'low', 'close', 'volume'
])
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)
# Resample to 15-minute intervals
df = df.resample('15min').agg({
'open': 'first',
'high': 'max',
'low': 'min',
'close': 'last',
'volume': 'sum'
}).dropna()
# Add features
df['sma_10'] = df['close'].rolling(10).mean()
df['sma_30'] = df['close'].rolling(30).mean()
df['rsi_14'] = 100 - (100 / (1 + df['close'].pct_change().add(1).rolling(14).mean()))
df['momentum'] = df['close'] - df['close'].shift(4)
df['price_delta'] = df['close'] - df['open']
df['vol_rolling'] = df['volume'].rolling(10).mean()
# Bollinger %B
rolling_mean = df['close'].rolling(20).mean()
rolling_std = df['close'].rolling(20).std()
df['bollinger_b'] = (df['close'] - rolling_mean) / (2 * rolling_std)
# MACD
ema12 = df['close'].ewm(span=12, adjust=False).mean()
ema26 = df['close'].ewm(span=26, adjust=False).mean()
df['macd'] = ema12 - ema26
# Timestamp-based features
df['hour'] = df.index.hour
df['weekday'] = df.index.weekday
# Simulated portfolio balance and buy-in value (placeholders for now)
df['balance'] = 10000.0 # Placeholder: could be dynamic in real-time
df['buy_in'] = df['close'].shift(1) # Simulated buy price
df['pnl_per_trade'] = df['close'] - df['buy_in'] # Fake PnL calc
# Target: Will price rise X% in next N intervals?
future_window = 4
threshold = 0.001
df['future_max'] = df['close'].shift(-future_window).rolling(future_window).max()
df['target'] = np.where(df['future_max'] > df['close'] * (1 + threshold), 1, 0)
df.dropna(inplace=True)
# Define feature set
features = [
'open', 'high', 'low', 'close', 'volume',
'sma_10', 'sma_30', 'rsi_14', 'momentum',
'price_delta', 'vol_rolling', 'bollinger_b', 'macd',
'hour', 'weekday', 'balance', 'buy_in', 'pnl_per_trade'
]
return df[features], df['target']