65 lines
2.2 KiB
Python
65 lines
2.2 KiB
Python
# ml/preprocessing.py
|
|
|
|
import pandas as pd
|
|
import numpy as np
|
|
|
|
def load_and_preprocess_data(path):
|
|
df = pd.read_excel(path, header=None, names=[
|
|
'timestamp', 'open', 'high', 'low', 'close', 'volume'
|
|
])
|
|
df['timestamp'] = pd.to_datetime(df['timestamp'])
|
|
df.set_index('timestamp', inplace=True)
|
|
|
|
# Resample to 15-minute intervals
|
|
df = df.resample('15min').agg({
|
|
'open': 'first',
|
|
'high': 'max',
|
|
'low': 'min',
|
|
'close': 'last',
|
|
'volume': 'sum'
|
|
}).dropna()
|
|
|
|
# Add features
|
|
df['sma_10'] = df['close'].rolling(10).mean()
|
|
df['sma_30'] = df['close'].rolling(30).mean()
|
|
df['rsi_14'] = 100 - (100 / (1 + df['close'].pct_change().add(1).rolling(14).mean()))
|
|
df['momentum'] = df['close'] - df['close'].shift(4)
|
|
df['price_delta'] = df['close'] - df['open']
|
|
df['vol_rolling'] = df['volume'].rolling(10).mean()
|
|
|
|
# Bollinger %B
|
|
rolling_mean = df['close'].rolling(20).mean()
|
|
rolling_std = df['close'].rolling(20).std()
|
|
df['bollinger_b'] = (df['close'] - rolling_mean) / (2 * rolling_std)
|
|
|
|
# MACD
|
|
ema12 = df['close'].ewm(span=12, adjust=False).mean()
|
|
ema26 = df['close'].ewm(span=26, adjust=False).mean()
|
|
df['macd'] = ema12 - ema26
|
|
|
|
# Timestamp-based features
|
|
df['hour'] = df.index.hour
|
|
df['weekday'] = df.index.weekday
|
|
|
|
# Simulated portfolio balance and buy-in value (placeholders for now)
|
|
df['balance'] = 10000.0 # Placeholder: could be dynamic in real-time
|
|
df['buy_in'] = df['close'].shift(1) # Simulated buy price
|
|
df['pnl_per_trade'] = df['close'] - df['buy_in'] # Fake PnL calc
|
|
|
|
# Target: Will price rise X% in next N intervals?
|
|
future_window = 4
|
|
threshold = 0.001
|
|
df['future_max'] = df['close'].shift(-future_window).rolling(future_window).max()
|
|
df['target'] = np.where(df['future_max'] > df['close'] * (1 + threshold), 1, 0)
|
|
|
|
df.dropna(inplace=True)
|
|
|
|
# Define feature set
|
|
features = [
|
|
'open', 'high', 'low', 'close', 'volume',
|
|
'sma_10', 'sma_30', 'rsi_14', 'momentum',
|
|
'price_delta', 'vol_rolling', 'bollinger_b', 'macd',
|
|
'hour', 'weekday', 'balance', 'buy_in', 'pnl_per_trade'
|
|
]
|
|
|
|
return df[features], df['target']
|