# ml/preprocessing.py

import pandas as pd
import numpy as np

def load_and_preprocess_data(path):
    df = pd.read_excel(path, header=None, names=[
        'timestamp', 'open', 'high', 'low', 'close', 'volume'
    ])
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df.set_index('timestamp', inplace=True)

    # Resample to 15-minute intervals
    df = df.resample('15min').agg({
        'open': 'first',
        'high': 'max',
        'low': 'min',
        'close': 'last',
        'volume': 'sum'
    }).dropna()

    # Add features
    df['sma_10'] = df['close'].rolling(10).mean()
    df['sma_30'] = df['close'].rolling(30).mean()
    df['rsi_14'] = 100 - (100 / (1 + df['close'].pct_change().add(1).rolling(14).mean()))
    df['momentum'] = df['close'] - df['close'].shift(4)
    df['price_delta'] = df['close'] - df['open']
    df['vol_rolling'] = df['volume'].rolling(10).mean()

    # Bollinger %B
    rolling_mean = df['close'].rolling(20).mean()
    rolling_std = df['close'].rolling(20).std()
    df['bollinger_b'] = (df['close'] - rolling_mean) / (2 * rolling_std)

    # MACD
    ema12 = df['close'].ewm(span=12, adjust=False).mean()
    ema26 = df['close'].ewm(span=26, adjust=False).mean()
    df['macd'] = ema12 - ema26

    # Timestamp-based features
    df['hour'] = df.index.hour
    df['weekday'] = df.index.weekday

    # Simulated portfolio balance and buy-in value (placeholders for now)
    df['balance'] = 10000.0  # Placeholder: could be dynamic in real-time
    df['buy_in'] = df['close'].shift(1)  # Simulated buy price
    df['pnl_per_trade'] = df['close'] - df['buy_in']  # Fake PnL calc

    # Target: Will price rise X% in next N intervals?
    future_window = 4
    threshold = 0.001
    df['future_max'] = df['close'].shift(-future_window).rolling(future_window).max()
    df['target'] = np.where(df['future_max'] > df['close'] * (1 + threshold), 1, 0)

    df.dropna(inplace=True)

    # Define feature set
    features = [
        'open', 'high', 'low', 'close', 'volume',
        'sma_10', 'sma_30', 'rsi_14', 'momentum',
        'price_delta', 'vol_rolling', 'bollinger_b', 'macd',
        'hour', 'weekday', 'balance', 'buy_in', 'pnl_per_trade'
    ]

    return df[features], df['target']