2025-05-07 17:20:34 -04:00
|
|
|
|
# ai.py
|
2025-05-13 22:47:15 -04:00
|
|
|
|
# This file handles all AI interactions, including loading/unloading models,
|
|
|
|
|
|
# generating responses, and injecting personas using the Ollama API.
|
|
|
|
|
|
|
2025-05-07 17:20:34 -04:00
|
|
|
|
import os
|
2025-05-13 10:57:32 -04:00
|
|
|
|
import requests
|
|
|
|
|
|
import re
|
2025-05-07 17:20:34 -04:00
|
|
|
|
from dotenv import load_dotenv
|
2025-05-07 18:40:28 -04:00
|
|
|
|
from personality import load_persona
|
2025-05-15 00:22:24 -04:00
|
|
|
|
from user_profiles import format_profile_for_block
|
2025-05-11 20:36:31 -04:00
|
|
|
|
from logger import setup_logger
|
2025-05-07 17:20:34 -04:00
|
|
|
|
|
2025-05-13 22:47:15 -04:00
|
|
|
|
# Set up logger specifically for AI operations
|
2025-05-13 10:57:32 -04:00
|
|
|
|
logger = setup_logger("ai")
|
2025-05-13 22:47:15 -04:00
|
|
|
|
|
|
|
|
|
|
# Load environment variables from .env file
|
2025-05-07 17:20:34 -04:00
|
|
|
|
load_dotenv()
|
2025-05-13 10:57:32 -04:00
|
|
|
|
|
2025-05-13 22:47:15 -04:00
|
|
|
|
# Base API setup from .env (e.g., http://localhost:11434/api)
|
|
|
|
|
|
BASE_API = os.getenv("OLLAMA_API").rstrip("/") # Remove trailing slash just in case
|
|
|
|
|
|
|
|
|
|
|
|
# API endpoints for different Ollama operations
|
2025-05-13 10:57:32 -04:00
|
|
|
|
GEN_ENDPOINT = f"{BASE_API}/generate"
|
|
|
|
|
|
PULL_ENDPOINT = f"{BASE_API}/pull"
|
2025-05-13 22:47:15 -04:00
|
|
|
|
# UNLOAD_ENDPOINT is not used because unloading is done via `generate` with keep_alive=0
|
2025-05-13 10:57:32 -04:00
|
|
|
|
TAGS_ENDPOINT = f"{BASE_API}/tags"
|
|
|
|
|
|
|
2025-05-13 22:47:15 -04:00
|
|
|
|
# Startup model and debug toggle from .env
|
2025-05-12 11:24:36 -04:00
|
|
|
|
MODEL_NAME = os.getenv("MODEL_NAME", "llama3:latest")
|
2025-05-13 10:57:32 -04:00
|
|
|
|
SHOW_THINKING_BLOCKS = os.getenv("SHOW_THINKING_BLOCKS", "false").lower() == "true"
|
|
|
|
|
|
|
2025-05-13 22:47:15 -04:00
|
|
|
|
# Ensure API base is configured
|
2025-05-13 10:57:32 -04:00
|
|
|
|
if not BASE_API:
|
|
|
|
|
|
logger.error("❌ OLLAMA_API not set.")
|
|
|
|
|
|
raise ValueError("❌ OLLAMA_API not set.")
|
|
|
|
|
|
|
2025-05-13 22:47:15 -04:00
|
|
|
|
# Returns current model from env/config
|
2025-05-13 10:57:32 -04:00
|
|
|
|
def get_model_name():
|
|
|
|
|
|
return MODEL_NAME
|
|
|
|
|
|
|
2025-05-13 22:47:15 -04:00
|
|
|
|
# Removes <think>...</think> blocks from the LLM response (used by some models)
|
2025-05-13 10:57:32 -04:00
|
|
|
|
def strip_thinking_block(text: str) -> str:
|
|
|
|
|
|
return re.sub(r"<think>.*?</think>\s*", "", text, flags=re.DOTALL)
|
|
|
|
|
|
|
2025-05-13 22:47:15 -04:00
|
|
|
|
# Check if a model exists locally by calling /tags
|
2025-05-13 10:57:32 -04:00
|
|
|
|
def model_exists_locally(model_name: str) -> bool:
|
|
|
|
|
|
try:
|
|
|
|
|
|
resp = requests.get(TAGS_ENDPOINT)
|
|
|
|
|
|
return model_name in resp.text
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"❌ Failed to check local models: {e}")
|
|
|
|
|
|
return False
|
2025-05-11 00:48:31 -04:00
|
|
|
|
|
2025-05-13 22:47:15 -04:00
|
|
|
|
# Attempt to pull (load) a model via Ollama's /pull endpoint
|
2025-05-13 10:57:32 -04:00
|
|
|
|
def load_model(model_name: str) -> bool:
|
|
|
|
|
|
try:
|
|
|
|
|
|
logger.info(f"🧠 Preloading model: {model_name}")
|
|
|
|
|
|
resp = requests.post(PULL_ENDPOINT, json={"name": model_name})
|
|
|
|
|
|
logger.info(f"📨 Ollama pull response: {resp.status_code} - {resp.text}")
|
|
|
|
|
|
return resp.status_code == 200
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"❌ Exception during model load: {str(e)}")
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
2025-05-13 22:47:15 -04:00
|
|
|
|
# Send an empty prompt to unload a model from VRAM safely using keep_alive: 0
|
2025-05-13 10:57:32 -04:00
|
|
|
|
def unload_model(model_name: str) -> bool:
|
|
|
|
|
|
try:
|
2025-05-13 22:47:15 -04:00
|
|
|
|
logger.info(f"🧹 Sending safe unload request for `{model_name}`")
|
|
|
|
|
|
payload = {
|
2025-05-13 10:57:32 -04:00
|
|
|
|
"model": model_name,
|
2025-05-13 23:26:03 -04:00
|
|
|
|
"prompt": "", # ✅ Required to make the request valid
|
|
|
|
|
|
"keep_alive": 0 # ✅ Unload from VRAM but keep on disk
|
2025-05-13 22:47:15 -04:00
|
|
|
|
}
|
|
|
|
|
|
resp = requests.post(GEN_ENDPOINT, json=payload)
|
2025-05-13 10:57:32 -04:00
|
|
|
|
logger.info(f"🧽 Ollama unload response: {resp.status_code} - {resp.text}")
|
|
|
|
|
|
return resp.status_code == 200
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"❌ Exception during soft-unload: {str(e)}")
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
2025-05-13 22:47:15 -04:00
|
|
|
|
# Shortcut for getting the current model (can be expanded later for dynamic switching)
|
2025-05-13 10:57:32 -04:00
|
|
|
|
def get_current_model():
|
|
|
|
|
|
return get_model_name()
|
2025-05-07 17:20:34 -04:00
|
|
|
|
|
2025-05-13 22:47:15 -04:00
|
|
|
|
# Main LLM interaction — injects personality and sends prompt to Ollama
|
2025-05-15 00:22:24 -04:00
|
|
|
|
def get_ai_response(user_prompt, context=None, user_profile=None):
|
2025-05-13 10:57:32 -04:00
|
|
|
|
model_name = get_model_name()
|
2025-05-14 20:27:49 -04:00
|
|
|
|
load_model(model_name)
|
2025-05-07 23:26:58 -04:00
|
|
|
|
|
2025-05-13 10:57:32 -04:00
|
|
|
|
persona = load_persona()
|
2025-05-14 20:27:49 -04:00
|
|
|
|
full_prompt = ""
|
|
|
|
|
|
|
2025-05-15 00:22:24 -04:00
|
|
|
|
# Inject Delta's base persona
|
2025-05-11 19:46:13 -04:00
|
|
|
|
if persona:
|
|
|
|
|
|
safe_inject = persona["prompt_inject"].replace("“", "\"").replace("”", "\"").replace("’", "'")
|
2025-05-14 20:27:49 -04:00
|
|
|
|
full_prompt += f"{safe_inject}\n"
|
|
|
|
|
|
|
2025-05-15 00:22:24 -04:00
|
|
|
|
# Inject custom user profile prompt as override or influence
|
|
|
|
|
|
if user_profile and user_profile.get("custom_prompt"):
|
|
|
|
|
|
full_prompt += f"[User Instruction]\n{user_profile['custom_prompt']}\n"
|
|
|
|
|
|
logger.info(f"🧠 Injected user custom prompt:\n{user_profile['custom_prompt']}")
|
|
|
|
|
|
|
|
|
|
|
|
# Add recent chat context (this already includes the profile block!)
|
2025-05-14 20:27:49 -04:00
|
|
|
|
if context:
|
|
|
|
|
|
logger.info("🧠 Injected context block (pre-prompt):\n" + context)
|
2025-05-15 00:22:24 -04:00
|
|
|
|
full_prompt += f"[Recent Conversation]\n{context}\n"
|
2025-05-14 20:27:49 -04:00
|
|
|
|
|
2025-05-15 00:22:24 -04:00
|
|
|
|
# Add user's message and expected bot reply prefix
|
2025-05-14 20:27:49 -04:00
|
|
|
|
if persona:
|
2025-05-15 00:22:24 -04:00
|
|
|
|
full_prompt += f"\nUser: {user_prompt}\n{persona['name']}:"
|
2025-05-11 19:46:13 -04:00
|
|
|
|
else:
|
2025-05-15 00:22:24 -04:00
|
|
|
|
full_prompt += f"\nUser: {user_prompt}\nResponse:"
|
2025-05-07 18:40:28 -04:00
|
|
|
|
|
2025-05-07 17:20:34 -04:00
|
|
|
|
payload = {
|
2025-05-14 20:27:49 -04:00
|
|
|
|
"model": model_name,
|
2025-05-07 18:40:28 -04:00
|
|
|
|
"prompt": full_prompt,
|
2025-05-07 17:20:34 -04:00
|
|
|
|
"stream": False
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-05-13 10:57:32 -04:00
|
|
|
|
logger.info("🛰️ SENDING TO OLLAMA /generate")
|
2025-05-11 20:36:31 -04:00
|
|
|
|
logger.info(f"Payload: {payload}")
|
2025-05-07 23:26:58 -04:00
|
|
|
|
|
2025-05-07 17:20:34 -04:00
|
|
|
|
try:
|
2025-05-13 10:57:32 -04:00
|
|
|
|
response = requests.post(GEN_ENDPOINT, json=payload)
|
2025-05-11 20:36:31 -04:00
|
|
|
|
logger.info(f"📨 Raw response: {response.text}")
|
2025-05-07 17:20:34 -04:00
|
|
|
|
if response.status_code == 200:
|
|
|
|
|
|
result = response.json()
|
2025-05-15 00:22:24 -04:00
|
|
|
|
return result.get("response", "[No message in response]")
|
2025-05-07 17:20:34 -04:00
|
|
|
|
else:
|
|
|
|
|
|
return f"[Error {response.status_code}] {response.text}"
|
|
|
|
|
|
except Exception as e:
|
2025-05-15 00:22:24 -04:00
|
|
|
|
return f"[Exception] {str(e)}"
|