# ai.py # This file handles all AI interactions, including loading/unloading models, # generating responses, and injecting personas using the Ollama API. import os import requests import re from dotenv import load_dotenv from personality import load_persona from user_profiles import format_profile_for_block from logger import setup_logger # Set up logger specifically for AI operations logger = setup_logger("ai") # Load environment variables from .env file load_dotenv() # Base API setup from .env (e.g., http://localhost:11434/api) BASE_API = os.getenv("OLLAMA_API").rstrip("/") # Remove trailing slash just in case # API endpoints for different Ollama operations GEN_ENDPOINT = f"{BASE_API}/generate" PULL_ENDPOINT = f"{BASE_API}/pull" # UNLOAD_ENDPOINT is not used because unloading is done via `generate` with keep_alive=0 TAGS_ENDPOINT = f"{BASE_API}/tags" # Startup model and debug toggle from .env MODEL_NAME = os.getenv("MODEL_NAME", "llama3:latest") SHOW_THINKING_BLOCKS = os.getenv("SHOW_THINKING_BLOCKS", "false").lower() == "true" # Ensure API base is configured if not BASE_API: logger.error("โŒ OLLAMA_API not set.") raise ValueError("โŒ OLLAMA_API not set.") # Returns current model from env/config def get_model_name(): return MODEL_NAME # Removes ... blocks from the LLM response (used by some models) def strip_thinking_block(text: str) -> str: return re.sub(r".*?\s*", "", text, flags=re.DOTALL) # Check if a model exists locally by calling /tags def model_exists_locally(model_name: str) -> bool: try: resp = requests.get(TAGS_ENDPOINT) return model_name in resp.text except Exception as e: logger.error(f"โŒ Failed to check local models: {e}") return False # Attempt to pull (load) a model via Ollama's /pull endpoint def load_model(model_name: str) -> bool: try: logger.info(f"๐Ÿง  Preloading model: {model_name}") resp = requests.post(PULL_ENDPOINT, json={"name": model_name}) logger.info(f"๐Ÿ“จ Ollama pull response: {resp.status_code} - {resp.text}") return resp.status_code == 200 except Exception as e: logger.error(f"โŒ Exception during model load: {str(e)}") return False # Send an empty prompt to unload a model from VRAM safely using keep_alive: 0 def unload_model(model_name: str) -> bool: try: logger.info(f"๐Ÿงน Sending safe unload request for `{model_name}`") payload = { "model": model_name, "prompt": "", # โœ… Required to make the request valid "keep_alive": 0 # โœ… Unload from VRAM but keep on disk } resp = requests.post(GEN_ENDPOINT, json=payload) logger.info(f"๐Ÿงฝ Ollama unload response: {resp.status_code} - {resp.text}") return resp.status_code == 200 except Exception as e: logger.error(f"โŒ Exception during soft-unload: {str(e)}") return False # Shortcut for getting the current model (can be expanded later for dynamic switching) def get_current_model(): return get_model_name() # Main LLM interaction โ€” injects personality and sends prompt to Ollama def get_ai_response(user_prompt, context=None, user_profile=None): model_name = get_model_name() load_model(model_name) persona = load_persona() full_prompt = "" # Inject Delta's base persona if persona: safe_inject = persona["prompt_inject"].replace("โ€œ", "\"").replace("โ€", "\"").replace("โ€™", "'") full_prompt += f"{safe_inject}\n" # Inject custom user profile prompt as override or influence if user_profile and user_profile.get("custom_prompt"): full_prompt += f"[User Instruction]\n{user_profile['custom_prompt']}\n" #logger.info(f"๐Ÿง  Injected user custom prompt:\n{user_profile['custom_prompt']}") logger.info("๐Ÿ‘ค [User Metadata]") logger.info(f" โ””โ”€ Name: {user_profile.get('display_name')}") logger.info(f" โ””โ”€ Interactions: {user_profile.get('interactions')}") if user_profile.get("pronouns"): logger.info(f" โ””โ”€ Pronouns: {user_profile['pronouns']}") if user_profile.get("custom_prompt"): logger.info(f" โ””โ”€ Custom Prompt: {user_profile['custom_prompt']}") # Add recent chat context (this already includes the profile block!) if context: logger.info("๐Ÿง  Injected context block (pre-prompt):\n" + context) full_prompt += f"[Recent Conversation]\n{context}\n" # Add user's message and expected bot reply prefix if persona: full_prompt += f"\nUser: {user_prompt}\n{persona['name']}:" else: full_prompt += f"\nUser: {user_prompt}\nResponse:" payload = { "model": model_name, "prompt": full_prompt, "stream": False } logger.info("๐Ÿ›ฐ๏ธ SENDING TO OLLAMA /generate") logger.info(f"Payload: {payload}") try: response = requests.post(GEN_ENDPOINT, json=payload) logger.info(f"๐Ÿ“จ Raw response: {response.text}") if response.status_code == 200: result = response.json() return result.get("response", "[No message in response]") else: return f"[Error {response.status_code}] {response.text}" except Exception as e: return f"[Exception] {str(e)}"