# ai.py import os import requests import re from dotenv import load_dotenv from personality import load_persona from logger import setup_logger logger = setup_logger("ai") load_dotenv() BASE_API = os.getenv("OLLAMA_API").rstrip("/") # โ† ensures no trailing slash issue GEN_ENDPOINT = f"{BASE_API}/generate" PULL_ENDPOINT = f"{BASE_API}/pull" #UNLOAD_ENDPOINT = f"{BASE_API}/unload" TAGS_ENDPOINT = f"{BASE_API}/tags" MODEL_NAME = os.getenv("MODEL_NAME", "llama3:latest") SHOW_THINKING_BLOCKS = os.getenv("SHOW_THINKING_BLOCKS", "false").lower() == "true" if not BASE_API: logger.error("โŒ OLLAMA_API not set.") raise ValueError("โŒ OLLAMA_API not set.") def get_model_name(): return MODEL_NAME def strip_thinking_block(text: str) -> str: return re.sub(r".*?\s*", "", text, flags=re.DOTALL) def model_exists_locally(model_name: str) -> bool: try: resp = requests.get(TAGS_ENDPOINT) return model_name in resp.text except Exception as e: logger.error(f"โŒ Failed to check local models: {e}") return False def load_model(model_name: str) -> bool: try: logger.info(f"๐Ÿง  Preloading model: {model_name}") resp = requests.post(PULL_ENDPOINT, json={"name": model_name}) logger.info(f"๐Ÿ“จ Ollama pull response: {resp.status_code} - {resp.text}") return resp.status_code == 200 except Exception as e: logger.error(f"โŒ Exception during model load: {str(e)}") return False def unload_model(model_name: str) -> bool: try: logger.info(f"๐Ÿงน Soft-unloading model from VRAM: {model_name}") resp = requests.post(GEN_ENDPOINT, json={ "model": model_name, "keep_alive": 0, "prompt": "" }) logger.info(f"๐Ÿงฝ Ollama unload response: {resp.status_code} - {resp.text}") return resp.status_code == 200 except Exception as e: logger.error(f"โŒ Exception during soft-unload: {str(e)}") return False def get_current_model(): return get_model_name() def get_ai_response(user_prompt): model_name = get_model_name() load_model(model_name) persona = load_persona() if persona: safe_inject = persona["prompt_inject"].replace("โ€œ", "\"").replace("โ€", "\"").replace("โ€™", "'") full_prompt = f"{safe_inject}\nUser: {user_prompt}\n{persona['name']}:" else: full_prompt = user_prompt payload = { "model": model_name, "prompt": full_prompt, "stream": False } logger.info("๐Ÿ›ฐ๏ธ SENDING TO OLLAMA /generate") logger.info(f"Payload: {payload}") try: response = requests.post(GEN_ENDPOINT, json=payload) logger.info(f"๐Ÿ“จ Raw response: {response.text}") if response.status_code == 200: result = response.json() response_text = result.get("response", "[No message in response]") return strip_thinking_block(response_text) if not SHOW_THINKING_BLOCKS else response_text else: return f"[Error {response.status_code}] {response.text}" except Exception as e: return f"[Exception] {str(e)}"