2025-05-07 17:20:34 -04:00
|
|
|
|
# ai.py
|
2025-05-13 22:47:15 -04:00
|
|
|
|
# This file handles all AI interactions, including loading/unloading models,
|
|
|
|
|
|
# generating responses, and injecting personas using the Ollama API.
|
|
|
|
|
|
|
2025-05-07 17:20:34 -04:00
|
|
|
|
import os
|
2025-05-13 10:57:32 -04:00
|
|
|
|
import requests
|
|
|
|
|
|
import re
|
2025-09-20 15:36:13 -04:00
|
|
|
|
import yaml
|
2025-05-07 17:20:34 -04:00
|
|
|
|
from dotenv import load_dotenv
|
2025-05-07 18:40:28 -04:00
|
|
|
|
from personality import load_persona
|
2025-05-15 00:22:24 -04:00
|
|
|
|
from user_profiles import format_profile_for_block
|
2025-09-20 12:00:55 -04:00
|
|
|
|
from logger import setup_logger, generate_req_id, log_llm_request, log_llm_response
|
2025-09-20 15:36:13 -04:00
|
|
|
|
from modelfile import load_modfile_if_exists, parse_mod_file
|
2025-05-07 17:20:34 -04:00
|
|
|
|
|
2025-05-15 12:23:36 -04:00
|
|
|
|
debug_mode = os.getenv("DEBUG_MODE", "false").lower() == "true"
|
|
|
|
|
|
|
|
|
|
|
|
|
2025-05-13 22:47:15 -04:00
|
|
|
|
# Set up logger specifically for AI operations
|
2025-05-13 10:57:32 -04:00
|
|
|
|
logger = setup_logger("ai")
|
2025-05-13 22:47:15 -04:00
|
|
|
|
|
|
|
|
|
|
# Load environment variables from .env file
|
2025-05-07 17:20:34 -04:00
|
|
|
|
load_dotenv()
|
2025-05-13 10:57:32 -04:00
|
|
|
|
|
2025-09-20 15:36:13 -04:00
|
|
|
|
# Load settings.yml to fetch ai.modfile config
|
|
|
|
|
|
try:
|
|
|
|
|
|
settings_path = os.path.join(os.path.dirname(__file__), "settings.yml")
|
|
|
|
|
|
with open(settings_path, "r", encoding="utf-8") as f:
|
|
|
|
|
|
SETTINGS = yaml.safe_load(f)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
SETTINGS = {}
|
|
|
|
|
|
|
|
|
|
|
|
# Modelfile config
|
|
|
|
|
|
AI_USE_MODFILE = SETTINGS.get("ai", {}).get("use_modfile", False)
|
|
|
|
|
|
AI_MODFILE_PATH = SETTINGS.get("ai", {}).get("modfile_path")
|
|
|
|
|
|
MODFILE = None
|
|
|
|
|
|
if AI_USE_MODFILE and AI_MODFILE_PATH:
|
|
|
|
|
|
try:
|
|
|
|
|
|
MODFILE = load_modfile_if_exists(AI_MODFILE_PATH)
|
|
|
|
|
|
if MODFILE:
|
|
|
|
|
|
# Resolve includes (best-effort): merge params and append system/template
|
|
|
|
|
|
def _resolve_includes(mod):
|
|
|
|
|
|
merged = dict(mod)
|
|
|
|
|
|
src = merged.get('_source_path')
|
|
|
|
|
|
includes = merged.get('includes', []) or []
|
|
|
|
|
|
base_dir = os.path.dirname(src) if src else os.path.dirname(__file__)
|
|
|
|
|
|
for inc in includes:
|
|
|
|
|
|
try:
|
|
|
|
|
|
# Resolve relative to base_dir
|
|
|
|
|
|
cand = inc if os.path.isabs(inc) else os.path.normpath(os.path.join(base_dir, inc))
|
|
|
|
|
|
if not os.path.exists(cand):
|
|
|
|
|
|
continue
|
|
|
|
|
|
inc_mod = parse_mod_file(cand)
|
|
|
|
|
|
# Merge params (included params do not override main ones)
|
|
|
|
|
|
inc_params = inc_mod.get('params', {}) or {}
|
|
|
|
|
|
for k, v in inc_params.items():
|
|
|
|
|
|
if k not in merged.get('params', {}):
|
|
|
|
|
|
merged.setdefault('params', {})[k] = v
|
|
|
|
|
|
# Append system text if main doesn't have one
|
|
|
|
|
|
if not merged.get('system') and inc_mod.get('system'):
|
|
|
|
|
|
merged['system'] = inc_mod.get('system')
|
|
|
|
|
|
# If main has no template, adopt included template
|
|
|
|
|
|
if not merged.get('template') and inc_mod.get('template'):
|
|
|
|
|
|
merged['template'] = inc_mod.get('template')
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
continue
|
|
|
|
|
|
return merged
|
|
|
|
|
|
|
|
|
|
|
|
MODFILE = _resolve_includes(MODFILE)
|
|
|
|
|
|
logger.info(f"🔁 Modelfile loaded: {AI_MODFILE_PATH}")
|
|
|
|
|
|
else:
|
|
|
|
|
|
logger.warning(f"⚠️ Modelfile not found or failed to parse: {AI_MODFILE_PATH}")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.exception("⚠️ Exception while loading modelfile: %s", e)
|
|
|
|
|
|
|
|
|
|
|
|
# If no modelfile explicitly configured, attempt to auto-load a `delta.mod` or
|
|
|
|
|
|
# `delta.json` in common example/persona locations so the bot has a default persona.
|
|
|
|
|
|
if not MODFILE:
|
|
|
|
|
|
for candidate in [
|
|
|
|
|
|
os.path.join(os.path.dirname(__file__), '..', 'examples', 'delta.mod'),
|
|
|
|
|
|
os.path.join(os.path.dirname(__file__), '..', 'examples', 'delta.json'),
|
|
|
|
|
|
os.path.join(os.path.dirname(__file__), '..', 'personas', 'delta.mod'),
|
|
|
|
|
|
]:
|
|
|
|
|
|
try:
|
|
|
|
|
|
mod = load_modfile_if_exists(candidate)
|
|
|
|
|
|
if mod:
|
|
|
|
|
|
MODFILE = mod
|
|
|
|
|
|
logger.info(f"🔁 Auto-loaded default modelfile: {candidate}")
|
|
|
|
|
|
break
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def list_modelfiles(search_dirs=None):
|
|
|
|
|
|
"""Return a list of candidate modelfile paths from common locations."""
|
|
|
|
|
|
base_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), '..'))
|
|
|
|
|
|
if search_dirs is None:
|
|
|
|
|
|
search_dirs = [
|
|
|
|
|
|
os.path.join(base_dir, 'examples'),
|
|
|
|
|
|
os.path.join(base_dir, 'personas'),
|
|
|
|
|
|
os.path.join(base_dir, 'src'),
|
|
|
|
|
|
base_dir,
|
|
|
|
|
|
]
|
|
|
|
|
|
results = []
|
|
|
|
|
|
for d in search_dirs:
|
|
|
|
|
|
try:
|
|
|
|
|
|
if not os.path.isdir(d):
|
|
|
|
|
|
continue
|
|
|
|
|
|
for fname in os.listdir(d):
|
|
|
|
|
|
if fname.endswith('.mod') or fname.endswith('.json'):
|
|
|
|
|
|
results.append(os.path.join(d, fname))
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
continue
|
|
|
|
|
|
return sorted(results)
|
|
|
|
|
|
|
2025-05-13 22:47:15 -04:00
|
|
|
|
# Base API setup from .env (e.g., http://localhost:11434/api)
|
2025-09-20 12:00:55 -04:00
|
|
|
|
# Normalize to ensure the configured base includes the `/api` prefix so
|
|
|
|
|
|
# endpoints like `/generate` and `/tags` are reachable even if the user
|
|
|
|
|
|
# sets `OLLAMA_API` without `/api`.
|
|
|
|
|
|
raw_api = os.getenv("OLLAMA_API") or ""
|
|
|
|
|
|
raw_api = raw_api.rstrip("/")
|
|
|
|
|
|
if raw_api == "":
|
|
|
|
|
|
BASE_API = ""
|
|
|
|
|
|
else:
|
|
|
|
|
|
BASE_API = raw_api if raw_api.endswith("/api") else f"{raw_api}/api"
|
2025-05-13 22:47:15 -04:00
|
|
|
|
|
|
|
|
|
|
# API endpoints for different Ollama operations
|
2025-05-13 10:57:32 -04:00
|
|
|
|
GEN_ENDPOINT = f"{BASE_API}/generate"
|
|
|
|
|
|
PULL_ENDPOINT = f"{BASE_API}/pull"
|
2025-05-13 22:47:15 -04:00
|
|
|
|
# UNLOAD_ENDPOINT is not used because unloading is done via `generate` with keep_alive=0
|
2025-05-13 10:57:32 -04:00
|
|
|
|
TAGS_ENDPOINT = f"{BASE_API}/tags"
|
|
|
|
|
|
|
2025-05-13 22:47:15 -04:00
|
|
|
|
# Startup model and debug toggle from .env
|
2025-05-12 11:24:36 -04:00
|
|
|
|
MODEL_NAME = os.getenv("MODEL_NAME", "llama3:latest")
|
2025-05-13 10:57:32 -04:00
|
|
|
|
SHOW_THINKING_BLOCKS = os.getenv("SHOW_THINKING_BLOCKS", "false").lower() == "true"
|
2025-09-20 15:36:13 -04:00
|
|
|
|
AI_INCLUDE_CONTEXT = os.getenv("AI_INCLUDE_CONTEXT", "true").lower() == "true"
|
2025-05-13 10:57:32 -04:00
|
|
|
|
|
2025-05-13 22:47:15 -04:00
|
|
|
|
# Ensure API base is configured
|
2025-05-13 10:57:32 -04:00
|
|
|
|
if not BASE_API:
|
|
|
|
|
|
logger.error("❌ OLLAMA_API not set.")
|
|
|
|
|
|
raise ValueError("❌ OLLAMA_API not set.")
|
|
|
|
|
|
|
2025-05-13 22:47:15 -04:00
|
|
|
|
# Returns current model from env/config
|
2025-05-13 10:57:32 -04:00
|
|
|
|
def get_model_name():
|
|
|
|
|
|
return MODEL_NAME
|
|
|
|
|
|
|
2025-05-13 22:47:15 -04:00
|
|
|
|
# Removes <think>...</think> blocks from the LLM response (used by some models)
|
2025-05-13 10:57:32 -04:00
|
|
|
|
def strip_thinking_block(text: str) -> str:
|
|
|
|
|
|
return re.sub(r"<think>.*?</think>\s*", "", text, flags=re.DOTALL)
|
|
|
|
|
|
|
2025-05-13 22:47:15 -04:00
|
|
|
|
# Check if a model exists locally by calling /tags
|
2025-05-13 10:57:32 -04:00
|
|
|
|
def model_exists_locally(model_name: str) -> bool:
|
|
|
|
|
|
try:
|
|
|
|
|
|
resp = requests.get(TAGS_ENDPOINT)
|
|
|
|
|
|
return model_name in resp.text
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"❌ Failed to check local models: {e}")
|
|
|
|
|
|
return False
|
2025-05-11 00:48:31 -04:00
|
|
|
|
|
2025-05-13 22:47:15 -04:00
|
|
|
|
# Attempt to pull (load) a model via Ollama's /pull endpoint
|
2025-05-13 10:57:32 -04:00
|
|
|
|
def load_model(model_name: str) -> bool:
|
|
|
|
|
|
try:
|
|
|
|
|
|
logger.info(f"🧠 Preloading model: {model_name}")
|
|
|
|
|
|
resp = requests.post(PULL_ENDPOINT, json={"name": model_name})
|
2025-05-15 12:23:36 -04:00
|
|
|
|
|
|
|
|
|
|
if debug_mode:
|
|
|
|
|
|
logger.debug(f"📨 Ollama pull response: {resp.status_code} - {resp.text}")
|
|
|
|
|
|
else:
|
|
|
|
|
|
if resp.status_code == 200:
|
|
|
|
|
|
logger.info("📦 Model pull started successfully.")
|
|
|
|
|
|
else:
|
|
|
|
|
|
logger.warning(f"⚠️ Model pull returned {resp.status_code}: {resp.text[:100]}...")
|
|
|
|
|
|
|
2025-05-13 10:57:32 -04:00
|
|
|
|
return resp.status_code == 200
|
2025-05-15 12:23:36 -04:00
|
|
|
|
|
2025-05-13 10:57:32 -04:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"❌ Exception during model load: {str(e)}")
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
2025-05-13 22:47:15 -04:00
|
|
|
|
# Send an empty prompt to unload a model from VRAM safely using keep_alive: 0
|
2025-05-13 10:57:32 -04:00
|
|
|
|
def unload_model(model_name: str) -> bool:
|
|
|
|
|
|
try:
|
2025-05-13 22:47:15 -04:00
|
|
|
|
logger.info(f"🧹 Sending safe unload request for `{model_name}`")
|
|
|
|
|
|
payload = {
|
2025-05-13 10:57:32 -04:00
|
|
|
|
"model": model_name,
|
2025-05-13 23:26:03 -04:00
|
|
|
|
"prompt": "", # ✅ Required to make the request valid
|
|
|
|
|
|
"keep_alive": 0 # ✅ Unload from VRAM but keep on disk
|
2025-05-13 22:47:15 -04:00
|
|
|
|
}
|
|
|
|
|
|
resp = requests.post(GEN_ENDPOINT, json=payload)
|
2025-05-13 10:57:32 -04:00
|
|
|
|
logger.info(f"🧽 Ollama unload response: {resp.status_code} - {resp.text}")
|
|
|
|
|
|
return resp.status_code == 200
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"❌ Exception during soft-unload: {str(e)}")
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
2025-05-13 22:47:15 -04:00
|
|
|
|
# Shortcut for getting the current model (can be expanded later for dynamic switching)
|
2025-05-13 10:57:32 -04:00
|
|
|
|
def get_current_model():
|
|
|
|
|
|
return get_model_name()
|
2025-05-07 17:20:34 -04:00
|
|
|
|
|
2025-05-13 22:47:15 -04:00
|
|
|
|
# Main LLM interaction — injects personality and sends prompt to Ollama
|
2025-05-15 00:22:24 -04:00
|
|
|
|
def get_ai_response(user_prompt, context=None, user_profile=None):
|
2025-05-13 10:57:32 -04:00
|
|
|
|
model_name = get_model_name()
|
2025-05-14 20:27:49 -04:00
|
|
|
|
load_model(model_name)
|
2025-05-13 10:57:32 -04:00
|
|
|
|
persona = load_persona()
|
2025-09-20 12:00:55 -04:00
|
|
|
|
# Build prompt pieces
|
2025-09-20 15:36:13 -04:00
|
|
|
|
# If a modelfile is active and provides a SYSTEM, prefer it over persona prompt_inject
|
|
|
|
|
|
system_inject = ""
|
|
|
|
|
|
if MODFILE and MODFILE.get('system'):
|
|
|
|
|
|
system_inject = MODFILE.get('system')
|
|
|
|
|
|
elif persona:
|
|
|
|
|
|
system_inject = persona["prompt_inject"].replace("“", '"').replace("”", '"').replace("’", "'")
|
2025-05-14 20:27:49 -04:00
|
|
|
|
|
2025-09-20 12:00:55 -04:00
|
|
|
|
user_block = ""
|
2025-05-15 00:22:24 -04:00
|
|
|
|
if user_profile and user_profile.get("custom_prompt"):
|
2025-09-20 12:00:55 -04:00
|
|
|
|
user_block = f"[User Instruction]\n{user_profile['custom_prompt']}\n"
|
|
|
|
|
|
|
2025-09-20 15:36:13 -04:00
|
|
|
|
context_block = f"[Recent Conversation]\n{context}\n" if (context and AI_INCLUDE_CONTEXT) else ""
|
2025-09-20 12:00:55 -04:00
|
|
|
|
|
2025-09-20 15:36:13 -04:00
|
|
|
|
# If a modelfile is active and defines a template, render it (best-effort)
|
|
|
|
|
|
full_prompt = None
|
|
|
|
|
|
if MODFILE:
|
|
|
|
|
|
tpl = MODFILE.get('template')
|
|
|
|
|
|
if tpl:
|
|
|
|
|
|
# Simple template handling: remove simple Go-style conditionals
|
|
|
|
|
|
tpl_work = re.sub(r"\{\{\s*if\s+\.System\s*\}\}", "", tpl)
|
|
|
|
|
|
tpl_work = re.sub(r"\{\{\s*end\s*\}\}", "", tpl_work)
|
|
|
|
|
|
# Build the prompt body we want to inject as .Prompt
|
|
|
|
|
|
prompt_body = f"{user_block}{context_block}User: {user_prompt}\n"
|
|
|
|
|
|
# Replace common placeholders
|
|
|
|
|
|
tpl_work = tpl_work.replace("{{ .System }}", system_inject)
|
|
|
|
|
|
tpl_work = tpl_work.replace("{{ .Prompt }}", prompt_body)
|
|
|
|
|
|
tpl_work = tpl_work.replace("{{ .User }}", user_block)
|
|
|
|
|
|
full_prompt = tpl_work.strip()
|
|
|
|
|
|
else:
|
|
|
|
|
|
# No template: use system_inject and do not append persona name
|
|
|
|
|
|
full_prompt = f"{system_inject}\n{user_block}{context_block}User: {user_prompt}\nResponse:"
|
2025-05-11 19:46:13 -04:00
|
|
|
|
else:
|
2025-09-20 15:36:13 -04:00
|
|
|
|
# No modelfile active: fall back to persona behaviour (include persona name)
|
|
|
|
|
|
if persona:
|
|
|
|
|
|
full_prompt = f"{system_inject}\n{user_block}{context_block}\nUser: {user_prompt}\n{persona['name']}:"
|
|
|
|
|
|
else:
|
|
|
|
|
|
full_prompt = f"{user_block}{context_block}\nUser: {user_prompt}\nResponse:"
|
2025-05-07 18:40:28 -04:00
|
|
|
|
|
2025-09-20 15:36:13 -04:00
|
|
|
|
# Build base payload and merge modelfile params if present
|
2025-09-20 12:00:55 -04:00
|
|
|
|
payload = {"model": model_name, "prompt": full_prompt, "stream": False}
|
2025-09-20 15:36:13 -04:00
|
|
|
|
if MODFILE and MODFILE.get('params'):
|
|
|
|
|
|
for k, v in MODFILE.get('params', {}).items():
|
|
|
|
|
|
payload[k] = v
|
2025-05-07 17:20:34 -04:00
|
|
|
|
|
2025-09-20 12:00:55 -04:00
|
|
|
|
# Logging: concise info plus debug for full payload/response
|
|
|
|
|
|
req_id = generate_req_id("llm-")
|
|
|
|
|
|
user_label = user_profile.get("display_name") if user_profile else None
|
|
|
|
|
|
log_llm_request(logger, req_id, model_name, user_label, len(context.splitlines()) if context else 0)
|
|
|
|
|
|
logger.debug("%s Sending payload to Ollama: model=%s user=%s", req_id, model_name, user_label)
|
|
|
|
|
|
logger.debug("%s Payload size=%d chars", req_id, len(full_prompt))
|
2025-05-07 23:26:58 -04:00
|
|
|
|
|
2025-09-20 12:00:55 -04:00
|
|
|
|
import time
|
|
|
|
|
|
start = time.perf_counter()
|
2025-05-07 17:20:34 -04:00
|
|
|
|
try:
|
2025-05-13 10:57:32 -04:00
|
|
|
|
response = requests.post(GEN_ENDPOINT, json=payload)
|
2025-09-20 12:00:55 -04:00
|
|
|
|
duration = time.perf_counter() - start
|
|
|
|
|
|
# Log raw response only at DEBUG to avoid clutter
|
|
|
|
|
|
logger.debug("%s Raw response status=%s", req_id, response.status_code)
|
|
|
|
|
|
logger.debug("%s Raw response body=%s", req_id, getattr(response, "text", ""))
|
|
|
|
|
|
|
2025-05-07 17:20:34 -04:00
|
|
|
|
if response.status_code == 200:
|
|
|
|
|
|
result = response.json()
|
2025-09-20 12:00:55 -04:00
|
|
|
|
short = (result.get("response") or "").replace("\n", " ")[:240]
|
|
|
|
|
|
log_llm_response(logger, req_id, model_name, duration, short, raw=result)
|
2025-05-15 00:22:24 -04:00
|
|
|
|
return result.get("response", "[No message in response]")
|
2025-05-07 17:20:34 -04:00
|
|
|
|
else:
|
2025-09-20 12:00:55 -04:00
|
|
|
|
# include status in logs and return an error string
|
|
|
|
|
|
log_llm_response(logger, req_id, model_name, duration, f"[Error {response.status_code}]", raw=response.text)
|
2025-05-07 17:20:34 -04:00
|
|
|
|
return f"[Error {response.status_code}] {response.text}"
|
|
|
|
|
|
except Exception as e:
|
2025-09-20 12:00:55 -04:00
|
|
|
|
duration = time.perf_counter() - start
|
|
|
|
|
|
logger.exception("%s Exception during LLM call", req_id)
|
|
|
|
|
|
log_llm_response(logger, req_id, model_name, duration, f"[Exception] {e}")
|
2025-09-20 15:36:13 -04:00
|
|
|
|
return f"[Exception] {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Runtime modelfile management APIs -------------------------------------------------
|
|
|
|
|
|
def load_modelfile(path: str = None) -> bool:
|
|
|
|
|
|
"""Load (or reload) a modelfile at runtime.
|
|
|
|
|
|
|
|
|
|
|
|
If `path` is provided, update the configured modelfile path and attempt
|
|
|
|
|
|
to load from that location. Returns True on success.
|
|
|
|
|
|
"""
|
|
|
|
|
|
global MODFILE, AI_MODFILE_PATH, AI_USE_MODFILE
|
|
|
|
|
|
if path:
|
|
|
|
|
|
AI_MODFILE_PATH = path
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
# Enable modelfile usage if it was disabled
|
|
|
|
|
|
AI_USE_MODFILE = True
|
|
|
|
|
|
|
|
|
|
|
|
if not AI_MODFILE_PATH:
|
|
|
|
|
|
logger.warning("⚠️ No modelfile path configured to load.")
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
mod = load_modfile_if_exists(AI_MODFILE_PATH)
|
|
|
|
|
|
MODFILE = mod
|
|
|
|
|
|
if MODFILE:
|
|
|
|
|
|
logger.info(f"🔁 Modelfile loaded: {AI_MODFILE_PATH}")
|
|
|
|
|
|
return True
|
|
|
|
|
|
else:
|
|
|
|
|
|
logger.warning(f"⚠️ Modelfile not found or failed to parse: {AI_MODFILE_PATH}")
|
|
|
|
|
|
return False
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.exception("⚠️ Exception while loading modelfile: %s", e)
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def unload_modelfile() -> bool:
|
|
|
|
|
|
"""Disable/unload the currently active modelfile so persona injection
|
|
|
|
|
|
falls back to the standard `persona.json` mechanism."""
|
|
|
|
|
|
global MODFILE, AI_USE_MODFILE
|
|
|
|
|
|
MODFILE = None
|
|
|
|
|
|
AI_USE_MODFILE = False
|
|
|
|
|
|
logger.info("🔁 Modelfile unloaded/disabled at runtime.")
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_modelfile_info() -> dict | None:
|
|
|
|
|
|
"""Return a small diagnostic dict about the currently loaded modelfile,
|
|
|
|
|
|
or None if no modelfile is active."""
|
|
|
|
|
|
if not MODFILE:
|
|
|
|
|
|
return None
|
|
|
|
|
|
return {
|
|
|
|
|
|
"_source_path": MODFILE.get("_source_path"),
|
|
|
|
|
|
"base_model": MODFILE.get("base_model"),
|
|
|
|
|
|
"params": MODFILE.get("params"),
|
|
|
|
|
|
"system_preview": (MODFILE.get("system") or "")[:300]
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def build_dryrun_payload(user_prompt, context=None, user_profile=None) -> dict:
|
|
|
|
|
|
"""Build and return the assembled prompt and payload that would be
|
|
|
|
|
|
sent to the model, without performing any HTTP calls. Useful for
|
|
|
|
|
|
inspecting template rendering and merged modelfile params.
|
|
|
|
|
|
Returns: { 'prompt': str, 'payload': dict }
|
|
|
|
|
|
"""
|
|
|
|
|
|
model_name = get_model_name()
|
|
|
|
|
|
# Reuse main prompt building logic but avoid calling load_model()
|
|
|
|
|
|
persona = load_persona()
|
|
|
|
|
|
|
|
|
|
|
|
# Build prompt pieces (same logic as `get_ai_response`)
|
|
|
|
|
|
system_inject = ""
|
|
|
|
|
|
if MODFILE and MODFILE.get('system'):
|
|
|
|
|
|
system_inject = MODFILE.get('system')
|
|
|
|
|
|
elif persona:
|
|
|
|
|
|
system_inject = persona["prompt_inject"].replace("“", '"').replace("”", '"').replace("’", "'")
|
|
|
|
|
|
|
|
|
|
|
|
user_block = ""
|
|
|
|
|
|
if user_profile and user_profile.get("custom_prompt"):
|
|
|
|
|
|
user_block = f"[User Instruction]\n{user_profile['custom_prompt']}\n"
|
|
|
|
|
|
|
|
|
|
|
|
context_block = f"[Recent Conversation]\n{context}\n" if (context and AI_INCLUDE_CONTEXT) else ""
|
|
|
|
|
|
|
|
|
|
|
|
if MODFILE:
|
|
|
|
|
|
tpl = MODFILE.get('template')
|
|
|
|
|
|
if tpl:
|
|
|
|
|
|
tpl_work = re.sub(r"\{\{\s*if\s+\.System\s*\}\}", "", tpl)
|
|
|
|
|
|
tpl_work = re.sub(r"\{\{\s*end\s*\}\}", "", tpl_work)
|
|
|
|
|
|
prompt_body = f"{user_block}{context_block}User: {user_prompt}\n"
|
|
|
|
|
|
tpl_work = tpl_work.replace("{{ .System }}", system_inject)
|
|
|
|
|
|
tpl_work = tpl_work.replace("{{ .Prompt }}", prompt_body)
|
|
|
|
|
|
tpl_work = tpl_work.replace("{{ .User }}", user_block)
|
|
|
|
|
|
full_prompt = tpl_work.strip()
|
|
|
|
|
|
else:
|
|
|
|
|
|
full_prompt = f"{system_inject}\n{user_block}{context_block}User: {user_prompt}\nResponse:"
|
|
|
|
|
|
else:
|
|
|
|
|
|
if persona:
|
|
|
|
|
|
full_prompt = f"{system_inject}\n{user_block}{context_block}\nUser: {user_prompt}\n{persona['name']}:"
|
|
|
|
|
|
else:
|
|
|
|
|
|
full_prompt = f"{user_block}{context_block}\nUser: {user_prompt}\nResponse:"
|
|
|
|
|
|
|
|
|
|
|
|
# Build payload and merge modelfile params
|
|
|
|
|
|
payload = {"model": model_name, "prompt": full_prompt, "stream": False}
|
|
|
|
|
|
if MODFILE and MODFILE.get('params'):
|
|
|
|
|
|
for k, v in MODFILE.get('params', {}).items():
|
|
|
|
|
|
payload[k] = v
|
|
|
|
|
|
|
|
|
|
|
|
return {"prompt": full_prompt, "payload": payload}
|