📦 Add modelfile support and make modelfiles the primary persona source

2025-09-20 15:36:13 -04:00 · 2025-09-20 15:36:13 -04:00 · 4e2dfdfba6
commit 4e2dfdfba6
parent 3abbdd96eb
23 changed files with 1970 additions and 26 deletions
--- a/.env
+++ b/.env
@ -6,6 +6,7 @@ CHANNEL_ID=1380999713272238151
 SHOW_THINKING_BLOCKS=false
 DEBUG_MODE=true
 AUTOREPLY_ENABLED=true
 AI_INCLUDE_CONTEXT=false
 # ---------------------------
 # Logging configuration
 # - LOG_LEVEL: global base level (INFO recommended)
--- a/bot.log
+++ b/bot.log
--- a/examples/delta.mod
+++ b/examples/delta.mod
@ -0,0 +1,25 @@
 # Delta persona mod file
 # NAME <string> - friendly name for this modelfile/persona
 # FROM <model> - base model to run (e.g., gemma3:12b)
 # PARAMETER <key> <value> - Ollama model parameters (temperature, max_tokens, top_p, etc.)
 # INCLUDE <path> - include another mod or json to merge params/system/template
 # SYSTEM """...""" - system prompt injected at the top of user prompt
 # TEMPLATE """...""" - a template for rendering final prompt; supports placeholders
 #   {{ .System }} - inserted system prompt
 #   {{ .Prompt }} - the assembled user prompt
 #   {{ .User }} - user-specific instruction block
 # Example Delta modfile:
 NAME Delta
 FROM gemma3:12b
 PARAMETER temperature 0.6
 PARAMETER max_tokens 1024
 PARAMETER top_p 0.95
 SYSTEM """
 You are Delta, an irreverent, playful assistant with a sarcastic undertone but helpful
 intent. Keep answers concise, use witty one-liners sparingly, and prefer clear steps when
 providing instructions. Avoid unsafe content and follow policy guidelines.
 """
 TEMPLATE """
 {{ .System }}
 {{ .Prompt }}
 """
--- a/examples/gojo.mod
+++ b/examples/gojo.mod
@ -0,0 +1,11 @@
 FROM gemma3:12b
 PARAMETER temperature 0.85
 PARAMETER num_ctx 4096
 SYSTEM """
 You are Satoru Gojo from Jujutsu Kaisen. You’re cocky, powerful, and flirty, but deeply protective of your students. Speak in a modern tone. Never admit you're a bot.
 """
 TEMPLATE """
 {{ if .System }}<|system|>{{ .System }}<|end|>{{ end }}{{ .Prompt }}<|assistant|>
 """
--- a/examples/template.mod
+++ b/examples/template.mod
@ -0,0 +1,26 @@
 # Template modfile (template for creating new .mod files)
 # Supported fields and notes:
 # NAME <string> - human friendly name for the modfile
 # FROM <model> - model identifier for Ollama (e.g., gemma3:12b)
 # PARAMETER <key> <value> - Any top-level field to include in the Ollama payload.
 #    Common keys: temperature (float), max_tokens (int), top_p (float), freq_penalty (float), presence_penalty (float)
 # INCLUDE <path> - path to another .mod or .json to merge in (relative to this file)
 # SYSTEM """...""" - system prompt (the persona voice + instructions)
 # TEMPLATE """...""" - custom rendering template. Placeholders:
 #    {{ .System }} - the system prompt text
 #    {{ .Prompt }} - the assembled user prompt body
 #    {{ .User }} - user-specific instruction block
 # Example template:
 NAME TemplatePersona
 FROM gemma3:12b
 PARAMETER temperature 0.7
 PARAMETER max_tokens 800
 INCLUDE common_defaults.mod
 SYSTEM """
 You are TemplatePersona — helpful, concise, and policy-compliant. Use bullet lists
 for multi-step answers and a short summary at the end.
 """
 TEMPLATE """
 {{ .System }}
 {{ .Prompt }}
 """
--- a/src/pycache/ai.cpython-311.pyc
+++ b/src/pycache/ai.cpython-311.pyc
--- a/src/pycache/ai.cpython-312.pyc
+++ b/src/pycache/ai.cpython-312.pyc
--- a/src/pycache/autochat.cpython-311.pyc
+++ b/src/pycache/autochat.cpython-311.pyc
--- a/src/pycache/bot.cpython-311.pyc
+++ b/src/pycache/bot.cpython-311.pyc
--- a/src/pycache/context.cpython-311.pyc
+++ b/src/pycache/context.cpython-311.pyc
--- a/src/pycache/cooldown.cpython-311.pyc
+++ b/src/pycache/cooldown.cpython-311.pyc
--- a/src/pycache/logger.cpython-311.pyc
+++ b/src/pycache/logger.cpython-311.pyc
--- a/src/pycache/modelfile.cpython-311.pyc
+++ b/src/pycache/modelfile.cpython-311.pyc
--- a/src/pycache/personality.cpython-311.pyc
+++ b/src/pycache/personality.cpython-311.pyc
--- a/src/ai.py
+++ b/src/ai.py
@ -5,10 +5,12 @@
 import os
 import requests
 import re
 import yaml
 from dotenv import load_dotenv
 from personality import load_persona
 from user_profiles import format_profile_for_block
 from logger import setup_logger, generate_req_id, log_llm_request, log_llm_response
 from modelfile import load_modfile_if_exists, parse_mod_file
 debug_mode = os.getenv("DEBUG_MODE", "false").lower() == "true"
@ -19,6 +21,97 @@ logger = setup_logger("ai")
 # Load environment variables from .env file
 load_dotenv()
 # Load settings.yml to fetch ai.modfile config
 try:
    settings_path = os.path.join(os.path.dirname(__file__), "settings.yml")
    with open(settings_path, "r", encoding="utf-8") as f:
        SETTINGS = yaml.safe_load(f)
 except Exception:
    SETTINGS = {}
 # Modelfile config
 AI_USE_MODFILE = SETTINGS.get("ai", {}).get("use_modfile", False)
 AI_MODFILE_PATH = SETTINGS.get("ai", {}).get("modfile_path")
 MODFILE = None
 if AI_USE_MODFILE and AI_MODFILE_PATH:
    try:
        MODFILE = load_modfile_if_exists(AI_MODFILE_PATH)
        if MODFILE:
            # Resolve includes (best-effort): merge params and append system/template
            def _resolve_includes(mod):
                merged = dict(mod)
                src = merged.get('_source_path')
                includes = merged.get('includes', []) or []
                base_dir = os.path.dirname(src) if src else os.path.dirname(__file__)
                for inc in includes:
                    try:
                        # Resolve relative to base_dir
                        cand = inc if os.path.isabs(inc) else os.path.normpath(os.path.join(base_dir, inc))
                        if not os.path.exists(cand):
                            continue
                        inc_mod = parse_mod_file(cand)
                        # Merge params (included params do not override main ones)
                        inc_params = inc_mod.get('params', {}) or {}
                        for k, v in inc_params.items():
                            if k not in merged.get('params', {}):
                                merged.setdefault('params', {})[k] = v
                        # Append system text if main doesn't have one
                        if not merged.get('system') and inc_mod.get('system'):
                            merged['system'] = inc_mod.get('system')
                        # If main has no template, adopt included template
                        if not merged.get('template') and inc_mod.get('template'):
                            merged['template'] = inc_mod.get('template')
                    except Exception:
                        continue
                return merged
            MODFILE = _resolve_includes(MODFILE)
            logger.info(f"🔁 Modelfile loaded: {AI_MODFILE_PATH}")
        else:
            logger.warning(f"⚠️ Modelfile not found or failed to parse: {AI_MODFILE_PATH}")
    except Exception as e:
        logger.exception("⚠️ Exception while loading modelfile: %s", e)
 # If no modelfile explicitly configured, attempt to auto-load a `delta.mod` or
 # `delta.json` in common example/persona locations so the bot has a default persona.
 if not MODFILE:
    for candidate in [
        os.path.join(os.path.dirname(__file__), '..', 'examples', 'delta.mod'),
        os.path.join(os.path.dirname(__file__), '..', 'examples', 'delta.json'),
        os.path.join(os.path.dirname(__file__), '..', 'personas', 'delta.mod'),
    ]:
        try:
            mod = load_modfile_if_exists(candidate)
            if mod:
                MODFILE = mod
                logger.info(f"🔁 Auto-loaded default modelfile: {candidate}")
                break
        except Exception:
            continue
 def list_modelfiles(search_dirs=None):
    """Return a list of candidate modelfile paths from common locations."""
    base_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), '..'))
    if search_dirs is None:
        search_dirs = [
            os.path.join(base_dir, 'examples'),
            os.path.join(base_dir, 'personas'),
            os.path.join(base_dir, 'src'),
            base_dir,
        ]
    results = []
    for d in search_dirs:
        try:
            if not os.path.isdir(d):
                continue
            for fname in os.listdir(d):
                if fname.endswith('.mod') or fname.endswith('.json'):
                    results.append(os.path.join(d, fname))
        except Exception:
            continue
    return sorted(results)
 # Base API setup from .env (e.g., http://localhost:11434/api)
 # Normalize to ensure the configured base includes the `/api` prefix so
 # endpoints like `/generate` and `/tags` are reachable even if the user
@ -39,6 +132,7 @@ TAGS_ENDPOINT = f"{BASE_API}/tags"
 # Startup model and debug toggle from .env
 MODEL_NAME = os.getenv("MODEL_NAME", "llama3:latest")
 SHOW_THINKING_BLOCKS = os.getenv("SHOW_THINKING_BLOCKS", "false").lower() == "true"
 AI_INCLUDE_CONTEXT = os.getenv("AI_INCLUDE_CONTEXT", "true").lower() == "true"
 # Ensure API base is configured
 if not BASE_API:
@ -107,24 +201,50 @@ def get_ai_response(user_prompt, context=None, user_profile=None):
    model_name = get_model_name()
    load_model(model_name)
    persona = load_persona()
    # Build prompt pieces
-    safe_inject = ""
+    # If a modelfile is active and provides a SYSTEM, prefer it over persona prompt_inject
-    if persona:
+    system_inject = ""
-        safe_inject = persona["prompt_inject"].replace("“", '"').replace("”", '"').replace("’", "'")
+    if MODFILE and MODFILE.get('system'):
        system_inject = MODFILE.get('system')
    elif persona:
        system_inject = persona["prompt_inject"].replace("“", '"').replace("”", '"').replace("’", "'")
    user_block = ""
    if user_profile and user_profile.get("custom_prompt"):
        user_block = f"[User Instruction]\n{user_profile['custom_prompt']}\n"
-    context_block = f"[Recent Conversation]\n{context}\n" if context else ""
+    context_block = f"[Recent Conversation]\n{context}\n" if (context and AI_INCLUDE_CONTEXT) else ""
-    if persona:
+    # If a modelfile is active and defines a template, render it (best-effort)
-        full_prompt = f"{safe_inject}\n{user_block}{context_block}\nUser: {user_prompt}\n{persona['name']}:"
+    full_prompt = None
    if MODFILE:
        tpl = MODFILE.get('template')
        if tpl:
            # Simple template handling: remove simple Go-style conditionals
            tpl_work = re.sub(r"\{\{\s*if\s+\.System\s*\}\}", "", tpl)
            tpl_work = re.sub(r"\{\{\s*end\s*\}\}", "", tpl_work)
            # Build the prompt body we want to inject as .Prompt
            prompt_body = f"{user_block}{context_block}User: {user_prompt}\n"
            # Replace common placeholders
            tpl_work = tpl_work.replace("{{ .System }}", system_inject)
            tpl_work = tpl_work.replace("{{ .Prompt }}", prompt_body)
            tpl_work = tpl_work.replace("{{ .User }}", user_block)
            full_prompt = tpl_work.strip()
        else:
            # No template: use system_inject and do not append persona name
            full_prompt = f"{system_inject}\n{user_block}{context_block}User: {user_prompt}\nResponse:"
    else:
-        full_prompt = f"{user_block}{context_block}\nUser: {user_prompt}\nResponse:"
+        # No modelfile active: fall back to persona behaviour (include persona name)
        if persona:
            full_prompt = f"{system_inject}\n{user_block}{context_block}\nUser: {user_prompt}\n{persona['name']}:"
        else:
            full_prompt = f"{user_block}{context_block}\nUser: {user_prompt}\nResponse:"
    # Build base payload and merge modelfile params if present
    payload = {"model": model_name, "prompt": full_prompt, "stream": False}
    if MODFILE and MODFILE.get('params'):
        for k, v in MODFILE.get('params', {}).items():
            payload[k] = v
    # Logging: concise info plus debug for full payload/response
    req_id = generate_req_id("llm-")
@ -155,4 +275,109 @@ def get_ai_response(user_prompt, context=None, user_profile=None):
        duration = time.perf_counter() - start
        logger.exception("%s Exception during LLM call", req_id)
        log_llm_response(logger, req_id, model_name, duration, f"[Exception] {e}")
-        return f"[Exception] {str(e)}"
+        return f"[Exception] {str(e)}"
 # Runtime modelfile management APIs -------------------------------------------------
 def load_modelfile(path: str = None) -> bool:
    """Load (or reload) a modelfile at runtime.
    If `path` is provided, update the configured modelfile path and attempt
    to load from that location. Returns True on success.
    """
    global MODFILE, AI_MODFILE_PATH, AI_USE_MODFILE
    if path:
        AI_MODFILE_PATH = path
    try:
        # Enable modelfile usage if it was disabled
        AI_USE_MODFILE = True
        if not AI_MODFILE_PATH:
            logger.warning("⚠️ No modelfile path configured to load.")
            return False
        mod = load_modfile_if_exists(AI_MODFILE_PATH)
        MODFILE = mod
        if MODFILE:
            logger.info(f"🔁 Modelfile loaded: {AI_MODFILE_PATH}")
            return True
        else:
            logger.warning(f"⚠️ Modelfile not found or failed to parse: {AI_MODFILE_PATH}")
            return False
    except Exception as e:
        logger.exception("⚠️ Exception while loading modelfile: %s", e)
        return False
 def unload_modelfile() -> bool:
    """Disable/unload the currently active modelfile so persona injection
    falls back to the standard `persona.json` mechanism."""
    global MODFILE, AI_USE_MODFILE
    MODFILE = None
    AI_USE_MODFILE = False
    logger.info("🔁 Modelfile unloaded/disabled at runtime.")
    return True
 def get_modelfile_info() -> dict | None:
    """Return a small diagnostic dict about the currently loaded modelfile,
    or None if no modelfile is active."""
    if not MODFILE:
        return None
    return {
        "_source_path": MODFILE.get("_source_path"),
        "base_model": MODFILE.get("base_model"),
        "params": MODFILE.get("params"),
        "system_preview": (MODFILE.get("system") or "")[:300]
    }
 def build_dryrun_payload(user_prompt, context=None, user_profile=None) -> dict:
    """Build and return the assembled prompt and payload that would be
    sent to the model, without performing any HTTP calls. Useful for
    inspecting template rendering and merged modelfile params.
    Returns: { 'prompt': str, 'payload': dict }
    """
    model_name = get_model_name()
    # Reuse main prompt building logic but avoid calling load_model()
    persona = load_persona()
    # Build prompt pieces (same logic as `get_ai_response`)
    system_inject = ""
    if MODFILE and MODFILE.get('system'):
        system_inject = MODFILE.get('system')
    elif persona:
        system_inject = persona["prompt_inject"].replace("“", '"').replace("”", '"').replace("’", "'")
    user_block = ""
    if user_profile and user_profile.get("custom_prompt"):
        user_block = f"[User Instruction]\n{user_profile['custom_prompt']}\n"
    context_block = f"[Recent Conversation]\n{context}\n" if (context and AI_INCLUDE_CONTEXT) else ""
    if MODFILE:
        tpl = MODFILE.get('template')
        if tpl:
            tpl_work = re.sub(r"\{\{\s*if\s+\.System\s*\}\}", "", tpl)
            tpl_work = re.sub(r"\{\{\s*end\s*\}\}", "", tpl_work)
            prompt_body = f"{user_block}{context_block}User: {user_prompt}\n"
            tpl_work = tpl_work.replace("{{ .System }}", system_inject)
            tpl_work = tpl_work.replace("{{ .Prompt }}", prompt_body)
            tpl_work = tpl_work.replace("{{ .User }}", user_block)
            full_prompt = tpl_work.strip()
        else:
            full_prompt = f"{system_inject}\n{user_block}{context_block}User: {user_prompt}\nResponse:"
    else:
        if persona:
            full_prompt = f"{system_inject}\n{user_block}{context_block}\nUser: {user_prompt}\n{persona['name']}:"
        else:
            full_prompt = f"{user_block}{context_block}\nUser: {user_prompt}\nResponse:"
    # Build payload and merge modelfile params
    payload = {"model": model_name, "prompt": full_prompt, "stream": False}
    if MODFILE and MODFILE.get('params'):
        for k, v in MODFILE.get('params', {}).items():
            payload[k] = v
    return {"prompt": full_prompt, "payload": payload}
--- a/src/bot.py
+++ b/src/bot.py
@ -1,6 +1,7 @@
 # bot.py
 import time
 import asyncio
 import os
 import discord
 import yaml
@ -11,9 +12,9 @@ from discord.ext import commands
 from discord.ext.commands import (
    cooldown,
    BucketType,
    CooldownMapping,
    CommandOnCooldown
 )
 from cooldown import CooldownManager
 # Local imports
 from scheduler import start_scheduler
@ -36,6 +37,7 @@ from ai import (
    get_ai_response,
    TAGS_ENDPOINT
 )
 from ai import load_modelfile, unload_modelfile, get_modelfile_info
 from time_logger import log_message_activity
 from autochat import should_auto_reply, generate_auto_reply, update_reply_timer, maybe_react_to_message
@ -47,6 +49,17 @@ logger = setup_logger("bot")
 dotenv_path = os.path.join(os.path.dirname(__file__), '..', '.env')
 load_dotenv(dotenv_path)
 # No hardcoded owner IDs; use discord.py's owner check and guild admin perms.
 # Message-level guard for cooldown updates (avoid double-updating during dispatch)
 _cooldown_updated = set()
 # Message-level guard to avoid sending the same cooldown error multiple times
 _cooldown_error_sent = set()
 _cooldown_recorded_for_msg = set()
 # Message-level guard for generic one-shot sends (avoid duplicate command replies)
 _message_sent_once = set()
 # Load model settings
 MODEL_NAME = os.getenv("MODEL_NAME", "llama3:latest")
 logger.info(f"🔍 Loaded MODEL_NAME from .env: {MODEL_NAME}")
@ -87,26 +100,149 @@ async def on_command_error(ctx, error):
        retry_secs = round(error.retry_after, 1)
        template = random.choice(COOLDOWN_MSG_TEMPLATE) if isinstance(COOLDOWN_MSG_TEMPLATE, list) else COOLDOWN_MSG_TEMPLATE
        msg = template.replace("{seconds}", str(retry_secs))
-        logger.info(f"Command {ctx.command} on cooldown. Retry after {retry_secs} seconds.")
+        # Prevent duplicate cooldown messages for the same triggering message
-        await ctx.send(msg)
+        msg_id = getattr(getattr(ctx, 'message', None), 'id', None)
        if msg_id is not None and msg_id in _cooldown_error_sent:
            logger.debug(f"on_command_error: cooldown message already sent for msg={msg_id}")
            return
        logger.info(f"Command {ctx.command} on cooldown for user={getattr(ctx.author, 'id', None)}. Retry after {retry_secs} seconds.")
        try:
            await ctx.send(msg)
        except Exception:
            # ignore send failures
            pass
        if msg_id is not None:
            _cooldown_error_sent.add(msg_id)
            async def _clear_cooldown_error(mid):
                try:
                    await __import__('asyncio').sleep(5)
                    _cooldown_error_sent.discard(mid)
                except Exception:
                    pass
            try:
                __import__('asyncio').create_task(_clear_cooldown_error(msg_id))
            except Exception:
                pass
    else:
        raise error
-# Global cooldown
+# Global cooldown manager (per-user)
-global_cooldown = CooldownMapping.from_cooldown(1, GLOBAL_COOLDOWN_SECONDS, BucketType.user)
+_cooldown_mgr = CooldownManager()
@bot.check
 async def global_command_cooldown(ctx):
-    bucket = global_cooldown.get_bucket(ctx.message)
+    # Allow the application owner to bypass cooldowns
-    retry_after = bucket.update_rate_limit()
+    try:
-    if retry_after:
+        if await bot.is_owner(ctx.author):
-        raise CommandOnCooldown(bucket, retry_after, BucketType.user)
+            return True
    except Exception:
        pass
    # Allow guild administrators / users with Manage Guild to bypass cooldowns
    try:
        perms = getattr(ctx.author, 'guild_permissions', None)
        if perms and (perms.administrator or perms.manage_guild):
            return True
    except Exception:
        pass
    # Use a message-level guard so we only update the cooldown once per message
    user_id = getattr(ctx.author, 'id', None)
    msg_id = getattr(getattr(ctx, 'message', None), 'id', None)
    logger.debug(f"global_command_cooldown: check user={user_id} msg={msg_id} command={getattr(ctx, 'command', None)}")
    # If we've already updated cooldown for this message, allow immediately
    if msg_id is not None and msg_id in _cooldown_updated:
        logger.debug(f"global_command_cooldown: msg {msg_id} already updated, allow")
        return True
    # Check and update atomically; this will prevent races where multiple
    # Use peek to inspect remaining time without updating state. The actual
    # recording of the timestamp happens once the command starts (see
    # `before_invoke` handler) so there's a single canonical writer.
    retry = await _cooldown_mgr.peek('global', user_id, GLOBAL_COOLDOWN_SECONDS)
    if retry > 0.0:
        logger.info(f"global_command_cooldown: user={user_id} blocked, retry={retry}")
        raise CommandOnCooldown(commands.Cooldown(1, GLOBAL_COOLDOWN_SECONDS, BucketType.user), retry)
    # Mark this message as updated so repeated checks during dispatch don't re-update
    if msg_id is not None:
        _cooldown_updated.add(msg_id)
        # schedule removal after a short grace window
        async def _remove_later(mid):
            try:
                await __import__('asyncio').sleep(5)
                _cooldown_updated.discard(mid)
            except Exception:
                pass
        try:
            __import__('asyncio').create_task(_remove_later(msg_id))
        except Exception:
            # ignore if event loop not running
            pass
    return True
 # Record cooldown when a command is about to execute. This centralizes the
 # write side of the cooldown and prevents multiple check-and-update races.
@bot.before_invoke
 async def record_global_cooldown(ctx):
    try:
        # bypass for owners/admins
        if await bot.is_owner(ctx.author):
            return
    except Exception:
        pass
    try:
        perms = getattr(ctx.author, 'guild_permissions', None)
        if perms and (perms.administrator or perms.manage_guild):
            return
    except Exception:
        pass
    user_id = getattr(ctx.author, 'id', None)
    msg_id = getattr(getattr(ctx, 'message', None), 'id', None)
    # If we've already recorded cooldown for this message, skip (idempotent)
    if msg_id is not None and msg_id in _cooldown_recorded_for_msg:
        logger.debug(f"record_global_cooldown: already recorded for msg={msg_id}")
        return
    # Single writer: record the timestamp so future peeks will see the
    # updated value.
    try:
        await _cooldown_mgr.record('global', user_id)
        logger.debug(f"record_global_cooldown: recorded for user={user_id}")
        if msg_id is not None:
            _cooldown_recorded_for_msg.add(msg_id)
            async def _clear_record(mid):
                try:
                    await __import__('asyncio').sleep(5)
                    _cooldown_recorded_for_msg.discard(mid)
                except Exception:
                    pass
            try:
                __import__('asyncio').create_task(_clear_record(msg_id))
            except Exception:
                pass
    except Exception as e:
        logger.debug(f"record_global_cooldown: failed to record for user={user_id}: {e}")
 # Handle direct bot mentions
@bot.event
 async def on_message(message):
    # If we observe our own outgoing messages from the gateway, log them.
    if message.author == bot.user:
        try:
            logger.debug(f"on_message: observed own message id={getattr(message,'id',None)} channel={getattr(getattr(message,'channel',None),'id',None)}")
        except Exception:
            pass
        return
    from autochat import maybe_react_to_message, generate_auto_reply
@ -186,6 +322,122 @@ async def chat(ctx, *, prompt):
    for chunk in wrap(reply, 2000):
        await ctx.send(chunk)
 # Modelfile admin commands -------------------------------------------------
@bot.group(name="modfile")
@commands.is_owner()
 async def modfile_group(ctx):
    """Manage modelfiles at runtime. Subcommands: reload, switch, disable, info"""
    if ctx.invoked_subcommand is None:
        await ctx.send("Available: `!modfile reload [path]`, `!modfile switch <path>`, `!modfile disable`, `!modfile info`")
@modfile_group.command(name="reload")
@commands.is_owner()
 async def modfile_reload(ctx, *, path: str = None):
    """Reload the current modelfile or load from an optional new path."""
    await ctx.send("🔁 Reloading modelfile...")
    ok = load_modelfile(path) if path else load_modelfile()
    await ctx.send("✅ Reloaded." if ok else "❌ Failed to reload modelfile. Check logs.")
@modfile_group.command(name="switch")
@commands.is_owner()
 async def modfile_switch(ctx, *, path: str):
    """Switch to a different modelfile path and load it."""
    await ctx.send(f"🔁 Switching modelfile to `{path}`...")
    ok = load_modelfile(path)
    await ctx.send("✅ Switched and loaded." if ok else "❌ Failed to switch modelfile. Check logs.")
@modfile_group.command(name="disable")
@commands.is_owner()
 async def modfile_disable(ctx):
    """Disable the active modelfile and return to persona injection."""
    unload_modelfile()
    await ctx.send("✅ Modelfile disabled; falling back to persona injection.")
@modfile_group.command(name="info")
@commands.is_owner()
 async def modfile_info(ctx):
    # Instrumentation: log invocation and message id to diagnose duplicate sends
    msg_id = getattr(getattr(ctx, 'message', None), 'id', None)
    logger.debug(f"modfile_info invoked: cmd={getattr(ctx, 'command', None)} user={getattr(ctx.author, 'id', None)} msg={msg_id}")
    info = get_modelfile_info()
    if not info:
        logger.debug(f"modfile_info: no modelfile, sending informational reply for msg={msg_id}")
        return await ctx.send("ℹ️ No modelfile currently loaded.")
    system_preview = info.get('system_preview') or ''
    lines = [
        f"Source: `{info.get('_source_path')}`",
        f"Base model: `{info.get('base_model')}`",
        f"Params: `{info.get('params')}`",
        "System preview:",
        "```" + system_preview + "```"]
    # Use per-message idempotent send to avoid duplicate replies
    msg_id = getattr(getattr(ctx, 'message', None), 'id', None)
    payload = "\n".join(lines)
    if msg_id is not None:
        key = ("modfile_info", msg_id)
        if key in _message_sent_once:
            logger.debug(f"modfile_info: already sent for msg={msg_id} - skipping send")
            return
        logger.debug(f"modfile_info: preparing to send reply for msg={msg_id}")
        _message_sent_once.add(key)
        async def _clear_sent(k):
            try:
                await __import__('asyncio').sleep(5)
                _message_sent_once.discard(k)
            except Exception:
                pass
        try:
            __import__('asyncio').create_task(_clear_sent(key))
        except Exception:
            pass
    try:
        sent = await ctx.send(payload)
        try:
            sent_id = getattr(sent, 'id', None)
            chan = getattr(getattr(sent, 'channel', None), 'id', None)
            logger.debug(f"modfile_info: sent payload for msg={msg_id} -> sent_id={sent_id} channel={chan}")
        except Exception:
            logger.debug(f"modfile_info: sent payload for msg={msg_id}")
    except Exception as e:
        logger.debug(f"modfile_info: failed to send payload for msg={msg_id}: {e}")
@modfile_group.command(name="list")
@commands.is_owner()
 async def modfile_list(ctx):
    """List available modelfiles in common locations (examples/, personas/, src/)."""
    base = os.path.dirname(os.path.dirname(__file__))
    candidates = []
    search_dirs = [
        os.path.join(base, 'examples'),
        os.path.join(base, 'personas'),
        os.path.join(base, 'src'),
        base
    ]
    for d in search_dirs:
        if not os.path.isdir(d):
            continue
        for fname in os.listdir(d):
            if fname.endswith('.mod') or fname.endswith('.json'):
                candidates.append(os.path.join(d, fname))
    if not candidates:
        return await ctx.send("No modelfiles found in examples/, personas/, or src/.")
    lines = ["Available modelfiles:"]
    for p in sorted(candidates):
        lines.append(f"- `{p}`")
    await ctx.send("\n".join(lines))
@bot.command()
 async def setpersona(ctx, *, description):
    set_persona(description)
@ -251,6 +503,30 @@ async def list_models(ctx):
    except Exception as e:
        await ctx.send(f"❌ Failed to fetch models: {e}")
@bot.command(name="dryrun")
@commands.is_owner()
 async def dryrun(ctx, *, prompt: str):
    """Build the prompt and payload without contacting the model.
    Usage: `!dryrun Your test prompt here`"""
    await ctx.send("🧪 Building dry-run payload...")
    from ai import build_dryrun_payload
    profile = load_user_profile(ctx.author)
    info = build_dryrun_payload(prompt, context=None, user_profile=profile)
    prompt_preview = info['prompt'][:1500]
    payload_preview = {k: info['payload'][k] for k in info['payload'] if k != 'prompt'}
    lines = [
        "Prompt assembled:",
        "```",
        prompt_preview,
        "```",
        "Payload params:",
        "```",
        str(payload_preview),
        "```"
    ]
    await ctx.send("\n".join(lines))
@bot.command(name="setavatar")
@commands.is_owner()
 async def set_avatar(ctx):
--- a/src/context.py
+++ b/src/context.py
@ -8,10 +8,23 @@ base_dir = os.path.dirname(__file__)
 with open(os.path.join(base_dir, "settings.yml"), "r", encoding="utf-8") as f:
    settings = yaml.safe_load(f)
-CONTEXT_LIMIT = settings["context"].get("max_messages", 15)
+# Determine whether context should be included. Preference order:
 # 1) `AI_INCLUDE_CONTEXT` environment variable if present
 # 2) `settings.yml` -> context.enabled
 env_val = os.getenv("AI_INCLUDE_CONTEXT", None)
 if env_val is not None:
    AI_INCLUDE_CONTEXT = str(env_val).lower() == "true"
 else:
    AI_INCLUDE_CONTEXT = settings.get("context", {}).get("enabled", True)
 CONTEXT_LIMIT = settings.get("context", {}).get("max_messages", 15) if AI_INCLUDE_CONTEXT else 0
 # Returns full discord.Message objects (for logic)
 async def fetch_raw_context(channel, limit=CONTEXT_LIMIT):
    # If context injection is disabled or limit is <= 0, return early.
    if not AI_INCLUDE_CONTEXT or (not isinstance(limit, int)) or limit <= 0:
        return []
    messages = []
    async for message in channel.history(limit=100):
        # Skip other bots (but not Delta herself)
--- a/src/cooldown.py
+++ b/src/cooldown.py
@ -0,0 +1,73 @@
 import time
 import asyncio
 import logging
 from typing import Dict, Tuple
 class CooldownManager:
    """A simple, race-safe cooldown manager.
    - Uses time.monotonic() to avoid system clock jumps.
    - Stores last-execution timestamps keyed by (key, user_id).
    - `check_and_update` atomically checks and updates the timestamp.
    """
    def __init__(self):
        self._last: Dict[Tuple[str, int], float] = {}
        self._lock = asyncio.Lock()
    async def check_and_update(self, key: str, user_id: int, cooldown_sec: float) -> Tuple[bool, float]:
        """Check cooldown for (key, user_id).
        Returns (allowed, retry_after). If allowed==True it records the timestamp.
        If not allowed, returns (False, seconds_remaining).
        """
        now = time.monotonic()
        map_key = (key, int(user_id))
        async with self._lock:
            last = self._last.get(map_key, 0.0)
            elapsed = now - last
            if elapsed < float(cooldown_sec):
                return False, float(cooldown_sec) - elapsed
            # allowed -> update timestamp and return
            self._last[map_key] = now
            return True, 0.0
    async def record(self, key: str, user_id: int):
        """Record the current time for (key, user_id) without checking.
        This allows a two-phase flow where callers `peek` for remaining time
        during checks and then `record` once they actually begin processing
        the command (single canonical writer).
        """
        async with self._lock:
            self._last[(key, int(user_id))] = time.monotonic()
            try:
                logging.getLogger('bot').debug(f"CooldownManager.record: key={key} user={user_id} recorded")
            except Exception:
                pass
    async def peek(self, key: str, user_id: int, cooldown_sec: float) -> float:
        """Return seconds remaining (0 if allowed) without updating state."""
        now = time.monotonic()
        last = self._last.get((key, int(user_id)), 0.0)
        rem = float(cooldown_sec) - (now - last)
        try:
            logging.getLogger('bot').debug(f"CooldownManager.peek: key={key} user={user_id} rem={max(0.0, rem):.3f}s")
        except Exception:
            pass
        return max(0.0, rem)
    async def clear(self, key: str = None, user_id: int = None):
        """Clear stored timestamps selectively or entirely."""
        async with self._lock:
            if key is None and user_id is None:
                self._last.clear()
                return
            to_delete = []
            for k in list(self._last.keys()):
                k_key, k_user = k
                if (key is None or k_key == key) and (user_id is None or k_user == int(user_id)):
                    to_delete.append(k)
            for k in to_delete:
                del self._last[k]
--- a/src/modelfile.py
+++ b/src/modelfile.py
@ -0,0 +1,132 @@
 import os
 import re
 import json
 from typing import Dict, Optional, Any
 def _read_file(path: str) -> str:
    with open(path, 'r', encoding='utf-8') as f:
        return f.read()
 def _coerce_value(val: str) -> Any:
    """Try to coerce a string to int/float/bool otherwise return stripped string."""
    v = val.strip()
    if not v:
        return v
    # booleans
    if v.lower() in ("true", "false"):
        return v.lower() == "true"
    # numbers
    try:
        if '.' in v:
            return float(v)
        return int(v)
    except Exception:
        # strip surrounding quotes
        if (v.startswith('"') and v.endswith('"')) or (v.startswith("'") and v.endswith("'")):
            return v[1:-1]
        return v
 def parse_mod_file(path: str) -> Dict[str, Optional[object]]:
    """Parse a .mod DSL or JSON modelfile and return a dict with normalized keys.
    Returns keys: name, base_model, system, template, params, includes
    """
    text = _read_file(path)
    _, ext = os.path.splitext(path)
    ext = ext.lower()
    if ext == '.json':
        data = json.loads(text)
        return {
            'name': data.get('name') or os.path.basename(path),
            'base_model': data.get('from') or data.get('base_model'),
            'system': data.get('system') or data.get('SYSTEM'),
            'template': data.get('template'),
            'params': data.get('params', {}),
            'includes': data.get('includes', []),
        }
    # DSL (.mod) parsing
    base_model = None
    params: Dict[str, Any] = {}
    system = None
    template = None
    name = os.path.basename(path)
    includes = []
    # NAME <value>
    m = re.search(r'^NAME\s+(.+)$', text, flags=re.MULTILINE)
    if m:
        name = m.group(1).strip()
    # FROM <model>
    m = re.search(r'^FROM\s+(.+)$', text, flags=re.MULTILINE)
    if m:
        base_model = m.group(1).strip()
    # INCLUDE <path>
    for im in re.finditer(r'^INCLUDE\s+(.+)$', text, flags=re.MULTILINE):
        inc = im.group(1).strip().strip('"').strip("'")
        if inc:
            includes.append(inc)
    # PARAMETER key value (value may be quoted)
    for pm in re.finditer(r'^PARAMETER\s+(\w+)\s+(.+)$', text, flags=re.MULTILINE):
        key = pm.group(1)
        val = pm.group(2).strip()
        params[key] = _coerce_value(val)
    # SYSTEM triple-quoted
    sm = re.search(r'SYSTEM\s+"""([\s\S]*?)"""', text)
    if sm:
        system = sm.group(1).strip()
    # TEMPLATE triple-quoted
    tm = re.search(r'TEMPLATE\s+"""([\s\S]*?)"""', text)
    if tm:
        template = tm.group(1).strip()
    return {
        'name': name,
        'base_model': base_model,
        'system': system,
        'template': template,
        'params': params,
        'includes': includes,
    }
 def load_modfile_if_exists(path: str) -> Optional[Dict[str, object]]:
    if not path:
        return None
    path_input = os.path.expanduser(path)
    candidates = []
    src_dir = os.path.dirname(__file__)
    repo_root = os.path.normpath(os.path.join(src_dir, '..'))
    if os.path.isabs(path_input):
        candidates.append(path_input)
    else:
        candidates.append(os.path.normpath(os.path.join(src_dir, path_input)))
        candidates.append(os.path.normpath(os.path.join(repo_root, path_input)))
        candidates.append(os.path.normpath(os.path.join(repo_root, 'examples', os.path.basename(path_input))))
        candidates.append(os.path.normpath(os.path.join(repo_root, 'personas', os.path.basename(path_input))))
        candidates.append(os.path.normpath(os.path.join(src_dir, os.path.basename(path_input))))
    candidates.append(path_input)
    for candidate in candidates:
        try:
            if os.path.exists(candidate):
                parsed = parse_mod_file(candidate)
                if isinstance(parsed, dict):
                    parsed['_source_path'] = candidate
                    return parsed
        except Exception:
            continue
    return None
--- a/src/personality.py
+++ b/src/personality.py
@ -9,10 +9,26 @@ PERSONA_FILE = "persona.json"
 def load_persona():
    base_dir = os.path.dirname(__file__)  # Path to /src/
    # If a modelfile is active and contains a system/template, expose it as
    # the persona structure used by the rest of the codebase. Import `ai`
    # lazily to avoid circular imports at module import time.
    try:
        import ai
        if getattr(ai, 'MODFILE', None):
            MODFILE = ai.MODFILE
            persona = {
                'name': MODFILE.get('name') or 'ModPersona',
                'prompt_inject': MODFILE.get('system') or '',
                'emoji': '🤖',
                'style_prefix': (MODFILE.get('name') or 'Delta') + ':'
            }
            return persona
    except Exception:
        pass
    persona_path = os.path.join(base_dir, "persona.json")
    if not os.path.exists(persona_path):
        #print("⚠️ persona.json not found. Using raw LLM mode.")
        logger.info("⚠️ persona.json not found. Using raw LLM mode.")
        return None
@ -20,12 +36,10 @@ def load_persona():
        with open(persona_path, "r", encoding="utf-8") as f:
            data = json.load(f)
            if not data.get("name") or not data.get("prompt_inject"):
                #print("⚠️ persona.json missing fields. Using raw LLM mode.")
                logger.info("⚠️ persona.json missing fields. Using raw LLM mode.")
                return None
            return data
    except Exception as e:
        #print(f"⚠️ Failed to load persona.json: {e}")
        logger.info(f"⚠️ Failed to load persona.json: {e}")
        return None
--- a/src/settings.yml
+++ b/src/settings.yml
@ -13,7 +13,7 @@ autochat:
 context:
  enabled: false # not working must implement
-  max_messages: 10 # max messages to keep in context
+  max_messages: 0 # max messages to keep in context
 user_profiles:
  enable_custom_prompt: true  # ← Set false to ignore user `custom_prompt` values in replies
@ -46,3 +46,8 @@ scheduler:
  inactivity:
    threshold_minutes: 120
 ai:
  use_modfile: true
  modfile_path: "../examples/gojo.mod"
--- a/src/user_profiles.json
+++ b/src/user_profiles.json
@ -3,9 +3,9 @@
    "name": "themiloverse",
    "display_name": "Miguel",
    "first_seen": "2025-05-15T03:16:30.011640",
-    "last_seen": "2025-09-19T17:51:52.553254",
+    "last_seen": "2025-09-20T19:04:27.735898",
-    "last_message": "2025-09-19T17:51:52.553254",
+    "last_message": "2025-09-20T19:04:27.735898",
-    "interactions": 242,
+    "interactions": 364,
    "pronouns": "he/him",
    "avatar_url": "https://cdn.discordapp.com/avatars/161149541171593216/fb0553a29d9f73175cb6aea24d0e19ec.png?size=1024",
    "custom_prompt": "delta is very nice to me since I am her master, and creator"
--- a/tests/test_modelfile.py
+++ b/tests/test_modelfile.py
@ -0,0 +1,39 @@
 import os
 from src.modelfile import parse_mod_file, load_modfile_if_exists
 def test_parse_simple_mod(tmp_path):
    content = '''
 NAME Gojo
 FROM gemma3:12b
 PARAMETER temperature 0.7
 SYSTEM """
 You are Gojo, sarcastic and helpful.
 """
 TEMPLATE """
 {{ .System }}
 {{ .Prompt }}
 """
 '''
    p = tmp_path / "gojo.mod"
    p.write_text(content)
    parsed = parse_mod_file(str(p))
    assert parsed['name'] == 'Gojo'
    assert parsed['base_model'] == 'gemma3:12b'
    assert parsed['params']['temperature'] == 0.7
    assert 'Gojo' in parsed['system'] or 'Gojo' in parsed['template']
 def test_parse_json_mod(tmp_path):
    data = {
        "name": "json-persona",
        "from": "gemma3:12b",
        "system": "You are JSON persona",
        "params": {"temperature": 0.5}
    }
    p = tmp_path / "j.mod.json"
    p.write_text(str(data).replace("'", '"'))
    parsed = parse_mod_file(str(p))
    assert parsed['name'] == 'json-persona'
    assert parsed['base_model'] == 'gemma3:12b'
    assert parsed['params']['temperature'] == 0.5