AI-Discord-Bot/src/ai.py

# ai.py
# This file handles all AI interactions, including loading/unloading models,
# generating responses, and injecting personas using the Ollama API.

import os
import requests
import re
from dotenv import load_dotenv
from personality import load_persona
from user_profiles import format_profile_for_block
from logger import setup_logger

# Set up logger specifically for AI operations
logger = setup_logger("ai")

# Load environment variables from .env file
load_dotenv()

# Base API setup from .env (e.g., http://localhost:11434/api)
BASE_API = os.getenv("OLLAMA_API").rstrip("/")  # Remove trailing slash just in case

# API endpoints for different Ollama operations
GEN_ENDPOINT = f"{BASE_API}/generate"
PULL_ENDPOINT = f"{BASE_API}/pull"
# UNLOAD_ENDPOINT is not used because unloading is done via `generate` with keep_alive=0
TAGS_ENDPOINT = f"{BASE_API}/tags"

# Startup model and debug toggle from .env
MODEL_NAME = os.getenv("MODEL_NAME", "llama3:latest")
SHOW_THINKING_BLOCKS = os.getenv("SHOW_THINKING_BLOCKS", "false").lower() == "true"

# Ensure API base is configured
if not BASE_API:
    logger.error("❌ OLLAMA_API not set.")
    raise ValueError("❌ OLLAMA_API not set.")

# Returns current model from env/config
def get_model_name():
    return MODEL_NAME

# Removes <think>...</think> blocks from the LLM response (used by some models)
def strip_thinking_block(text: str) -> str:
    return re.sub(r"<think>.*?</think>\s*", "", text, flags=re.DOTALL)

# Check if a model exists locally by calling /tags
def model_exists_locally(model_name: str) -> bool:
    try:
        resp = requests.get(TAGS_ENDPOINT)
        return model_name in resp.text
    except Exception as e:
        logger.error(f"❌ Failed to check local models: {e}")
        return False

# Attempt to pull (load) a model via Ollama's /pull endpoint
def load_model(model_name: str) -> bool:
    try:
        logger.info(f"🧠 Preloading model: {model_name}")
        resp = requests.post(PULL_ENDPOINT, json={"name": model_name})
        logger.info(f"📨 Ollama pull response: {resp.status_code} - {resp.text}")
        return resp.status_code == 200
    except Exception as e:
        logger.error(f"❌ Exception during model load: {str(e)}")
        return False

# Send an empty prompt to unload a model from VRAM safely using keep_alive: 0
def unload_model(model_name: str) -> bool:
    try:
        logger.info(f"🧹 Sending safe unload request for `{model_name}`")
        payload = {
            "model": model_name,
            "prompt": "",         # ✅ Required to make the request valid
            "keep_alive": 0       # ✅ Unload from VRAM but keep on disk
        }
        resp = requests.post(GEN_ENDPOINT, json=payload)
        logger.info(f"🧽 Ollama unload response: {resp.status_code} - {resp.text}")
        return resp.status_code == 200
    except Exception as e:
        logger.error(f"❌ Exception during soft-unload: {str(e)}")
        return False

# Shortcut for getting the current model (can be expanded later for dynamic switching)
def get_current_model():
    return get_model_name()

# Main LLM interaction — injects personality and sends prompt to Ollama
def get_ai_response(user_prompt, context=None, user_profile=None):
    model_name = get_model_name()
    load_model(model_name)

    persona = load_persona()
    full_prompt = ""

    # Inject Delta's base persona
    if persona:
        safe_inject = persona["prompt_inject"].replace("“", "\"").replace("”", "\"").replace("’", "'")
        full_prompt += f"{safe_inject}\n"

    # Inject custom user profile prompt as override or influence
    if user_profile and user_profile.get("custom_prompt"):
        full_prompt += f"[User Instruction]\n{user_profile['custom_prompt']}\n"
        logger.info(f"🧠 Injected user custom prompt:\n{user_profile['custom_prompt']}")

    # Add recent chat context (this already includes the profile block!)
    if context:
        logger.info("🧠 Injected context block (pre-prompt):\n" + context)
        full_prompt += f"[Recent Conversation]\n{context}\n"

    # Add user's message and expected bot reply prefix
    if persona:
        full_prompt += f"\nUser: {user_prompt}\n{persona['name']}:"
    else:
        full_prompt += f"\nUser: {user_prompt}\nResponse:"

    payload = {
        "model": model_name,
        "prompt": full_prompt,
        "stream": False
    }

    logger.info("🛰️ SENDING TO OLLAMA /generate")
    logger.info(f"Payload: {payload}")

    try:
        response = requests.post(GEN_ENDPOINT, json=payload)
        logger.info(f"📨 Raw response: {response.text}")
        if response.status_code == 200:
            result = response.json()
            return result.get("response", "[No message in response]")
        else:
            return f"[Error {response.status_code}] {response.text}"
    except Exception as e:
        return f"[Exception] {str(e)}"
-												Bot can how reply when using the "!chat" command then prompting it. The .env file points to an ollama instance, the ai.py file has the model being used.

											
										
										
											2025-05-07 17:20:34 -04:00
+								# ai.py
-												Its mostly working. Issue #27 is still not unloading or loading models. Only when the model is naturaly unloaded will the .env file model load.

											
										
										
											2025-05-13 22:47:15 -04:00
+								# This file handles all AI interactions, including loading/unloading models,
 								# generating responses, and injecting personas using the Ollama API.
-												Bot can how reply when using the "!chat" command then prompting it. The .env file points to an ollama instance, the ai.py file has the model being used.

											
										
										
											2025-05-07 17:20:34 -04:00
+								import os
-												This is a mostly working but defective version of the main branch. Used it to test some loading/unloading functions

											
										
										
											2025-05-13 10:57:32 -04:00
+								import requests
 								import re
-												Bot can how reply when using the "!chat" command then prompting it. The .env file points to an ollama instance, the ai.py file has the model being used.

											
										
										
											2025-05-07 17:20:34 -04:00
+								from dotenv import load_dotenv
-												Most of day 3 is done but the personality is not being loaded properly. Some time is being taken to understand Ollama better and see how system prompts can be used

											
										
										
											2025-05-07 18:40:28 -04:00
+								from personality import load_persona
-												Feature #8 is almost fully implemented. Needs pronouns and it would be good. Check settings.yml

											
										
										
											2025-05-15 00:22:24 -04:00
+								from user_profiles import format_profile_for_block
-												Added logging functionality to fix issue (#5). Bot now does some basic logging

											
										
										
											2025-05-11 20:36:31 -04:00
+								from logger import setup_logger
-												Bot can how reply when using the "!chat" command then prompting it. The .env file points to an ollama instance, the ai.py file has the model being used.

											
										
										
											2025-05-07 17:20:34 -04:00
-												Its mostly working. Issue #27 is still not unloading or loading models. Only when the model is naturaly unloaded will the .env file model load.

											
										
										
											2025-05-13 22:47:15 -04:00
+								# Set up logger specifically for AI operations
-												This is a mostly working but defective version of the main branch. Used it to test some loading/unloading functions

											
										
										
											2025-05-13 10:57:32 -04:00
+								logger = setup_logger("ai")
-												Its mostly working. Issue #27 is still not unloading or loading models. Only when the model is naturaly unloaded will the .env file model load.

											
										
										
											2025-05-13 22:47:15 -04:00
 								# Load environment variables from .env file
-												Bot can how reply when using the "!chat" command then prompting it. The .env file points to an ollama instance, the ai.py file has the model being used.

											
										
										
											2025-05-07 17:20:34 -04:00
+								load_dotenv()
-												This is a mostly working but defective version of the main branch. Used it to test some loading/unloading functions

											
										
										
											2025-05-13 10:57:32 -04:00
-												Its mostly working. Issue #27 is still not unloading or loading models. Only when the model is naturaly unloaded will the .env file model load.

											
										
										
											2025-05-13 22:47:15 -04:00
+								# Base API setup from .env (e.g., http://localhost:11434/api)
 								BASE_API = os.getenv("OLLAMA_API").rstrip("/")  # Remove trailing slash just in case
 								# API endpoints for different Ollama operations
-												This is a mostly working but defective version of the main branch. Used it to test some loading/unloading functions

											
										
										
											2025-05-13 10:57:32 -04:00
+								GEN_ENDPOINT = f"{BASE_API}/generate"
 								PULL_ENDPOINT = f"{BASE_API}/pull"
-												Its mostly working. Issue #27 is still not unloading or loading models. Only when the model is naturaly unloaded will the .env file model load.

											
										
										
											2025-05-13 22:47:15 -04:00
+								# UNLOAD_ENDPOINT is not used because unloading is done via `generate` with keep_alive=0
-												This is a mostly working but defective version of the main branch. Used it to test some loading/unloading functions

											
										
										
											2025-05-13 10:57:32 -04:00
+								TAGS_ENDPOINT = f"{BASE_API}/tags"
-												Its mostly working. Issue #27 is still not unloading or loading models. Only when the model is naturaly unloaded will the .env file model load.

											
										
										
											2025-05-13 22:47:15 -04:00
+								# Startup model and debug toggle from .env
-												Working on [Multi model support](https://forgejo.milotech.us/milo/AI-Discord-Bot/issues/27#issue-28) model specified from ".env" file is not working.

											
										
										
											2025-05-12 11:24:36 -04:00
+								MODEL_NAME = os.getenv("MODEL_NAME", "llama3:latest")
-												This is a mostly working but defective version of the main branch. Used it to test some loading/unloading functions

											
										
										
											2025-05-13 10:57:32 -04:00
+								SHOW_THINKING_BLOCKS = os.getenv("SHOW_THINKING_BLOCKS", "false").lower() == "true"
-												Its mostly working. Issue #27 is still not unloading or loading models. Only when the model is naturaly unloaded will the .env file model load.

											
										
										
											2025-05-13 22:47:15 -04:00
+								# Ensure API base is configured
-												This is a mostly working but defective version of the main branch. Used it to test some loading/unloading functions

											
										
										
											2025-05-13 10:57:32 -04:00
+								if not BASE_API:
 								    logger.error("❌ OLLAMA_API not set.")
 								    raise ValueError("❌ OLLAMA_API not set.")
-												Its mostly working. Issue #27 is still not unloading or loading models. Only when the model is naturaly unloaded will the .env file model load.

											
										
										
											2025-05-13 22:47:15 -04:00
+								# Returns current model from env/config
-												This is a mostly working but defective version of the main branch. Used it to test some loading/unloading functions

											
										
										
											2025-05-13 10:57:32 -04:00
+								def get_model_name():
 								    return MODEL_NAME
-												Its mostly working. Issue #27 is still not unloading or loading models. Only when the model is naturaly unloaded will the .env file model load.

											
										
										
											2025-05-13 22:47:15 -04:00
+								# Removes <think>...</think> blocks from the LLM response (used by some models)
-												This is a mostly working but defective version of the main branch. Used it to test some loading/unloading functions

											
										
										
											2025-05-13 10:57:32 -04:00
+								def strip_thinking_block(text: str) -> str:
 								    return re.sub(r"<think>.*?</think>\s*", "", text, flags=re.DOTALL)
-												Its mostly working. Issue #27 is still not unloading or loading models. Only when the model is naturaly unloaded will the .env file model load.

											
										
										
											2025-05-13 22:47:15 -04:00
+								# Check if a model exists locally by calling /tags
-												This is a mostly working but defective version of the main branch. Used it to test some loading/unloading functions

											
										
										
											2025-05-13 10:57:32 -04:00
+								def model_exists_locally(model_name: str) -> bool:
 								    try:
 								        resp = requests.get(TAGS_ENDPOINT)
 								        return model_name in resp.text
 								    except Exception as e:
 								        logger.error(f"❌ Failed to check local models: {e}")
 								        return False
-												🐋 Docker build is UP!
- Standalone Dockerfile, no compose yet
- Bot works great in container
- Scheduler runs by default (no toggle yet)

											
										
										
											2025-05-11 00:48:31 -04:00
-												Its mostly working. Issue #27 is still not unloading or loading models. Only when the model is naturaly unloaded will the .env file model load.

											
										
										
											2025-05-13 22:47:15 -04:00
+								# Attempt to pull (load) a model via Ollama's /pull endpoint
-												This is a mostly working but defective version of the main branch. Used it to test some loading/unloading functions

											
										
										
											2025-05-13 10:57:32 -04:00
+								def load_model(model_name: str) -> bool:
 								    try:
 								        logger.info(f"🧠 Preloading model: {model_name}")
 								        resp = requests.post(PULL_ENDPOINT, json={"name": model_name})
 								        logger.info(f"📨 Ollama pull response: {resp.status_code} - {resp.text}")
 								        return resp.status_code == 200
 								    except Exception as e:
 								        logger.error(f"❌ Exception during model load: {str(e)}")
 								        return False
-												Its mostly working. Issue #27 is still not unloading or loading models. Only when the model is naturaly unloaded will the .env file model load.

											
										
										
											2025-05-13 22:47:15 -04:00
+								# Send an empty prompt to unload a model from VRAM safely using keep_alive: 0
-												This is a mostly working but defective version of the main branch. Used it to test some loading/unloading functions

											
										
										
											2025-05-13 10:57:32 -04:00
+								def unload_model(model_name: str) -> bool:
 								    try:
-												Its mostly working. Issue #27 is still not unloading or loading models. Only when the model is naturaly unloaded will the .env file model load.

											
										
										
											2025-05-13 22:47:15 -04:00
+								        logger.info(f"🧹 Sending safe unload request for `{model_name}`")
 								        payload = {
-												This is a mostly working but defective version of the main branch. Used it to test some loading/unloading functions

											
										
										
											2025-05-13 10:57:32 -04:00
+								            "model": model_name,
-												Further thesting for #27 but still not working. May move on to a different feature set.

											
										
										
											2025-05-13 23:26:03 -04:00
+								            "prompt": "",         # ✅ Required to make the request valid
 								            "keep_alive": 0       # ✅ Unload from VRAM but keep on disk
-												Its mostly working. Issue #27 is still not unloading or loading models. Only when the model is naturaly unloaded will the .env file model load.

											
										
										
											2025-05-13 22:47:15 -04:00
+								        }
 								        resp = requests.post(GEN_ENDPOINT, json=payload)
-												This is a mostly working but defective version of the main branch. Used it to test some loading/unloading functions

											
										
										
											2025-05-13 10:57:32 -04:00
+								        logger.info(f"🧽 Ollama unload response: {resp.status_code} - {resp.text}")
 								        return resp.status_code == 200
 								    except Exception as e:
 								        logger.error(f"❌ Exception during soft-unload: {str(e)}")
 								        return False
-												Its mostly working. Issue #27 is still not unloading or loading models. Only when the model is naturaly unloaded will the .env file model load.

											
										
										
											2025-05-13 22:47:15 -04:00
+								# Shortcut for getting the current model (can be expanded later for dynamic switching)
-												This is a mostly working but defective version of the main branch. Used it to test some loading/unloading functions

											
										
										
											2025-05-13 10:57:32 -04:00
+								def get_current_model():
 								    return get_model_name()
-												Bot can how reply when using the "!chat" command then prompting it. The .env file points to an ollama instance, the ai.py file has the model being used.

											
										
										
											2025-05-07 17:20:34 -04:00
-												Its mostly working. Issue #27 is still not unloading or loading models. Only when the model is naturaly unloaded will the .env file model load.

											
										
										
											2025-05-13 22:47:15 -04:00
+								# Main LLM interaction — injects personality and sends prompt to Ollama
-												Feature #8 is almost fully implemented. Needs pronouns and it would be good. Check settings.yml

											
										
										
											2025-05-15 00:22:24 -04:00
+								def get_ai_response(user_prompt, context=None, user_profile=None):
-												This is a mostly working but defective version of the main branch. Used it to test some loading/unloading functions

											
										
										
											2025-05-13 10:57:32 -04:00
+								    model_name = get_model_name()
-												Implemented feature #7

											
										
										
											2025-05-14 20:27:49 -04:00
+								    load_model(model_name)
-												Chat API approach was not successful. I couldnt figure it out with ChatGPT so we went back to a more rudementary approach that does seem to work fairly well.

This is a good and working version of the code.

											
										
										
											2025-05-07 23:26:58 -04:00
-												This is a mostly working but defective version of the main branch. Used it to test some loading/unloading functions

											
										
										
											2025-05-13 10:57:32 -04:00
+								    persona = load_persona()
-												Implemented feature #7

											
										
										
											2025-05-14 20:27:49 -04:00
+								    full_prompt = ""
-												Feature #8 is almost fully implemented. Needs pronouns and it would be good. Check settings.yml

											
										
										
											2025-05-15 00:22:24 -04:00
+								    # Inject Delta's base persona
-												Fixed personality injection logic and fallback to raw LLM if missing (#4)

											
										
										
											2025-05-11 19:46:13 -04:00
+								    if persona:
 								        safe_inject = persona["prompt_inject"].replace("“", "\"").replace("”", "\"").replace("’", "'")
-												Implemented feature #7

											
										
										
											2025-05-14 20:27:49 -04:00
+								        full_prompt += f"{safe_inject}\n"
-												Feature #8 is almost fully implemented. Needs pronouns and it would be good. Check settings.yml

											
										
										
											2025-05-15 00:22:24 -04:00
+								    # Inject custom user profile prompt as override or influence
 								    if user_profile and user_profile.get("custom_prompt"):
 								        full_prompt += f"[User Instruction]\n{user_profile['custom_prompt']}\n"
 								        logger.info(f"🧠 Injected user custom prompt:\n{user_profile['custom_prompt']}")
 								    # Add recent chat context (this already includes the profile block!)
-												Implemented feature #7

											
										
										
											2025-05-14 20:27:49 -04:00
+								    if context:
 								        logger.info("🧠 Injected context block (pre-prompt):\n" + context)
-												Feature #8 is almost fully implemented. Needs pronouns and it would be good. Check settings.yml

											
										
										
											2025-05-15 00:22:24 -04:00
+								        full_prompt += f"[Recent Conversation]\n{context}\n"
-												Implemented feature #7

											
										
										
											2025-05-14 20:27:49 -04:00
-												Feature #8 is almost fully implemented. Needs pronouns and it would be good. Check settings.yml

											
										
										
											2025-05-15 00:22:24 -04:00
+								    # Add user's message and expected bot reply prefix
-												Implemented feature #7

											
										
										
											2025-05-14 20:27:49 -04:00
+								    if persona:
-												Feature #8 is almost fully implemented. Needs pronouns and it would be good. Check settings.yml

											
										
										
											2025-05-15 00:22:24 -04:00
+								        full_prompt += f"\nUser: {user_prompt}\n{persona['name']}:"
-												Fixed personality injection logic and fallback to raw LLM if missing (#4)

											
										
										
											2025-05-11 19:46:13 -04:00
+								    else:
-												Feature #8 is almost fully implemented. Needs pronouns and it would be good. Check settings.yml

											
										
										
											2025-05-15 00:22:24 -04:00
+								        full_prompt += f"\nUser: {user_prompt}\nResponse:"
-												Most of day 3 is done but the personality is not being loaded properly. Some time is being taken to understand Ollama better and see how system prompts can be used

											
										
										
											2025-05-07 18:40:28 -04:00
-												Bot can how reply when using the "!chat" command then prompting it. The .env file points to an ollama instance, the ai.py file has the model being used.

											
										
										
											2025-05-07 17:20:34 -04:00
+								    payload = {
-												Implemented feature #7

											
										
										
											2025-05-14 20:27:49 -04:00
+								        "model": model_name,
-												Most of day 3 is done but the personality is not being loaded properly. Some time is being taken to understand Ollama better and see how system prompts can be used

											
										
										
											2025-05-07 18:40:28 -04:00
+								        "prompt": full_prompt,
-												Bot can how reply when using the "!chat" command then prompting it. The .env file points to an ollama instance, the ai.py file has the model being used.

											
										
										
											2025-05-07 17:20:34 -04:00
+								        "stream": False
 								    }
-												This is a mostly working but defective version of the main branch. Used it to test some loading/unloading functions

											
										
										
											2025-05-13 10:57:32 -04:00
+								    logger.info("🛰️ SENDING TO OLLAMA /generate")
-												Added logging functionality to fix issue (#5). Bot now does some basic logging

											
										
										
											2025-05-11 20:36:31 -04:00
+								    logger.info(f"Payload: {payload}")
-												Chat API approach was not successful. I couldnt figure it out with ChatGPT so we went back to a more rudementary approach that does seem to work fairly well.

This is a good and working version of the code.

											
										
										
											2025-05-07 23:26:58 -04:00
-												Bot can how reply when using the "!chat" command then prompting it. The .env file points to an ollama instance, the ai.py file has the model being used.

											
										
										
											2025-05-07 17:20:34 -04:00
+								    try:
-												This is a mostly working but defective version of the main branch. Used it to test some loading/unloading functions

											
										
										
											2025-05-13 10:57:32 -04:00
+								        response = requests.post(GEN_ENDPOINT, json=payload)
-												Added logging functionality to fix issue (#5). Bot now does some basic logging

											
										
										
											2025-05-11 20:36:31 -04:00
+								        logger.info(f"📨 Raw response: {response.text}")
-												Bot can how reply when using the "!chat" command then prompting it. The .env file points to an ollama instance, the ai.py file has the model being used.

											
										
										
											2025-05-07 17:20:34 -04:00
+								        if response.status_code == 200:
 								            result = response.json()
-												Feature #8 is almost fully implemented. Needs pronouns and it would be good. Check settings.yml

											
										
										
											2025-05-15 00:22:24 -04:00
+								            return result.get("response", "[No message in response]")
-												Bot can how reply when using the "!chat" command then prompting it. The .env file points to an ollama instance, the ai.py file has the model being used.

											
										
										
											2025-05-07 17:20:34 -04:00
+								        else:
 								            return f"[Error {response.status_code}] {response.text}"
 								    except Exception as e:
-												Feature #8 is almost fully implemented. Needs pronouns and it would be good. Check settings.yml

											
										
										
											2025-05-15 00:22:24 -04:00
+								        return f"[Exception] {str(e)}"