Merge pull request 'Model-loading-test' (#28) from Model-loading-test into main
Reviewed-on: #28
This commit is contained in:
commit
ea206a1d7f
6 changed files with 551 additions and 62 deletions
5
.env
5
.env
|
|
@ -1,4 +1,5 @@
|
||||||
DISCORD_TOKEN=MTM2OTc3NDY4OTYzNDg4MTU4Ng.G9Nrgz.akHoOO9SrXCDwiOCI3BUXfdR4bpSNb9zrVx9UI
|
DISCORD_TOKEN=MTM2OTc3NDY4OTYzNDg4MTU4Ng.G9Nrgz.akHoOO9SrXCDwiOCI3BUXfdR4bpSNb9zrVx9UI
|
||||||
OLLAMA_API=http://192.168.1.100:11434/api/generate
|
OLLAMA_API=http://192.168.1.100:11434/api/
|
||||||
MODEL_NAME=deepseek-r1:8b
|
MODEL_NAME=gemma3:12b
|
||||||
CHANNEL_ID=1370420592360161393
|
CHANNEL_ID=1370420592360161393
|
||||||
|
SHOW_THINKING_BLOCKS=false
|
||||||
378
bot.log
378
bot.log
File diff suppressed because one or more lines are too long
Binary file not shown.
98
src/ai.py
98
src/ai.py
|
|
@ -1,51 +1,117 @@
|
||||||
# ai.py
|
# ai.py
|
||||||
|
# This file handles all AI interactions, including loading/unloading models,
|
||||||
|
# generating responses, and injecting personas using the Ollama API.
|
||||||
|
|
||||||
import requests
|
|
||||||
import os
|
import os
|
||||||
|
import requests
|
||||||
|
import re
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from personality import load_persona
|
from personality import load_persona
|
||||||
from logger import setup_logger
|
from logger import setup_logger
|
||||||
|
|
||||||
|
# Set up logger specifically for AI operations
|
||||||
logger = setup_logger("ai")
|
logger = setup_logger("ai")
|
||||||
|
|
||||||
|
# Load environment variables from .env file
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
AI_URL = os.getenv("OLLAMA_API") # match .env and Docker ENV (e.g., http://localhost:11434/api/generate)
|
|
||||||
|
# Base API setup from .env (e.g., http://localhost:11434/api)
|
||||||
|
BASE_API = os.getenv("OLLAMA_API").rstrip("/") # Remove trailing slash just in case
|
||||||
|
|
||||||
|
# API endpoints for different Ollama operations
|
||||||
|
GEN_ENDPOINT = f"{BASE_API}/generate"
|
||||||
|
PULL_ENDPOINT = f"{BASE_API}/pull"
|
||||||
|
# UNLOAD_ENDPOINT is not used because unloading is done via `generate` with keep_alive=0
|
||||||
|
TAGS_ENDPOINT = f"{BASE_API}/tags"
|
||||||
|
|
||||||
|
# Startup model and debug toggle from .env
|
||||||
MODEL_NAME = os.getenv("MODEL_NAME", "llama3:latest")
|
MODEL_NAME = os.getenv("MODEL_NAME", "llama3:latest")
|
||||||
|
SHOW_THINKING_BLOCKS = os.getenv("SHOW_THINKING_BLOCKS", "false").lower() == "true"
|
||||||
|
|
||||||
if not AI_URL:
|
# Ensure API base is configured
|
||||||
logger.error("❌ OLLAMA_API environment variable is not set.")
|
if not BASE_API:
|
||||||
raise ValueError("❌ OLLAMA_API environment variable is not set.")
|
logger.error("❌ OLLAMA_API not set.")
|
||||||
|
raise ValueError("❌ OLLAMA_API not set.")
|
||||||
|
|
||||||
|
# Returns current model from env/config
|
||||||
|
def get_model_name():
|
||||||
|
return MODEL_NAME
|
||||||
|
|
||||||
|
# Removes <think>...</think> blocks from the LLM response (used by some models)
|
||||||
|
def strip_thinking_block(text: str) -> str:
|
||||||
|
return re.sub(r"<think>.*?</think>\s*", "", text, flags=re.DOTALL)
|
||||||
|
|
||||||
|
# Check if a model exists locally by calling /tags
|
||||||
|
def model_exists_locally(model_name: str) -> bool:
|
||||||
|
try:
|
||||||
|
resp = requests.get(TAGS_ENDPOINT)
|
||||||
|
return model_name in resp.text
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"❌ Failed to check local models: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Attempt to pull (load) a model via Ollama's /pull endpoint
|
||||||
|
def load_model(model_name: str) -> bool:
|
||||||
|
try:
|
||||||
|
logger.info(f"🧠 Preloading model: {model_name}")
|
||||||
|
resp = requests.post(PULL_ENDPOINT, json={"name": model_name})
|
||||||
|
logger.info(f"📨 Ollama pull response: {resp.status_code} - {resp.text}")
|
||||||
|
return resp.status_code == 200
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"❌ Exception during model load: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Send an empty prompt to unload a model from VRAM safely using keep_alive: 0
|
||||||
|
def unload_model(model_name: str) -> bool:
|
||||||
|
try:
|
||||||
|
logger.info(f"🧹 Sending safe unload request for `{model_name}`")
|
||||||
|
payload = {
|
||||||
|
"model": model_name,
|
||||||
|
"keep_alive": 0 # Tells Ollama to remove the model from memory, not disk
|
||||||
|
}
|
||||||
|
resp = requests.post(GEN_ENDPOINT, json=payload)
|
||||||
|
logger.info(f"🧽 Ollama unload response: {resp.status_code} - {resp.text}")
|
||||||
|
return resp.status_code == 200
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"❌ Exception during soft-unload: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Shortcut for getting the current model (can be expanded later for dynamic switching)
|
||||||
|
def get_current_model():
|
||||||
|
return get_model_name()
|
||||||
|
|
||||||
|
# Main LLM interaction — injects personality and sends prompt to Ollama
|
||||||
def get_ai_response(user_prompt):
|
def get_ai_response(user_prompt):
|
||||||
persona = load_persona()
|
model_name = get_model_name()
|
||||||
|
load_model(model_name) # Ensures the model is pulled and ready
|
||||||
|
|
||||||
|
persona = load_persona()
|
||||||
if persona:
|
if persona:
|
||||||
# Sanitize prompt injection
|
# Clean fancy quotes and build final prompt with character injection
|
||||||
safe_inject = persona["prompt_inject"].replace("“", "\"").replace("”", "\"").replace("’", "'")
|
safe_inject = persona["prompt_inject"].replace("“", "\"").replace("”", "\"").replace("’", "'")
|
||||||
full_prompt = f"{safe_inject}\nUser: {user_prompt}\n{persona['name']}:"
|
full_prompt = f"{safe_inject}\nUser: {user_prompt}\n{persona['name']}:"
|
||||||
else:
|
else:
|
||||||
full_prompt = user_prompt # fallback mode: just send the user's prompt
|
full_prompt = user_prompt # fallback to raw prompt if no persona loaded
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": MODEL_NAME,
|
"model": model_name, # 🔧 Suggested fix: previously hardcoded to MODEL_NAME
|
||||||
"prompt": full_prompt,
|
"prompt": full_prompt,
|
||||||
"stream": False
|
"stream": False
|
||||||
|
# optional: add "keep_alive": 300 to keep model warm
|
||||||
}
|
}
|
||||||
|
|
||||||
#print("\n🛰️ SENDING TO OLLAMA /api/generate")
|
logger.info("🛰️ SENDING TO OLLAMA /generate")
|
||||||
logger.info("🛰️ SENDING TO OLLAMA /api/generate")
|
|
||||||
#print("Payload:", payload)
|
|
||||||
logger.info(f"Payload: {payload}")
|
logger.info(f"Payload: {payload}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = requests.post(AI_URL, json=payload)
|
response = requests.post(GEN_ENDPOINT, json=payload)
|
||||||
#print("📨 Raw response:", response.text)
|
|
||||||
logger.info(f"📨 Raw response: {response.text}")
|
logger.info(f"📨 Raw response: {response.text}")
|
||||||
|
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
result = response.json()
|
result = response.json()
|
||||||
return result.get("response", "[No message in response]")
|
response_text = result.get("response", "[No message in response]")
|
||||||
|
return strip_thinking_block(response_text) if not SHOW_THINKING_BLOCKS else response_text
|
||||||
else:
|
else:
|
||||||
return f"[Error {response.status_code}] {response.text}"
|
return f"[Error {response.status_code}] {response.text}"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return f"[Exception] {str(e)}"
|
return f"[Exception] {str(e)}"
|
||||||
|
|
||||||
|
|
|
||||||
130
src/bot.py
130
src/bot.py
|
|
@ -4,12 +4,45 @@ import os
|
||||||
import discord
|
import discord
|
||||||
import yaml
|
import yaml
|
||||||
from discord.ext import commands
|
from discord.ext import commands
|
||||||
|
from textwrap import wrap
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
import random
|
||||||
|
import yaml
|
||||||
|
from scheduler import start_scheduler
|
||||||
|
from logger import setup_logger
|
||||||
|
logger = setup_logger("bot")
|
||||||
|
|
||||||
|
from ai import unload_model, load_model, get_current_model
|
||||||
|
|
||||||
dotenv_path = os.path.join(os.path.dirname(__file__), '..', '.env')
|
dotenv_path = os.path.join(os.path.dirname(__file__), '..', '.env')
|
||||||
load_dotenv(dotenv_path)
|
load_dotenv(dotenv_path)
|
||||||
|
logger.info(f"🔍 Loaded MODEL_NAME from .env: {os.getenv('MODEL_NAME')}")
|
||||||
|
|
||||||
|
MODEL_NAME = os.getenv("MODEL_NAME", "llama3:latest")
|
||||||
|
logger.info(f"🔍 Loaded MODEL_NAME from .env: {MODEL_NAME}")
|
||||||
|
|
||||||
|
# 🧽 Try to unload any currently loaded model
|
||||||
|
logger.info(f"🧹 Attempting to clear VRAM before loading {MODEL_NAME}...")
|
||||||
|
unload_model(MODEL_NAME)
|
||||||
|
|
||||||
|
# 🚀 Load target model from .env
|
||||||
|
if load_model(MODEL_NAME):
|
||||||
|
logger.info(f"🚀 Model `{MODEL_NAME}` preloaded on startup.")
|
||||||
|
else:
|
||||||
|
logger.warning(f"⚠️ Failed to preload model `{MODEL_NAME}`.")
|
||||||
|
|
||||||
|
logger.info(f"✅ Final model in use: {MODEL_NAME}")
|
||||||
|
|
||||||
|
from ai import get_ai_response, load_model
|
||||||
|
MODEL_NAME = os.getenv("MODEL_NAME", "llama3:latest")
|
||||||
|
|
||||||
|
if load_model(MODEL_NAME):
|
||||||
|
logger.info(f"🚀 Model `{MODEL_NAME}` preloaded on startup.")
|
||||||
|
else:
|
||||||
|
logger.warning(f"⚠️ Failed to preload model `{MODEL_NAME}`.")
|
||||||
|
|
||||||
|
logger.info(f"✅ Final model in use: {MODEL_NAME}")
|
||||||
|
|
||||||
from ai import get_ai_response
|
|
||||||
from personality import apply_personality, set_persona
|
from personality import apply_personality, set_persona
|
||||||
from discord.ext.commands import (
|
from discord.ext.commands import (
|
||||||
cooldown,
|
cooldown,
|
||||||
|
|
@ -17,10 +50,6 @@ from discord.ext.commands import (
|
||||||
CooldownMapping,
|
CooldownMapping,
|
||||||
CommandOnCooldown
|
CommandOnCooldown
|
||||||
)
|
)
|
||||||
import yaml
|
|
||||||
from scheduler import start_scheduler
|
|
||||||
from logger import setup_logger
|
|
||||||
logger = setup_logger("bot")
|
|
||||||
|
|
||||||
base_dir = os.path.dirname(__file__)
|
base_dir = os.path.dirname(__file__)
|
||||||
settings_path = os.path.join(base_dir, "settings.yml")
|
settings_path = os.path.join(base_dir, "settings.yml")
|
||||||
|
|
@ -32,8 +61,10 @@ ROAST_COOLDOWN_SECONDS = settings["cooldowns"]["roast"]
|
||||||
GLOBAL_COOLDOWN_SECONDS = settings["cooldowns"]["global"]
|
GLOBAL_COOLDOWN_SECONDS = settings["cooldowns"]["global"]
|
||||||
COOLDOWN_MSG_TEMPLATE = settings["messages"]["cooldown"]
|
COOLDOWN_MSG_TEMPLATE = settings["messages"]["cooldown"]
|
||||||
|
|
||||||
load_dotenv()
|
|
||||||
TOKEN = os.getenv("DISCORD_TOKEN")
|
TOKEN = os.getenv("DISCORD_TOKEN")
|
||||||
|
if not TOKEN:
|
||||||
|
logger.error("❌ DISCORD_TOKEN not set in .env file.")
|
||||||
|
raise SystemExit("DISCORD_TOKEN not set.")
|
||||||
|
|
||||||
intents = discord.Intents.default()
|
intents = discord.Intents.default()
|
||||||
intents.message_content = True
|
intents.message_content = True
|
||||||
|
|
@ -44,8 +75,9 @@ bot = commands.Bot(command_prefix="!", intents=intents)
|
||||||
async def on_command_error(ctx, error):
|
async def on_command_error(ctx, error):
|
||||||
if isinstance(error, CommandOnCooldown):
|
if isinstance(error, CommandOnCooldown):
|
||||||
retry_secs = round(error.retry_after, 1)
|
retry_secs = round(error.retry_after, 1)
|
||||||
msg = COOLDOWN_MSG_TEMPLATE.replace("{seconds}", str(retry_secs))
|
template = random.choice(COOLDOWN_MSG_TEMPLATE) if isinstance(COOLDOWN_MSG_TEMPLATE, list) else COOLDOWN_MSG_TEMPLATE
|
||||||
print("🕒 Chill, mortal. You must wait 11.6s before trying again. 😼")
|
msg = template.replace("{seconds}", str(retry_secs))
|
||||||
|
|
||||||
logger.info(f"Command {ctx.command} on cooldown. Retry after {retry_secs} seconds.")
|
logger.info(f"Command {ctx.command} on cooldown. Retry after {retry_secs} seconds.")
|
||||||
await ctx.send(msg)
|
await ctx.send(msg)
|
||||||
else:
|
else:
|
||||||
|
|
@ -68,17 +100,26 @@ async def ping(ctx):
|
||||||
await ctx.send("🏓 Pong!")
|
await ctx.send("🏓 Pong!")
|
||||||
|
|
||||||
@bot.command()
|
@bot.command()
|
||||||
async def chat(ctx, *, message):
|
async def chat(ctx, *, prompt):
|
||||||
await ctx.send("🤖 Thinking...")
|
await ctx.send("🤖 Thinking...")
|
||||||
reply = get_ai_response(message)
|
reply = get_ai_response(prompt)
|
||||||
await ctx.send(reply)
|
MAX_DISCORD_MESSAGE_LENGTH = 2000
|
||||||
|
|
||||||
|
# Split long replies into chunks that fit Discord limits
|
||||||
|
chunks = wrap(reply, MAX_DISCORD_MESSAGE_LENGTH)
|
||||||
|
|
||||||
|
# Log only if the response is being chunked
|
||||||
|
if len(chunks) > 1:
|
||||||
|
logger.warning(f"💬 Splitting response into {len(chunks)} chunks due to length.")
|
||||||
|
|
||||||
|
for chunk in chunks:
|
||||||
|
await ctx.send(chunk)
|
||||||
|
|
||||||
@bot.command()
|
@bot.command()
|
||||||
async def setpersona(ctx, *, description):
|
async def setpersona(ctx, *, description):
|
||||||
set_persona(description)
|
set_persona(description)
|
||||||
await ctx.send("✅ Persona updated! New style will be used in replies.")
|
await ctx.send("✅ Persona updated! New style will be used in replies.")
|
||||||
|
|
||||||
|
|
||||||
@bot.command(name='roast')
|
@bot.command(name='roast')
|
||||||
@cooldown(rate=1, per=ROAST_COOLDOWN_SECONDS, type=BucketType.user)
|
@cooldown(rate=1, per=ROAST_COOLDOWN_SECONDS, type=BucketType.user)
|
||||||
async def roast(ctx):
|
async def roast(ctx):
|
||||||
|
|
@ -94,6 +135,71 @@ async def roast(ctx):
|
||||||
# Send the roast back to the channel
|
# Send the roast back to the channel
|
||||||
await ctx.send(f"😼 {response}")
|
await ctx.send(f"😼 {response}")
|
||||||
|
|
||||||
|
@bot.command(name="clearmodel")
|
||||||
|
async def clear_model(ctx):
|
||||||
|
from ai import unload_model, get_current_model
|
||||||
|
model = get_current_model()
|
||||||
|
success = unload_model(model)
|
||||||
|
msg = f"✅ Unloaded model: `{model}`" if success else f"❌ Failed to unload model: `{model}`"
|
||||||
|
await ctx.send(msg)
|
||||||
|
|
||||||
|
@bot.command(name="model")
|
||||||
|
async def current_model(ctx):
|
||||||
|
from ai import get_current_model
|
||||||
|
model = get_current_model()
|
||||||
|
await ctx.send(f"📦 Current model: `{model}`")
|
||||||
|
|
||||||
|
@bot.command(name="setmodel")
|
||||||
|
async def set_model(ctx, *, model_name):
|
||||||
|
from ai import get_current_model, load_model, unload_model
|
||||||
|
|
||||||
|
current_model = get_current_model()
|
||||||
|
if model_name == current_model:
|
||||||
|
return await ctx.send(f"⚠️ `{model_name}` is already active.")
|
||||||
|
|
||||||
|
await ctx.send(f"🔄 Switching from `{current_model}` to `{model_name}`…")
|
||||||
|
|
||||||
|
# 1) Soft-unload old model from VRAM only
|
||||||
|
if unload_model(current_model):
|
||||||
|
await ctx.send(f"🧽 Unloaded `{current_model}` from VRAM.")
|
||||||
|
else:
|
||||||
|
await ctx.send(f"⚠️ Couldn’t unload `{current_model}` (it may not have been loaded).")
|
||||||
|
|
||||||
|
# 2) Load the new one
|
||||||
|
if not load_model(model_name):
|
||||||
|
return await ctx.send(f"❌ Failed to pull `{model_name}`. Make sure it’s in `ollama list`.")
|
||||||
|
|
||||||
|
# 3) Update runtime AND .env on disk
|
||||||
|
os.environ["MODEL_NAME"] = model_name
|
||||||
|
env_path = os.path.join(os.path.dirname(__file__), '..', '.env')
|
||||||
|
# Read and rewrite .env
|
||||||
|
lines = []
|
||||||
|
with open(env_path, 'r', encoding='utf-8') as f:
|
||||||
|
for line in f:
|
||||||
|
if line.startswith("MODEL_NAME="):
|
||||||
|
lines.append(f"MODEL_NAME={model_name}\n")
|
||||||
|
else:
|
||||||
|
lines.append(line)
|
||||||
|
with open(env_path, 'w', encoding='utf-8') as f:
|
||||||
|
f.writelines(lines)
|
||||||
|
|
||||||
|
await ctx.send(f"✅ Model switched to `{model_name}` and `.env` updated.")
|
||||||
|
|
||||||
|
@bot.command(name="models")
|
||||||
|
async def list_models(ctx):
|
||||||
|
import requests
|
||||||
|
from ai import TAGS_ENDPOINT
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = requests.get(TAGS_ENDPOINT)
|
||||||
|
models = [m["name"] for m in resp.json().get("models", [])]
|
||||||
|
if models:
|
||||||
|
await ctx.send("🧠 Available models:\n" + "\n".join(f"- `{m}`" for m in models))
|
||||||
|
else:
|
||||||
|
await ctx.send("❌ No models found.")
|
||||||
|
except Exception as e:
|
||||||
|
await ctx.send(f"❌ Failed to fetch models: {e}")
|
||||||
|
|
||||||
@bot.event
|
@bot.event
|
||||||
async def on_ready():
|
async def on_ready():
|
||||||
print(f"✅ Logged in as {bot.user.name}")
|
print(f"✅ Logged in as {bot.user.name}")
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ cooldowns:
|
||||||
|
|
||||||
messages:
|
messages:
|
||||||
cooldown:
|
cooldown:
|
||||||
- "🕒 Chill, mortal. You must wait {seconds}s before trying again. 😼"
|
- "🕒 Chill, wait {seconds}s before trying again."
|
||||||
|
|
||||||
scheduler:
|
scheduler:
|
||||||
enabled: false
|
enabled: false
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue