This is a mostly working but defective version of the main branch. Used it to test some loading/unloading functions
This commit is contained in:
parent
0d0084fd9a
commit
fe5e2d9b96
6 changed files with 379 additions and 64 deletions
5
.env
5
.env
|
|
@ -1,4 +1,5 @@
|
|||
DISCORD_TOKEN=MTM2OTc3NDY4OTYzNDg4MTU4Ng.G9Nrgz.akHoOO9SrXCDwiOCI3BUXfdR4bpSNb9zrVx9UI
|
||||
OLLAMA_API=http://192.168.1.100:11434/api/generate
|
||||
MODEL_NAME=deepseek-r1:8b
|
||||
OLLAMA_API=http://192.168.1.100:11434/api/
|
||||
MODEL_NAME=dolphin-llama3:latest
|
||||
CHANNEL_ID=1370420592360161393
|
||||
SHOW_THINKING_BLOCKS=false
|
||||
221
bot.log
221
bot.log
File diff suppressed because one or more lines are too long
Binary file not shown.
84
src/ai.py
84
src/ai.py
|
|
@ -1,51 +1,97 @@
|
|||
# ai.py
|
||||
|
||||
import requests
|
||||
import os
|
||||
import requests
|
||||
import re
|
||||
from dotenv import load_dotenv
|
||||
from personality import load_persona
|
||||
from logger import setup_logger
|
||||
|
||||
logger = setup_logger("ai")
|
||||
|
||||
load_dotenv()
|
||||
AI_URL = os.getenv("OLLAMA_API") # match .env and Docker ENV (e.g., http://localhost:11434/api/generate)
|
||||
MODEL_NAME = os.getenv("MODEL_NAME", "llama3:latest")
|
||||
|
||||
if not AI_URL:
|
||||
logger.error("❌ OLLAMA_API environment variable is not set.")
|
||||
raise ValueError("❌ OLLAMA_API environment variable is not set.")
|
||||
BASE_API = os.getenv("OLLAMA_API").rstrip("/") # ← ensures no trailing slash issue
|
||||
GEN_ENDPOINT = f"{BASE_API}/generate"
|
||||
PULL_ENDPOINT = f"{BASE_API}/pull"
|
||||
#UNLOAD_ENDPOINT = f"{BASE_API}/unload"
|
||||
TAGS_ENDPOINT = f"{BASE_API}/tags"
|
||||
|
||||
MODEL_NAME = os.getenv("MODEL_NAME", "llama3:latest")
|
||||
SHOW_THINKING_BLOCKS = os.getenv("SHOW_THINKING_BLOCKS", "false").lower() == "true"
|
||||
|
||||
if not BASE_API:
|
||||
logger.error("❌ OLLAMA_API not set.")
|
||||
raise ValueError("❌ OLLAMA_API not set.")
|
||||
|
||||
def get_model_name():
|
||||
return MODEL_NAME
|
||||
|
||||
def strip_thinking_block(text: str) -> str:
|
||||
return re.sub(r"<think>.*?</think>\s*", "", text, flags=re.DOTALL)
|
||||
|
||||
def model_exists_locally(model_name: str) -> bool:
|
||||
try:
|
||||
resp = requests.get(TAGS_ENDPOINT)
|
||||
return model_name in resp.text
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Failed to check local models: {e}")
|
||||
return False
|
||||
|
||||
def load_model(model_name: str) -> bool:
|
||||
try:
|
||||
logger.info(f"🧠 Preloading model: {model_name}")
|
||||
resp = requests.post(PULL_ENDPOINT, json={"name": model_name})
|
||||
logger.info(f"📨 Ollama pull response: {resp.status_code} - {resp.text}")
|
||||
return resp.status_code == 200
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Exception during model load: {str(e)}")
|
||||
return False
|
||||
|
||||
def unload_model(model_name: str) -> bool:
|
||||
try:
|
||||
logger.info(f"🧹 Soft-unloading model from VRAM: {model_name}")
|
||||
resp = requests.post(GEN_ENDPOINT, json={
|
||||
"model": model_name,
|
||||
"keep_alive": 0,
|
||||
"prompt": ""
|
||||
})
|
||||
logger.info(f"🧽 Ollama unload response: {resp.status_code} - {resp.text}")
|
||||
return resp.status_code == 200
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Exception during soft-unload: {str(e)}")
|
||||
return False
|
||||
|
||||
def get_current_model():
|
||||
return get_model_name()
|
||||
|
||||
def get_ai_response(user_prompt):
|
||||
persona = load_persona()
|
||||
model_name = get_model_name()
|
||||
load_model(model_name)
|
||||
|
||||
persona = load_persona()
|
||||
if persona:
|
||||
# Sanitize prompt injection
|
||||
safe_inject = persona["prompt_inject"].replace("“", "\"").replace("”", "\"").replace("’", "'")
|
||||
full_prompt = f"{safe_inject}\nUser: {user_prompt}\n{persona['name']}:"
|
||||
else:
|
||||
full_prompt = user_prompt # fallback mode: just send the user's prompt
|
||||
full_prompt = user_prompt
|
||||
|
||||
payload = {
|
||||
"model": MODEL_NAME,
|
||||
"model": model_name,
|
||||
"prompt": full_prompt,
|
||||
"stream": False
|
||||
}
|
||||
|
||||
#print("\n🛰️ SENDING TO OLLAMA /api/generate")
|
||||
logger.info("🛰️ SENDING TO OLLAMA /api/generate")
|
||||
#print("Payload:", payload)
|
||||
logger.info("🛰️ SENDING TO OLLAMA /generate")
|
||||
logger.info(f"Payload: {payload}")
|
||||
|
||||
try:
|
||||
response = requests.post(AI_URL, json=payload)
|
||||
#print("📨 Raw response:", response.text)
|
||||
response = requests.post(GEN_ENDPOINT, json=payload)
|
||||
logger.info(f"📨 Raw response: {response.text}")
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
return result.get("response", "[No message in response]")
|
||||
response_text = result.get("response", "[No message in response]")
|
||||
return strip_thinking_block(response_text) if not SHOW_THINKING_BLOCKS else response_text
|
||||
else:
|
||||
return f"[Error {response.status_code}] {response.text}"
|
||||
except Exception as e:
|
||||
return f"[Exception] {str(e)}"
|
||||
|
||||
|
|
|
|||
131
src/bot.py
131
src/bot.py
|
|
@ -4,12 +4,46 @@ import os
|
|||
import discord
|
||||
import yaml
|
||||
from discord.ext import commands
|
||||
from textwrap import wrap
|
||||
from dotenv import load_dotenv
|
||||
import random
|
||||
import yaml
|
||||
from scheduler import start_scheduler
|
||||
from logger import setup_logger
|
||||
logger = setup_logger("bot")
|
||||
|
||||
from ai import unload_model, load_model, get_current_model
|
||||
|
||||
|
||||
dotenv_path = os.path.join(os.path.dirname(__file__), '..', '.env')
|
||||
load_dotenv(dotenv_path)
|
||||
logger.info(f"🔍 Loaded MODEL_NAME from .env: {os.getenv('MODEL_NAME')}")
|
||||
|
||||
MODEL_NAME = os.getenv("MODEL_NAME", "llama3:latest")
|
||||
logger.info(f"🔍 Loaded MODEL_NAME from .env: {MODEL_NAME}")
|
||||
|
||||
# 🧽 Try to unload any currently loaded model
|
||||
logger.info(f"🧹 Attempting to clear VRAM before loading {MODEL_NAME}...")
|
||||
unload_model(MODEL_NAME)
|
||||
|
||||
# 🚀 Load target model from .env
|
||||
if load_model(MODEL_NAME):
|
||||
logger.info(f"🚀 Model `{MODEL_NAME}` preloaded on startup.")
|
||||
else:
|
||||
logger.warning(f"⚠️ Failed to preload model `{MODEL_NAME}`.")
|
||||
|
||||
logger.info(f"✅ Final model in use: {MODEL_NAME}")
|
||||
|
||||
from ai import get_ai_response, load_model
|
||||
MODEL_NAME = os.getenv("MODEL_NAME", "llama3:latest")
|
||||
|
||||
if load_model(MODEL_NAME):
|
||||
logger.info(f"🚀 Model `{MODEL_NAME}` preloaded on startup.")
|
||||
else:
|
||||
logger.warning(f"⚠️ Failed to preload model `{MODEL_NAME}`.")
|
||||
|
||||
logger.info(f"✅ Final model in use: {MODEL_NAME}")
|
||||
|
||||
from ai import get_ai_response
|
||||
from personality import apply_personality, set_persona
|
||||
from discord.ext.commands import (
|
||||
cooldown,
|
||||
|
|
@ -17,10 +51,6 @@ from discord.ext.commands import (
|
|||
CooldownMapping,
|
||||
CommandOnCooldown
|
||||
)
|
||||
import yaml
|
||||
from scheduler import start_scheduler
|
||||
from logger import setup_logger
|
||||
logger = setup_logger("bot")
|
||||
|
||||
base_dir = os.path.dirname(__file__)
|
||||
settings_path = os.path.join(base_dir, "settings.yml")
|
||||
|
|
@ -32,8 +62,10 @@ ROAST_COOLDOWN_SECONDS = settings["cooldowns"]["roast"]
|
|||
GLOBAL_COOLDOWN_SECONDS = settings["cooldowns"]["global"]
|
||||
COOLDOWN_MSG_TEMPLATE = settings["messages"]["cooldown"]
|
||||
|
||||
load_dotenv()
|
||||
TOKEN = os.getenv("DISCORD_TOKEN")
|
||||
if not TOKEN:
|
||||
logger.error("❌ DISCORD_TOKEN not set in .env file.")
|
||||
raise SystemExit("DISCORD_TOKEN not set.")
|
||||
|
||||
intents = discord.Intents.default()
|
||||
intents.message_content = True
|
||||
|
|
@ -44,8 +76,9 @@ bot = commands.Bot(command_prefix="!", intents=intents)
|
|||
async def on_command_error(ctx, error):
|
||||
if isinstance(error, CommandOnCooldown):
|
||||
retry_secs = round(error.retry_after, 1)
|
||||
msg = COOLDOWN_MSG_TEMPLATE.replace("{seconds}", str(retry_secs))
|
||||
print("🕒 Chill, mortal. You must wait 11.6s before trying again. 😼")
|
||||
template = random.choice(COOLDOWN_MSG_TEMPLATE) if isinstance(COOLDOWN_MSG_TEMPLATE, list) else COOLDOWN_MSG_TEMPLATE
|
||||
msg = template.replace("{seconds}", str(retry_secs))
|
||||
|
||||
logger.info(f"Command {ctx.command} on cooldown. Retry after {retry_secs} seconds.")
|
||||
await ctx.send(msg)
|
||||
else:
|
||||
|
|
@ -68,10 +101,21 @@ async def ping(ctx):
|
|||
await ctx.send("🏓 Pong!")
|
||||
|
||||
@bot.command()
|
||||
async def chat(ctx, *, message):
|
||||
async def chat(ctx, *, prompt):
|
||||
await ctx.send("🤖 Thinking...")
|
||||
reply = get_ai_response(message)
|
||||
await ctx.send(reply)
|
||||
reply = get_ai_response(prompt)
|
||||
MAX_DISCORD_MESSAGE_LENGTH = 2000
|
||||
|
||||
# Split long replies into chunks that fit Discord limits
|
||||
chunks = wrap(reply, MAX_DISCORD_MESSAGE_LENGTH)
|
||||
|
||||
# Log only if the response is being chunked
|
||||
if len(chunks) > 1:
|
||||
logger.warning(f"💬 Splitting response into {len(chunks)} chunks due to length.")
|
||||
|
||||
for chunk in chunks:
|
||||
await ctx.send(chunk)
|
||||
|
||||
|
||||
@bot.command()
|
||||
async def setpersona(ctx, *, description):
|
||||
|
|
@ -94,6 +138,71 @@ async def roast(ctx):
|
|||
# Send the roast back to the channel
|
||||
await ctx.send(f"😼 {response}")
|
||||
|
||||
@bot.command(name="clearmodel")
|
||||
async def clear_model(ctx):
|
||||
from ai import unload_model, get_current_model
|
||||
model = get_current_model()
|
||||
success = unload_model(model)
|
||||
msg = f"✅ Unloaded model: `{model}`" if success else f"❌ Failed to unload model: `{model}`"
|
||||
await ctx.send(msg)
|
||||
|
||||
@bot.command(name="model")
|
||||
async def current_model(ctx):
|
||||
from ai import get_current_model
|
||||
model = get_current_model()
|
||||
await ctx.send(f"📦 Current model: `{model}`")
|
||||
|
||||
@bot.command(name="setmodel")
|
||||
async def set_model(ctx, *, model_name):
|
||||
from ai import get_current_model, load_model, unload_model
|
||||
|
||||
current_model = get_current_model()
|
||||
if model_name == current_model:
|
||||
return await ctx.send(f"⚠️ `{model_name}` is already active.")
|
||||
|
||||
await ctx.send(f"🔄 Switching from `{current_model}` to `{model_name}`…")
|
||||
|
||||
# 1) Soft-unload old model from VRAM only
|
||||
if unload_model(current_model):
|
||||
await ctx.send(f"🧽 Unloaded `{current_model}` from VRAM.")
|
||||
else:
|
||||
await ctx.send(f"⚠️ Couldn’t unload `{current_model}` (it may not have been loaded).")
|
||||
|
||||
# 2) Load the new one
|
||||
if not load_model(model_name):
|
||||
return await ctx.send(f"❌ Failed to pull `{model_name}`. Make sure it’s in `ollama list`.")
|
||||
|
||||
# 3) Update runtime AND .env on disk
|
||||
os.environ["MODEL_NAME"] = model_name
|
||||
env_path = os.path.join(os.path.dirname(__file__), '..', '.env')
|
||||
# Read and rewrite .env
|
||||
lines = []
|
||||
with open(env_path, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
if line.startswith("MODEL_NAME="):
|
||||
lines.append(f"MODEL_NAME={model_name}\n")
|
||||
else:
|
||||
lines.append(line)
|
||||
with open(env_path, 'w', encoding='utf-8') as f:
|
||||
f.writelines(lines)
|
||||
|
||||
await ctx.send(f"✅ Model switched to `{model_name}` and `.env` updated.")
|
||||
|
||||
@bot.command(name="models")
|
||||
async def list_models(ctx):
|
||||
import requests
|
||||
from ai import TAGS_ENDPOINT
|
||||
|
||||
try:
|
||||
resp = requests.get(TAGS_ENDPOINT)
|
||||
models = [m["name"] for m in resp.json().get("models", [])]
|
||||
if models:
|
||||
await ctx.send("🧠 Available models:\n" + "\n".join(f"- `{m}`" for m in models))
|
||||
else:
|
||||
await ctx.send("❌ No models found.")
|
||||
except Exception as e:
|
||||
await ctx.send(f"❌ Failed to fetch models: {e}")
|
||||
|
||||
@bot.event
|
||||
async def on_ready():
|
||||
print(f"✅ Logged in as {bot.user.name}")
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ cooldowns:
|
|||
|
||||
messages:
|
||||
cooldown:
|
||||
- "🕒 Chill, mortal. You must wait {seconds}s before trying again. 😼"
|
||||
- "🕒 Chill, wait {seconds}s before trying again."
|
||||
|
||||
scheduler:
|
||||
enabled: false
|
||||
|
|
|
|||
Loading…
Reference in a new issue