41 lines
1.2 KiB
Python
41 lines
1.2 KiB
Python
import requests
|
|
import time
|
|
|
|
API_URL = "http://192.168.1.100:11434/api/generate"
|
|
MODEL = "tinyllama:1.1b"
|
|
|
|
prompt_text = (
|
|
"You are a professional AI assistant. Read the following email and briefly explain what it's about "
|
|
"as if you were summarizing it for your busy boss.\n\n"
|
|
"Be concise, clear, and include names, requests, deadlines, and project names if mentioned.\n\n"
|
|
"Email:\n"
|
|
"\"Hi there, just checking in to see if you received my last message about the invoice due next week. "
|
|
"Please let me know when you get a chance.\""
|
|
)
|
|
|
|
payload = {
|
|
"model": MODEL,
|
|
"prompt": prompt_text,
|
|
"stream": False
|
|
}
|
|
|
|
def run_summary_pass(pass_label):
|
|
print(f"\n🔁 {pass_label} run for model: {MODEL}")
|
|
start_time = time.time()
|
|
response = requests.post(API_URL, json=payload)
|
|
end_time = time.time()
|
|
|
|
if response.status_code == 200:
|
|
result = response.json().get("response")
|
|
else:
|
|
result = f"❌ Error: {response.status_code} - {response.text}"
|
|
|
|
elapsed = end_time - start_time
|
|
print(f"🧠 Summary: {result}")
|
|
print(f"⏱️ Time taken: {elapsed:.2f} seconds")
|
|
|
|
# Warm-up run (model loading)
|
|
run_summary_pass("Warm-up")
|
|
|
|
# Second run (real performance)
|
|
run_summary_pass("Performance")
|