import requests import time API_URL = "http://192.168.1.100:11434/api/generate" MODEL = "tinyllama:1.1b" prompt_text = ( "You are a professional AI assistant. Read the following email and briefly explain what it's about " "as if you were summarizing it for your busy boss.\n\n" "Be concise, clear, and include names, requests, deadlines, and project names if mentioned.\n\n" "Email:\n" "\"Hi there, just checking in to see if you received my last message about the invoice due next week. " "Please let me know when you get a chance.\"" ) payload = { "model": MODEL, "prompt": prompt_text, "stream": False } def run_summary_pass(pass_label): print(f"\nšŸ” {pass_label} run for model: {MODEL}") start_time = time.time() response = requests.post(API_URL, json=payload) end_time = time.time() if response.status_code == 200: result = response.json().get("response") else: result = f"āŒ Error: {response.status_code} - {response.text}" elapsed = end_time - start_time print(f"🧠 Summary: {result}") print(f"ā±ļø Time taken: {elapsed:.2f} seconds") # Warm-up run (model loading) run_summary_pass("Warm-up") # Second run (real performance) run_summary_pass("Performance")