89 lines
2.7 KiB
Python
89 lines
2.7 KiB
Python
import spacy
|
|
import mysql.connector
|
|
import os
|
|
import sys
|
|
from collections import Counter
|
|
from string import punctuation
|
|
|
|
# === Load spaCy model ===
|
|
nlp = spacy.load("en_core_web_sm")
|
|
|
|
# === DB Credentials ===
|
|
DB_HOST = os.getenv("DB_HOST", "localhost")
|
|
DB_PORT = int(os.getenv("DB_PORT", 3306))
|
|
DB_USER = os.getenv("DB_USER", "emailuser")
|
|
DB_PASSWORD = os.getenv("DB_PASSWORD", "miguel33020")
|
|
DB_NAME = os.getenv("DB_NAME", "emailassistant")
|
|
|
|
# === Connect to DB ===
|
|
conn = mysql.connector.connect(
|
|
host=DB_HOST,
|
|
port=DB_PORT,
|
|
user=DB_USER,
|
|
password=DB_PASSWORD,
|
|
database=DB_NAME
|
|
)
|
|
cursor = conn.cursor(dictionary=True)
|
|
|
|
# === Logging Helper ===
|
|
def log_event(cursor, level, source, message):
|
|
cursor.execute(
|
|
"INSERT INTO logs (level, source, message) VALUES (%s, %s, %s)",
|
|
(level, source, message)
|
|
)
|
|
|
|
# === Summarization Logic ===
|
|
def summarize(text, max_sentences=3):
|
|
doc = nlp(text)
|
|
words = [token.text.lower() for token in doc if token.is_alpha and not token.is_stop]
|
|
word_freq = Counter(words)
|
|
|
|
sentence_scores = {}
|
|
for sent in doc.sents:
|
|
for word in sent:
|
|
if word.text.lower() in word_freq:
|
|
sentence_scores[sent] = sentence_scores.get(sent, 0) + word_freq[word.text.lower()]
|
|
|
|
summarized = sorted(sentence_scores, key=sentence_scores.get, reverse=True)[:max_sentences]
|
|
return " ".join(str(s) for s in summarized)
|
|
|
|
# === Fetch All Emails with Missing Summaries ===
|
|
cursor.execute("SELECT id, body FROM emails WHERE ai_summary IS NULL")
|
|
emails = cursor.fetchall()
|
|
|
|
# === Main Processing Loop ===
|
|
for email in emails:
|
|
email_id = email["id"]
|
|
body = email["body"]
|
|
|
|
if not body or not body.strip():
|
|
log_event(cursor, "WARNING", "summarizer", f"Skipped empty body for email ID {email_id}")
|
|
continue
|
|
|
|
try:
|
|
summary = summarize(body)
|
|
if not summary.strip():
|
|
summary = "No meaningful summary could be generated."
|
|
|
|
# Optional confidence (ratio of summary length to original body)
|
|
confidence = round(len(summary.split()) / max(1, len(body.split())), 2)
|
|
|
|
# Update email
|
|
cursor.execute("""
|
|
UPDATE emails
|
|
SET ai_summary = %s,
|
|
ai_confidence = %s
|
|
WHERE id = %s
|
|
""", (summary, confidence, email_id))
|
|
|
|
log_event(cursor, "INFO", "summarizer", f"Summarized email ID {email_id}")
|
|
print(f"✅ Summarized email {email_id} (confidence: {confidence})")
|
|
|
|
except Exception as e:
|
|
log_event(cursor, "ERROR", "summarizer", f"Error summarizing email ID {email_id}: {str(e)}")
|
|
print(f"❌ Error summarizing email {email_id}: {e}")
|
|
|
|
# === Commit & Close ===
|
|
conn.commit()
|
|
cursor.close()
|
|
conn.close()
|