Email-Agent/Obsolete/subject_summariser.py

96 lines
2.9 KiB
Python
Raw Permalink Normal View History

2025-05-06 11:13:15 -04:00
import spacy
import mysql.connector
import os
import sys
from collections import Counter
# === Load spaCy model ===
nlp = spacy.load("en_core_web_sm")
# === DB Credentials ===
DB_HOST = os.getenv("DB_HOST", "localhost")
DB_PORT = int(os.getenv("DB_PORT", 3306))
DB_USER = os.getenv("DB_USER", "emailuser")
DB_PASSWORD = os.getenv("DB_PASSWORD", "miguel33020")
DB_NAME = os.getenv("DB_NAME", "emailassistant")
# === Connect to DB ===
conn = mysql.connector.connect(
host=DB_HOST,
port=DB_PORT,
user=DB_USER,
password=DB_PASSWORD,
database=DB_NAME
)
cursor = conn.cursor(dictionary=True)
# === Logging Helper ===
def log_event(cursor, level, source, message):
cursor.execute(
"INSERT INTO logs (level, source, message) VALUES (%s, %s, %s)",
(level, source, message)
)
# === Subject-Based Summarization ===
def summarize_subject(subject):
doc = nlp(subject)
keywords = [token.text for token in doc if token.is_alpha and not token.is_stop]
if not keywords:
return subject, 1.0 # fallback to raw subject
# Prioritize noun chunks that include keywords
noun_chunks = list(doc.noun_chunks)
chunks = [chunk.text for chunk in noun_chunks if any(tok.text in keywords for tok in chunk)]
# Combine and limit summary length
compressed = " ".join(chunks or keywords)
compressed_words = compressed.split()
subject_word_count = len(subject.split())
summary = " ".join(compressed_words[:max(1, subject_word_count - 1)]).strip()
# Confidence is relative to subject word count
confidence = round(len(summary.split()) / max(1, subject_word_count), 2)
# Fallback if summary is too short or confidence too low
if len(summary.split()) < 2 or confidence < 0.3:
return subject, 1.0
return summary, confidence
# === Fetch emails with NULL ai_summary ===
cursor.execute("SELECT id, subject FROM emails")
emails = cursor.fetchall()
# === Main Processing Loop ===
# === Main Processing Loop ===
for email in emails:
email_id = email["id"]
subject = email["subject"]
if not subject or not subject.strip():
log_event(cursor, "WARNING", "subject_summarizer", f"Skipped empty subject for email ID {email_id}")
continue
try:
summary, confidence = summarize_subject(subject)
cursor.execute("""
UPDATE emails
SET ai_summary = %s,
ai_confidence = %s
WHERE id = %s
""", (summary, confidence, email_id))
log_event(cursor, "INFO", "subject_summarizer", f"Subject summarized for email ID {email_id}")
print(f"✅ Subject summarized for email {email_id} (confidence: {confidence})")
except Exception as e:
log_event(cursor, "ERROR", "subject_summarizer", f"Error on email ID {email_id}: {str(e)}")
print(f"❌ Error summarizing subject for email {email_id}: {e}")
# === Commit & Close ===
conn.commit()
cursor.close()
conn.close()