88 lines
2.6 KiB
Python
88 lines
2.6 KiB
Python
|
|
from keybert import KeyBERT
|
||
|
|
from sentence_transformers import SentenceTransformer
|
||
|
|
import mysql.connector
|
||
|
|
import os
|
||
|
|
|
||
|
|
# === Load multilingual KeyBERT model ===
|
||
|
|
model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
|
||
|
|
kw_model = KeyBERT(model)
|
||
|
|
|
||
|
|
# === DB Credentials ===
|
||
|
|
DB_HOST = os.getenv("DB_HOST", "localhost")
|
||
|
|
DB_PORT = int(os.getenv("DB_PORT", 3306))
|
||
|
|
DB_USER = os.getenv("DB_USER", "emailuser")
|
||
|
|
DB_PASSWORD = os.getenv("DB_PASSWORD", "miguel33020")
|
||
|
|
DB_NAME = os.getenv("DB_NAME", "emailassistant")
|
||
|
|
|
||
|
|
# === Connect to DB ===
|
||
|
|
conn = mysql.connector.connect(
|
||
|
|
host=DB_HOST,
|
||
|
|
port=DB_PORT,
|
||
|
|
user=DB_USER,
|
||
|
|
password=DB_PASSWORD,
|
||
|
|
database=DB_NAME
|
||
|
|
)
|
||
|
|
cursor = conn.cursor(dictionary=True)
|
||
|
|
|
||
|
|
# === Logging Helper ===
|
||
|
|
def log_event(cursor, level, source, message):
|
||
|
|
try:
|
||
|
|
cursor.execute(
|
||
|
|
"INSERT INTO logs (level, source, message) VALUES (%s, %s, %s)",
|
||
|
|
(level, source, message)
|
||
|
|
)
|
||
|
|
except Exception as e:
|
||
|
|
print(f"[LOG ERROR] Failed to log event: {e}")
|
||
|
|
|
||
|
|
# === Subject-Based Summarization Using KeyBERT ===
|
||
|
|
def summarize_subject(subject):
|
||
|
|
keywords = kw_model.extract_keywords(
|
||
|
|
subject,
|
||
|
|
keyphrase_ngram_range=(1, 2),
|
||
|
|
stop_words='english',
|
||
|
|
top_n=1
|
||
|
|
)
|
||
|
|
|
||
|
|
summary = keywords[0][0] if keywords else subject
|
||
|
|
confidence = round(len(summary.split()) / max(1, len(subject.split())), 2)
|
||
|
|
|
||
|
|
if len(summary.split()) < 1 or confidence < 0.2:
|
||
|
|
return subject, 1.0
|
||
|
|
|
||
|
|
return summary.strip(), confidence
|
||
|
|
|
||
|
|
# === Fetch emails ===
|
||
|
|
cursor.execute("SELECT id, subject FROM emails")
|
||
|
|
emails = cursor.fetchall()
|
||
|
|
|
||
|
|
# === Main Processing Loop ===
|
||
|
|
for email in emails:
|
||
|
|
email_id = email["id"]
|
||
|
|
subject = email["subject"]
|
||
|
|
|
||
|
|
if not subject or not subject.strip():
|
||
|
|
log_event(cursor, "WARNING", "subject_summarizer", f"Skipped empty subject for email ID {email_id}")
|
||
|
|
continue
|
||
|
|
|
||
|
|
try:
|
||
|
|
summary, confidence = summarize_subject(subject)
|
||
|
|
|
||
|
|
cursor.execute("""
|
||
|
|
UPDATE emails
|
||
|
|
SET ai_summary = %s,
|
||
|
|
ai_confidence = %s
|
||
|
|
WHERE id = %s
|
||
|
|
""", (summary, confidence, email_id))
|
||
|
|
|
||
|
|
log_event(cursor, "INFO", "subject_summarizer", f"Subject summarized for email ID {email_id}")
|
||
|
|
print(f"✅ Subject summarized for email {email_id} (confidence: {confidence})")
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
log_event(cursor, "ERROR", "subject_summarizer", f"Error on email ID {email_id}: {str(e)}")
|
||
|
|
print(f"❌ Error summarizing subject for email {email_id}: {e}")
|
||
|
|
|
||
|
|
# === Commit & Close ===
|
||
|
|
conn.commit()
|
||
|
|
cursor.close()
|
||
|
|
conn.close()
|