import spacy import mysql.connector import os import sys from collections import Counter from string import punctuation # === Load spaCy model === nlp = spacy.load("en_core_web_sm") # === DB Credentials === DB_HOST = os.getenv("DB_HOST", "localhost") DB_PORT = int(os.getenv("DB_PORT", 3306)) DB_USER = os.getenv("DB_USER", "emailuser") DB_PASSWORD = os.getenv("DB_PASSWORD", "miguel33020") DB_NAME = os.getenv("DB_NAME", "emailassistant") # === Connect to DB === conn = mysql.connector.connect( host=DB_HOST, port=DB_PORT, user=DB_USER, password=DB_PASSWORD, database=DB_NAME ) cursor = conn.cursor(dictionary=True) # === Logging Helper === def log_event(cursor, level, source, message): cursor.execute( "INSERT INTO logs (level, source, message) VALUES (%s, %s, %s)", (level, source, message) ) # === Summarization Logic === def summarize(text, max_sentences=3): doc = nlp(text) words = [token.text.lower() for token in doc if token.is_alpha and not token.is_stop] word_freq = Counter(words) sentence_scores = {} for sent in doc.sents: for word in sent: if word.text.lower() in word_freq: sentence_scores[sent] = sentence_scores.get(sent, 0) + word_freq[word.text.lower()] summarized = sorted(sentence_scores, key=sentence_scores.get, reverse=True)[:max_sentences] return " ".join(str(s) for s in summarized) # === Fetch All Emails with Missing Summaries === cursor.execute("SELECT id, body FROM emails WHERE ai_summary IS NULL") emails = cursor.fetchall() # === Main Processing Loop === for email in emails: email_id = email["id"] body = email["body"] if not body or not body.strip(): log_event(cursor, "WARNING", "summarizer", f"Skipped empty body for email ID {email_id}") continue try: summary = summarize(body) if not summary.strip(): summary = "No meaningful summary could be generated." # Optional confidence (ratio of summary length to original body) confidence = round(len(summary.split()) / max(1, len(body.split())), 2) # Update email cursor.execute(""" UPDATE emails SET ai_summary = %s, ai_confidence = %s WHERE id = %s """, (summary, confidence, email_id)) log_event(cursor, "INFO", "summarizer", f"Summarized email ID {email_id}") print(f"✅ Summarized email {email_id} (confidence: {confidence})") except Exception as e: log_event(cursor, "ERROR", "summarizer", f"Error summarizing email ID {email_id}: {str(e)}") print(f"❌ Error summarizing email {email_id}: {e}") # === Commit & Close === conn.commit() cursor.close() conn.close()