import spacy import mysql.connector import os import sys from collections import Counter # === Load spaCy model === nlp = spacy.load("en_core_web_sm") # === DB Credentials === DB_HOST = os.getenv("DB_HOST", "localhost") DB_PORT = int(os.getenv("DB_PORT", 3306)) DB_USER = os.getenv("DB_USER", "emailuser") DB_PASSWORD = os.getenv("DB_PASSWORD", "miguel33020") DB_NAME = os.getenv("DB_NAME", "emailassistant") # === Connect to DB === conn = mysql.connector.connect( host=DB_HOST, port=DB_PORT, user=DB_USER, password=DB_PASSWORD, database=DB_NAME ) cursor = conn.cursor(dictionary=True) # === Logging Helper === def log_event(cursor, level, source, message): cursor.execute( "INSERT INTO logs (level, source, message) VALUES (%s, %s, %s)", (level, source, message) ) # === Subject-Based Summarization === def summarize_subject(subject): doc = nlp(subject) keywords = [token.text for token in doc if token.is_alpha and not token.is_stop] if not keywords: return subject, 1.0 # fallback to raw subject # Prioritize noun chunks that include keywords noun_chunks = list(doc.noun_chunks) chunks = [chunk.text for chunk in noun_chunks if any(tok.text in keywords for tok in chunk)] # Combine and limit summary length compressed = " ".join(chunks or keywords) compressed_words = compressed.split() subject_word_count = len(subject.split()) summary = " ".join(compressed_words[:max(1, subject_word_count - 1)]).strip() # Confidence is relative to subject word count confidence = round(len(summary.split()) / max(1, subject_word_count), 2) # Fallback if summary is too short or confidence too low if len(summary.split()) < 2 or confidence < 0.3: return subject, 1.0 return summary, confidence # === Fetch emails with NULL ai_summary === cursor.execute("SELECT id, subject FROM emails") emails = cursor.fetchall() # === Main Processing Loop === # === Main Processing Loop === for email in emails: email_id = email["id"] subject = email["subject"] if not subject or not subject.strip(): log_event(cursor, "WARNING", "subject_summarizer", f"Skipped empty subject for email ID {email_id}") continue try: summary, confidence = summarize_subject(subject) cursor.execute(""" UPDATE emails SET ai_summary = %s, ai_confidence = %s WHERE id = %s """, (summary, confidence, email_id)) log_event(cursor, "INFO", "subject_summarizer", f"Subject summarized for email ID {email_id}") print(f"✅ Subject summarized for email {email_id} (confidence: {confidence})") except Exception as e: log_event(cursor, "ERROR", "subject_summarizer", f"Error on email ID {email_id}: {str(e)}") print(f"❌ Error summarizing subject for email {email_id}: {e}") # === Commit & Close === conn.commit() cursor.close() conn.close()