commit 89052f53fd5ea5566b2c234ead66f52ad8b65dab Author: milo Date: Tue May 6 11:13:15 2025 -0400 First push from FORK client diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..1a56bde --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,18 @@ +{ + "sqltools.connections": [ + { + "mysqlOptions": { + "authProtocol": "default", + "enableSsl": "Disabled" + }, + "previewLimit": 50, + "server": "localhost", + "port": 3306, + "driver": "MariaDB", + "name": "emailassistant", + "database": "emailassistant", + "username": "emailuser", + "password": "miguel33020" + } + ] +} \ No newline at end of file diff --git a/Obsolete/bert_subject_summariser.py b/Obsolete/bert_subject_summariser.py new file mode 100644 index 0000000..719b8dc --- /dev/null +++ b/Obsolete/bert_subject_summariser.py @@ -0,0 +1,87 @@ +from keybert import KeyBERT +from sentence_transformers import SentenceTransformer +import mysql.connector +import os + +# === Load multilingual KeyBERT model === +model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2') +kw_model = KeyBERT(model) + +# === DB Credentials === +DB_HOST = os.getenv("DB_HOST", "localhost") +DB_PORT = int(os.getenv("DB_PORT", 3306)) +DB_USER = os.getenv("DB_USER", "emailuser") +DB_PASSWORD = os.getenv("DB_PASSWORD", "miguel33020") +DB_NAME = os.getenv("DB_NAME", "emailassistant") + +# === Connect to DB === +conn = mysql.connector.connect( + host=DB_HOST, + port=DB_PORT, + user=DB_USER, + password=DB_PASSWORD, + database=DB_NAME +) +cursor = conn.cursor(dictionary=True) + +# === Logging Helper === +def log_event(cursor, level, source, message): + try: + cursor.execute( + "INSERT INTO logs (level, source, message) VALUES (%s, %s, %s)", + (level, source, message) + ) + except Exception as e: + print(f"[LOG ERROR] Failed to log event: {e}") + +# === Subject-Based Summarization Using KeyBERT === +def summarize_subject(subject): + keywords = kw_model.extract_keywords( + subject, + keyphrase_ngram_range=(1, 2), + stop_words='english', + top_n=1 + ) + + summary = keywords[0][0] if keywords else subject + confidence = round(len(summary.split()) / max(1, len(subject.split())), 2) + + if len(summary.split()) < 1 or confidence < 0.2: + return subject, 1.0 + + return summary.strip(), confidence + +# === Fetch emails === +cursor.execute("SELECT id, subject FROM emails") +emails = cursor.fetchall() + +# === Main Processing Loop === +for email in emails: + email_id = email["id"] + subject = email["subject"] + + if not subject or not subject.strip(): + log_event(cursor, "WARNING", "subject_summarizer", f"Skipped empty subject for email ID {email_id}") + continue + + try: + summary, confidence = summarize_subject(subject) + + cursor.execute(""" + UPDATE emails + SET ai_summary = %s, + ai_confidence = %s + WHERE id = %s + """, (summary, confidence, email_id)) + + log_event(cursor, "INFO", "subject_summarizer", f"Subject summarized for email ID {email_id}") + print(f"✅ Subject summarized for email {email_id} (confidence: {confidence})") + + except Exception as e: + log_event(cursor, "ERROR", "subject_summarizer", f"Error on email ID {email_id}: {str(e)}") + print(f"❌ Error summarizing subject for email {email_id}: {e}") + +# === Commit & Close === +conn.commit() +cursor.close() +conn.close() diff --git a/Obsolete/cleaner.py b/Obsolete/cleaner.py new file mode 100644 index 0000000..53ca9d1 --- /dev/null +++ b/Obsolete/cleaner.py @@ -0,0 +1,97 @@ +import mysql.connector +import json +import re +import spacy +from bs4 import BeautifulSoup +from datetime import datetime + +# === Load spaCy model === +nlp = spacy.load("en_core_web_sm") + +# === Logging helper === +def log_event(cursor, level, source, message): + cursor.execute( + "INSERT INTO logs (level, source, message) VALUES (%s, %s, %s)", + (level, source, message) + ) + +# === Extract all links from body === +def extract_links(text): + return re.findall(r'https?://[^\s<>()"]+', text) + +# === Extract unsubscribe links === +def extract_unsubscribe_link(text): + # Match links that contain the word "unsubscribe" + matches = re.findall(r'(https?://[^\s()"]*unsubscribe[^\s()"]*)', text, re.IGNORECASE) + if matches: + return matches[0] # Return the first match + return None + +# === Clean email body === +def clean_body(body): + soup = BeautifulSoup(body, "html.parser") + return soup.get_text(separator=' ', strip=True) + +# === Main cleaning logic === +def clean_emails(): + conn = mysql.connector.connect( + host="localhost", + user="emailuser", + password="miguel33020", + database="emailassistant" + ) + cursor = conn.cursor(dictionary=True) + + cursor.execute("SELECT * FROM emails WHERE body IS NOT NULL") + emails = cursor.fetchall() + + for email in emails: + email_id = email["id"] + body = email["body"] + + cleaned_body = clean_body(body) + links = extract_links(cleaned_body) + unsubscribe_link = extract_unsubscribe_link(cleaned_body) + + # Attempt to parse attachments + attachments_data = None + if email.get("attachments"): + try: + attachments_data = json.loads(email["attachments"]) + except json.JSONDecodeError: + try: + # Quick fix: replace single quotes with double quotes + attachments_data = json.loads(email["attachments"].replace("'", '"')) + log_event(cursor, "WARNING", "cleaner", f"Auto-corrected JSON in attachments (email ID {email_id})") + except Exception as e2: + log_event(cursor, "ERROR", "cleaner", f"Attachment parse failed (ID {email_id}): {str(e2)}") + attachments_data = None + + # Update database + try: + cursor.execute(""" + UPDATE emails + SET body = %s, + links = %s, + unsubscribe_data = %s, + attachments = %s + WHERE id = %s + """, ( + cleaned_body, + json.dumps(links), + unsubscribe_link, + json.dumps(attachments_data) if attachments_data else None, + email_id + )) + conn.commit() + print(f"✅ Cleaned email {email_id}") + log_event(cursor, "INFO", "cleaner", f"Successfully cleaned email ID {email_id}") + except Exception as e: + print(f"❌ Error updating email {email_id}: {e}") + log_event(cursor, "ERROR", "cleaner", f"DB update failed for email ID {email_id}: {str(e)}") + + cursor.close() + conn.close() + +if __name__ == "__main__": + clean_emails() diff --git a/Obsolete/compose.yml b/Obsolete/compose.yml new file mode 100644 index 0000000..6e4fb25 --- /dev/null +++ b/Obsolete/compose.yml @@ -0,0 +1,19 @@ +version: '3.8' + +services: + mariadb: + image: lscr.io/linuxserver/mariadb:latest + container_name: mariadb + environment: + - PUID=1000 + - PGID=1000 + - TZ=Etc/UTC + - MYSQL_ROOT_PASSWORD=miguel33020 + - MYSQL_DATABASE=emailassistant + - MYSQL_USER=emailuser + - MYSQL_PASSWORD=miguel33020 + volumes: + - C:/Users/migue/mariadb_config:/config + ports: + - 3306:3306 + restart: unless-stopped diff --git a/Obsolete/credentials.json b/Obsolete/credentials.json new file mode 100644 index 0000000..696d59b --- /dev/null +++ b/Obsolete/credentials.json @@ -0,0 +1 @@ +{"installed":{"client_id":"712638107230-am5njg9pf0aj9plh1kbtv2h085dveo1q.apps.googleusercontent.com","project_id":"ez-email-agent","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://oauth2.googleapis.com/token","auth_provider_x509_cert_url":"https://www.googleapis.com/oauth2/v1/certs","client_secret":"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","redirect_uris":["http://localhost"]}} \ No newline at end of file diff --git a/Obsolete/database.py b/Obsolete/database.py new file mode 100644 index 0000000..421a353 --- /dev/null +++ b/Obsolete/database.py @@ -0,0 +1,32 @@ +import mysql.connector +import os + +# Load database credentials from environment variables +DB_HOST = os.getenv("DB_HOST", "localhost") # Your server's IP +DB_PORT = int(os.getenv("DB_PORT", "3306")) # Convert port to integer +DB_USER = os.getenv("DB_USER", "emailuser") +DB_PASSWORD = os.getenv("DB_PASSWORD", "miguel33020") +DB_NAME = os.getenv("DB_NAME", "emailassistant") + +def connect_db(): + try: + conn = mysql.connector.connect( + host=DB_HOST, + port=DB_PORT, # Now it's an integer + user=DB_USER, + password=DB_PASSWORD, + database=DB_NAME + ) + print("✅ Connected to MariaDB successfully!") + return conn + except mysql.connector.Error as err: + print(f"❌ Error: {err}") + return None + +# Test connection +if __name__ == "__main__": + conn = connect_db() + if conn: + conn.close() + + diff --git a/Obsolete/gmail_to_db_test.py b/Obsolete/gmail_to_db_test.py new file mode 100644 index 0000000..d10402a --- /dev/null +++ b/Obsolete/gmail_to_db_test.py @@ -0,0 +1,135 @@ +from google_auth_oauthlib.flow import InstalledAppFlow +from googleapiclient.discovery import build +import base64 +import mysql.connector +import os +import yaml +import datetime +from initialize_db import initialize_database + +SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"] + +# === Load DB credentials === +DB_HOST = os.getenv("DB_HOST", "localhost") +DB_PORT = int(os.getenv("DB_PORT", 3306)) +DB_USER = os.getenv("DB_USER", "emailuser") +DB_PASSWORD = os.getenv("DB_PASSWORD", "miguel33020") +DB_NAME = os.getenv("DB_NAME", "emailassistant") + +def authenticate_gmail(): + flow = InstalledAppFlow.from_client_secrets_file("credentials.json", SCOPES) + creds = flow.run_local_server(port=0) + return build("gmail", "v1", credentials=creds) + +def get_header(headers, name): + for h in headers: + if h["name"].lower() == name.lower(): + return h["value"] + return None + +def decode_body(payload): + if "data" in payload.get("body", {}): + return base64.urlsafe_b64decode(payload["body"]["data"]).decode("utf-8", errors="ignore") + elif "parts" in payload: + for part in payload["parts"]: + if part.get("mimeType") == "text/plain" and "data" in part.get("body", {}): + return base64.urlsafe_b64decode(part["body"]["data"]).decode("utf-8", errors="ignore") + return "" + +def insert_into_db(email_data): + conn = mysql.connector.connect( + host=DB_HOST, + port=DB_PORT, + user=DB_USER, + password=DB_PASSWORD, + database=DB_NAME + ) + cursor = conn.cursor() + + query = """ + INSERT IGNORE INTO emails ( + user, account, message_id, thread_id, account_id, sender, cc, subject, body, links, + received_at, folder, attachments, is_read, labels, + ai_category, ai_confidence, ai_summary, + processing_status, sync_status, attachment_path, downloaded + ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, + %s, %s, %s, %s, %s, + %s, %s, %s, + %s, %s, %s, %s) + """ + + try: + cursor.execute(query, ( + email_data.get("user", "default_user"), + email_data.get("account", "main_account"), + email_data["message_id"], + email_data["thread_id"], + email_data.get("account_id", ""), + email_data["sender"], + email_data["cc"], + email_data["subject"], + email_data["body"], + email_data.get("links", ""), + email_data["received_at"], + email_data.get("folder", "inbox"), + email_data["attachments"], + False, + email_data["labels"], + None, None, None, + "unprocessed", + "synced", + None, + False + )) + print(f"✅ Stored: {email_data['subject'][:60]}...") + except Exception as e: + print("❌ Error inserting into DB:", e) + + conn.commit() + cursor.close() + conn.close() + +def fetch_and_store_emails(service): + results = service.users().messages().list(userId="me", maxResults=500).execute() + messages = results.get("messages", []) + + for msg in messages: + msg_data = service.users().messages().get(userId="me", id=msg["id"]).execute() + + payload = msg_data.get("payload", {}) + headers = payload.get("headers", []) + + sender = get_header(headers, "From") + cc = get_header(headers, "Cc") + subject = get_header(headers, "Subject") + date_str = get_header(headers, "Date") + body = decode_body(payload) + + try: + received_at = datetime.datetime.strptime(date_str, "%a, %d %b %Y %H:%M:%S %z") + except: + received_at = datetime.datetime.utcnow() + + email_data = { + "user": "default_user", + "account": "main_account", + "message_id": msg_data["id"], + "thread_id": msg_data.get("threadId"), + "account_id": "", + "sender": sender, + "cc": cc, + "subject": subject, + "body": body, + "links": "", # Placeholder, will be populated by AI + "received_at": received_at, + "folder": "inbox", + "attachments": str(payload.get("parts", [])), + "labels": str(msg_data.get("labelIds", [])) + } + + insert_into_db(email_data) + +if __name__ == "__main__": + initialize_database() + gmail_service = authenticate_gmail() + fetch_and_store_emails(gmail_service) diff --git a/Obsolete/initialize_db.py b/Obsolete/initialize_db.py new file mode 100644 index 0000000..6d6dc6b --- /dev/null +++ b/Obsolete/initialize_db.py @@ -0,0 +1,93 @@ +import mysql.connector +import yaml +import os + +def initialize_database(): + # === Load config file === + with open("config.yml", "r") as file: + config = yaml.safe_load(file) + + # === DB Connection === + conn = mysql.connector.connect( + host=os.getenv("DB_HOST", "localhost"), + port=os.getenv("DB_PORT", 3306), + user=os.getenv("DB_USER", "emailuser"), + password=os.getenv("DB_PASSWORD", "miguel33020"), + database=os.getenv("DB_NAME", "emailassistant") + ) + cursor = conn.cursor() + + # === Table: metadata (previously main_account) === + cursor.execute(""" + CREATE TABLE IF NOT EXISTS metadata ( + id INT AUTO_INCREMENT PRIMARY KEY, + user VARCHAR(255), + email VARCHAR(255) UNIQUE NOT NULL, + token TEXT + ); + """) + print("✅ Table ready: metadata") + +# === Table: emails === + cursor.execute(""" + CREATE TABLE IF NOT EXISTS emails ( + id INT AUTO_INCREMENT PRIMARY KEY, + user VARCHAR(255), + account VARCHAR(255), + message_id VARCHAR(255) UNIQUE, + thread_id VARCHAR(255), + account_id VARCHAR(255), + sender VARCHAR(255), + cc TEXT, + subject TEXT, + body LONGTEXT, + links LONGTEXT, + unsubscribe_data TEXT, + received_at DATETIME, + folder VARCHAR(50), + attachments LONGTEXT, + is_read BOOLEAN DEFAULT FALSE, + labels LONGTEXT, + + -- 🔍 AI-Generated Fields + ai_category VARCHAR(100), -- Top-level (e.g. 'promo') + ai_confidence FLOAT, -- Confidence score + ai_summary TEXT, -- Summary of subject/body + ai_keywords TEXT, -- Comma-separated extracted keywords + ai_label_source VARCHAR(100), -- 'subject', 'body', 'combined', 'llm' + summary_source VARCHAR(100), -- Similar to above + ai_model_version VARCHAR(100), -- Versioning helps long-term debugging + is_ai_reviewed BOOLEAN DEFAULT FALSE, -- Was this fully processed by AI? + processing_notes TEXT, -- Optional notes about fallback, etc. + + -- 🔄 Sync and Processing Status + processing_status VARCHAR(50), + sync_status VARCHAR(50), + attachment_path TEXT, + downloaded BOOLEAN DEFAULT FALSE + ); + """) + print("✅ Table ready: emails") + + + # === Table: logs === + cursor.execute(""" + CREATE TABLE IF NOT EXISTS logs ( + id INT AUTO_INCREMENT PRIMARY KEY, + timestamp DATETIME DEFAULT CURRENT_TIMESTAMP, + level VARCHAR(20), + source VARCHAR(255), + message TEXT + ); + """) + print("✅ Table ready: logs") + + cursor.close() + conn.close() + +# if __name__ == "__main__": +# initialize_database() + + +#if __name__ == "__main__": +# initialize_database() diff --git a/Obsolete/insight,py b/Obsolete/insight,py new file mode 100644 index 0000000..9540fe7 --- /dev/null +++ b/Obsolete/insight,py @@ -0,0 +1,58 @@ +import os +import mysql.connector +from keybert import KeyBERT +from sentence_transformers import SentenceTransformer +from collections import Counter + +# === Load multilingual model for KeyBERT === +model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2") +kw_model = KeyBERT(model) + +# === DB Credentials === +DB_HOST = os.getenv("DB_HOST", "localhost") +DB_PORT = int(os.getenv("DB_PORT", 3306)) +DB_USER = os.getenv("DB_USER", "emailuser") +DB_PASSWORD = os.getenv("DB_PASSWORD", "miguel33020") +DB_NAME = os.getenv("DB_NAME", "emailassistant") + +# === Connect to DB === +conn = mysql.connector.connect( + host=DB_HOST, + port=DB_PORT, + user=DB_USER, + password=DB_PASSWORD, + database=DB_NAME +) +cursor = conn.cursor(dictionary=True) + +# === Fetch only unlabeled emails === +cursor.execute("SELECT id, subject FROM emails WHERE ai_category = 'unlabeled'") +emails = cursor.fetchall() + +print(f"🔍 Analyzing {len(emails)} unlabeled emails...") + +keyword_counter = Counter() + +for email in emails: + subject = email["subject"] + if not subject: + continue + + try: + keywords = kw_model.extract_keywords( + subject, + keyphrase_ngram_range=(1, 2), + stop_words="english", + top_n=5 + ) + keyword_counter.update([kw[0].lower() for kw in keywords]) + except Exception as e: + print(f"❌ Error processing email ID {email['id']}: {e}") + +# === Output top missing keywords === +print("\n📊 Top keywords in unlabeled emails:") +for word, count in keyword_counter.most_common(30): + print(f"{word}: {count}") + +cursor.close() +conn.close() diff --git a/Obsolete/labeler.py b/Obsolete/labeler.py new file mode 100644 index 0000000..0540943 --- /dev/null +++ b/Obsolete/labeler.py @@ -0,0 +1,115 @@ +import os +import yaml +import mysql.connector +from keybert import KeyBERT +from sentence_transformers import SentenceTransformer + + +# === Load multilingual model for KeyBERT === +model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2") +kw_model = KeyBERT(model) + +# === Load label hierarchy from YAML === +LABEL_FILE = os.getenv("LABEL_CONFIG_PATH", "labels.yml") +with open(LABEL_FILE, "r", encoding="utf-8") as f: + label_config = yaml.safe_load(f) + +print(f"📂 Using label config: {LABEL_FILE}") +print(label_config) + + +# === DB Credentials === +DB_HOST = os.getenv("DB_HOST", "localhost") +DB_PORT = int(os.getenv("DB_PORT", 3306)) +DB_USER = os.getenv("DB_USER", "emailuser") +DB_PASSWORD = os.getenv("DB_PASSWORD", "miguel33020") +DB_NAME = os.getenv("DB_NAME", "emailassistant") + +# === Connect to DB === +conn = mysql.connector.connect( + host=DB_HOST, + port=DB_PORT, + user=DB_USER, + password=DB_PASSWORD, + database=DB_NAME +) +cursor = conn.cursor(dictionary=True) + +# === Logging Helper === +def log_event(cursor, level, source, message): + cursor.execute( + "INSERT INTO logs (level, source, message) VALUES (%s, %s, %s)", + (level, source, message) + ) + +# === Recursive label matcher === +def match_labels(keywords, label_tree, prefix=""): + for label, data in label_tree.items(): + full_label = f"{prefix}/{label}".strip("/") + label_keywords = [kw.lower() for kw in data.get("keywords", [])] + + # First check children + children = data.get("children", {}) + child_match = match_labels(keywords, children, prefix=full_label) + if child_match: + return child_match + + # Then check this level (so children take priority) + if any(kw in keywords for kw in label_keywords): + return full_label + + return None + + + +# === Fetch emails that haven't been labeled === +cursor.execute("SELECT id, subject, ai_category FROM emails") +emails = cursor.fetchall() + +# === Main Labeling Loop === +for email in emails: + email_id = email["id"] + subject = email["subject"] + current_label = email["ai_category"] + +# if current_label not in [None, "None", ""]: +# print(f"ℹ️ Email {email_id} already has label '{current_label}'") +# continue + + if not subject or not subject.strip(): + log_event(cursor, "WARNING", "labeler", f"Skipped empty subject for email ID {email_id}") + continue + + try: + keywords = kw_model.extract_keywords( + subject, + keyphrase_ngram_range=(1, 2), + stop_words="english", + top_n=5 + ) + keyword_set = set(k[0].lower() for k in keywords) + label = match_labels(keyword_set, label_config) or "unlabeled" + + cursor.execute(""" + UPDATE emails + SET ai_category = %s, + ai_keywords = %s, + ai_label_source = %s, + ai_confidence = %s, + is_ai_reviewed = FALSE + WHERE id = %s + """, (label, ", ".join(keyword_set), "labeler_v1.0", 1.0, email_id)) + + + log_event(cursor, "INFO", "labeler", f"Labeled email {email_id} as '{label}'") + print(f"🏷️ Email {email_id} labeled as: {label}") + + + except Exception as e: + log_event(cursor, "ERROR", "labeler", f"Error labeling email ID {email_id}: {str(e)}") + print(f"❌ Error labeling email {email_id}: {e}") + +# === Commit & Close === +conn.commit() +cursor.close() +conn.close() diff --git a/Obsolete/migrations.py b/Obsolete/migrations.py new file mode 100644 index 0000000..450a6cc --- /dev/null +++ b/Obsolete/migrations.py @@ -0,0 +1,54 @@ +import os +import mysql.connector +from datetime import datetime + +# === DB Credentials === +DB_HOST = os.getenv("DB_HOST", "localhost") +DB_PORT = int(os.getenv("DB_PORT", 3306)) +DB_USER = os.getenv("DB_USER", "emailuser") +DB_PASSWORD = os.getenv("DB_PASSWORD", "miguel33020") +DB_NAME = os.getenv("DB_NAME", "emailassistant") + +# === Connect to DB === +conn = mysql.connector.connect( + host=DB_HOST, + port=DB_PORT, + user=DB_USER, + password=DB_PASSWORD, + database=DB_NAME +) +cursor = conn.cursor() + +# === Logging Helper === +def log_event(cursor, level, source, message): + cursor.execute( + "INSERT INTO logs (level, source, message, timestamp) VALUES (%s, %s, %s, %s)", + (level, source, message, datetime.now()) + ) + +# === Migration Commands === +migration_commands = [ + "ALTER TABLE emails ADD COLUMN IF NOT EXISTS ai_keywords TEXT;", + "ALTER TABLE emails ADD COLUMN IF NOT EXISTS ai_label_source VARCHAR(100);", + "ALTER TABLE emails ADD COLUMN IF NOT EXISTS summary_source VARCHAR(100);", + "ALTER TABLE emails ADD COLUMN IF NOT EXISTS ai_model_version VARCHAR(100);", + "ALTER TABLE emails ADD COLUMN IF NOT EXISTS is_ai_reviewed BOOLEAN DEFAULT FALSE;", + "ALTER TABLE emails ADD COLUMN IF NOT EXISTS processing_notes TEXT;", +] + +# === Apply Migrations === +print("🚀 Starting migrations...") +for cmd in migration_commands: + try: + cursor.execute(cmd) + log_event(cursor, "INFO", "migrations", f"Executed: {cmd}") + print(f"✅ Executed: {cmd}") + except mysql.connector.Error as err: + log_event(cursor, "WARNING", "migrations", f"Skipped or failed: {cmd} -> {err}") + print(f"⚠️ Skipped or failed: {cmd} -> {err}") + +# === Commit & Close === +conn.commit() +cursor.close() +conn.close() +print("✅ Migration complete.") diff --git a/Obsolete/nlp_summary.py b/Obsolete/nlp_summary.py new file mode 100644 index 0000000..33e3f88 --- /dev/null +++ b/Obsolete/nlp_summary.py @@ -0,0 +1,89 @@ +import spacy +import mysql.connector +import os +import sys +from collections import Counter +from string import punctuation + +# === Load spaCy model === +nlp = spacy.load("en_core_web_sm") + +# === DB Credentials === +DB_HOST = os.getenv("DB_HOST", "localhost") +DB_PORT = int(os.getenv("DB_PORT", 3306)) +DB_USER = os.getenv("DB_USER", "emailuser") +DB_PASSWORD = os.getenv("DB_PASSWORD", "miguel33020") +DB_NAME = os.getenv("DB_NAME", "emailassistant") + +# === Connect to DB === +conn = mysql.connector.connect( + host=DB_HOST, + port=DB_PORT, + user=DB_USER, + password=DB_PASSWORD, + database=DB_NAME +) +cursor = conn.cursor(dictionary=True) + +# === Logging Helper === +def log_event(cursor, level, source, message): + cursor.execute( + "INSERT INTO logs (level, source, message) VALUES (%s, %s, %s)", + (level, source, message) + ) + +# === Summarization Logic === +def summarize(text, max_sentences=3): + doc = nlp(text) + words = [token.text.lower() for token in doc if token.is_alpha and not token.is_stop] + word_freq = Counter(words) + + sentence_scores = {} + for sent in doc.sents: + for word in sent: + if word.text.lower() in word_freq: + sentence_scores[sent] = sentence_scores.get(sent, 0) + word_freq[word.text.lower()] + + summarized = sorted(sentence_scores, key=sentence_scores.get, reverse=True)[:max_sentences] + return " ".join(str(s) for s in summarized) + +# === Fetch All Emails with Missing Summaries === +cursor.execute("SELECT id, body FROM emails WHERE ai_summary IS NULL") +emails = cursor.fetchall() + +# === Main Processing Loop === +for email in emails: + email_id = email["id"] + body = email["body"] + + if not body or not body.strip(): + log_event(cursor, "WARNING", "summarizer", f"Skipped empty body for email ID {email_id}") + continue + + try: + summary = summarize(body) + if not summary.strip(): + summary = "No meaningful summary could be generated." + + # Optional confidence (ratio of summary length to original body) + confidence = round(len(summary.split()) / max(1, len(body.split())), 2) + + # Update email + cursor.execute(""" + UPDATE emails + SET ai_summary = %s, + ai_confidence = %s + WHERE id = %s + """, (summary, confidence, email_id)) + + log_event(cursor, "INFO", "summarizer", f"Summarized email ID {email_id}") + print(f"✅ Summarized email {email_id} (confidence: {confidence})") + + except Exception as e: + log_event(cursor, "ERROR", "summarizer", f"Error summarizing email ID {email_id}: {str(e)}") + print(f"❌ Error summarizing email {email_id}: {e}") + +# === Commit & Close === +conn.commit() +cursor.close() +conn.close() diff --git a/Obsolete/requirements.txt b/Obsolete/requirements.txt new file mode 100644 index 0000000..76320e1 --- /dev/null +++ b/Obsolete/requirements.txt @@ -0,0 +1,6 @@ +google-auth +google-auth-oauthlib +google-auth-httplib2 +google-api-python-client +openai +transformers diff --git a/Obsolete/smart_labler.py b/Obsolete/smart_labler.py new file mode 100644 index 0000000..6cd8601 --- /dev/null +++ b/Obsolete/smart_labler.py @@ -0,0 +1,135 @@ +import os +import ast +import yaml +import mysql.connector +from keybert import KeyBERT +from sentence_transformers import SentenceTransformer +from collections import Counter + +# === Load multilingual model for KeyBERT === +model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2") +kw_model = KeyBERT(model) + +# === Load label hierarchy from YAML === +LABEL_FILE = os.getenv("LABEL_CONFIG_PATH", "labels.yml") +with open(LABEL_FILE, "r", encoding="utf-8") as f: + label_config = yaml.safe_load(f) + +# === DB Credentials === +DB_HOST = os.getenv("DB_HOST", "localhost") +DB_PORT = int(os.getenv("DB_PORT", 3306)) +DB_USER = os.getenv("DB_USER", "emailuser") +DB_PASSWORD = os.getenv("DB_PASSWORD", "miguel33020") +DB_NAME = os.getenv("DB_NAME", "emailassistant") + +# === Connect to DB === +conn = mysql.connector.connect( + host=DB_HOST, + port=DB_PORT, + user=DB_USER, + password=DB_PASSWORD, + database=DB_NAME +) +cursor = conn.cursor(dictionary=True) + +# === Logging Helper === +def log_event(cursor, level, source, message): + try: + cursor.execute( + "INSERT INTO logs (level, source, message) VALUES (%s, %s, %s)", + (level, source, message) + ) + except: + print(f"[LOG ERROR] {level} from {source}: {message}") + +# === Recursive label matcher === +def match_labels(keywords, label_tree, prefix=""): + for label, data in label_tree.items(): + full_label = f"{prefix}/{label}".strip("/") + label_keywords = [kw.lower() for kw in data.get("keywords", [])] + if any(kw in keywords for kw in label_keywords): + children = data.get("children", {}) + child_match = match_labels(keywords, children, prefix=full_label) + return child_match if child_match else full_label + return None + +# === Smart Label Aggregator === +def smart_label(email): + votes = [] + + # 1. FROM address rules + from_addr = email.get("sender", "").lower() + if any(x in from_addr for x in ["paypal", "bankofamerica", "chase"]): + votes.append("bank") + if "indeed" in from_addr or "hiring" in from_addr: + votes.append("job") + + # 2. Subject keyword analysis + subject = email.get("subject", "") + if subject: + keywords = kw_model.extract_keywords( + subject, keyphrase_ngram_range=(1, 2), stop_words="english", top_n=5 + ) + keyword_set = set(k[0].lower() for k in keywords) + label_from_subject = match_labels(keyword_set, label_config) + if label_from_subject: + votes.append(label_from_subject) + + # 3. AI summary matching + summary = email.get("ai_summary", "").lower() + if "payment" in summary or "transaction" in summary: + votes.append("bank") + if "your order" in summary or "delivered" in summary: + votes.append("promo") + + # 4. Gmail label logic (from "labels" column) + raw_label = email.get("labels", "") + try: + gmail_labels = ast.literal_eval(raw_label) if raw_label else [] + gmail_labels = [label.upper() for label in gmail_labels] + except (ValueError, SyntaxError): + gmail_labels = [] + + if "CATEGORY_PROMOTIONS" in gmail_labels: + votes.append("promo") + elif "CATEGORY_SOCIAL" in gmail_labels: + votes.append("social") + elif "CATEGORY_UPDATES" in gmail_labels: + votes.append("work") + elif "IMPORTANT" in gmail_labels: + votes.append("work") + + # 5. Count votes + label_counts = Counter(votes) + return label_counts.most_common(1)[0][0] if label_counts else "unlabeled" + +# === Fetch unlabeled emails === +cursor.execute("SELECT id, sender, subject, ai_summary, labels, ai_category FROM emails") + +emails = cursor.fetchall() +print(f"📬 Found {len(emails)} total emails for re-labeling") + +# === Main Labeling Loop === +for email in emails: + email_id = email["id"] + try: + label = smart_label(email) + cursor.execute(""" + UPDATE emails + SET ai_category = %s, + ai_label_source = %s, + is_ai_reviewed = FALSE + WHERE id = %s + """, (label, "smart_labeler", email_id)) + + log_event(cursor, "INFO", "smart_labeler", f"Labeled email {email_id} as '{label}'") + print(f"🏷️ Email {email_id} labeled as: {label}") + + except Exception as e: + log_event(cursor, "ERROR", "smart_labeler", f"Error labeling email {email_id}: {str(e)}") + print(f"❌ Error labeling email {email_id}: {e}") + +# === Commit & Close === +conn.commit() +cursor.close() +conn.close() diff --git a/Obsolete/subject_summariser.py b/Obsolete/subject_summariser.py new file mode 100644 index 0000000..d3019d5 --- /dev/null +++ b/Obsolete/subject_summariser.py @@ -0,0 +1,95 @@ +import spacy +import mysql.connector +import os +import sys +from collections import Counter + +# === Load spaCy model === +nlp = spacy.load("en_core_web_sm") + +# === DB Credentials === +DB_HOST = os.getenv("DB_HOST", "localhost") +DB_PORT = int(os.getenv("DB_PORT", 3306)) +DB_USER = os.getenv("DB_USER", "emailuser") +DB_PASSWORD = os.getenv("DB_PASSWORD", "miguel33020") +DB_NAME = os.getenv("DB_NAME", "emailassistant") + +# === Connect to DB === +conn = mysql.connector.connect( + host=DB_HOST, + port=DB_PORT, + user=DB_USER, + password=DB_PASSWORD, + database=DB_NAME +) +cursor = conn.cursor(dictionary=True) + +# === Logging Helper === +def log_event(cursor, level, source, message): + cursor.execute( + "INSERT INTO logs (level, source, message) VALUES (%s, %s, %s)", + (level, source, message) + ) + +# === Subject-Based Summarization === +def summarize_subject(subject): + doc = nlp(subject) + keywords = [token.text for token in doc if token.is_alpha and not token.is_stop] + if not keywords: + return subject, 1.0 # fallback to raw subject + + # Prioritize noun chunks that include keywords + noun_chunks = list(doc.noun_chunks) + chunks = [chunk.text for chunk in noun_chunks if any(tok.text in keywords for tok in chunk)] + + # Combine and limit summary length + compressed = " ".join(chunks or keywords) + compressed_words = compressed.split() + subject_word_count = len(subject.split()) + summary = " ".join(compressed_words[:max(1, subject_word_count - 1)]).strip() + + # Confidence is relative to subject word count + confidence = round(len(summary.split()) / max(1, subject_word_count), 2) + + # Fallback if summary is too short or confidence too low + if len(summary.split()) < 2 or confidence < 0.3: + return subject, 1.0 + + return summary, confidence + +# === Fetch emails with NULL ai_summary === +cursor.execute("SELECT id, subject FROM emails") +emails = cursor.fetchall() + +# === Main Processing Loop === +# === Main Processing Loop === +for email in emails: + email_id = email["id"] + subject = email["subject"] + + if not subject or not subject.strip(): + log_event(cursor, "WARNING", "subject_summarizer", f"Skipped empty subject for email ID {email_id}") + continue + + try: + summary, confidence = summarize_subject(subject) + + cursor.execute(""" + UPDATE emails + SET ai_summary = %s, + ai_confidence = %s + WHERE id = %s + """, (summary, confidence, email_id)) + + log_event(cursor, "INFO", "subject_summarizer", f"Subject summarized for email ID {email_id}") + print(f"✅ Subject summarized for email {email_id} (confidence: {confidence})") + + except Exception as e: + log_event(cursor, "ERROR", "subject_summarizer", f"Error on email ID {email_id}: {str(e)}") + print(f"❌ Error summarizing subject for email {email_id}: {e}") + + +# === Commit & Close === +conn.commit() +cursor.close() +conn.close() diff --git a/Obsolete/test.py b/Obsolete/test.py new file mode 100644 index 0000000..fbd7179 --- /dev/null +++ b/Obsolete/test.py @@ -0,0 +1,41 @@ +import requests +import time + +API_URL = "http://192.168.1.100:11434/api/generate" +MODEL = "tinyllama:1.1b" + +prompt_text = ( + "You are a professional AI assistant. Read the following email and briefly explain what it's about " + "as if you were summarizing it for your busy boss.\n\n" + "Be concise, clear, and include names, requests, deadlines, and project names if mentioned.\n\n" + "Email:\n" + "\"Hi there, just checking in to see if you received my last message about the invoice due next week. " + "Please let me know when you get a chance.\"" +) + +payload = { + "model": MODEL, + "prompt": prompt_text, + "stream": False +} + +def run_summary_pass(pass_label): + print(f"\n🔁 {pass_label} run for model: {MODEL}") + start_time = time.time() + response = requests.post(API_URL, json=payload) + end_time = time.time() + + if response.status_code == 200: + result = response.json().get("response") + else: + result = f"❌ Error: {response.status_code} - {response.text}" + + elapsed = end_time - start_time + print(f"🧠 Summary: {result}") + print(f"⏱️ Time taken: {elapsed:.2f} seconds") + +# Warm-up run (model loading) +run_summary_pass("Warm-up") + +# Second run (real performance) +run_summary_pass("Performance") diff --git a/Obsolete/test_gmail.py b/Obsolete/test_gmail.py new file mode 100644 index 0000000..74ba665 --- /dev/null +++ b/Obsolete/test_gmail.py @@ -0,0 +1,67 @@ +from google_auth_oauthlib.flow import InstalledAppFlow +from googleapiclient.discovery import build +import nlp_summary + +SCOPES = ["https://www.googleapis.com/auth/gmail.modify"] +nlp = nlp_summary.load("en_core_web_sm") + +# Define keyword-based categories +CATEGORIES = { + "Work": ["meeting", "deadline", "project", "report"], + "Finance": ["invoice", "bill", "receipt", "payment", "tax"], + "Security": ["verification", "sign in attempt", "password"], + "Promotions": ["sale", "deal", "offer", "discount", "promotion"], + "Events": ["webinar", "conference", "event", "invitation"] +} + +def authenticate_gmail(): + flow = InstalledAppFlow.from_client_secrets_file("credentials.json", SCOPES) + creds = flow.run_local_server(port=0) + return build("gmail", "v1", credentials=creds) + +def categorize_email(subject): + doc = nlp(subject.lower()) + for category, keywords in CATEGORIES.items(): + if any(word in doc.text for word in keywords): + return category + return "Uncategorized" + +def list_and_categorize_emails(service): + results = service.users().messages().list(userId="me", maxResults=10).execute() + messages = results.get("messages", []) + + for msg in messages: + msg_data = service.users().messages().get(userId="me", id=msg["id"]).execute() + subject = msg_data.get("snippet", "No Subject") + category = categorize_email(subject) + + print(f"📩 Subject: {subject}") + print(f" 🏷️ Category: {category}\n") + + # Apply the category label in Gmail + label_email(service, msg["id"], category) + +def label_email(service, message_id, category): + label_id = get_or_create_label(service, category) + service.users().messages().modify( + userId="me", + id=message_id, + body={"addLabelIds": [label_id]} + ).execute() + +def get_or_create_label(service, label_name): + labels = service.users().labels().list(userId="me").execute().get("labels", []) + for label in labels: + if label["name"].lower() == label_name.lower(): + return label["id"] + + # Create a new label if not found + label = service.users().labels().create( + userId="me", + body={"name": label_name, "labelListVisibility": "labelShow"} + ).execute() + return label["id"] + +if __name__ == "__main__": + gmail_service = authenticate_gmail() + list_and_categorize_emails(gmail_service) diff --git a/__pycache__/nlp_summary.cpython-310.pyc b/__pycache__/nlp_summary.cpython-310.pyc new file mode 100644 index 0000000..9ebfc60 Binary files /dev/null and b/__pycache__/nlp_summary.cpython-310.pyc differ diff --git a/config/accounts.yml b/config/accounts.yml new file mode 100644 index 0000000..8cc8300 --- /dev/null +++ b/config/accounts.yml @@ -0,0 +1,4 @@ +accounts: + - name: main_account + email: miguelloy97@gmail.com + diff --git a/config/labels.yml b/config/labels.yml new file mode 100644 index 0000000..476f8e2 --- /dev/null +++ b/config/labels.yml @@ -0,0 +1,63 @@ +promo: + keywords: ["sale", "deal", "discount", "offer", "clearance", "gift", "free", "promo", "savings", "save", "perk", "alert", "50", "10"] + children: + stores: + keywords: ["walmart", "target", "amazon", "bestbuy", "shein", "temu"] + newsletters: + keywords: ["weekly roundup", "newsletter", "digest", "perk alert", "alert 10", "week 03", "spring", "new"] + coupons: + keywords: ["coupon", "voucher", "redeem"] + electronics: + keywords: ["raspberry pi", "digi key", "digikey", "hardware", "component", "order"] + gaming: + keywords: ["steam wishlist", "game", "bonus", "classic", "dlc", "gaming", "wishlist"] + seasonal: + keywords: ["fishing sale", "flavor", "spring", "classic fishing"] + +job: + keywords: ["hiring", "interview", "career", "position", "job", "resume", "software engineer", "engineer", "developer"] + children: + offers: + keywords: ["job offer", "contract", "start date", "accept"] + applications: + keywords: ["application", "applied", "submitted", "review"] + +bank: + keywords: ["account", "transaction", "balance", "deposit", "withdrawal", "bank"] + children: + alerts: + keywords: ["alert", "fraud", "security", "unauthorized"] + credit_offers: + keywords: ["approved", "pre-approved", "credit cards", "selected pre", "approved credit", "payment", "changed"] + +school: + keywords: ["course", "assignment", "professor", "exam", "lecture", "university"] + children: + grades: + keywords: ["grade", "result", "score", "transcript"] + schedule: + keywords: ["calendar", "timetable", "class schedule"] + +social: + keywords: ["friend", "follow", "message", "mention", "notification"] + children: + networks: + keywords: ["twitter", "facebook", "instagram", "tiktok", "discord"] + invites: + keywords: ["invite", "joined", "group", "event"] + +travel: + keywords: ["flight", "booking", "hotel", "trip", "reservation", "itinerary"] + children: + airlines: + keywords: ["delta", "united", "american airlines", "southwest"] + deals: + keywords: ["travel deal", "fare", "cheap flights"] + +work: + keywords: ["meeting", "weekly meeting", "time card", "2025"] + children: + projects: + keywords: ["project", "task", "deadline", "milestone"] + team: + keywords: ["team", "colleague", "manager", "supervisor"] diff --git a/config/settings.yml b/config/settings.yml new file mode 100644 index 0000000..e69de29 diff --git a/src/db/__pycache__/database_manager.cpython-310.pyc b/src/db/__pycache__/database_manager.cpython-310.pyc new file mode 100644 index 0000000..8f293e8 Binary files /dev/null and b/src/db/__pycache__/database_manager.cpython-310.pyc differ diff --git a/src/db/database_manager.py b/src/db/database_manager.py new file mode 100644 index 0000000..50ddb99 --- /dev/null +++ b/src/db/database_manager.py @@ -0,0 +1,155 @@ +# src/db/database_manager.py + +import os +import mysql.connector +from utils.logger import Logger +import yaml + +class DatabaseManager: + def __init__(self, config=None, source="db"): + self.logger = Logger(source) + self.config = config or self._load_env_config() + self.connection = self._connect() + + def _load_env_config(self): + return { + "host": os.getenv("DB_HOST", "localhost"), + "port": int(os.getenv("DB_PORT", "3306")), + "user": os.getenv("DB_USER", "emailuser"), + "password": os.getenv("DB_PASSWORD", "miguel33020"), + "database": os.getenv("DB_NAME", "emailassistant") + } + + def _connect(self): + try: + conn = mysql.connector.connect( + host=self.config["host"], + port=self.config["port"], + user=self.config["user"], + password=self.config["password"], + database=self.config["database"] + ) + self.logger.log(f"✅ Connected to MariaDB at {self.config['host']}:{self.config['port']}") + return conn + except mysql.connector.Error as err: + self.logger.log(f"❌ DB connection failed: {err}", level="ERROR") + return None + + def initialize_schema(self): + if not self.connection: + self.logger.log("❌ No DB connection — cannot initialize schema.", level="ERROR") + return + + cursor = self.connection.cursor() + + try: + # metadata + cursor.execute(""" + CREATE TABLE IF NOT EXISTS metadata ( + id INT AUTO_INCREMENT PRIMARY KEY, + user VARCHAR(255), + email VARCHAR(255) UNIQUE NOT NULL, + token TEXT + ); + """) + self.logger.log("✅ Table ready: metadata") + + # emails + cursor.execute(""" + CREATE TABLE IF NOT EXISTS emails ( + id INT AUTO_INCREMENT PRIMARY KEY, + user VARCHAR(255), + account VARCHAR(255), + message_id VARCHAR(255) UNIQUE, + thread_id VARCHAR(255), + account_id VARCHAR(255), + sender VARCHAR(255), + cc TEXT, + subject TEXT, + body LONGTEXT, + links LONGTEXT, + unsubscribe_data TEXT, + received_at DATETIME, + folder VARCHAR(50), + attachments LONGTEXT, + is_read BOOLEAN DEFAULT FALSE, + labels LONGTEXT, + + ai_category VARCHAR(100), + ai_confidence FLOAT, + ai_summary TEXT, + ai_keywords TEXT, + ai_label_source VARCHAR(100), + summary_source VARCHAR(100), + ai_model_version VARCHAR(100), + is_ai_reviewed BOOLEAN DEFAULT FALSE, + processing_notes TEXT, + + processing_status VARCHAR(50), + sync_status VARCHAR(50), + attachment_path TEXT, + downloaded BOOLEAN DEFAULT FALSE + ); + """) + self.logger.log("✅ Table ready: emails") + + # logs + cursor.execute(""" + CREATE TABLE IF NOT EXISTS logs ( + id INT AUTO_INCREMENT PRIMARY KEY, + timestamp DATETIME DEFAULT CURRENT_TIMESTAMP, + level VARCHAR(20), + source VARCHAR(255), + message TEXT + ); + """) + self.logger.log("✅ Table ready: logs") + + self.connection.commit() + self.logger.log("✅ Database schema initialized successfully!") + + except Exception as e: + self.logger.log(f"❌ Failed to initialize schema: {e}", level="ERROR") + + finally: + cursor.close() + + def check_health(self): + status = {"status": "unknown", "details": []} + + if not self.connection: + self.logger.log("❌ Health check failed: No DB connection.", level="ERROR") + status["status"] = "unhealthy" + status["details"].append("No database connection.") + return status + + try: + # Ping the DB + cursor = self.connection.cursor() + cursor.execute("SELECT 1") + _ = cursor.fetchall() + + # Check core tables + required_tables = ["emails", "logs", "metadata"] + cursor.execute("SHOW TABLES;") + existing_tables = set(row[0] for row in cursor.fetchall()) + + missing_tables = [table for table in required_tables if table not in existing_tables] + if missing_tables: + status["status"] = "degraded" + for table in missing_tables: + self.logger.log(f"⚠️ Missing table: {table}", level="WARNING") + status["details"].append(f"Missing table: {table}") + else: + status["status"] = "healthy" + status["details"] = [f"{table} ✅" for table in required_tables] + + self.logger.log(f"✅ Health check passed: {status['status']}") + return status + + except Exception as e: + self.logger.log(f"❌ Health check failed: {e}", level="ERROR") + status["status"] = "unhealthy" + status["details"].append(str(e)) + return status + diff --git a/src/gmail/__pycache__/gmail_client.cpython-310.pyc b/src/gmail/__pycache__/gmail_client.cpython-310.pyc new file mode 100644 index 0000000..fdd3f58 Binary files /dev/null and b/src/gmail/__pycache__/gmail_client.cpython-310.pyc differ diff --git a/src/gmail/gmail_client.py b/src/gmail/gmail_client.py new file mode 100644 index 0000000..bb433d7 --- /dev/null +++ b/src/gmail/gmail_client.py @@ -0,0 +1,20 @@ +# src/gmail/gmail_client.py + +class GmailClient: + """ + Handles authentication and email fetching via Gmail API. + """ + + def __init__(self, gmail_config): + self.gmail_config = gmail_config + self.service = self._authenticate() + + def _authenticate(self): + # TODO: Implement OAuth2 flow using token + credentials + # Return authenticated Gmail API service + pass + + def fetch_emails(self, account_config): + # TODO: Fetch emails for a specific account + # Return a list of raw emails (with subject, body, etc.) + return [] diff --git a/src/gmail/gmail_parser.py b/src/gmail/gmail_parser.py new file mode 100644 index 0000000..263f6fa --- /dev/null +++ b/src/gmail/gmail_parser.py @@ -0,0 +1,22 @@ +# src/gmail/gmail_parser.py + +class GmailParser: + """ + Parses raw Gmail messages into structured format. + """ + + def __init__(self): + pass + + def parse_message(self, raw_message): + # TODO: Extract subject, sender, body, date, attachments, etc. + parsed = { + "subject": "", + "from": "", + "to": "", + "body": "", + "attachments": [], + "date": "", + "message_id": "" + } + return parsed diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..5227d49 --- /dev/null +++ b/src/main.py @@ -0,0 +1,23 @@ +# src/main.py +import yaml +from orchestrator.assistant import EmailAssistant +import sys, os +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "."))) +from db.database_manager import DatabaseManager + +def main(): + print("📦 EZ Email Assistant: Backend Bootstrap\n") + + # 1. Initialize DB Schema + db = DatabaseManager() + db.initialize_schema() + + # 2. Run Health Check + print("\n🔍 Checking DB Health...") + health = db.check_health() + print("Health Status:", health["status"]) + for detail in health["details"]: + print(" -", detail) + +if __name__ == "__main__": + main() diff --git a/src/models/llm_engine.py b/src/models/llm_engine.py new file mode 100644 index 0000000..f1c9eeb --- /dev/null +++ b/src/models/llm_engine.py @@ -0,0 +1,19 @@ +# src/models/llm_engine.py + +class LLMEngine: + """ + Handles summarization or classification via LLMs (local or API). + """ + + def __init__(self, mode="api", config=None): + self.mode = mode + self.config = config or {} + # TODO: Initialize model or API client + + def summarize(self, text): + # TODO: Send to local LLM or API + return "LLM summary" + + def classify(self, text): + # (optional) Use LLM for category prediction + return ["promo", "job"] diff --git a/src/orchestrator/__pycache__/assistant.cpython-310.pyc b/src/orchestrator/__pycache__/assistant.cpython-310.pyc new file mode 100644 index 0000000..61783f8 Binary files /dev/null and b/src/orchestrator/__pycache__/assistant.cpython-310.pyc differ diff --git a/src/orchestrator/assistant.py b/src/orchestrator/assistant.py new file mode 100644 index 0000000..204e38b --- /dev/null +++ b/src/orchestrator/assistant.py @@ -0,0 +1,54 @@ +# src/orchestrator/assistant.py + +from gmail.gmail_client import GmailClient +from processor.cleaner import Cleaner +from processor.summarizer import Summarizer +from processor.labeler import Labeler +from db.database_manager import DatabaseManager +from utils.logger import Logger + +class EmailAssistant: + """ + Orchestrates the entire flow: + - Fetches emails + - Cleans and summarizes content + - Categorizes + - Stores in database + """ + + def __init__(self, config): + self.config = config + self.logger = Logger() + self.db = DatabaseManager(config["db"]) + self.gmail = GmailClient(config["gmail"]) + self.cleaner = Cleaner() + self.summarizer = Summarizer() + self.labeler = Labeler() + + def run(self): + self.logger.log("🔄 Starting email assistant run...") + + for account in self.config["accounts"]: + self.logger.log(f"📩 Fetching emails for account: {account['email']}") + emails = self.gmail.fetch_emails(account) + + for email in emails: + # TODO: Add ID check to avoid duplicate inserts + cleaned_body = self.cleaner.clean_body(email["body"]) + links, unsub_data = self.cleaner.extract_links(email["body"]) + summary = self.summarizer.summarize(cleaned_body) + labels = self.labeler.label(email["subject"], summary) + + # TODO: Insert or update email in DB + self.db.insert_email({ + **email, + "cleaned_body": cleaned_body, + "summary": summary, + "labels": labels, + "links": links, + "unsubscribe_data": unsub_data + }) + + self.logger.log(f"✅ Processed: {email['subject']}") + + self.logger.log("✅ Email assistant run completed.") diff --git a/src/processor/__pycache__/cleaner.cpython-310.pyc b/src/processor/__pycache__/cleaner.cpython-310.pyc new file mode 100644 index 0000000..16d9e75 Binary files /dev/null and b/src/processor/__pycache__/cleaner.cpython-310.pyc differ diff --git a/src/processor/__pycache__/labeler.cpython-310.pyc b/src/processor/__pycache__/labeler.cpython-310.pyc new file mode 100644 index 0000000..c567051 Binary files /dev/null and b/src/processor/__pycache__/labeler.cpython-310.pyc differ diff --git a/src/processor/__pycache__/summarizer.cpython-310.pyc b/src/processor/__pycache__/summarizer.cpython-310.pyc new file mode 100644 index 0000000..91556e9 Binary files /dev/null and b/src/processor/__pycache__/summarizer.cpython-310.pyc differ diff --git a/src/processor/cleaner.py b/src/processor/cleaner.py new file mode 100644 index 0000000..0ec82bf --- /dev/null +++ b/src/processor/cleaner.py @@ -0,0 +1,17 @@ +# src/processor/cleaner.py + +class Cleaner: + """ + Cleans raw email body and extracts useful data like links and unsubscribe URLs. + """ + + def __init__(self): + pass + + def clean_body(self, html_body): + # TODO: Strip HTML, remove tracking pixels, normalize text + return "cleaned email body" + + def extract_links(self, html_body): + # TODO: Find all URLs and unsubscribe links in the body + return ["http://example.com"], "http://unsubscribe.example.com" diff --git a/src/processor/labeler.py b/src/processor/labeler.py new file mode 100644 index 0000000..c75d09a --- /dev/null +++ b/src/processor/labeler.py @@ -0,0 +1,14 @@ +# src/processor/labeler.py + +class Labeler: + """ + Assigns labels to emails using subject, sender, or summary-based rules. + """ + + def __init__(self): + # TODO: Load rules or ML model + pass + + def label(self, subject, summary): + # TODO: Return a list of categories or labels + return ["promo", "gaming"] diff --git a/src/processor/summarizer.py b/src/processor/summarizer.py new file mode 100644 index 0000000..ab0840a --- /dev/null +++ b/src/processor/summarizer.py @@ -0,0 +1,14 @@ +# src/processor/summarizer.py + +class Summarizer: + """ + Summarizes cleaned email text using spaCy, KeyBERT, or LLM (configurable). + """ + + def __init__(self, method="spacy"): + self.method = method + # TODO: Load model(s) depending on config + + def summarize(self, text): + # TODO: Return a short summary or key phrases + return "summary of email" diff --git a/src/utils/__pycache__/logger.cpython-310.pyc b/src/utils/__pycache__/logger.cpython-310.pyc new file mode 100644 index 0000000..d655e46 Binary files /dev/null and b/src/utils/__pycache__/logger.cpython-310.pyc differ diff --git a/src/utils/logger.py b/src/utils/logger.py new file mode 100644 index 0000000..aa5c868 --- /dev/null +++ b/src/utils/logger.py @@ -0,0 +1,44 @@ +# src/utils/logger.py + +from datetime import datetime +import mysql.connector +import os + +class Logger: + def __init__(self, source="system"): + self.source = source + self.verbose = True + self._connect_db() + + def _connect_db(self): + try: + self.conn = mysql.connector.connect( + host=os.getenv("DB_HOST", "localhost"), + port=int(os.getenv("DB_PORT", "3306")), + user=os.getenv("DB_USER", "emailuser"), + password=os.getenv("DB_PASSWORD", "miguel33020"), + database=os.getenv("DB_NAME", "emailassistant") + ) + self.cursor = self.conn.cursor() + except Exception as e: + self.conn = None + print(f"[Logger] ❌ Could not connect to logs DB: {e}") + + def log(self, message, level="INFO"): + timestamp = datetime.now().strftime("[%Y-%m-%d %H:%M:%S]") + formatted = f"{timestamp} [{level}] {self.source.upper()}: {message}" + + # Console log + if self.verbose: + print(formatted) + + # DB log + if self.conn: + try: + self.cursor.execute(""" + INSERT INTO logs (level, source, message) + VALUES (%s, %s, %s) + """, (level, self.source, message)) + self.conn.commit() + except Exception as e: + print(f"[Logger] ⚠️ Failed to log to DB: {e}") diff --git a/src/utils/scheduler.py b/src/utils/scheduler.py new file mode 100644 index 0000000..438cf95 --- /dev/null +++ b/src/utils/scheduler.py @@ -0,0 +1,14 @@ +# src/utils/scheduler.py + +class Scheduler: + """ + Placeholder for scheduled tasks or cron-like runs. + Will manage background sync in v2. + """ + + def __init__(self): + pass + + def run_scheduled_tasks(self): + # TODO: Run assistant on schedule (via cron, Celery, etc.) + pass diff --git a/ui/streamlit_app/Home.py b/ui/streamlit_app/Home.py new file mode 100644 index 0000000..fef5034 --- /dev/null +++ b/ui/streamlit_app/Home.py @@ -0,0 +1,19 @@ +# ui/streamlit_app/Home.py +import streamlit as st + +from utils.logger import Logger + +logger = Logger(source="test_ui") +logger.log("This is a test log from the UI!", level="INFO") + + +def run(): + st.set_page_config(page_title="Email Assistant", layout="wide") + st.title("📬 Email Assistant Dashboard") + st.markdown("Welcome to your AI-powered inbox control center!") + + # TODO: Show summary stats, recent labels, account statuses + st.info("Stats coming soon! Build in progress.") + +if __name__ == "__main__": + run() diff --git a/ui/streamlit_app/__init__.py b/ui/streamlit_app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ui/streamlit_app/__pycache__/utils.cpython-310.pyc b/ui/streamlit_app/__pycache__/utils.cpython-310.pyc new file mode 100644 index 0000000..cd367b1 Binary files /dev/null and b/ui/streamlit_app/__pycache__/utils.cpython-310.pyc differ diff --git a/ui/streamlit_app/pages/EmailViewer.py b/ui/streamlit_app/pages/EmailViewer.py new file mode 100644 index 0000000..6967d0b --- /dev/null +++ b/ui/streamlit_app/pages/EmailViewer.py @@ -0,0 +1,45 @@ +# ui/streamlit_app/pages/EmailViewer.py +import streamlit as st + +def fetch_emails(): + # TODO: Replace with real DB call + return [ + { + "Subject": "Welcome to our platform!", + "From": "noreply@example.com", + "Summary": "Intro to the platform and features.", + "Labels": ["promo"], + "Date": "2025-03-27", + }, + { + "Subject": "Your invoice is ready", + "From": "billing@example.com", + "Summary": "Invoice for your recent purchase.", + "Labels": ["finance"], + "Date": "2025-03-25", + }, + ] + +def run(): + st.title("📧 Email Viewer") + st.markdown("A simple, readable inbox-style layout.") + + st.markdown("## 📬 Emails") + + emails = fetch_emails() + + for email in emails: + with st.container(): + st.markdown(f"### {email['Subject']}") + col1, col2 = st.columns([4, 1]) + with col1: + st.markdown(f"**From:** {email['From']}") + st.markdown(f"*{email['Summary']}*") + st.markdown("Labels: " + " ".join([f"`{label}`" for label in email["Labels"]])) + with col2: + st.markdown(f"📅 {email['Date']}") + + st.markdown("---") + +if __name__ == "__main__": + run() \ No newline at end of file diff --git a/ui/streamlit_app/pages/LabelManager.py b/ui/streamlit_app/pages/LabelManager.py new file mode 100644 index 0000000..a359132 --- /dev/null +++ b/ui/streamlit_app/pages/LabelManager.py @@ -0,0 +1,18 @@ +# ui/streamlit_app/LabelManager.py +import streamlit as st + +def run(): + st.title("🏷️ Label Manager") + st.markdown("Manage your smart labels and categories.") + + # TODO: Load labels from DB + st.info("Labels not loaded yet. This feature is under construction!") + + st.subheader("Create New Label") + new_label = st.text_input("Label Name") + if st.button("Add Label"): + # TODO: Insert new label into database + st.success(f"✅ Added label: {new_label}") + +if __name__ == "__main__": + run() \ No newline at end of file diff --git a/ui/streamlit_app/pages/Settings.py b/ui/streamlit_app/pages/Settings.py new file mode 100644 index 0000000..6725305 --- /dev/null +++ b/ui/streamlit_app/pages/Settings.py @@ -0,0 +1,21 @@ +# ui/streamlit_app/Settings.py +import streamlit as st + +def run(): + st.title("⚙️ Settings") + st.markdown("Customize your assistant’s behavior.") + + st.subheader("Summary Mode") + summary_mode = st.radio("Choose summarization engine:", ["spaCy", "KeyBERT", "LLM"]) + st.success(f"🔍 Using: {summary_mode}") + + st.subheader("Sync Options") + auto_sync = st.checkbox("Auto-sync every hour", value=False) + download_attachments = st.checkbox("Auto-download invoices/receipts", value=True) + + if st.button("Save Settings"): + # TODO: Persist to config/settings.yml + st.success("✅ Settings saved!") + +if __name__ == "__main__": + run() \ No newline at end of file diff --git a/ui/streamlit_app/utils.py b/ui/streamlit_app/utils.py new file mode 100644 index 0000000..d2c4542 --- /dev/null +++ b/ui/streamlit_app/utils.py @@ -0,0 +1,24 @@ +# ui/streamlit_app/utils.py +import sys +import os +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../"))) +from src.db.database_manager import DatabaseManager +import yaml + +def load_settings(path="config/settings.yml"): + with open(path, "r") as f: + return yaml.safe_load(f) + +def save_settings(settings, path="config/settings.yml"): + with open(path, "w") as f: + yaml.dump(settings, f) + +def get_db(): + config = load_settings() + db_config = config.get("db", { + "host": "localhost", + "user": "root", + "password": "", + "database": "data" + }) + return DatabaseManager(db_config)