First push
from FORK client
This commit is contained in:
parent
3813a0798b
commit
5bdd911cf4
46 changed files with 1732 additions and 0 deletions
18
.vscode/settings.json
vendored
Normal file
18
.vscode/settings.json
vendored
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
{
|
||||||
|
"sqltools.connections": [
|
||||||
|
{
|
||||||
|
"mysqlOptions": {
|
||||||
|
"authProtocol": "default",
|
||||||
|
"enableSsl": "Disabled"
|
||||||
|
},
|
||||||
|
"previewLimit": 50,
|
||||||
|
"server": "localhost",
|
||||||
|
"port": 3306,
|
||||||
|
"driver": "MariaDB",
|
||||||
|
"name": "emailassistant",
|
||||||
|
"database": "emailassistant",
|
||||||
|
"username": "emailuser",
|
||||||
|
"password": "miguel33020"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
87
Obsolete/bert_subject_summariser.py
Normal file
87
Obsolete/bert_subject_summariser.py
Normal file
|
|
@ -0,0 +1,87 @@
|
||||||
|
from keybert import KeyBERT
|
||||||
|
from sentence_transformers import SentenceTransformer
|
||||||
|
import mysql.connector
|
||||||
|
import os
|
||||||
|
|
||||||
|
# === Load multilingual KeyBERT model ===
|
||||||
|
model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
|
||||||
|
kw_model = KeyBERT(model)
|
||||||
|
|
||||||
|
# === DB Credentials ===
|
||||||
|
DB_HOST = os.getenv("DB_HOST", "localhost")
|
||||||
|
DB_PORT = int(os.getenv("DB_PORT", 3306))
|
||||||
|
DB_USER = os.getenv("DB_USER", "emailuser")
|
||||||
|
DB_PASSWORD = os.getenv("DB_PASSWORD", "miguel33020")
|
||||||
|
DB_NAME = os.getenv("DB_NAME", "emailassistant")
|
||||||
|
|
||||||
|
# === Connect to DB ===
|
||||||
|
conn = mysql.connector.connect(
|
||||||
|
host=DB_HOST,
|
||||||
|
port=DB_PORT,
|
||||||
|
user=DB_USER,
|
||||||
|
password=DB_PASSWORD,
|
||||||
|
database=DB_NAME
|
||||||
|
)
|
||||||
|
cursor = conn.cursor(dictionary=True)
|
||||||
|
|
||||||
|
# === Logging Helper ===
|
||||||
|
def log_event(cursor, level, source, message):
|
||||||
|
try:
|
||||||
|
cursor.execute(
|
||||||
|
"INSERT INTO logs (level, source, message) VALUES (%s, %s, %s)",
|
||||||
|
(level, source, message)
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[LOG ERROR] Failed to log event: {e}")
|
||||||
|
|
||||||
|
# === Subject-Based Summarization Using KeyBERT ===
|
||||||
|
def summarize_subject(subject):
|
||||||
|
keywords = kw_model.extract_keywords(
|
||||||
|
subject,
|
||||||
|
keyphrase_ngram_range=(1, 2),
|
||||||
|
stop_words='english',
|
||||||
|
top_n=1
|
||||||
|
)
|
||||||
|
|
||||||
|
summary = keywords[0][0] if keywords else subject
|
||||||
|
confidence = round(len(summary.split()) / max(1, len(subject.split())), 2)
|
||||||
|
|
||||||
|
if len(summary.split()) < 1 or confidence < 0.2:
|
||||||
|
return subject, 1.0
|
||||||
|
|
||||||
|
return summary.strip(), confidence
|
||||||
|
|
||||||
|
# === Fetch emails ===
|
||||||
|
cursor.execute("SELECT id, subject FROM emails")
|
||||||
|
emails = cursor.fetchall()
|
||||||
|
|
||||||
|
# === Main Processing Loop ===
|
||||||
|
for email in emails:
|
||||||
|
email_id = email["id"]
|
||||||
|
subject = email["subject"]
|
||||||
|
|
||||||
|
if not subject or not subject.strip():
|
||||||
|
log_event(cursor, "WARNING", "subject_summarizer", f"Skipped empty subject for email ID {email_id}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
summary, confidence = summarize_subject(subject)
|
||||||
|
|
||||||
|
cursor.execute("""
|
||||||
|
UPDATE emails
|
||||||
|
SET ai_summary = %s,
|
||||||
|
ai_confidence = %s
|
||||||
|
WHERE id = %s
|
||||||
|
""", (summary, confidence, email_id))
|
||||||
|
|
||||||
|
log_event(cursor, "INFO", "subject_summarizer", f"Subject summarized for email ID {email_id}")
|
||||||
|
print(f"✅ Subject summarized for email {email_id} (confidence: {confidence})")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log_event(cursor, "ERROR", "subject_summarizer", f"Error on email ID {email_id}: {str(e)}")
|
||||||
|
print(f"❌ Error summarizing subject for email {email_id}: {e}")
|
||||||
|
|
||||||
|
# === Commit & Close ===
|
||||||
|
conn.commit()
|
||||||
|
cursor.close()
|
||||||
|
conn.close()
|
||||||
97
Obsolete/cleaner.py
Normal file
97
Obsolete/cleaner.py
Normal file
|
|
@ -0,0 +1,97 @@
|
||||||
|
import mysql.connector
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import spacy
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# === Load spaCy model ===
|
||||||
|
nlp = spacy.load("en_core_web_sm")
|
||||||
|
|
||||||
|
# === Logging helper ===
|
||||||
|
def log_event(cursor, level, source, message):
|
||||||
|
cursor.execute(
|
||||||
|
"INSERT INTO logs (level, source, message) VALUES (%s, %s, %s)",
|
||||||
|
(level, source, message)
|
||||||
|
)
|
||||||
|
|
||||||
|
# === Extract all links from body ===
|
||||||
|
def extract_links(text):
|
||||||
|
return re.findall(r'https?://[^\s<>()"]+', text)
|
||||||
|
|
||||||
|
# === Extract unsubscribe links ===
|
||||||
|
def extract_unsubscribe_link(text):
|
||||||
|
# Match links that contain the word "unsubscribe"
|
||||||
|
matches = re.findall(r'(https?://[^\s()"]*unsubscribe[^\s()"]*)', text, re.IGNORECASE)
|
||||||
|
if matches:
|
||||||
|
return matches[0] # Return the first match
|
||||||
|
return None
|
||||||
|
|
||||||
|
# === Clean email body ===
|
||||||
|
def clean_body(body):
|
||||||
|
soup = BeautifulSoup(body, "html.parser")
|
||||||
|
return soup.get_text(separator=' ', strip=True)
|
||||||
|
|
||||||
|
# === Main cleaning logic ===
|
||||||
|
def clean_emails():
|
||||||
|
conn = mysql.connector.connect(
|
||||||
|
host="localhost",
|
||||||
|
user="emailuser",
|
||||||
|
password="miguel33020",
|
||||||
|
database="emailassistant"
|
||||||
|
)
|
||||||
|
cursor = conn.cursor(dictionary=True)
|
||||||
|
|
||||||
|
cursor.execute("SELECT * FROM emails WHERE body IS NOT NULL")
|
||||||
|
emails = cursor.fetchall()
|
||||||
|
|
||||||
|
for email in emails:
|
||||||
|
email_id = email["id"]
|
||||||
|
body = email["body"]
|
||||||
|
|
||||||
|
cleaned_body = clean_body(body)
|
||||||
|
links = extract_links(cleaned_body)
|
||||||
|
unsubscribe_link = extract_unsubscribe_link(cleaned_body)
|
||||||
|
|
||||||
|
# Attempt to parse attachments
|
||||||
|
attachments_data = None
|
||||||
|
if email.get("attachments"):
|
||||||
|
try:
|
||||||
|
attachments_data = json.loads(email["attachments"])
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
try:
|
||||||
|
# Quick fix: replace single quotes with double quotes
|
||||||
|
attachments_data = json.loads(email["attachments"].replace("'", '"'))
|
||||||
|
log_event(cursor, "WARNING", "cleaner", f"Auto-corrected JSON in attachments (email ID {email_id})")
|
||||||
|
except Exception as e2:
|
||||||
|
log_event(cursor, "ERROR", "cleaner", f"Attachment parse failed (ID {email_id}): {str(e2)}")
|
||||||
|
attachments_data = None
|
||||||
|
|
||||||
|
# Update database
|
||||||
|
try:
|
||||||
|
cursor.execute("""
|
||||||
|
UPDATE emails
|
||||||
|
SET body = %s,
|
||||||
|
links = %s,
|
||||||
|
unsubscribe_data = %s,
|
||||||
|
attachments = %s
|
||||||
|
WHERE id = %s
|
||||||
|
""", (
|
||||||
|
cleaned_body,
|
||||||
|
json.dumps(links),
|
||||||
|
unsubscribe_link,
|
||||||
|
json.dumps(attachments_data) if attachments_data else None,
|
||||||
|
email_id
|
||||||
|
))
|
||||||
|
conn.commit()
|
||||||
|
print(f"✅ Cleaned email {email_id}")
|
||||||
|
log_event(cursor, "INFO", "cleaner", f"Successfully cleaned email ID {email_id}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error updating email {email_id}: {e}")
|
||||||
|
log_event(cursor, "ERROR", "cleaner", f"DB update failed for email ID {email_id}: {str(e)}")
|
||||||
|
|
||||||
|
cursor.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
clean_emails()
|
||||||
19
Obsolete/compose.yml
Normal file
19
Obsolete/compose.yml
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
mariadb:
|
||||||
|
image: lscr.io/linuxserver/mariadb:latest
|
||||||
|
container_name: mariadb
|
||||||
|
environment:
|
||||||
|
- PUID=1000
|
||||||
|
- PGID=1000
|
||||||
|
- TZ=Etc/UTC
|
||||||
|
- MYSQL_ROOT_PASSWORD=miguel33020
|
||||||
|
- MYSQL_DATABASE=emailassistant
|
||||||
|
- MYSQL_USER=emailuser
|
||||||
|
- MYSQL_PASSWORD=miguel33020
|
||||||
|
volumes:
|
||||||
|
- C:/Users/migue/mariadb_config:/config
|
||||||
|
ports:
|
||||||
|
- 3306:3306
|
||||||
|
restart: unless-stopped
|
||||||
1
Obsolete/credentials.json
Normal file
1
Obsolete/credentials.json
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
{"installed":{"client_id":"712638107230-am5njg9pf0aj9plh1kbtv2h085dveo1q.apps.googleusercontent.com","project_id":"ez-email-agent","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://oauth2.googleapis.com/token","auth_provider_x509_cert_url":"https://www.googleapis.com/oauth2/v1/certs","client_secret":"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","redirect_uris":["http://localhost"]}}
|
||||||
32
Obsolete/database.py
Normal file
32
Obsolete/database.py
Normal file
|
|
@ -0,0 +1,32 @@
|
||||||
|
import mysql.connector
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Load database credentials from environment variables
|
||||||
|
DB_HOST = os.getenv("DB_HOST", "localhost") # Your server's IP
|
||||||
|
DB_PORT = int(os.getenv("DB_PORT", "3306")) # Convert port to integer
|
||||||
|
DB_USER = os.getenv("DB_USER", "emailuser")
|
||||||
|
DB_PASSWORD = os.getenv("DB_PASSWORD", "miguel33020")
|
||||||
|
DB_NAME = os.getenv("DB_NAME", "emailassistant")
|
||||||
|
|
||||||
|
def connect_db():
|
||||||
|
try:
|
||||||
|
conn = mysql.connector.connect(
|
||||||
|
host=DB_HOST,
|
||||||
|
port=DB_PORT, # Now it's an integer
|
||||||
|
user=DB_USER,
|
||||||
|
password=DB_PASSWORD,
|
||||||
|
database=DB_NAME
|
||||||
|
)
|
||||||
|
print("✅ Connected to MariaDB successfully!")
|
||||||
|
return conn
|
||||||
|
except mysql.connector.Error as err:
|
||||||
|
print(f"❌ Error: {err}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Test connection
|
||||||
|
if __name__ == "__main__":
|
||||||
|
conn = connect_db()
|
||||||
|
if conn:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
135
Obsolete/gmail_to_db_test.py
Normal file
135
Obsolete/gmail_to_db_test.py
Normal file
|
|
@ -0,0 +1,135 @@
|
||||||
|
from google_auth_oauthlib.flow import InstalledAppFlow
|
||||||
|
from googleapiclient.discovery import build
|
||||||
|
import base64
|
||||||
|
import mysql.connector
|
||||||
|
import os
|
||||||
|
import yaml
|
||||||
|
import datetime
|
||||||
|
from initialize_db import initialize_database
|
||||||
|
|
||||||
|
SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"]
|
||||||
|
|
||||||
|
# === Load DB credentials ===
|
||||||
|
DB_HOST = os.getenv("DB_HOST", "localhost")
|
||||||
|
DB_PORT = int(os.getenv("DB_PORT", 3306))
|
||||||
|
DB_USER = os.getenv("DB_USER", "emailuser")
|
||||||
|
DB_PASSWORD = os.getenv("DB_PASSWORD", "miguel33020")
|
||||||
|
DB_NAME = os.getenv("DB_NAME", "emailassistant")
|
||||||
|
|
||||||
|
def authenticate_gmail():
|
||||||
|
flow = InstalledAppFlow.from_client_secrets_file("credentials.json", SCOPES)
|
||||||
|
creds = flow.run_local_server(port=0)
|
||||||
|
return build("gmail", "v1", credentials=creds)
|
||||||
|
|
||||||
|
def get_header(headers, name):
|
||||||
|
for h in headers:
|
||||||
|
if h["name"].lower() == name.lower():
|
||||||
|
return h["value"]
|
||||||
|
return None
|
||||||
|
|
||||||
|
def decode_body(payload):
|
||||||
|
if "data" in payload.get("body", {}):
|
||||||
|
return base64.urlsafe_b64decode(payload["body"]["data"]).decode("utf-8", errors="ignore")
|
||||||
|
elif "parts" in payload:
|
||||||
|
for part in payload["parts"]:
|
||||||
|
if part.get("mimeType") == "text/plain" and "data" in part.get("body", {}):
|
||||||
|
return base64.urlsafe_b64decode(part["body"]["data"]).decode("utf-8", errors="ignore")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def insert_into_db(email_data):
|
||||||
|
conn = mysql.connector.connect(
|
||||||
|
host=DB_HOST,
|
||||||
|
port=DB_PORT,
|
||||||
|
user=DB_USER,
|
||||||
|
password=DB_PASSWORD,
|
||||||
|
database=DB_NAME
|
||||||
|
)
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
query = """
|
||||||
|
INSERT IGNORE INTO emails (
|
||||||
|
user, account, message_id, thread_id, account_id, sender, cc, subject, body, links,
|
||||||
|
received_at, folder, attachments, is_read, labels,
|
||||||
|
ai_category, ai_confidence, ai_summary,
|
||||||
|
processing_status, sync_status, attachment_path, downloaded
|
||||||
|
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
|
||||||
|
%s, %s, %s, %s, %s,
|
||||||
|
%s, %s, %s,
|
||||||
|
%s, %s, %s, %s)
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
cursor.execute(query, (
|
||||||
|
email_data.get("user", "default_user"),
|
||||||
|
email_data.get("account", "main_account"),
|
||||||
|
email_data["message_id"],
|
||||||
|
email_data["thread_id"],
|
||||||
|
email_data.get("account_id", ""),
|
||||||
|
email_data["sender"],
|
||||||
|
email_data["cc"],
|
||||||
|
email_data["subject"],
|
||||||
|
email_data["body"],
|
||||||
|
email_data.get("links", ""),
|
||||||
|
email_data["received_at"],
|
||||||
|
email_data.get("folder", "inbox"),
|
||||||
|
email_data["attachments"],
|
||||||
|
False,
|
||||||
|
email_data["labels"],
|
||||||
|
None, None, None,
|
||||||
|
"unprocessed",
|
||||||
|
"synced",
|
||||||
|
None,
|
||||||
|
False
|
||||||
|
))
|
||||||
|
print(f"✅ Stored: {email_data['subject'][:60]}...")
|
||||||
|
except Exception as e:
|
||||||
|
print("❌ Error inserting into DB:", e)
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
cursor.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
def fetch_and_store_emails(service):
|
||||||
|
results = service.users().messages().list(userId="me", maxResults=500).execute()
|
||||||
|
messages = results.get("messages", [])
|
||||||
|
|
||||||
|
for msg in messages:
|
||||||
|
msg_data = service.users().messages().get(userId="me", id=msg["id"]).execute()
|
||||||
|
|
||||||
|
payload = msg_data.get("payload", {})
|
||||||
|
headers = payload.get("headers", [])
|
||||||
|
|
||||||
|
sender = get_header(headers, "From")
|
||||||
|
cc = get_header(headers, "Cc")
|
||||||
|
subject = get_header(headers, "Subject")
|
||||||
|
date_str = get_header(headers, "Date")
|
||||||
|
body = decode_body(payload)
|
||||||
|
|
||||||
|
try:
|
||||||
|
received_at = datetime.datetime.strptime(date_str, "%a, %d %b %Y %H:%M:%S %z")
|
||||||
|
except:
|
||||||
|
received_at = datetime.datetime.utcnow()
|
||||||
|
|
||||||
|
email_data = {
|
||||||
|
"user": "default_user",
|
||||||
|
"account": "main_account",
|
||||||
|
"message_id": msg_data["id"],
|
||||||
|
"thread_id": msg_data.get("threadId"),
|
||||||
|
"account_id": "",
|
||||||
|
"sender": sender,
|
||||||
|
"cc": cc,
|
||||||
|
"subject": subject,
|
||||||
|
"body": body,
|
||||||
|
"links": "", # Placeholder, will be populated by AI
|
||||||
|
"received_at": received_at,
|
||||||
|
"folder": "inbox",
|
||||||
|
"attachments": str(payload.get("parts", [])),
|
||||||
|
"labels": str(msg_data.get("labelIds", []))
|
||||||
|
}
|
||||||
|
|
||||||
|
insert_into_db(email_data)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
initialize_database()
|
||||||
|
gmail_service = authenticate_gmail()
|
||||||
|
fetch_and_store_emails(gmail_service)
|
||||||
93
Obsolete/initialize_db.py
Normal file
93
Obsolete/initialize_db.py
Normal file
|
|
@ -0,0 +1,93 @@
|
||||||
|
import mysql.connector
|
||||||
|
import yaml
|
||||||
|
import os
|
||||||
|
|
||||||
|
def initialize_database():
|
||||||
|
# === Load config file ===
|
||||||
|
with open("config.yml", "r") as file:
|
||||||
|
config = yaml.safe_load(file)
|
||||||
|
|
||||||
|
# === DB Connection ===
|
||||||
|
conn = mysql.connector.connect(
|
||||||
|
host=os.getenv("DB_HOST", "localhost"),
|
||||||
|
port=os.getenv("DB_PORT", 3306),
|
||||||
|
user=os.getenv("DB_USER", "emailuser"),
|
||||||
|
password=os.getenv("DB_PASSWORD", "miguel33020"),
|
||||||
|
database=os.getenv("DB_NAME", "emailassistant")
|
||||||
|
)
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
# === Table: metadata (previously main_account) ===
|
||||||
|
cursor.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS metadata (
|
||||||
|
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||||
|
user VARCHAR(255),
|
||||||
|
email VARCHAR(255) UNIQUE NOT NULL,
|
||||||
|
token TEXT
|
||||||
|
);
|
||||||
|
""")
|
||||||
|
print("✅ Table ready: metadata")
|
||||||
|
|
||||||
|
# === Table: emails ===
|
||||||
|
cursor.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS emails (
|
||||||
|
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||||
|
user VARCHAR(255),
|
||||||
|
account VARCHAR(255),
|
||||||
|
message_id VARCHAR(255) UNIQUE,
|
||||||
|
thread_id VARCHAR(255),
|
||||||
|
account_id VARCHAR(255),
|
||||||
|
sender VARCHAR(255),
|
||||||
|
cc TEXT,
|
||||||
|
subject TEXT,
|
||||||
|
body LONGTEXT,
|
||||||
|
links LONGTEXT,
|
||||||
|
unsubscribe_data TEXT,
|
||||||
|
received_at DATETIME,
|
||||||
|
folder VARCHAR(50),
|
||||||
|
attachments LONGTEXT,
|
||||||
|
is_read BOOLEAN DEFAULT FALSE,
|
||||||
|
labels LONGTEXT,
|
||||||
|
|
||||||
|
-- 🔍 AI-Generated Fields
|
||||||
|
ai_category VARCHAR(100), -- Top-level (e.g. 'promo')
|
||||||
|
ai_confidence FLOAT, -- Confidence score
|
||||||
|
ai_summary TEXT, -- Summary of subject/body
|
||||||
|
ai_keywords TEXT, -- Comma-separated extracted keywords
|
||||||
|
ai_label_source VARCHAR(100), -- 'subject', 'body', 'combined', 'llm'
|
||||||
|
summary_source VARCHAR(100), -- Similar to above
|
||||||
|
ai_model_version VARCHAR(100), -- Versioning helps long-term debugging
|
||||||
|
is_ai_reviewed BOOLEAN DEFAULT FALSE, -- Was this fully processed by AI?
|
||||||
|
processing_notes TEXT, -- Optional notes about fallback, etc.
|
||||||
|
|
||||||
|
-- 🔄 Sync and Processing Status
|
||||||
|
processing_status VARCHAR(50),
|
||||||
|
sync_status VARCHAR(50),
|
||||||
|
attachment_path TEXT,
|
||||||
|
downloaded BOOLEAN DEFAULT FALSE
|
||||||
|
);
|
||||||
|
""")
|
||||||
|
print("✅ Table ready: emails")
|
||||||
|
|
||||||
|
|
||||||
|
# === Table: logs ===
|
||||||
|
cursor.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS logs (
|
||||||
|
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||||
|
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
level VARCHAR(20),
|
||||||
|
source VARCHAR(255),
|
||||||
|
message TEXT
|
||||||
|
);
|
||||||
|
""")
|
||||||
|
print("✅ Table ready: logs")
|
||||||
|
|
||||||
|
cursor.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
# if __name__ == "__main__":
|
||||||
|
# initialize_database()
|
||||||
|
|
||||||
|
|
||||||
|
#if __name__ == "__main__":
|
||||||
|
# initialize_database()
|
||||||
58
Obsolete/insight,py
Normal file
58
Obsolete/insight,py
Normal file
|
|
@ -0,0 +1,58 @@
|
||||||
|
import os
|
||||||
|
import mysql.connector
|
||||||
|
from keybert import KeyBERT
|
||||||
|
from sentence_transformers import SentenceTransformer
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
|
# === Load multilingual model for KeyBERT ===
|
||||||
|
model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
|
||||||
|
kw_model = KeyBERT(model)
|
||||||
|
|
||||||
|
# === DB Credentials ===
|
||||||
|
DB_HOST = os.getenv("DB_HOST", "localhost")
|
||||||
|
DB_PORT = int(os.getenv("DB_PORT", 3306))
|
||||||
|
DB_USER = os.getenv("DB_USER", "emailuser")
|
||||||
|
DB_PASSWORD = os.getenv("DB_PASSWORD", "miguel33020")
|
||||||
|
DB_NAME = os.getenv("DB_NAME", "emailassistant")
|
||||||
|
|
||||||
|
# === Connect to DB ===
|
||||||
|
conn = mysql.connector.connect(
|
||||||
|
host=DB_HOST,
|
||||||
|
port=DB_PORT,
|
||||||
|
user=DB_USER,
|
||||||
|
password=DB_PASSWORD,
|
||||||
|
database=DB_NAME
|
||||||
|
)
|
||||||
|
cursor = conn.cursor(dictionary=True)
|
||||||
|
|
||||||
|
# === Fetch only unlabeled emails ===
|
||||||
|
cursor.execute("SELECT id, subject FROM emails WHERE ai_category = 'unlabeled'")
|
||||||
|
emails = cursor.fetchall()
|
||||||
|
|
||||||
|
print(f"🔍 Analyzing {len(emails)} unlabeled emails...")
|
||||||
|
|
||||||
|
keyword_counter = Counter()
|
||||||
|
|
||||||
|
for email in emails:
|
||||||
|
subject = email["subject"]
|
||||||
|
if not subject:
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
keywords = kw_model.extract_keywords(
|
||||||
|
subject,
|
||||||
|
keyphrase_ngram_range=(1, 2),
|
||||||
|
stop_words="english",
|
||||||
|
top_n=5
|
||||||
|
)
|
||||||
|
keyword_counter.update([kw[0].lower() for kw in keywords])
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error processing email ID {email['id']}: {e}")
|
||||||
|
|
||||||
|
# === Output top missing keywords ===
|
||||||
|
print("\n📊 Top keywords in unlabeled emails:")
|
||||||
|
for word, count in keyword_counter.most_common(30):
|
||||||
|
print(f"{word}: {count}")
|
||||||
|
|
||||||
|
cursor.close()
|
||||||
|
conn.close()
|
||||||
115
Obsolete/labeler.py
Normal file
115
Obsolete/labeler.py
Normal file
|
|
@ -0,0 +1,115 @@
|
||||||
|
import os
|
||||||
|
import yaml
|
||||||
|
import mysql.connector
|
||||||
|
from keybert import KeyBERT
|
||||||
|
from sentence_transformers import SentenceTransformer
|
||||||
|
|
||||||
|
|
||||||
|
# === Load multilingual model for KeyBERT ===
|
||||||
|
model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
|
||||||
|
kw_model = KeyBERT(model)
|
||||||
|
|
||||||
|
# === Load label hierarchy from YAML ===
|
||||||
|
LABEL_FILE = os.getenv("LABEL_CONFIG_PATH", "labels.yml")
|
||||||
|
with open(LABEL_FILE, "r", encoding="utf-8") as f:
|
||||||
|
label_config = yaml.safe_load(f)
|
||||||
|
|
||||||
|
print(f"📂 Using label config: {LABEL_FILE}")
|
||||||
|
print(label_config)
|
||||||
|
|
||||||
|
|
||||||
|
# === DB Credentials ===
|
||||||
|
DB_HOST = os.getenv("DB_HOST", "localhost")
|
||||||
|
DB_PORT = int(os.getenv("DB_PORT", 3306))
|
||||||
|
DB_USER = os.getenv("DB_USER", "emailuser")
|
||||||
|
DB_PASSWORD = os.getenv("DB_PASSWORD", "miguel33020")
|
||||||
|
DB_NAME = os.getenv("DB_NAME", "emailassistant")
|
||||||
|
|
||||||
|
# === Connect to DB ===
|
||||||
|
conn = mysql.connector.connect(
|
||||||
|
host=DB_HOST,
|
||||||
|
port=DB_PORT,
|
||||||
|
user=DB_USER,
|
||||||
|
password=DB_PASSWORD,
|
||||||
|
database=DB_NAME
|
||||||
|
)
|
||||||
|
cursor = conn.cursor(dictionary=True)
|
||||||
|
|
||||||
|
# === Logging Helper ===
|
||||||
|
def log_event(cursor, level, source, message):
|
||||||
|
cursor.execute(
|
||||||
|
"INSERT INTO logs (level, source, message) VALUES (%s, %s, %s)",
|
||||||
|
(level, source, message)
|
||||||
|
)
|
||||||
|
|
||||||
|
# === Recursive label matcher ===
|
||||||
|
def match_labels(keywords, label_tree, prefix=""):
|
||||||
|
for label, data in label_tree.items():
|
||||||
|
full_label = f"{prefix}/{label}".strip("/")
|
||||||
|
label_keywords = [kw.lower() for kw in data.get("keywords", [])]
|
||||||
|
|
||||||
|
# First check children
|
||||||
|
children = data.get("children", {})
|
||||||
|
child_match = match_labels(keywords, children, prefix=full_label)
|
||||||
|
if child_match:
|
||||||
|
return child_match
|
||||||
|
|
||||||
|
# Then check this level (so children take priority)
|
||||||
|
if any(kw in keywords for kw in label_keywords):
|
||||||
|
return full_label
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# === Fetch emails that haven't been labeled ===
|
||||||
|
cursor.execute("SELECT id, subject, ai_category FROM emails")
|
||||||
|
emails = cursor.fetchall()
|
||||||
|
|
||||||
|
# === Main Labeling Loop ===
|
||||||
|
for email in emails:
|
||||||
|
email_id = email["id"]
|
||||||
|
subject = email["subject"]
|
||||||
|
current_label = email["ai_category"]
|
||||||
|
|
||||||
|
# if current_label not in [None, "None", ""]:
|
||||||
|
# print(f"ℹ️ Email {email_id} already has label '{current_label}'")
|
||||||
|
# continue
|
||||||
|
|
||||||
|
if not subject or not subject.strip():
|
||||||
|
log_event(cursor, "WARNING", "labeler", f"Skipped empty subject for email ID {email_id}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
keywords = kw_model.extract_keywords(
|
||||||
|
subject,
|
||||||
|
keyphrase_ngram_range=(1, 2),
|
||||||
|
stop_words="english",
|
||||||
|
top_n=5
|
||||||
|
)
|
||||||
|
keyword_set = set(k[0].lower() for k in keywords)
|
||||||
|
label = match_labels(keyword_set, label_config) or "unlabeled"
|
||||||
|
|
||||||
|
cursor.execute("""
|
||||||
|
UPDATE emails
|
||||||
|
SET ai_category = %s,
|
||||||
|
ai_keywords = %s,
|
||||||
|
ai_label_source = %s,
|
||||||
|
ai_confidence = %s,
|
||||||
|
is_ai_reviewed = FALSE
|
||||||
|
WHERE id = %s
|
||||||
|
""", (label, ", ".join(keyword_set), "labeler_v1.0", 1.0, email_id))
|
||||||
|
|
||||||
|
|
||||||
|
log_event(cursor, "INFO", "labeler", f"Labeled email {email_id} as '{label}'")
|
||||||
|
print(f"🏷️ Email {email_id} labeled as: {label}")
|
||||||
|
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log_event(cursor, "ERROR", "labeler", f"Error labeling email ID {email_id}: {str(e)}")
|
||||||
|
print(f"❌ Error labeling email {email_id}: {e}")
|
||||||
|
|
||||||
|
# === Commit & Close ===
|
||||||
|
conn.commit()
|
||||||
|
cursor.close()
|
||||||
|
conn.close()
|
||||||
54
Obsolete/migrations.py
Normal file
54
Obsolete/migrations.py
Normal file
|
|
@ -0,0 +1,54 @@
|
||||||
|
import os
|
||||||
|
import mysql.connector
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# === DB Credentials ===
|
||||||
|
DB_HOST = os.getenv("DB_HOST", "localhost")
|
||||||
|
DB_PORT = int(os.getenv("DB_PORT", 3306))
|
||||||
|
DB_USER = os.getenv("DB_USER", "emailuser")
|
||||||
|
DB_PASSWORD = os.getenv("DB_PASSWORD", "miguel33020")
|
||||||
|
DB_NAME = os.getenv("DB_NAME", "emailassistant")
|
||||||
|
|
||||||
|
# === Connect to DB ===
|
||||||
|
conn = mysql.connector.connect(
|
||||||
|
host=DB_HOST,
|
||||||
|
port=DB_PORT,
|
||||||
|
user=DB_USER,
|
||||||
|
password=DB_PASSWORD,
|
||||||
|
database=DB_NAME
|
||||||
|
)
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
# === Logging Helper ===
|
||||||
|
def log_event(cursor, level, source, message):
|
||||||
|
cursor.execute(
|
||||||
|
"INSERT INTO logs (level, source, message, timestamp) VALUES (%s, %s, %s, %s)",
|
||||||
|
(level, source, message, datetime.now())
|
||||||
|
)
|
||||||
|
|
||||||
|
# === Migration Commands ===
|
||||||
|
migration_commands = [
|
||||||
|
"ALTER TABLE emails ADD COLUMN IF NOT EXISTS ai_keywords TEXT;",
|
||||||
|
"ALTER TABLE emails ADD COLUMN IF NOT EXISTS ai_label_source VARCHAR(100);",
|
||||||
|
"ALTER TABLE emails ADD COLUMN IF NOT EXISTS summary_source VARCHAR(100);",
|
||||||
|
"ALTER TABLE emails ADD COLUMN IF NOT EXISTS ai_model_version VARCHAR(100);",
|
||||||
|
"ALTER TABLE emails ADD COLUMN IF NOT EXISTS is_ai_reviewed BOOLEAN DEFAULT FALSE;",
|
||||||
|
"ALTER TABLE emails ADD COLUMN IF NOT EXISTS processing_notes TEXT;",
|
||||||
|
]
|
||||||
|
|
||||||
|
# === Apply Migrations ===
|
||||||
|
print("🚀 Starting migrations...")
|
||||||
|
for cmd in migration_commands:
|
||||||
|
try:
|
||||||
|
cursor.execute(cmd)
|
||||||
|
log_event(cursor, "INFO", "migrations", f"Executed: {cmd}")
|
||||||
|
print(f"✅ Executed: {cmd}")
|
||||||
|
except mysql.connector.Error as err:
|
||||||
|
log_event(cursor, "WARNING", "migrations", f"Skipped or failed: {cmd} -> {err}")
|
||||||
|
print(f"⚠️ Skipped or failed: {cmd} -> {err}")
|
||||||
|
|
||||||
|
# === Commit & Close ===
|
||||||
|
conn.commit()
|
||||||
|
cursor.close()
|
||||||
|
conn.close()
|
||||||
|
print("✅ Migration complete.")
|
||||||
89
Obsolete/nlp_summary.py
Normal file
89
Obsolete/nlp_summary.py
Normal file
|
|
@ -0,0 +1,89 @@
|
||||||
|
import spacy
|
||||||
|
import mysql.connector
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from collections import Counter
|
||||||
|
from string import punctuation
|
||||||
|
|
||||||
|
# === Load spaCy model ===
|
||||||
|
nlp = spacy.load("en_core_web_sm")
|
||||||
|
|
||||||
|
# === DB Credentials ===
|
||||||
|
DB_HOST = os.getenv("DB_HOST", "localhost")
|
||||||
|
DB_PORT = int(os.getenv("DB_PORT", 3306))
|
||||||
|
DB_USER = os.getenv("DB_USER", "emailuser")
|
||||||
|
DB_PASSWORD = os.getenv("DB_PASSWORD", "miguel33020")
|
||||||
|
DB_NAME = os.getenv("DB_NAME", "emailassistant")
|
||||||
|
|
||||||
|
# === Connect to DB ===
|
||||||
|
conn = mysql.connector.connect(
|
||||||
|
host=DB_HOST,
|
||||||
|
port=DB_PORT,
|
||||||
|
user=DB_USER,
|
||||||
|
password=DB_PASSWORD,
|
||||||
|
database=DB_NAME
|
||||||
|
)
|
||||||
|
cursor = conn.cursor(dictionary=True)
|
||||||
|
|
||||||
|
# === Logging Helper ===
|
||||||
|
def log_event(cursor, level, source, message):
|
||||||
|
cursor.execute(
|
||||||
|
"INSERT INTO logs (level, source, message) VALUES (%s, %s, %s)",
|
||||||
|
(level, source, message)
|
||||||
|
)
|
||||||
|
|
||||||
|
# === Summarization Logic ===
|
||||||
|
def summarize(text, max_sentences=3):
|
||||||
|
doc = nlp(text)
|
||||||
|
words = [token.text.lower() for token in doc if token.is_alpha and not token.is_stop]
|
||||||
|
word_freq = Counter(words)
|
||||||
|
|
||||||
|
sentence_scores = {}
|
||||||
|
for sent in doc.sents:
|
||||||
|
for word in sent:
|
||||||
|
if word.text.lower() in word_freq:
|
||||||
|
sentence_scores[sent] = sentence_scores.get(sent, 0) + word_freq[word.text.lower()]
|
||||||
|
|
||||||
|
summarized = sorted(sentence_scores, key=sentence_scores.get, reverse=True)[:max_sentences]
|
||||||
|
return " ".join(str(s) for s in summarized)
|
||||||
|
|
||||||
|
# === Fetch All Emails with Missing Summaries ===
|
||||||
|
cursor.execute("SELECT id, body FROM emails WHERE ai_summary IS NULL")
|
||||||
|
emails = cursor.fetchall()
|
||||||
|
|
||||||
|
# === Main Processing Loop ===
|
||||||
|
for email in emails:
|
||||||
|
email_id = email["id"]
|
||||||
|
body = email["body"]
|
||||||
|
|
||||||
|
if not body or not body.strip():
|
||||||
|
log_event(cursor, "WARNING", "summarizer", f"Skipped empty body for email ID {email_id}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
summary = summarize(body)
|
||||||
|
if not summary.strip():
|
||||||
|
summary = "No meaningful summary could be generated."
|
||||||
|
|
||||||
|
# Optional confidence (ratio of summary length to original body)
|
||||||
|
confidence = round(len(summary.split()) / max(1, len(body.split())), 2)
|
||||||
|
|
||||||
|
# Update email
|
||||||
|
cursor.execute("""
|
||||||
|
UPDATE emails
|
||||||
|
SET ai_summary = %s,
|
||||||
|
ai_confidence = %s
|
||||||
|
WHERE id = %s
|
||||||
|
""", (summary, confidence, email_id))
|
||||||
|
|
||||||
|
log_event(cursor, "INFO", "summarizer", f"Summarized email ID {email_id}")
|
||||||
|
print(f"✅ Summarized email {email_id} (confidence: {confidence})")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log_event(cursor, "ERROR", "summarizer", f"Error summarizing email ID {email_id}: {str(e)}")
|
||||||
|
print(f"❌ Error summarizing email {email_id}: {e}")
|
||||||
|
|
||||||
|
# === Commit & Close ===
|
||||||
|
conn.commit()
|
||||||
|
cursor.close()
|
||||||
|
conn.close()
|
||||||
6
Obsolete/requirements.txt
Normal file
6
Obsolete/requirements.txt
Normal file
|
|
@ -0,0 +1,6 @@
|
||||||
|
google-auth
|
||||||
|
google-auth-oauthlib
|
||||||
|
google-auth-httplib2
|
||||||
|
google-api-python-client
|
||||||
|
openai
|
||||||
|
transformers
|
||||||
135
Obsolete/smart_labler.py
Normal file
135
Obsolete/smart_labler.py
Normal file
|
|
@ -0,0 +1,135 @@
|
||||||
|
import os
|
||||||
|
import ast
|
||||||
|
import yaml
|
||||||
|
import mysql.connector
|
||||||
|
from keybert import KeyBERT
|
||||||
|
from sentence_transformers import SentenceTransformer
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
|
# === Load multilingual model for KeyBERT ===
|
||||||
|
model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
|
||||||
|
kw_model = KeyBERT(model)
|
||||||
|
|
||||||
|
# === Load label hierarchy from YAML ===
|
||||||
|
LABEL_FILE = os.getenv("LABEL_CONFIG_PATH", "labels.yml")
|
||||||
|
with open(LABEL_FILE, "r", encoding="utf-8") as f:
|
||||||
|
label_config = yaml.safe_load(f)
|
||||||
|
|
||||||
|
# === DB Credentials ===
|
||||||
|
DB_HOST = os.getenv("DB_HOST", "localhost")
|
||||||
|
DB_PORT = int(os.getenv("DB_PORT", 3306))
|
||||||
|
DB_USER = os.getenv("DB_USER", "emailuser")
|
||||||
|
DB_PASSWORD = os.getenv("DB_PASSWORD", "miguel33020")
|
||||||
|
DB_NAME = os.getenv("DB_NAME", "emailassistant")
|
||||||
|
|
||||||
|
# === Connect to DB ===
|
||||||
|
conn = mysql.connector.connect(
|
||||||
|
host=DB_HOST,
|
||||||
|
port=DB_PORT,
|
||||||
|
user=DB_USER,
|
||||||
|
password=DB_PASSWORD,
|
||||||
|
database=DB_NAME
|
||||||
|
)
|
||||||
|
cursor = conn.cursor(dictionary=True)
|
||||||
|
|
||||||
|
# === Logging Helper ===
|
||||||
|
def log_event(cursor, level, source, message):
|
||||||
|
try:
|
||||||
|
cursor.execute(
|
||||||
|
"INSERT INTO logs (level, source, message) VALUES (%s, %s, %s)",
|
||||||
|
(level, source, message)
|
||||||
|
)
|
||||||
|
except:
|
||||||
|
print(f"[LOG ERROR] {level} from {source}: {message}")
|
||||||
|
|
||||||
|
# === Recursive label matcher ===
|
||||||
|
def match_labels(keywords, label_tree, prefix=""):
|
||||||
|
for label, data in label_tree.items():
|
||||||
|
full_label = f"{prefix}/{label}".strip("/")
|
||||||
|
label_keywords = [kw.lower() for kw in data.get("keywords", [])]
|
||||||
|
if any(kw in keywords for kw in label_keywords):
|
||||||
|
children = data.get("children", {})
|
||||||
|
child_match = match_labels(keywords, children, prefix=full_label)
|
||||||
|
return child_match if child_match else full_label
|
||||||
|
return None
|
||||||
|
|
||||||
|
# === Smart Label Aggregator ===
|
||||||
|
def smart_label(email):
|
||||||
|
votes = []
|
||||||
|
|
||||||
|
# 1. FROM address rules
|
||||||
|
from_addr = email.get("sender", "").lower()
|
||||||
|
if any(x in from_addr for x in ["paypal", "bankofamerica", "chase"]):
|
||||||
|
votes.append("bank")
|
||||||
|
if "indeed" in from_addr or "hiring" in from_addr:
|
||||||
|
votes.append("job")
|
||||||
|
|
||||||
|
# 2. Subject keyword analysis
|
||||||
|
subject = email.get("subject", "")
|
||||||
|
if subject:
|
||||||
|
keywords = kw_model.extract_keywords(
|
||||||
|
subject, keyphrase_ngram_range=(1, 2), stop_words="english", top_n=5
|
||||||
|
)
|
||||||
|
keyword_set = set(k[0].lower() for k in keywords)
|
||||||
|
label_from_subject = match_labels(keyword_set, label_config)
|
||||||
|
if label_from_subject:
|
||||||
|
votes.append(label_from_subject)
|
||||||
|
|
||||||
|
# 3. AI summary matching
|
||||||
|
summary = email.get("ai_summary", "").lower()
|
||||||
|
if "payment" in summary or "transaction" in summary:
|
||||||
|
votes.append("bank")
|
||||||
|
if "your order" in summary or "delivered" in summary:
|
||||||
|
votes.append("promo")
|
||||||
|
|
||||||
|
# 4. Gmail label logic (from "labels" column)
|
||||||
|
raw_label = email.get("labels", "")
|
||||||
|
try:
|
||||||
|
gmail_labels = ast.literal_eval(raw_label) if raw_label else []
|
||||||
|
gmail_labels = [label.upper() for label in gmail_labels]
|
||||||
|
except (ValueError, SyntaxError):
|
||||||
|
gmail_labels = []
|
||||||
|
|
||||||
|
if "CATEGORY_PROMOTIONS" in gmail_labels:
|
||||||
|
votes.append("promo")
|
||||||
|
elif "CATEGORY_SOCIAL" in gmail_labels:
|
||||||
|
votes.append("social")
|
||||||
|
elif "CATEGORY_UPDATES" in gmail_labels:
|
||||||
|
votes.append("work")
|
||||||
|
elif "IMPORTANT" in gmail_labels:
|
||||||
|
votes.append("work")
|
||||||
|
|
||||||
|
# 5. Count votes
|
||||||
|
label_counts = Counter(votes)
|
||||||
|
return label_counts.most_common(1)[0][0] if label_counts else "unlabeled"
|
||||||
|
|
||||||
|
# === Fetch unlabeled emails ===
|
||||||
|
cursor.execute("SELECT id, sender, subject, ai_summary, labels, ai_category FROM emails")
|
||||||
|
|
||||||
|
emails = cursor.fetchall()
|
||||||
|
print(f"📬 Found {len(emails)} total emails for re-labeling")
|
||||||
|
|
||||||
|
# === Main Labeling Loop ===
|
||||||
|
for email in emails:
|
||||||
|
email_id = email["id"]
|
||||||
|
try:
|
||||||
|
label = smart_label(email)
|
||||||
|
cursor.execute("""
|
||||||
|
UPDATE emails
|
||||||
|
SET ai_category = %s,
|
||||||
|
ai_label_source = %s,
|
||||||
|
is_ai_reviewed = FALSE
|
||||||
|
WHERE id = %s
|
||||||
|
""", (label, "smart_labeler", email_id))
|
||||||
|
|
||||||
|
log_event(cursor, "INFO", "smart_labeler", f"Labeled email {email_id} as '{label}'")
|
||||||
|
print(f"🏷️ Email {email_id} labeled as: {label}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log_event(cursor, "ERROR", "smart_labeler", f"Error labeling email {email_id}: {str(e)}")
|
||||||
|
print(f"❌ Error labeling email {email_id}: {e}")
|
||||||
|
|
||||||
|
# === Commit & Close ===
|
||||||
|
conn.commit()
|
||||||
|
cursor.close()
|
||||||
|
conn.close()
|
||||||
95
Obsolete/subject_summariser.py
Normal file
95
Obsolete/subject_summariser.py
Normal file
|
|
@ -0,0 +1,95 @@
|
||||||
|
import spacy
|
||||||
|
import mysql.connector
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
|
# === Load spaCy model ===
|
||||||
|
nlp = spacy.load("en_core_web_sm")
|
||||||
|
|
||||||
|
# === DB Credentials ===
|
||||||
|
DB_HOST = os.getenv("DB_HOST", "localhost")
|
||||||
|
DB_PORT = int(os.getenv("DB_PORT", 3306))
|
||||||
|
DB_USER = os.getenv("DB_USER", "emailuser")
|
||||||
|
DB_PASSWORD = os.getenv("DB_PASSWORD", "miguel33020")
|
||||||
|
DB_NAME = os.getenv("DB_NAME", "emailassistant")
|
||||||
|
|
||||||
|
# === Connect to DB ===
|
||||||
|
conn = mysql.connector.connect(
|
||||||
|
host=DB_HOST,
|
||||||
|
port=DB_PORT,
|
||||||
|
user=DB_USER,
|
||||||
|
password=DB_PASSWORD,
|
||||||
|
database=DB_NAME
|
||||||
|
)
|
||||||
|
cursor = conn.cursor(dictionary=True)
|
||||||
|
|
||||||
|
# === Logging Helper ===
|
||||||
|
def log_event(cursor, level, source, message):
|
||||||
|
cursor.execute(
|
||||||
|
"INSERT INTO logs (level, source, message) VALUES (%s, %s, %s)",
|
||||||
|
(level, source, message)
|
||||||
|
)
|
||||||
|
|
||||||
|
# === Subject-Based Summarization ===
|
||||||
|
def summarize_subject(subject):
|
||||||
|
doc = nlp(subject)
|
||||||
|
keywords = [token.text for token in doc if token.is_alpha and not token.is_stop]
|
||||||
|
if not keywords:
|
||||||
|
return subject, 1.0 # fallback to raw subject
|
||||||
|
|
||||||
|
# Prioritize noun chunks that include keywords
|
||||||
|
noun_chunks = list(doc.noun_chunks)
|
||||||
|
chunks = [chunk.text for chunk in noun_chunks if any(tok.text in keywords for tok in chunk)]
|
||||||
|
|
||||||
|
# Combine and limit summary length
|
||||||
|
compressed = " ".join(chunks or keywords)
|
||||||
|
compressed_words = compressed.split()
|
||||||
|
subject_word_count = len(subject.split())
|
||||||
|
summary = " ".join(compressed_words[:max(1, subject_word_count - 1)]).strip()
|
||||||
|
|
||||||
|
# Confidence is relative to subject word count
|
||||||
|
confidence = round(len(summary.split()) / max(1, subject_word_count), 2)
|
||||||
|
|
||||||
|
# Fallback if summary is too short or confidence too low
|
||||||
|
if len(summary.split()) < 2 or confidence < 0.3:
|
||||||
|
return subject, 1.0
|
||||||
|
|
||||||
|
return summary, confidence
|
||||||
|
|
||||||
|
# === Fetch emails with NULL ai_summary ===
|
||||||
|
cursor.execute("SELECT id, subject FROM emails")
|
||||||
|
emails = cursor.fetchall()
|
||||||
|
|
||||||
|
# === Main Processing Loop ===
|
||||||
|
# === Main Processing Loop ===
|
||||||
|
for email in emails:
|
||||||
|
email_id = email["id"]
|
||||||
|
subject = email["subject"]
|
||||||
|
|
||||||
|
if not subject or not subject.strip():
|
||||||
|
log_event(cursor, "WARNING", "subject_summarizer", f"Skipped empty subject for email ID {email_id}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
summary, confidence = summarize_subject(subject)
|
||||||
|
|
||||||
|
cursor.execute("""
|
||||||
|
UPDATE emails
|
||||||
|
SET ai_summary = %s,
|
||||||
|
ai_confidence = %s
|
||||||
|
WHERE id = %s
|
||||||
|
""", (summary, confidence, email_id))
|
||||||
|
|
||||||
|
log_event(cursor, "INFO", "subject_summarizer", f"Subject summarized for email ID {email_id}")
|
||||||
|
print(f"✅ Subject summarized for email {email_id} (confidence: {confidence})")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log_event(cursor, "ERROR", "subject_summarizer", f"Error on email ID {email_id}: {str(e)}")
|
||||||
|
print(f"❌ Error summarizing subject for email {email_id}: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
# === Commit & Close ===
|
||||||
|
conn.commit()
|
||||||
|
cursor.close()
|
||||||
|
conn.close()
|
||||||
41
Obsolete/test.py
Normal file
41
Obsolete/test.py
Normal file
|
|
@ -0,0 +1,41 @@
|
||||||
|
import requests
|
||||||
|
import time
|
||||||
|
|
||||||
|
API_URL = "http://192.168.1.100:11434/api/generate"
|
||||||
|
MODEL = "tinyllama:1.1b"
|
||||||
|
|
||||||
|
prompt_text = (
|
||||||
|
"You are a professional AI assistant. Read the following email and briefly explain what it's about "
|
||||||
|
"as if you were summarizing it for your busy boss.\n\n"
|
||||||
|
"Be concise, clear, and include names, requests, deadlines, and project names if mentioned.\n\n"
|
||||||
|
"Email:\n"
|
||||||
|
"\"Hi there, just checking in to see if you received my last message about the invoice due next week. "
|
||||||
|
"Please let me know when you get a chance.\""
|
||||||
|
)
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"model": MODEL,
|
||||||
|
"prompt": prompt_text,
|
||||||
|
"stream": False
|
||||||
|
}
|
||||||
|
|
||||||
|
def run_summary_pass(pass_label):
|
||||||
|
print(f"\n🔁 {pass_label} run for model: {MODEL}")
|
||||||
|
start_time = time.time()
|
||||||
|
response = requests.post(API_URL, json=payload)
|
||||||
|
end_time = time.time()
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
result = response.json().get("response")
|
||||||
|
else:
|
||||||
|
result = f"❌ Error: {response.status_code} - {response.text}"
|
||||||
|
|
||||||
|
elapsed = end_time - start_time
|
||||||
|
print(f"🧠 Summary: {result}")
|
||||||
|
print(f"⏱️ Time taken: {elapsed:.2f} seconds")
|
||||||
|
|
||||||
|
# Warm-up run (model loading)
|
||||||
|
run_summary_pass("Warm-up")
|
||||||
|
|
||||||
|
# Second run (real performance)
|
||||||
|
run_summary_pass("Performance")
|
||||||
67
Obsolete/test_gmail.py
Normal file
67
Obsolete/test_gmail.py
Normal file
|
|
@ -0,0 +1,67 @@
|
||||||
|
from google_auth_oauthlib.flow import InstalledAppFlow
|
||||||
|
from googleapiclient.discovery import build
|
||||||
|
import nlp_summary
|
||||||
|
|
||||||
|
SCOPES = ["https://www.googleapis.com/auth/gmail.modify"]
|
||||||
|
nlp = nlp_summary.load("en_core_web_sm")
|
||||||
|
|
||||||
|
# Define keyword-based categories
|
||||||
|
CATEGORIES = {
|
||||||
|
"Work": ["meeting", "deadline", "project", "report"],
|
||||||
|
"Finance": ["invoice", "bill", "receipt", "payment", "tax"],
|
||||||
|
"Security": ["verification", "sign in attempt", "password"],
|
||||||
|
"Promotions": ["sale", "deal", "offer", "discount", "promotion"],
|
||||||
|
"Events": ["webinar", "conference", "event", "invitation"]
|
||||||
|
}
|
||||||
|
|
||||||
|
def authenticate_gmail():
|
||||||
|
flow = InstalledAppFlow.from_client_secrets_file("credentials.json", SCOPES)
|
||||||
|
creds = flow.run_local_server(port=0)
|
||||||
|
return build("gmail", "v1", credentials=creds)
|
||||||
|
|
||||||
|
def categorize_email(subject):
|
||||||
|
doc = nlp(subject.lower())
|
||||||
|
for category, keywords in CATEGORIES.items():
|
||||||
|
if any(word in doc.text for word in keywords):
|
||||||
|
return category
|
||||||
|
return "Uncategorized"
|
||||||
|
|
||||||
|
def list_and_categorize_emails(service):
|
||||||
|
results = service.users().messages().list(userId="me", maxResults=10).execute()
|
||||||
|
messages = results.get("messages", [])
|
||||||
|
|
||||||
|
for msg in messages:
|
||||||
|
msg_data = service.users().messages().get(userId="me", id=msg["id"]).execute()
|
||||||
|
subject = msg_data.get("snippet", "No Subject")
|
||||||
|
category = categorize_email(subject)
|
||||||
|
|
||||||
|
print(f"📩 Subject: {subject}")
|
||||||
|
print(f" 🏷️ Category: {category}\n")
|
||||||
|
|
||||||
|
# Apply the category label in Gmail
|
||||||
|
label_email(service, msg["id"], category)
|
||||||
|
|
||||||
|
def label_email(service, message_id, category):
|
||||||
|
label_id = get_or_create_label(service, category)
|
||||||
|
service.users().messages().modify(
|
||||||
|
userId="me",
|
||||||
|
id=message_id,
|
||||||
|
body={"addLabelIds": [label_id]}
|
||||||
|
).execute()
|
||||||
|
|
||||||
|
def get_or_create_label(service, label_name):
|
||||||
|
labels = service.users().labels().list(userId="me").execute().get("labels", [])
|
||||||
|
for label in labels:
|
||||||
|
if label["name"].lower() == label_name.lower():
|
||||||
|
return label["id"]
|
||||||
|
|
||||||
|
# Create a new label if not found
|
||||||
|
label = service.users().labels().create(
|
||||||
|
userId="me",
|
||||||
|
body={"name": label_name, "labelListVisibility": "labelShow"}
|
||||||
|
).execute()
|
||||||
|
return label["id"]
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
gmail_service = authenticate_gmail()
|
||||||
|
list_and_categorize_emails(gmail_service)
|
||||||
BIN
__pycache__/nlp_summary.cpython-310.pyc
Normal file
BIN
__pycache__/nlp_summary.cpython-310.pyc
Normal file
Binary file not shown.
4
config/accounts.yml
Normal file
4
config/accounts.yml
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
accounts:
|
||||||
|
- name: main_account
|
||||||
|
email: miguelloy97@gmail.com
|
||||||
|
|
||||||
63
config/labels.yml
Normal file
63
config/labels.yml
Normal file
|
|
@ -0,0 +1,63 @@
|
||||||
|
promo:
|
||||||
|
keywords: ["sale", "deal", "discount", "offer", "clearance", "gift", "free", "promo", "savings", "save", "perk", "alert", "50", "10"]
|
||||||
|
children:
|
||||||
|
stores:
|
||||||
|
keywords: ["walmart", "target", "amazon", "bestbuy", "shein", "temu"]
|
||||||
|
newsletters:
|
||||||
|
keywords: ["weekly roundup", "newsletter", "digest", "perk alert", "alert 10", "week 03", "spring", "new"]
|
||||||
|
coupons:
|
||||||
|
keywords: ["coupon", "voucher", "redeem"]
|
||||||
|
electronics:
|
||||||
|
keywords: ["raspberry pi", "digi key", "digikey", "hardware", "component", "order"]
|
||||||
|
gaming:
|
||||||
|
keywords: ["steam wishlist", "game", "bonus", "classic", "dlc", "gaming", "wishlist"]
|
||||||
|
seasonal:
|
||||||
|
keywords: ["fishing sale", "flavor", "spring", "classic fishing"]
|
||||||
|
|
||||||
|
job:
|
||||||
|
keywords: ["hiring", "interview", "career", "position", "job", "resume", "software engineer", "engineer", "developer"]
|
||||||
|
children:
|
||||||
|
offers:
|
||||||
|
keywords: ["job offer", "contract", "start date", "accept"]
|
||||||
|
applications:
|
||||||
|
keywords: ["application", "applied", "submitted", "review"]
|
||||||
|
|
||||||
|
bank:
|
||||||
|
keywords: ["account", "transaction", "balance", "deposit", "withdrawal", "bank"]
|
||||||
|
children:
|
||||||
|
alerts:
|
||||||
|
keywords: ["alert", "fraud", "security", "unauthorized"]
|
||||||
|
credit_offers:
|
||||||
|
keywords: ["approved", "pre-approved", "credit cards", "selected pre", "approved credit", "payment", "changed"]
|
||||||
|
|
||||||
|
school:
|
||||||
|
keywords: ["course", "assignment", "professor", "exam", "lecture", "university"]
|
||||||
|
children:
|
||||||
|
grades:
|
||||||
|
keywords: ["grade", "result", "score", "transcript"]
|
||||||
|
schedule:
|
||||||
|
keywords: ["calendar", "timetable", "class schedule"]
|
||||||
|
|
||||||
|
social:
|
||||||
|
keywords: ["friend", "follow", "message", "mention", "notification"]
|
||||||
|
children:
|
||||||
|
networks:
|
||||||
|
keywords: ["twitter", "facebook", "instagram", "tiktok", "discord"]
|
||||||
|
invites:
|
||||||
|
keywords: ["invite", "joined", "group", "event"]
|
||||||
|
|
||||||
|
travel:
|
||||||
|
keywords: ["flight", "booking", "hotel", "trip", "reservation", "itinerary"]
|
||||||
|
children:
|
||||||
|
airlines:
|
||||||
|
keywords: ["delta", "united", "american airlines", "southwest"]
|
||||||
|
deals:
|
||||||
|
keywords: ["travel deal", "fare", "cheap flights"]
|
||||||
|
|
||||||
|
work:
|
||||||
|
keywords: ["meeting", "weekly meeting", "time card", "2025"]
|
||||||
|
children:
|
||||||
|
projects:
|
||||||
|
keywords: ["project", "task", "deadline", "milestone"]
|
||||||
|
team:
|
||||||
|
keywords: ["team", "colleague", "manager", "supervisor"]
|
||||||
0
config/settings.yml
Normal file
0
config/settings.yml
Normal file
BIN
src/db/__pycache__/database_manager.cpython-310.pyc
Normal file
BIN
src/db/__pycache__/database_manager.cpython-310.pyc
Normal file
Binary file not shown.
155
src/db/database_manager.py
Normal file
155
src/db/database_manager.py
Normal file
|
|
@ -0,0 +1,155 @@
|
||||||
|
# src/db/database_manager.py
|
||||||
|
|
||||||
|
import os
|
||||||
|
import mysql.connector
|
||||||
|
from utils.logger import Logger
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
class DatabaseManager:
|
||||||
|
def __init__(self, config=None, source="db"):
|
||||||
|
self.logger = Logger(source)
|
||||||
|
self.config = config or self._load_env_config()
|
||||||
|
self.connection = self._connect()
|
||||||
|
|
||||||
|
def _load_env_config(self):
|
||||||
|
return {
|
||||||
|
"host": os.getenv("DB_HOST", "localhost"),
|
||||||
|
"port": int(os.getenv("DB_PORT", "3306")),
|
||||||
|
"user": os.getenv("DB_USER", "emailuser"),
|
||||||
|
"password": os.getenv("DB_PASSWORD", "miguel33020"),
|
||||||
|
"database": os.getenv("DB_NAME", "emailassistant")
|
||||||
|
}
|
||||||
|
|
||||||
|
def _connect(self):
|
||||||
|
try:
|
||||||
|
conn = mysql.connector.connect(
|
||||||
|
host=self.config["host"],
|
||||||
|
port=self.config["port"],
|
||||||
|
user=self.config["user"],
|
||||||
|
password=self.config["password"],
|
||||||
|
database=self.config["database"]
|
||||||
|
)
|
||||||
|
self.logger.log(f"✅ Connected to MariaDB at {self.config['host']}:{self.config['port']}")
|
||||||
|
return conn
|
||||||
|
except mysql.connector.Error as err:
|
||||||
|
self.logger.log(f"❌ DB connection failed: {err}", level="ERROR")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def initialize_schema(self):
|
||||||
|
if not self.connection:
|
||||||
|
self.logger.log("❌ No DB connection — cannot initialize schema.", level="ERROR")
|
||||||
|
return
|
||||||
|
|
||||||
|
cursor = self.connection.cursor()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# metadata
|
||||||
|
cursor.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS metadata (
|
||||||
|
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||||
|
user VARCHAR(255),
|
||||||
|
email VARCHAR(255) UNIQUE NOT NULL,
|
||||||
|
token TEXT
|
||||||
|
);
|
||||||
|
""")
|
||||||
|
self.logger.log("✅ Table ready: metadata")
|
||||||
|
|
||||||
|
# emails
|
||||||
|
cursor.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS emails (
|
||||||
|
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||||
|
user VARCHAR(255),
|
||||||
|
account VARCHAR(255),
|
||||||
|
message_id VARCHAR(255) UNIQUE,
|
||||||
|
thread_id VARCHAR(255),
|
||||||
|
account_id VARCHAR(255),
|
||||||
|
sender VARCHAR(255),
|
||||||
|
cc TEXT,
|
||||||
|
subject TEXT,
|
||||||
|
body LONGTEXT,
|
||||||
|
links LONGTEXT,
|
||||||
|
unsubscribe_data TEXT,
|
||||||
|
received_at DATETIME,
|
||||||
|
folder VARCHAR(50),
|
||||||
|
attachments LONGTEXT,
|
||||||
|
is_read BOOLEAN DEFAULT FALSE,
|
||||||
|
labels LONGTEXT,
|
||||||
|
|
||||||
|
ai_category VARCHAR(100),
|
||||||
|
ai_confidence FLOAT,
|
||||||
|
ai_summary TEXT,
|
||||||
|
ai_keywords TEXT,
|
||||||
|
ai_label_source VARCHAR(100),
|
||||||
|
summary_source VARCHAR(100),
|
||||||
|
ai_model_version VARCHAR(100),
|
||||||
|
is_ai_reviewed BOOLEAN DEFAULT FALSE,
|
||||||
|
processing_notes TEXT,
|
||||||
|
|
||||||
|
processing_status VARCHAR(50),
|
||||||
|
sync_status VARCHAR(50),
|
||||||
|
attachment_path TEXT,
|
||||||
|
downloaded BOOLEAN DEFAULT FALSE
|
||||||
|
);
|
||||||
|
""")
|
||||||
|
self.logger.log("✅ Table ready: emails")
|
||||||
|
|
||||||
|
# logs
|
||||||
|
cursor.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS logs (
|
||||||
|
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||||
|
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
level VARCHAR(20),
|
||||||
|
source VARCHAR(255),
|
||||||
|
message TEXT
|
||||||
|
);
|
||||||
|
""")
|
||||||
|
self.logger.log("✅ Table ready: logs")
|
||||||
|
|
||||||
|
self.connection.commit()
|
||||||
|
self.logger.log("✅ Database schema initialized successfully!")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.log(f"❌ Failed to initialize schema: {e}", level="ERROR")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
cursor.close()
|
||||||
|
|
||||||
|
def check_health(self):
|
||||||
|
status = {"status": "unknown", "details": []}
|
||||||
|
|
||||||
|
if not self.connection:
|
||||||
|
self.logger.log("❌ Health check failed: No DB connection.", level="ERROR")
|
||||||
|
status["status"] = "unhealthy"
|
||||||
|
status["details"].append("No database connection.")
|
||||||
|
return status
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Ping the DB
|
||||||
|
cursor = self.connection.cursor()
|
||||||
|
cursor.execute("SELECT 1")
|
||||||
|
_ = cursor.fetchall()
|
||||||
|
|
||||||
|
# Check core tables
|
||||||
|
required_tables = ["emails", "logs", "metadata"]
|
||||||
|
cursor.execute("SHOW TABLES;")
|
||||||
|
existing_tables = set(row[0] for row in cursor.fetchall())
|
||||||
|
|
||||||
|
missing_tables = [table for table in required_tables if table not in existing_tables]
|
||||||
|
if missing_tables:
|
||||||
|
status["status"] = "degraded"
|
||||||
|
for table in missing_tables:
|
||||||
|
self.logger.log(f"⚠️ Missing table: {table}", level="WARNING")
|
||||||
|
status["details"].append(f"Missing table: {table}")
|
||||||
|
else:
|
||||||
|
status["status"] = "healthy"
|
||||||
|
status["details"] = [f"{table} ✅" for table in required_tables]
|
||||||
|
|
||||||
|
self.logger.log(f"✅ Health check passed: {status['status']}")
|
||||||
|
return status
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.log(f"❌ Health check failed: {e}", level="ERROR")
|
||||||
|
status["status"] = "unhealthy"
|
||||||
|
status["details"].append(str(e))
|
||||||
|
return status
|
||||||
|
|
||||||
BIN
src/gmail/__pycache__/gmail_client.cpython-310.pyc
Normal file
BIN
src/gmail/__pycache__/gmail_client.cpython-310.pyc
Normal file
Binary file not shown.
20
src/gmail/gmail_client.py
Normal file
20
src/gmail/gmail_client.py
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
# src/gmail/gmail_client.py
|
||||||
|
|
||||||
|
class GmailClient:
|
||||||
|
"""
|
||||||
|
Handles authentication and email fetching via Gmail API.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, gmail_config):
|
||||||
|
self.gmail_config = gmail_config
|
||||||
|
self.service = self._authenticate()
|
||||||
|
|
||||||
|
def _authenticate(self):
|
||||||
|
# TODO: Implement OAuth2 flow using token + credentials
|
||||||
|
# Return authenticated Gmail API service
|
||||||
|
pass
|
||||||
|
|
||||||
|
def fetch_emails(self, account_config):
|
||||||
|
# TODO: Fetch emails for a specific account
|
||||||
|
# Return a list of raw emails (with subject, body, etc.)
|
||||||
|
return []
|
||||||
22
src/gmail/gmail_parser.py
Normal file
22
src/gmail/gmail_parser.py
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
# src/gmail/gmail_parser.py
|
||||||
|
|
||||||
|
class GmailParser:
|
||||||
|
"""
|
||||||
|
Parses raw Gmail messages into structured format.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def parse_message(self, raw_message):
|
||||||
|
# TODO: Extract subject, sender, body, date, attachments, etc.
|
||||||
|
parsed = {
|
||||||
|
"subject": "",
|
||||||
|
"from": "",
|
||||||
|
"to": "",
|
||||||
|
"body": "",
|
||||||
|
"attachments": [],
|
||||||
|
"date": "",
|
||||||
|
"message_id": ""
|
||||||
|
}
|
||||||
|
return parsed
|
||||||
23
src/main.py
Normal file
23
src/main.py
Normal file
|
|
@ -0,0 +1,23 @@
|
||||||
|
# src/main.py
|
||||||
|
import yaml
|
||||||
|
from orchestrator.assistant import EmailAssistant
|
||||||
|
import sys, os
|
||||||
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".")))
|
||||||
|
from db.database_manager import DatabaseManager
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("📦 EZ Email Assistant: Backend Bootstrap\n")
|
||||||
|
|
||||||
|
# 1. Initialize DB Schema
|
||||||
|
db = DatabaseManager()
|
||||||
|
db.initialize_schema()
|
||||||
|
|
||||||
|
# 2. Run Health Check
|
||||||
|
print("\n🔍 Checking DB Health...")
|
||||||
|
health = db.check_health()
|
||||||
|
print("Health Status:", health["status"])
|
||||||
|
for detail in health["details"]:
|
||||||
|
print(" -", detail)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
19
src/models/llm_engine.py
Normal file
19
src/models/llm_engine.py
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
# src/models/llm_engine.py
|
||||||
|
|
||||||
|
class LLMEngine:
|
||||||
|
"""
|
||||||
|
Handles summarization or classification via LLMs (local or API).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, mode="api", config=None):
|
||||||
|
self.mode = mode
|
||||||
|
self.config = config or {}
|
||||||
|
# TODO: Initialize model or API client
|
||||||
|
|
||||||
|
def summarize(self, text):
|
||||||
|
# TODO: Send to local LLM or API
|
||||||
|
return "LLM summary"
|
||||||
|
|
||||||
|
def classify(self, text):
|
||||||
|
# (optional) Use LLM for category prediction
|
||||||
|
return ["promo", "job"]
|
||||||
BIN
src/orchestrator/__pycache__/assistant.cpython-310.pyc
Normal file
BIN
src/orchestrator/__pycache__/assistant.cpython-310.pyc
Normal file
Binary file not shown.
54
src/orchestrator/assistant.py
Normal file
54
src/orchestrator/assistant.py
Normal file
|
|
@ -0,0 +1,54 @@
|
||||||
|
# src/orchestrator/assistant.py
|
||||||
|
|
||||||
|
from gmail.gmail_client import GmailClient
|
||||||
|
from processor.cleaner import Cleaner
|
||||||
|
from processor.summarizer import Summarizer
|
||||||
|
from processor.labeler import Labeler
|
||||||
|
from db.database_manager import DatabaseManager
|
||||||
|
from utils.logger import Logger
|
||||||
|
|
||||||
|
class EmailAssistant:
|
||||||
|
"""
|
||||||
|
Orchestrates the entire flow:
|
||||||
|
- Fetches emails
|
||||||
|
- Cleans and summarizes content
|
||||||
|
- Categorizes
|
||||||
|
- Stores in database
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config):
|
||||||
|
self.config = config
|
||||||
|
self.logger = Logger()
|
||||||
|
self.db = DatabaseManager(config["db"])
|
||||||
|
self.gmail = GmailClient(config["gmail"])
|
||||||
|
self.cleaner = Cleaner()
|
||||||
|
self.summarizer = Summarizer()
|
||||||
|
self.labeler = Labeler()
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
self.logger.log("🔄 Starting email assistant run...")
|
||||||
|
|
||||||
|
for account in self.config["accounts"]:
|
||||||
|
self.logger.log(f"📩 Fetching emails for account: {account['email']}")
|
||||||
|
emails = self.gmail.fetch_emails(account)
|
||||||
|
|
||||||
|
for email in emails:
|
||||||
|
# TODO: Add ID check to avoid duplicate inserts
|
||||||
|
cleaned_body = self.cleaner.clean_body(email["body"])
|
||||||
|
links, unsub_data = self.cleaner.extract_links(email["body"])
|
||||||
|
summary = self.summarizer.summarize(cleaned_body)
|
||||||
|
labels = self.labeler.label(email["subject"], summary)
|
||||||
|
|
||||||
|
# TODO: Insert or update email in DB
|
||||||
|
self.db.insert_email({
|
||||||
|
**email,
|
||||||
|
"cleaned_body": cleaned_body,
|
||||||
|
"summary": summary,
|
||||||
|
"labels": labels,
|
||||||
|
"links": links,
|
||||||
|
"unsubscribe_data": unsub_data
|
||||||
|
})
|
||||||
|
|
||||||
|
self.logger.log(f"✅ Processed: {email['subject']}")
|
||||||
|
|
||||||
|
self.logger.log("✅ Email assistant run completed.")
|
||||||
BIN
src/processor/__pycache__/cleaner.cpython-310.pyc
Normal file
BIN
src/processor/__pycache__/cleaner.cpython-310.pyc
Normal file
Binary file not shown.
BIN
src/processor/__pycache__/labeler.cpython-310.pyc
Normal file
BIN
src/processor/__pycache__/labeler.cpython-310.pyc
Normal file
Binary file not shown.
BIN
src/processor/__pycache__/summarizer.cpython-310.pyc
Normal file
BIN
src/processor/__pycache__/summarizer.cpython-310.pyc
Normal file
Binary file not shown.
17
src/processor/cleaner.py
Normal file
17
src/processor/cleaner.py
Normal file
|
|
@ -0,0 +1,17 @@
|
||||||
|
# src/processor/cleaner.py
|
||||||
|
|
||||||
|
class Cleaner:
|
||||||
|
"""
|
||||||
|
Cleans raw email body and extracts useful data like links and unsubscribe URLs.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def clean_body(self, html_body):
|
||||||
|
# TODO: Strip HTML, remove tracking pixels, normalize text
|
||||||
|
return "cleaned email body"
|
||||||
|
|
||||||
|
def extract_links(self, html_body):
|
||||||
|
# TODO: Find all URLs and unsubscribe links in the body
|
||||||
|
return ["http://example.com"], "http://unsubscribe.example.com"
|
||||||
14
src/processor/labeler.py
Normal file
14
src/processor/labeler.py
Normal file
|
|
@ -0,0 +1,14 @@
|
||||||
|
# src/processor/labeler.py
|
||||||
|
|
||||||
|
class Labeler:
|
||||||
|
"""
|
||||||
|
Assigns labels to emails using subject, sender, or summary-based rules.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
# TODO: Load rules or ML model
|
||||||
|
pass
|
||||||
|
|
||||||
|
def label(self, subject, summary):
|
||||||
|
# TODO: Return a list of categories or labels
|
||||||
|
return ["promo", "gaming"]
|
||||||
14
src/processor/summarizer.py
Normal file
14
src/processor/summarizer.py
Normal file
|
|
@ -0,0 +1,14 @@
|
||||||
|
# src/processor/summarizer.py
|
||||||
|
|
||||||
|
class Summarizer:
|
||||||
|
"""
|
||||||
|
Summarizes cleaned email text using spaCy, KeyBERT, or LLM (configurable).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, method="spacy"):
|
||||||
|
self.method = method
|
||||||
|
# TODO: Load model(s) depending on config
|
||||||
|
|
||||||
|
def summarize(self, text):
|
||||||
|
# TODO: Return a short summary or key phrases
|
||||||
|
return "summary of email"
|
||||||
BIN
src/utils/__pycache__/logger.cpython-310.pyc
Normal file
BIN
src/utils/__pycache__/logger.cpython-310.pyc
Normal file
Binary file not shown.
44
src/utils/logger.py
Normal file
44
src/utils/logger.py
Normal file
|
|
@ -0,0 +1,44 @@
|
||||||
|
# src/utils/logger.py
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
import mysql.connector
|
||||||
|
import os
|
||||||
|
|
||||||
|
class Logger:
|
||||||
|
def __init__(self, source="system"):
|
||||||
|
self.source = source
|
||||||
|
self.verbose = True
|
||||||
|
self._connect_db()
|
||||||
|
|
||||||
|
def _connect_db(self):
|
||||||
|
try:
|
||||||
|
self.conn = mysql.connector.connect(
|
||||||
|
host=os.getenv("DB_HOST", "localhost"),
|
||||||
|
port=int(os.getenv("DB_PORT", "3306")),
|
||||||
|
user=os.getenv("DB_USER", "emailuser"),
|
||||||
|
password=os.getenv("DB_PASSWORD", "miguel33020"),
|
||||||
|
database=os.getenv("DB_NAME", "emailassistant")
|
||||||
|
)
|
||||||
|
self.cursor = self.conn.cursor()
|
||||||
|
except Exception as e:
|
||||||
|
self.conn = None
|
||||||
|
print(f"[Logger] ❌ Could not connect to logs DB: {e}")
|
||||||
|
|
||||||
|
def log(self, message, level="INFO"):
|
||||||
|
timestamp = datetime.now().strftime("[%Y-%m-%d %H:%M:%S]")
|
||||||
|
formatted = f"{timestamp} [{level}] {self.source.upper()}: {message}"
|
||||||
|
|
||||||
|
# Console log
|
||||||
|
if self.verbose:
|
||||||
|
print(formatted)
|
||||||
|
|
||||||
|
# DB log
|
||||||
|
if self.conn:
|
||||||
|
try:
|
||||||
|
self.cursor.execute("""
|
||||||
|
INSERT INTO logs (level, source, message)
|
||||||
|
VALUES (%s, %s, %s)
|
||||||
|
""", (level, self.source, message))
|
||||||
|
self.conn.commit()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[Logger] ⚠️ Failed to log to DB: {e}")
|
||||||
14
src/utils/scheduler.py
Normal file
14
src/utils/scheduler.py
Normal file
|
|
@ -0,0 +1,14 @@
|
||||||
|
# src/utils/scheduler.py
|
||||||
|
|
||||||
|
class Scheduler:
|
||||||
|
"""
|
||||||
|
Placeholder for scheduled tasks or cron-like runs.
|
||||||
|
Will manage background sync in v2.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def run_scheduled_tasks(self):
|
||||||
|
# TODO: Run assistant on schedule (via cron, Celery, etc.)
|
||||||
|
pass
|
||||||
19
ui/streamlit_app/Home.py
Normal file
19
ui/streamlit_app/Home.py
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
# ui/streamlit_app/Home.py
|
||||||
|
import streamlit as st
|
||||||
|
|
||||||
|
from utils.logger import Logger
|
||||||
|
|
||||||
|
logger = Logger(source="test_ui")
|
||||||
|
logger.log("This is a test log from the UI!", level="INFO")
|
||||||
|
|
||||||
|
|
||||||
|
def run():
|
||||||
|
st.set_page_config(page_title="Email Assistant", layout="wide")
|
||||||
|
st.title("📬 Email Assistant Dashboard")
|
||||||
|
st.markdown("Welcome to your AI-powered inbox control center!")
|
||||||
|
|
||||||
|
# TODO: Show summary stats, recent labels, account statuses
|
||||||
|
st.info("Stats coming soon! Build in progress.")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run()
|
||||||
0
ui/streamlit_app/__init__.py
Normal file
0
ui/streamlit_app/__init__.py
Normal file
BIN
ui/streamlit_app/__pycache__/utils.cpython-310.pyc
Normal file
BIN
ui/streamlit_app/__pycache__/utils.cpython-310.pyc
Normal file
Binary file not shown.
45
ui/streamlit_app/pages/EmailViewer.py
Normal file
45
ui/streamlit_app/pages/EmailViewer.py
Normal file
|
|
@ -0,0 +1,45 @@
|
||||||
|
# ui/streamlit_app/pages/EmailViewer.py
|
||||||
|
import streamlit as st
|
||||||
|
|
||||||
|
def fetch_emails():
|
||||||
|
# TODO: Replace with real DB call
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"Subject": "Welcome to our platform!",
|
||||||
|
"From": "noreply@example.com",
|
||||||
|
"Summary": "Intro to the platform and features.",
|
||||||
|
"Labels": ["promo"],
|
||||||
|
"Date": "2025-03-27",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Subject": "Your invoice is ready",
|
||||||
|
"From": "billing@example.com",
|
||||||
|
"Summary": "Invoice for your recent purchase.",
|
||||||
|
"Labels": ["finance"],
|
||||||
|
"Date": "2025-03-25",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def run():
|
||||||
|
st.title("📧 Email Viewer")
|
||||||
|
st.markdown("A simple, readable inbox-style layout.")
|
||||||
|
|
||||||
|
st.markdown("## 📬 Emails")
|
||||||
|
|
||||||
|
emails = fetch_emails()
|
||||||
|
|
||||||
|
for email in emails:
|
||||||
|
with st.container():
|
||||||
|
st.markdown(f"### {email['Subject']}")
|
||||||
|
col1, col2 = st.columns([4, 1])
|
||||||
|
with col1:
|
||||||
|
st.markdown(f"**From:** {email['From']}")
|
||||||
|
st.markdown(f"*{email['Summary']}*")
|
||||||
|
st.markdown("Labels: " + " ".join([f"`{label}`" for label in email["Labels"]]))
|
||||||
|
with col2:
|
||||||
|
st.markdown(f"📅 {email['Date']}")
|
||||||
|
|
||||||
|
st.markdown("---")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run()
|
||||||
18
ui/streamlit_app/pages/LabelManager.py
Normal file
18
ui/streamlit_app/pages/LabelManager.py
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
# ui/streamlit_app/LabelManager.py
|
||||||
|
import streamlit as st
|
||||||
|
|
||||||
|
def run():
|
||||||
|
st.title("🏷️ Label Manager")
|
||||||
|
st.markdown("Manage your smart labels and categories.")
|
||||||
|
|
||||||
|
# TODO: Load labels from DB
|
||||||
|
st.info("Labels not loaded yet. This feature is under construction!")
|
||||||
|
|
||||||
|
st.subheader("Create New Label")
|
||||||
|
new_label = st.text_input("Label Name")
|
||||||
|
if st.button("Add Label"):
|
||||||
|
# TODO: Insert new label into database
|
||||||
|
st.success(f"✅ Added label: {new_label}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run()
|
||||||
21
ui/streamlit_app/pages/Settings.py
Normal file
21
ui/streamlit_app/pages/Settings.py
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
# ui/streamlit_app/Settings.py
|
||||||
|
import streamlit as st
|
||||||
|
|
||||||
|
def run():
|
||||||
|
st.title("⚙️ Settings")
|
||||||
|
st.markdown("Customize your assistant’s behavior.")
|
||||||
|
|
||||||
|
st.subheader("Summary Mode")
|
||||||
|
summary_mode = st.radio("Choose summarization engine:", ["spaCy", "KeyBERT", "LLM"])
|
||||||
|
st.success(f"🔍 Using: {summary_mode}")
|
||||||
|
|
||||||
|
st.subheader("Sync Options")
|
||||||
|
auto_sync = st.checkbox("Auto-sync every hour", value=False)
|
||||||
|
download_attachments = st.checkbox("Auto-download invoices/receipts", value=True)
|
||||||
|
|
||||||
|
if st.button("Save Settings"):
|
||||||
|
# TODO: Persist to config/settings.yml
|
||||||
|
st.success("✅ Settings saved!")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run()
|
||||||
24
ui/streamlit_app/utils.py
Normal file
24
ui/streamlit_app/utils.py
Normal file
|
|
@ -0,0 +1,24 @@
|
||||||
|
# ui/streamlit_app/utils.py
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../")))
|
||||||
|
from src.db.database_manager import DatabaseManager
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
def load_settings(path="config/settings.yml"):
|
||||||
|
with open(path, "r") as f:
|
||||||
|
return yaml.safe_load(f)
|
||||||
|
|
||||||
|
def save_settings(settings, path="config/settings.yml"):
|
||||||
|
with open(path, "w") as f:
|
||||||
|
yaml.dump(settings, f)
|
||||||
|
|
||||||
|
def get_db():
|
||||||
|
config = load_settings()
|
||||||
|
db_config = config.get("db", {
|
||||||
|
"host": "localhost",
|
||||||
|
"user": "root",
|
||||||
|
"password": "",
|
||||||
|
"database": "data"
|
||||||
|
})
|
||||||
|
return DatabaseManager(db_config)
|
||||||
Loading…
Reference in a new issue