Crypto-Scraper/src/orchestrator.py

121 lines
4.8 KiB
Python
Raw Normal View History

2025-05-05 22:58:57 -04:00
import os
import sqlite3
import yaml
from databases.logs import Logsdb
from databases.data import Datadb
class Orchestrator:
"""Ensures databases and configs are initialized before the scraper runs."""
def __init__(self):
self.data_db_path = "data/data.db"
self.logs_db_path = "data/logs.db"
self.config_path = "config/settings.yml"
self.sites_path = "config/sites.yml"
# Initialize logs database for logging
self.logs_db = Logsdb()
def check_and_create_databases(self):
"""Ensures that both data.db and logs.db exist."""
os.makedirs("data", exist_ok=True) # Ensure data folder exists
if not os.path.exists(self.data_db_path):
self.logs_db.log("⚠️ [WARNING] data.db not found. Creating new database...")
Datadb().initialize()
self.logs_db.log("✅ [INFO] data.db initialized successfully.")
if not os.path.exists(self.logs_db_path):
self.logs_db.log("⚠️ [WARNING] logs.db not found. Creating new database...")
Logsdb().initialize()
self.logs_db.log("✅ [INFO] logs.db initialized successfully.")
def check_database_tables(self):
"""Ensures required tables exist in the database."""
conn = sqlite3.connect(self.data_db_path)
cursor = conn.cursor()
required_tables = ["tokens", "nfts", "logs"]
for table in required_tables:
cursor.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table}';")
if not cursor.fetchone():
self.logs_db.log(f"❌ [ERROR] Missing required table: {table} in data.db")
conn.close()
def create_sample_config_files(self):
"""Creates sample settings.yml and sites.yml if they are missing."""
os.makedirs("config", exist_ok=True) # Ensure config folder exists
# Sample settings.yml
default_settings = {
"scraper": {
"user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"request_timeout": 10,
"retry_attempts": 3
},
"database": {
"data_path": "data/data.db",
"logs_path": "data/logs.db"
},
"api_keys": {
"twitter": "YOUR_TWITTER_API_KEY",
"discord": "YOUR_DISCORD_BOT_TOKEN"
},
"logging": {
"level": "INFO",
"file": "data/scraper.log"
},
"debug": {
"save_html_js": True # Set to True for debugging, False for production
}
}
if not os.path.exists(self.config_path):
with open(self.config_path, "w") as f:
yaml.dump(default_settings, f, default_flow_style=False)
self.logs_db.log("📝 [INFO] Created sample settings.yml")
# Sample sites.yml
default_sites = {
"airdrops": [
{"name": "Airdrops.io", "url": "https://airdrops.io/latest/"},
{"name": "CoinMarketCap Recently Added", "url": "https://coinmarketcap.com/new/"},
{"name": "CoinMarketCap Upcoming Tokens", "url": "https://coinmarketcap.com/upcoming/"},
{"name": "CoinMarketCap Upcoming NFTs", "url": "https://coinmarketcap.com/nft/upcoming/"}
]
}
if not os.path.exists(self.sites_path):
with open(self.sites_path, "w") as f:
yaml.dump(default_sites, f, default_flow_style=False)
self.logs_db.log("📝 [INFO] Created sample sites.yml")
def validate_config_files(self):
"""Ensures that settings.yml and sites.yml are properly formatted."""
for file in [self.config_path, self.sites_path]:
if not os.path.exists(file):
self.logs_db.log(f"❌ [ERROR] Missing required config file: {file}")
continue
try:
with open(file, "r") as f:
yaml.safe_load(f) # Attempt to parse YAML file
self.logs_db.log(f"✅ [INFO] Successfully loaded {file}")
except yaml.YAMLError as e:
self.logs_db.log(f"❌ [ERROR] Invalid YAML format in {file}: {e}")
def run_checks(self):
"""Runs all integrity checks before the scraper starts."""
self.logs_db.log("🚀 [INFO] Running startup integrity checks...")
self.check_and_create_databases()
self.check_database_tables()
self.create_sample_config_files()
self.validate_config_files()
self.logs_db.log("✅ [INFO] All checks completed successfully.")
# ✅ Run the orchestrator when script is executed
if __name__ == "__main__":
orchestrator = Orchestrator()
orchestrator.run_checks()