import os import sqlite3 import yaml from databases.logs import Logsdb from databases.data import Datadb class Orchestrator: """Ensures databases and configs are initialized before the scraper runs.""" def __init__(self): self.data_db_path = "data/data.db" self.logs_db_path = "data/logs.db" self.config_path = "config/settings.yml" self.sites_path = "config/sites.yml" # Initialize logs database for logging self.logs_db = Logsdb() def check_and_create_databases(self): """Ensures that both data.db and logs.db exist.""" os.makedirs("data", exist_ok=True) # Ensure data folder exists if not os.path.exists(self.data_db_path): self.logs_db.log("⚠️ [WARNING] data.db not found. Creating new database...") Datadb().initialize() self.logs_db.log("✅ [INFO] data.db initialized successfully.") if not os.path.exists(self.logs_db_path): self.logs_db.log("⚠️ [WARNING] logs.db not found. Creating new database...") Logsdb().initialize() self.logs_db.log("✅ [INFO] logs.db initialized successfully.") def check_database_tables(self): """Ensures required tables exist in the database.""" conn = sqlite3.connect(self.data_db_path) cursor = conn.cursor() required_tables = ["tokens", "nfts", "logs"] for table in required_tables: cursor.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table}';") if not cursor.fetchone(): self.logs_db.log(f"❌ [ERROR] Missing required table: {table} in data.db") conn.close() def create_sample_config_files(self): """Creates sample settings.yml and sites.yml if they are missing.""" os.makedirs("config", exist_ok=True) # Ensure config folder exists # Sample settings.yml default_settings = { "scraper": { "user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", "request_timeout": 10, "retry_attempts": 3 }, "database": { "data_path": "data/data.db", "logs_path": "data/logs.db" }, "api_keys": { "twitter": "YOUR_TWITTER_API_KEY", "discord": "YOUR_DISCORD_BOT_TOKEN" }, "logging": { "level": "INFO", "file": "data/scraper.log" }, "debug": { "save_html_js": True # Set to True for debugging, False for production } } if not os.path.exists(self.config_path): with open(self.config_path, "w") as f: yaml.dump(default_settings, f, default_flow_style=False) self.logs_db.log("📝 [INFO] Created sample settings.yml") # Sample sites.yml default_sites = { "airdrops": [ {"name": "Airdrops.io", "url": "https://airdrops.io/latest/"}, {"name": "CoinMarketCap Recently Added", "url": "https://coinmarketcap.com/new/"}, {"name": "CoinMarketCap Upcoming Tokens", "url": "https://coinmarketcap.com/upcoming/"}, {"name": "CoinMarketCap Upcoming NFTs", "url": "https://coinmarketcap.com/nft/upcoming/"} ] } if not os.path.exists(self.sites_path): with open(self.sites_path, "w") as f: yaml.dump(default_sites, f, default_flow_style=False) self.logs_db.log("📝 [INFO] Created sample sites.yml") def validate_config_files(self): """Ensures that settings.yml and sites.yml are properly formatted.""" for file in [self.config_path, self.sites_path]: if not os.path.exists(file): self.logs_db.log(f"❌ [ERROR] Missing required config file: {file}") continue try: with open(file, "r") as f: yaml.safe_load(f) # Attempt to parse YAML file self.logs_db.log(f"✅ [INFO] Successfully loaded {file}") except yaml.YAMLError as e: self.logs_db.log(f"❌ [ERROR] Invalid YAML format in {file}: {e}") def run_checks(self): """Runs all integrity checks before the scraper starts.""" self.logs_db.log("🚀 [INFO] Running startup integrity checks...") self.check_and_create_databases() self.check_database_tables() self.create_sample_config_files() self.validate_config_files() self.logs_db.log("✅ [INFO] All checks completed successfully.") # ✅ Run the orchestrator when script is executed if __name__ == "__main__": orchestrator = Orchestrator() orchestrator.run_checks()