120 lines
4.8 KiB
Python
120 lines
4.8 KiB
Python
import os
|
|
import sqlite3
|
|
import yaml
|
|
from databases.logs import Logsdb
|
|
from databases.data import Datadb
|
|
|
|
class Orchestrator:
|
|
"""Ensures databases and configs are initialized before the scraper runs."""
|
|
|
|
def __init__(self):
|
|
self.data_db_path = "data/data.db"
|
|
self.logs_db_path = "data/logs.db"
|
|
self.config_path = "config/settings.yml"
|
|
self.sites_path = "config/sites.yml"
|
|
|
|
# Initialize logs database for logging
|
|
self.logs_db = Logsdb()
|
|
|
|
def check_and_create_databases(self):
|
|
"""Ensures that both data.db and logs.db exist."""
|
|
os.makedirs("data", exist_ok=True) # Ensure data folder exists
|
|
|
|
if not os.path.exists(self.data_db_path):
|
|
self.logs_db.log("⚠️ [WARNING] data.db not found. Creating new database...")
|
|
Datadb().initialize()
|
|
self.logs_db.log("✅ [INFO] data.db initialized successfully.")
|
|
|
|
if not os.path.exists(self.logs_db_path):
|
|
self.logs_db.log("⚠️ [WARNING] logs.db not found. Creating new database...")
|
|
Logsdb().initialize()
|
|
self.logs_db.log("✅ [INFO] logs.db initialized successfully.")
|
|
|
|
def check_database_tables(self):
|
|
"""Ensures required tables exist in the database."""
|
|
conn = sqlite3.connect(self.data_db_path)
|
|
cursor = conn.cursor()
|
|
required_tables = ["tokens", "nfts", "logs"]
|
|
|
|
for table in required_tables:
|
|
cursor.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table}';")
|
|
if not cursor.fetchone():
|
|
self.logs_db.log(f"❌ [ERROR] Missing required table: {table} in data.db")
|
|
|
|
conn.close()
|
|
|
|
def create_sample_config_files(self):
|
|
"""Creates sample settings.yml and sites.yml if they are missing."""
|
|
os.makedirs("config", exist_ok=True) # Ensure config folder exists
|
|
|
|
# Sample settings.yml
|
|
default_settings = {
|
|
"scraper": {
|
|
"user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
"request_timeout": 10,
|
|
"retry_attempts": 3
|
|
},
|
|
"database": {
|
|
"data_path": "data/data.db",
|
|
"logs_path": "data/logs.db"
|
|
},
|
|
"api_keys": {
|
|
"twitter": "YOUR_TWITTER_API_KEY",
|
|
"discord": "YOUR_DISCORD_BOT_TOKEN"
|
|
},
|
|
"logging": {
|
|
"level": "INFO",
|
|
"file": "data/scraper.log"
|
|
},
|
|
"debug": {
|
|
"save_html_js": True # Set to True for debugging, False for production
|
|
}
|
|
}
|
|
|
|
if not os.path.exists(self.config_path):
|
|
with open(self.config_path, "w") as f:
|
|
yaml.dump(default_settings, f, default_flow_style=False)
|
|
self.logs_db.log("📝 [INFO] Created sample settings.yml")
|
|
|
|
# Sample sites.yml
|
|
default_sites = {
|
|
"airdrops": [
|
|
{"name": "Airdrops.io", "url": "https://airdrops.io/latest/"},
|
|
{"name": "CoinMarketCap Recently Added", "url": "https://coinmarketcap.com/new/"},
|
|
{"name": "CoinMarketCap Upcoming Tokens", "url": "https://coinmarketcap.com/upcoming/"},
|
|
{"name": "CoinMarketCap Upcoming NFTs", "url": "https://coinmarketcap.com/nft/upcoming/"}
|
|
]
|
|
}
|
|
|
|
if not os.path.exists(self.sites_path):
|
|
with open(self.sites_path, "w") as f:
|
|
yaml.dump(default_sites, f, default_flow_style=False)
|
|
self.logs_db.log("📝 [INFO] Created sample sites.yml")
|
|
|
|
def validate_config_files(self):
|
|
"""Ensures that settings.yml and sites.yml are properly formatted."""
|
|
for file in [self.config_path, self.sites_path]:
|
|
if not os.path.exists(file):
|
|
self.logs_db.log(f"❌ [ERROR] Missing required config file: {file}")
|
|
continue
|
|
|
|
try:
|
|
with open(file, "r") as f:
|
|
yaml.safe_load(f) # Attempt to parse YAML file
|
|
self.logs_db.log(f"✅ [INFO] Successfully loaded {file}")
|
|
except yaml.YAMLError as e:
|
|
self.logs_db.log(f"❌ [ERROR] Invalid YAML format in {file}: {e}")
|
|
|
|
def run_checks(self):
|
|
"""Runs all integrity checks before the scraper starts."""
|
|
self.logs_db.log("🚀 [INFO] Running startup integrity checks...")
|
|
self.check_and_create_databases()
|
|
self.check_database_tables()
|
|
self.create_sample_config_files()
|
|
self.validate_config_files()
|
|
self.logs_db.log("✅ [INFO] All checks completed successfully.")
|
|
|
|
# ✅ Run the orchestrator when script is executed
|
|
if __name__ == "__main__":
|
|
orchestrator = Orchestrator()
|
|
orchestrator.run_checks()
|