diff --git a/scraper.py b/scraper.py index 9a98cc8..756c2d9 100644 --- a/scraper.py +++ b/scraper.py @@ -23,12 +23,13 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %( logger = logging.getLogger(__name__) # Environment variables -RABBITMQ_HOST = os.getenv("RABBITMQ_HOST", "rabbitq.thejobhub.xyz") -RABBITMQ_PORT = int(os.getenv("RABBITMQ_PORT", "5672")) -RABBITMQ_USER = os.getenv("RABBITMQ_USER", "guest") -RABBITMQ_PASS = os.getenv("RABBITMQ_PASS", "guest") -REDIS_HOST = os.getenv("REDIS_HOST", "redis-scrape.thejobhub.xyz") -REDIS_PORT = int(os.getenv("REDIS_PORT", "6379")) +RABBITMQ_HOST = os.getenv("RABBITMQ_HOST") +RABBITMQ_PORT = os.getenv("RABBITMQ_PORT") +RABBITMQ_USER = os.getenv("RABBITMQ_USER") +RABBITMQ_PASS = os.getenv("RABBITMQ_PASS") +REDIS_HOST = os.getenv("REDIS_HOST") +REDIS_PORT = os.getenv("REDIS_PORT") + class AshbyJobScraper: def __init__( diff --git a/sender.py b/sender.py index c836ba1..02b51ab 100644 --- a/sender.py +++ b/sender.py @@ -1,3 +1,4 @@ + import csv import json import logging @@ -9,7 +10,6 @@ import uuid from configparser import ConfigParser import pika import redis -import os class Sender: def __init__(self, config_file='config.ini'): @@ -17,13 +17,19 @@ class Sender: self.config.read(config_file) # RabbitMQ from env vars with fallbacks - self.rabbitmq_host = os.getenv("RABBITMQ_HOST", self.config.get('rabbitmq', 'url', fallback='rabbitq.thejobhub.xyz')) - self.rabbitmq_port = int(os.getenv("RABBITMQ_PORT", self.config.get('rabbitmq', 'port', fallback='5672'))) - self.username = os.getenv("RABBITMQ_USER", self.config.get('rabbitmq', 'username', fallback='guest')) - self.password = os.getenv("RABBITMQ_PASS", self.config.get('rabbitmq', 'password', fallback='guest')) + self.rabbitmq_host = os.getenv("RABBITMQ_HOST") + self.rabbitmq_port = os.getenv("RABBITMQ_PORT") + self.username = os.getenv("RABBITMQ_USER") + self.password = os.getenv("RABBITMQ_PASS") self.queue_name = self.config.get('rabbitmq', 'queue_name', fallback='job_queue') - self.directory = self.config.get('files', 'directory', fallback='/var/jobs/csv') - self.log_file = self.config.get('logging', 'log_file', fallback='/var/logs/sender.log') + self.directory = self.config.get('files', 'directory', fallback=os.path.join(os.path.expanduser("~"), "jobs", "csv")) + + # Cross-platform log path: use user's home directory + default_log_dir = os.path.join(os.path.expanduser("~"), ".web_scraping_project", "logs") + os.makedirs(default_log_dir, exist_ok=True) + default_log_file = os.path.join(default_log_dir, "sender.log") + self.log_file = self.config.get('logging', 'log_file', fallback=default_log_file) + self.virtual_host = self.config.get('rabbitmq', 'virtual_hash', fallback='/') self.batch_size = 500 self.retry_attempts = 5 # Increased for robustness @@ -31,10 +37,13 @@ class Sender: self.check_interval = 30 # More frequent polling # Redis for deduplication - redis_host = os.getenv("REDIS_HOST", "redis-scrape.thejobhub.xyz") - redis_port = int(os.getenv("REDIS_PORT", "6379")) + redis_host = os.getenv("REDIS_HOST") + redis_port = os.getenv("REDIS_PORT") self.redis_client = redis.Redis(host=redis_host, port=redis_port, db=1, decode_responses=True) + # Ensure log directory exists before configuring logging + log_dir = os.path.dirname(self.log_file) + os.makedirs(log_dir, exist_ok=True) logging.basicConfig(filename=self.log_file, level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') self.logger = logging.getLogger(__name__)