Compare commits

..

No commits in common. "0c447d0f77357e2e00e4103abefe294c28516f80" and "c0c7925be3804e81f7b75b6728c166a3751ec8ea" have entirely different histories.

2 changed files with 15 additions and 25 deletions

View File

@ -23,13 +23,12 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Environment variables # Environment variables
RABBITMQ_HOST = os.getenv("RABBITMQ_HOST") RABBITMQ_HOST = os.getenv("RABBITMQ_HOST", "rabbitq.thejobhub.xyz")
RABBITMQ_PORT = os.getenv("RABBITMQ_PORT") RABBITMQ_PORT = int(os.getenv("RABBITMQ_PORT", "5672"))
RABBITMQ_USER = os.getenv("RABBITMQ_USER") RABBITMQ_USER = os.getenv("RABBITMQ_USER", "guest")
RABBITMQ_PASS = os.getenv("RABBITMQ_PASS") RABBITMQ_PASS = os.getenv("RABBITMQ_PASS", "guest")
REDIS_HOST = os.getenv("REDIS_HOST") REDIS_HOST = os.getenv("REDIS_HOST", "redis-scrape.thejobhub.xyz")
REDIS_PORT = os.getenv("REDIS_PORT") REDIS_PORT = int(os.getenv("REDIS_PORT", "6379"))
class AshbyJobScraper: class AshbyJobScraper:
def __init__( def __init__(

View File

@ -1,4 +1,3 @@
import csv import csv
import json import json
import logging import logging
@ -10,6 +9,7 @@ import uuid
from configparser import ConfigParser from configparser import ConfigParser
import pika import pika
import redis import redis
import os
class Sender: class Sender:
def __init__(self, config_file='config.ini'): def __init__(self, config_file='config.ini'):
@ -17,19 +17,13 @@ class Sender:
self.config.read(config_file) self.config.read(config_file)
# RabbitMQ from env vars with fallbacks # RabbitMQ from env vars with fallbacks
self.rabbitmq_host = os.getenv("RABBITMQ_HOST") self.rabbitmq_host = os.getenv("RABBITMQ_HOST", self.config.get('rabbitmq', 'url', fallback='rabbitq.thejobhub.xyz'))
self.rabbitmq_port = os.getenv("RABBITMQ_PORT") self.rabbitmq_port = int(os.getenv("RABBITMQ_PORT", self.config.get('rabbitmq', 'port', fallback='5672')))
self.username = os.getenv("RABBITMQ_USER") self.username = os.getenv("RABBITMQ_USER", self.config.get('rabbitmq', 'username', fallback='guest'))
self.password = os.getenv("RABBITMQ_PASS") self.password = os.getenv("RABBITMQ_PASS", self.config.get('rabbitmq', 'password', fallback='guest'))
self.queue_name = self.config.get('rabbitmq', 'queue_name', fallback='job_queue') self.queue_name = self.config.get('rabbitmq', 'queue_name', fallback='job_queue')
self.directory = self.config.get('files', 'directory', fallback=os.path.join(os.path.expanduser("~"), "jobs", "csv")) self.directory = self.config.get('files', 'directory', fallback='/var/jobs/csv')
self.log_file = self.config.get('logging', 'log_file', fallback='/var/logs/sender.log')
# Cross-platform log path: use user's home directory
default_log_dir = os.path.join(os.path.expanduser("~"), ".web_scraping_project", "logs")
os.makedirs(default_log_dir, exist_ok=True)
default_log_file = os.path.join(default_log_dir, "sender.log")
self.log_file = self.config.get('logging', 'log_file', fallback=default_log_file)
self.virtual_host = self.config.get('rabbitmq', 'virtual_hash', fallback='/') self.virtual_host = self.config.get('rabbitmq', 'virtual_hash', fallback='/')
self.batch_size = 500 self.batch_size = 500
self.retry_attempts = 5 # Increased for robustness self.retry_attempts = 5 # Increased for robustness
@ -37,13 +31,10 @@ class Sender:
self.check_interval = 30 # More frequent polling self.check_interval = 30 # More frequent polling
# Redis for deduplication # Redis for deduplication
redis_host = os.getenv("REDIS_HOST") redis_host = os.getenv("REDIS_HOST", "redis-scrape.thejobhub.xyz")
redis_port = os.getenv("REDIS_PORT") redis_port = int(os.getenv("REDIS_PORT", "6379"))
self.redis_client = redis.Redis(host=redis_host, port=redis_port, db=1, decode_responses=True) self.redis_client = redis.Redis(host=redis_host, port=redis_port, db=1, decode_responses=True)
# Ensure log directory exists before configuring logging
log_dir = os.path.dirname(self.log_file)
os.makedirs(log_dir, exist_ok=True)
logging.basicConfig(filename=self.log_file, level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logging.basicConfig(filename=self.log_file, level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
self.logger = logging.getLogger(__name__) self.logger = logging.getLogger(__name__)