Compare commits

..

2 Commits

2 changed files with 25 additions and 15 deletions

View File

@ -23,12 +23,13 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Environment variables # Environment variables
RABBITMQ_HOST = os.getenv("RABBITMQ_HOST", "rabbitq.thejobhub.xyz") RABBITMQ_HOST = os.getenv("RABBITMQ_HOST")
RABBITMQ_PORT = int(os.getenv("RABBITMQ_PORT", "5672")) RABBITMQ_PORT = os.getenv("RABBITMQ_PORT")
RABBITMQ_USER = os.getenv("RABBITMQ_USER", "guest") RABBITMQ_USER = os.getenv("RABBITMQ_USER")
RABBITMQ_PASS = os.getenv("RABBITMQ_PASS", "guest") RABBITMQ_PASS = os.getenv("RABBITMQ_PASS")
REDIS_HOST = os.getenv("REDIS_HOST", "redis-scrape.thejobhub.xyz") REDIS_HOST = os.getenv("REDIS_HOST")
REDIS_PORT = int(os.getenv("REDIS_PORT", "6379")) REDIS_PORT = os.getenv("REDIS_PORT")
class AshbyJobScraper: class AshbyJobScraper:
def __init__( def __init__(

View File

@ -1,3 +1,4 @@
import csv import csv
import json import json
import logging import logging
@ -9,7 +10,6 @@ import uuid
from configparser import ConfigParser from configparser import ConfigParser
import pika import pika
import redis import redis
import os
class Sender: class Sender:
def __init__(self, config_file='config.ini'): def __init__(self, config_file='config.ini'):
@ -17,13 +17,19 @@ class Sender:
self.config.read(config_file) self.config.read(config_file)
# RabbitMQ from env vars with fallbacks # RabbitMQ from env vars with fallbacks
self.rabbitmq_host = os.getenv("RABBITMQ_HOST", self.config.get('rabbitmq', 'url', fallback='rabbitq.thejobhub.xyz')) self.rabbitmq_host = os.getenv("RABBITMQ_HOST")
self.rabbitmq_port = int(os.getenv("RABBITMQ_PORT", self.config.get('rabbitmq', 'port', fallback='5672'))) self.rabbitmq_port = os.getenv("RABBITMQ_PORT")
self.username = os.getenv("RABBITMQ_USER", self.config.get('rabbitmq', 'username', fallback='guest')) self.username = os.getenv("RABBITMQ_USER")
self.password = os.getenv("RABBITMQ_PASS", self.config.get('rabbitmq', 'password', fallback='guest')) self.password = os.getenv("RABBITMQ_PASS")
self.queue_name = self.config.get('rabbitmq', 'queue_name', fallback='job_queue') self.queue_name = self.config.get('rabbitmq', 'queue_name', fallback='job_queue')
self.directory = self.config.get('files', 'directory', fallback='/var/jobs/csv') self.directory = self.config.get('files', 'directory', fallback=os.path.join(os.path.expanduser("~"), "jobs", "csv"))
self.log_file = self.config.get('logging', 'log_file', fallback='/var/logs/sender.log')
# Cross-platform log path: use user's home directory
default_log_dir = os.path.join(os.path.expanduser("~"), ".web_scraping_project", "logs")
os.makedirs(default_log_dir, exist_ok=True)
default_log_file = os.path.join(default_log_dir, "sender.log")
self.log_file = self.config.get('logging', 'log_file', fallback=default_log_file)
self.virtual_host = self.config.get('rabbitmq', 'virtual_hash', fallback='/') self.virtual_host = self.config.get('rabbitmq', 'virtual_hash', fallback='/')
self.batch_size = 500 self.batch_size = 500
self.retry_attempts = 5 # Increased for robustness self.retry_attempts = 5 # Increased for robustness
@ -31,10 +37,13 @@ class Sender:
self.check_interval = 30 # More frequent polling self.check_interval = 30 # More frequent polling
# Redis for deduplication # Redis for deduplication
redis_host = os.getenv("REDIS_HOST", "redis-scrape.thejobhub.xyz") redis_host = os.getenv("REDIS_HOST")
redis_port = int(os.getenv("REDIS_PORT", "6379")) redis_port = os.getenv("REDIS_PORT")
self.redis_client = redis.Redis(host=redis_host, port=redis_port, db=1, decode_responses=True) self.redis_client = redis.Redis(host=redis_host, port=redis_port, db=1, decode_responses=True)
# Ensure log directory exists before configuring logging
log_dir = os.path.dirname(self.log_file)
os.makedirs(log_dir, exist_ok=True)
logging.basicConfig(filename=self.log_file, level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logging.basicConfig(filename=self.log_file, level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
self.logger = logging.getLogger(__name__) self.logger = logging.getLogger(__name__)