Compare commits

..

2 Commits

2 changed files with 25 additions and 15 deletions

View File

@ -23,12 +23,13 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
logger = logging.getLogger(__name__)
# Environment variables
RABBITMQ_HOST = os.getenv("RABBITMQ_HOST", "rabbitq.thejobhub.xyz")
RABBITMQ_PORT = int(os.getenv("RABBITMQ_PORT", "5672"))
RABBITMQ_USER = os.getenv("RABBITMQ_USER", "guest")
RABBITMQ_PASS = os.getenv("RABBITMQ_PASS", "guest")
REDIS_HOST = os.getenv("REDIS_HOST", "redis-scrape.thejobhub.xyz")
REDIS_PORT = int(os.getenv("REDIS_PORT", "6379"))
RABBITMQ_HOST = os.getenv("RABBITMQ_HOST")
RABBITMQ_PORT = os.getenv("RABBITMQ_PORT")
RABBITMQ_USER = os.getenv("RABBITMQ_USER")
RABBITMQ_PASS = os.getenv("RABBITMQ_PASS")
REDIS_HOST = os.getenv("REDIS_HOST")
REDIS_PORT = os.getenv("REDIS_PORT")
class AshbyJobScraper:
def __init__(

View File

@ -1,3 +1,4 @@
import csv
import json
import logging
@ -9,7 +10,6 @@ import uuid
from configparser import ConfigParser
import pika
import redis
import os
class Sender:
def __init__(self, config_file='config.ini'):
@ -17,13 +17,19 @@ class Sender:
self.config.read(config_file)
# RabbitMQ from env vars with fallbacks
self.rabbitmq_host = os.getenv("RABBITMQ_HOST", self.config.get('rabbitmq', 'url', fallback='rabbitq.thejobhub.xyz'))
self.rabbitmq_port = int(os.getenv("RABBITMQ_PORT", self.config.get('rabbitmq', 'port', fallback='5672')))
self.username = os.getenv("RABBITMQ_USER", self.config.get('rabbitmq', 'username', fallback='guest'))
self.password = os.getenv("RABBITMQ_PASS", self.config.get('rabbitmq', 'password', fallback='guest'))
self.rabbitmq_host = os.getenv("RABBITMQ_HOST")
self.rabbitmq_port = os.getenv("RABBITMQ_PORT")
self.username = os.getenv("RABBITMQ_USER")
self.password = os.getenv("RABBITMQ_PASS")
self.queue_name = self.config.get('rabbitmq', 'queue_name', fallback='job_queue')
self.directory = self.config.get('files', 'directory', fallback='/var/jobs/csv')
self.log_file = self.config.get('logging', 'log_file', fallback='/var/logs/sender.log')
self.directory = self.config.get('files', 'directory', fallback=os.path.join(os.path.expanduser("~"), "jobs", "csv"))
# Cross-platform log path: use user's home directory
default_log_dir = os.path.join(os.path.expanduser("~"), ".web_scraping_project", "logs")
os.makedirs(default_log_dir, exist_ok=True)
default_log_file = os.path.join(default_log_dir, "sender.log")
self.log_file = self.config.get('logging', 'log_file', fallback=default_log_file)
self.virtual_host = self.config.get('rabbitmq', 'virtual_hash', fallback='/')
self.batch_size = 500
self.retry_attempts = 5 # Increased for robustness
@ -31,10 +37,13 @@ class Sender:
self.check_interval = 30 # More frequent polling
# Redis for deduplication
redis_host = os.getenv("REDIS_HOST", "redis-scrape.thejobhub.xyz")
redis_port = int(os.getenv("REDIS_PORT", "6379"))
redis_host = os.getenv("REDIS_HOST")
redis_port = os.getenv("REDIS_PORT")
self.redis_client = redis.Redis(host=redis_host, port=redis_port, db=1, decode_responses=True)
# Ensure log directory exists before configuring logging
log_dir = os.path.dirname(self.log_file)
os.makedirs(log_dir, exist_ok=True)
logging.basicConfig(filename=self.log_file, level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
self.logger = logging.getLogger(__name__)