Compare commits
2 Commits
c0c7925be3
...
0c447d0f77
| Author | SHA1 | Date | |
|---|---|---|---|
| 0c447d0f77 | |||
| 94d87943de |
13
scraper.py
13
scraper.py
@ -23,12 +23,13 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Environment variables
|
||||
RABBITMQ_HOST = os.getenv("RABBITMQ_HOST", "rabbitq.thejobhub.xyz")
|
||||
RABBITMQ_PORT = int(os.getenv("RABBITMQ_PORT", "5672"))
|
||||
RABBITMQ_USER = os.getenv("RABBITMQ_USER", "guest")
|
||||
RABBITMQ_PASS = os.getenv("RABBITMQ_PASS", "guest")
|
||||
REDIS_HOST = os.getenv("REDIS_HOST", "redis-scrape.thejobhub.xyz")
|
||||
REDIS_PORT = int(os.getenv("REDIS_PORT", "6379"))
|
||||
RABBITMQ_HOST = os.getenv("RABBITMQ_HOST")
|
||||
RABBITMQ_PORT = os.getenv("RABBITMQ_PORT")
|
||||
RABBITMQ_USER = os.getenv("RABBITMQ_USER")
|
||||
RABBITMQ_PASS = os.getenv("RABBITMQ_PASS")
|
||||
REDIS_HOST = os.getenv("REDIS_HOST")
|
||||
REDIS_PORT = os.getenv("REDIS_PORT")
|
||||
|
||||
|
||||
class AshbyJobScraper:
|
||||
def __init__(
|
||||
|
||||
27
sender.py
27
sender.py
@ -1,3 +1,4 @@
|
||||
|
||||
import csv
|
||||
import json
|
||||
import logging
|
||||
@ -9,7 +10,6 @@ import uuid
|
||||
from configparser import ConfigParser
|
||||
import pika
|
||||
import redis
|
||||
import os
|
||||
|
||||
class Sender:
|
||||
def __init__(self, config_file='config.ini'):
|
||||
@ -17,13 +17,19 @@ class Sender:
|
||||
self.config.read(config_file)
|
||||
|
||||
# RabbitMQ from env vars with fallbacks
|
||||
self.rabbitmq_host = os.getenv("RABBITMQ_HOST", self.config.get('rabbitmq', 'url', fallback='rabbitq.thejobhub.xyz'))
|
||||
self.rabbitmq_port = int(os.getenv("RABBITMQ_PORT", self.config.get('rabbitmq', 'port', fallback='5672')))
|
||||
self.username = os.getenv("RABBITMQ_USER", self.config.get('rabbitmq', 'username', fallback='guest'))
|
||||
self.password = os.getenv("RABBITMQ_PASS", self.config.get('rabbitmq', 'password', fallback='guest'))
|
||||
self.rabbitmq_host = os.getenv("RABBITMQ_HOST")
|
||||
self.rabbitmq_port = os.getenv("RABBITMQ_PORT")
|
||||
self.username = os.getenv("RABBITMQ_USER")
|
||||
self.password = os.getenv("RABBITMQ_PASS")
|
||||
self.queue_name = self.config.get('rabbitmq', 'queue_name', fallback='job_queue')
|
||||
self.directory = self.config.get('files', 'directory', fallback='/var/jobs/csv')
|
||||
self.log_file = self.config.get('logging', 'log_file', fallback='/var/logs/sender.log')
|
||||
self.directory = self.config.get('files', 'directory', fallback=os.path.join(os.path.expanduser("~"), "jobs", "csv"))
|
||||
|
||||
# Cross-platform log path: use user's home directory
|
||||
default_log_dir = os.path.join(os.path.expanduser("~"), ".web_scraping_project", "logs")
|
||||
os.makedirs(default_log_dir, exist_ok=True)
|
||||
default_log_file = os.path.join(default_log_dir, "sender.log")
|
||||
self.log_file = self.config.get('logging', 'log_file', fallback=default_log_file)
|
||||
|
||||
self.virtual_host = self.config.get('rabbitmq', 'virtual_hash', fallback='/')
|
||||
self.batch_size = 500
|
||||
self.retry_attempts = 5 # Increased for robustness
|
||||
@ -31,10 +37,13 @@ class Sender:
|
||||
self.check_interval = 30 # More frequent polling
|
||||
|
||||
# Redis for deduplication
|
||||
redis_host = os.getenv("REDIS_HOST", "redis-scrape.thejobhub.xyz")
|
||||
redis_port = int(os.getenv("REDIS_PORT", "6379"))
|
||||
redis_host = os.getenv("REDIS_HOST")
|
||||
redis_port = os.getenv("REDIS_PORT")
|
||||
self.redis_client = redis.Redis(host=redis_host, port=redis_port, db=1, decode_responses=True)
|
||||
|
||||
# Ensure log directory exists before configuring logging
|
||||
log_dir = os.path.dirname(self.log_file)
|
||||
os.makedirs(log_dir, exist_ok=True)
|
||||
logging.basicConfig(filename=self.log_file, level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user