Refactor scraper.py to improve code readability by removing unnecessary blank lines and ensuring consistent formatting.

This commit is contained in:
Ofure Ikheloa 2025-12-15 14:18:52 +01:00
parent 5939f2bd04
commit 5901e9c1a1

View File

@ -1,4 +1,3 @@
import asyncio
import random
import os
@ -19,6 +18,7 @@ from ssl_connection import create_ssl_connection_parameters # Import from ssl.p
import redis
load_dotenv()
# Configure logging
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
@ -80,7 +80,6 @@ class RedisManager:
def get_cached_llm_result(self, job_url: str) -> Optional[Dict]:
if not self.redis_client:
return None
try:
cached_data = self.redis_client.get(f"llm_cache:{job_url}")
if cached_data:
@ -132,7 +131,6 @@ class RedisManager:
except Exception as e:
logger.error(f"Redis error removing from error cache: {e}")
def add_job_to_sent_cache(self, job_id: str):
"""Mark job as sent for processing."""
if not self.redis_client:
@ -208,7 +206,6 @@ class MultiPlatformJobScraper:
async def create_fresh_context(self):
if self.browser is None:
await self.init_browser()
try:
await self.browser.new_page()
except Exception:
@ -526,6 +523,8 @@ METRICS = {
"skipped": 0,
"start_time": time.time()
}
async def process_message_async(scraper: MultiPlatformJobScraper, ch, method, properties, body):
try:
job_data = json.loads(body)
@ -547,6 +546,8 @@ async def process_message_async(scraper: MultiPlatformJobScraper, ch, method, pr
METRICS["failed"] += 1
finally:
ch.basic_ack(delivery_tag=method.delivery_tag)
def callback_wrapper(scraper: MultiPlatformJobScraper):
def callback(ch, method, properties, body):
asyncio.run(process_message_async(scraper, ch, method, properties, body))
@ -565,7 +566,6 @@ def start_consumer():
for attempt in range(5):
try:
parameters = create_ssl_connection_parameters()
if RABBITMQ_SSL_ENABLED:
logger.info(f"Connecting to RabbitMQ over SSL at {RABBITMQ_HOST}:{RABBITMQ_PORT}")
else:
@ -590,5 +590,7 @@ def start_consumer():
channel.stop_consuming()
connection.close()
asyncio.run(scraper.close_browser())
if __name__ == "__main__":
start_consumer()