From 5901e9c1a1769bd529c862abe357fea2a9526d3b Mon Sep 17 00:00:00 2001 From: Ofure Ikheloa Date: Mon, 15 Dec 2025 14:18:52 +0100 Subject: [PATCH] Refactor scraper.py to improve code readability by removing unnecessary blank lines and ensuring consistent formatting. --- scraper.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/scraper.py b/scraper.py index 9a001c6..1ce5948 100644 --- a/scraper.py +++ b/scraper.py @@ -1,4 +1,3 @@ - import asyncio import random import os @@ -19,6 +18,7 @@ from ssl_connection import create_ssl_connection_parameters # Import from ssl.p import redis load_dotenv() # Configure logging + logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) @@ -79,8 +79,7 @@ class RedisManager: def get_cached_llm_result(self, job_url: str) -> Optional[Dict]: if not self.redis_client: - return None - + return None try: cached_data = self.redis_client.get(f"llm_cache:{job_url}") if cached_data: @@ -131,8 +130,7 @@ class RedisManager: logger.info(f"✅ Removed job {job_id} from error cache after successful processing") except Exception as e: logger.error(f"Redis error removing from error cache: {e}") - - + def add_job_to_sent_cache(self, job_id: str): """Mark job as sent for processing.""" if not self.redis_client: @@ -207,8 +205,7 @@ class MultiPlatformJobScraper: async def create_fresh_context(self): if self.browser is None: - await self.init_browser() - + await self.init_browser() try: await self.browser.new_page() except Exception: @@ -526,6 +523,8 @@ METRICS = { "skipped": 0, "start_time": time.time() } + + async def process_message_async(scraper: MultiPlatformJobScraper, ch, method, properties, body): try: job_data = json.loads(body) @@ -547,6 +546,8 @@ async def process_message_async(scraper: MultiPlatformJobScraper, ch, method, pr METRICS["failed"] += 1 finally: ch.basic_ack(delivery_tag=method.delivery_tag) + + def callback_wrapper(scraper: MultiPlatformJobScraper): def callback(ch, method, properties, body): asyncio.run(process_message_async(scraper, ch, method, properties, body)) @@ -564,8 +565,7 @@ def start_consumer(): connection = None for attempt in range(5): try: - parameters = create_ssl_connection_parameters() - + parameters = create_ssl_connection_parameters() if RABBITMQ_SSL_ENABLED: logger.info(f"Connecting to RabbitMQ over SSL at {RABBITMQ_HOST}:{RABBITMQ_PORT}") else: @@ -590,5 +590,7 @@ def start_consumer(): channel.stop_consuming() connection.close() asyncio.run(scraper.close_browser()) + + if __name__ == "__main__": start_consumer() \ No newline at end of file