Refactor scraper.py to improve code readability by removing unnecessary blank lines and ensuring consistent formatting.

This commit is contained in:
Ofure Ikheloa 2025-12-15 14:18:52 +01:00
parent 5939f2bd04
commit 5901e9c1a1

View File

@ -1,4 +1,3 @@
import asyncio import asyncio
import random import random
import os import os
@ -19,6 +18,7 @@ from ssl_connection import create_ssl_connection_parameters # Import from ssl.p
import redis import redis
load_dotenv() load_dotenv()
# Configure logging # Configure logging
logging.basicConfig(level=logging.INFO, logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s') format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -80,7 +80,6 @@ class RedisManager:
def get_cached_llm_result(self, job_url: str) -> Optional[Dict]: def get_cached_llm_result(self, job_url: str) -> Optional[Dict]:
if not self.redis_client: if not self.redis_client:
return None return None
try: try:
cached_data = self.redis_client.get(f"llm_cache:{job_url}") cached_data = self.redis_client.get(f"llm_cache:{job_url}")
if cached_data: if cached_data:
@ -132,7 +131,6 @@ class RedisManager:
except Exception as e: except Exception as e:
logger.error(f"Redis error removing from error cache: {e}") logger.error(f"Redis error removing from error cache: {e}")
def add_job_to_sent_cache(self, job_id: str): def add_job_to_sent_cache(self, job_id: str):
"""Mark job as sent for processing.""" """Mark job as sent for processing."""
if not self.redis_client: if not self.redis_client:
@ -208,7 +206,6 @@ class MultiPlatformJobScraper:
async def create_fresh_context(self): async def create_fresh_context(self):
if self.browser is None: if self.browser is None:
await self.init_browser() await self.init_browser()
try: try:
await self.browser.new_page() await self.browser.new_page()
except Exception: except Exception:
@ -526,6 +523,8 @@ METRICS = {
"skipped": 0, "skipped": 0,
"start_time": time.time() "start_time": time.time()
} }
async def process_message_async(scraper: MultiPlatformJobScraper, ch, method, properties, body): async def process_message_async(scraper: MultiPlatformJobScraper, ch, method, properties, body):
try: try:
job_data = json.loads(body) job_data = json.loads(body)
@ -547,6 +546,8 @@ async def process_message_async(scraper: MultiPlatformJobScraper, ch, method, pr
METRICS["failed"] += 1 METRICS["failed"] += 1
finally: finally:
ch.basic_ack(delivery_tag=method.delivery_tag) ch.basic_ack(delivery_tag=method.delivery_tag)
def callback_wrapper(scraper: MultiPlatformJobScraper): def callback_wrapper(scraper: MultiPlatformJobScraper):
def callback(ch, method, properties, body): def callback(ch, method, properties, body):
asyncio.run(process_message_async(scraper, ch, method, properties, body)) asyncio.run(process_message_async(scraper, ch, method, properties, body))
@ -565,7 +566,6 @@ def start_consumer():
for attempt in range(5): for attempt in range(5):
try: try:
parameters = create_ssl_connection_parameters() parameters = create_ssl_connection_parameters()
if RABBITMQ_SSL_ENABLED: if RABBITMQ_SSL_ENABLED:
logger.info(f"Connecting to RabbitMQ over SSL at {RABBITMQ_HOST}:{RABBITMQ_PORT}") logger.info(f"Connecting to RabbitMQ over SSL at {RABBITMQ_HOST}:{RABBITMQ_PORT}")
else: else:
@ -590,5 +590,7 @@ def start_consumer():
channel.stop_consuming() channel.stop_consuming()
connection.close() connection.close()
asyncio.run(scraper.close_browser()) asyncio.run(scraper.close_browser())
if __name__ == "__main__": if __name__ == "__main__":
start_consumer() start_consumer()