Refactor scraper.py to improve code readability by removing unnecessary blank lines and ensuring consistent formatting.
This commit is contained in:
parent
5939f2bd04
commit
5901e9c1a1
12
scraper.py
12
scraper.py
@ -1,4 +1,3 @@
|
|||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import random
|
import random
|
||||||
import os
|
import os
|
||||||
@ -19,6 +18,7 @@ from ssl_connection import create_ssl_connection_parameters # Import from ssl.p
|
|||||||
import redis
|
import redis
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
# Configure logging
|
# Configure logging
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO,
|
logging.basicConfig(level=logging.INFO,
|
||||||
format='%(asctime)s - %(levelname)s - %(message)s')
|
format='%(asctime)s - %(levelname)s - %(message)s')
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@ -80,7 +80,6 @@ class RedisManager:
|
|||||||
def get_cached_llm_result(self, job_url: str) -> Optional[Dict]:
|
def get_cached_llm_result(self, job_url: str) -> Optional[Dict]:
|
||||||
if not self.redis_client:
|
if not self.redis_client:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
cached_data = self.redis_client.get(f"llm_cache:{job_url}")
|
cached_data = self.redis_client.get(f"llm_cache:{job_url}")
|
||||||
if cached_data:
|
if cached_data:
|
||||||
@ -132,7 +131,6 @@ class RedisManager:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Redis error removing from error cache: {e}")
|
logger.error(f"Redis error removing from error cache: {e}")
|
||||||
|
|
||||||
|
|
||||||
def add_job_to_sent_cache(self, job_id: str):
|
def add_job_to_sent_cache(self, job_id: str):
|
||||||
"""Mark job as sent for processing."""
|
"""Mark job as sent for processing."""
|
||||||
if not self.redis_client:
|
if not self.redis_client:
|
||||||
@ -208,7 +206,6 @@ class MultiPlatformJobScraper:
|
|||||||
async def create_fresh_context(self):
|
async def create_fresh_context(self):
|
||||||
if self.browser is None:
|
if self.browser is None:
|
||||||
await self.init_browser()
|
await self.init_browser()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await self.browser.new_page()
|
await self.browser.new_page()
|
||||||
except Exception:
|
except Exception:
|
||||||
@ -526,6 +523,8 @@ METRICS = {
|
|||||||
"skipped": 0,
|
"skipped": 0,
|
||||||
"start_time": time.time()
|
"start_time": time.time()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
async def process_message_async(scraper: MultiPlatformJobScraper, ch, method, properties, body):
|
async def process_message_async(scraper: MultiPlatformJobScraper, ch, method, properties, body):
|
||||||
try:
|
try:
|
||||||
job_data = json.loads(body)
|
job_data = json.loads(body)
|
||||||
@ -547,6 +546,8 @@ async def process_message_async(scraper: MultiPlatformJobScraper, ch, method, pr
|
|||||||
METRICS["failed"] += 1
|
METRICS["failed"] += 1
|
||||||
finally:
|
finally:
|
||||||
ch.basic_ack(delivery_tag=method.delivery_tag)
|
ch.basic_ack(delivery_tag=method.delivery_tag)
|
||||||
|
|
||||||
|
|
||||||
def callback_wrapper(scraper: MultiPlatformJobScraper):
|
def callback_wrapper(scraper: MultiPlatformJobScraper):
|
||||||
def callback(ch, method, properties, body):
|
def callback(ch, method, properties, body):
|
||||||
asyncio.run(process_message_async(scraper, ch, method, properties, body))
|
asyncio.run(process_message_async(scraper, ch, method, properties, body))
|
||||||
@ -565,7 +566,6 @@ def start_consumer():
|
|||||||
for attempt in range(5):
|
for attempt in range(5):
|
||||||
try:
|
try:
|
||||||
parameters = create_ssl_connection_parameters()
|
parameters = create_ssl_connection_parameters()
|
||||||
|
|
||||||
if RABBITMQ_SSL_ENABLED:
|
if RABBITMQ_SSL_ENABLED:
|
||||||
logger.info(f"Connecting to RabbitMQ over SSL at {RABBITMQ_HOST}:{RABBITMQ_PORT}")
|
logger.info(f"Connecting to RabbitMQ over SSL at {RABBITMQ_HOST}:{RABBITMQ_PORT}")
|
||||||
else:
|
else:
|
||||||
@ -590,5 +590,7 @@ def start_consumer():
|
|||||||
channel.stop_consuming()
|
channel.stop_consuming()
|
||||||
connection.close()
|
connection.close()
|
||||||
asyncio.run(scraper.close_browser())
|
asyncio.run(scraper.close_browser())
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
start_consumer()
|
start_consumer()
|
||||||
Loading…
x
Reference in New Issue
Block a user