From 7dca4c915976faae6bacd86cdd9060200bdb29e7 Mon Sep 17 00:00:00 2001 From: Ofure Ikheloa Date: Sun, 23 Nov 2025 09:27:05 +0100 Subject: [PATCH] refactor(job_scraper): improve page loading and typing in linkedin scraper - Change page load strategy from 'load' to 'domcontentloaded' and 'networkidle' for better performance - Make search_keywords parameter optional to handle empty searches - Update type imports to include List for better type hints - Set headless mode to true by default for production use --- job_scraper.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/job_scraper.py b/job_scraper.py index 2f4f093..efef239 100644 --- a/job_scraper.py +++ b/job_scraper.py @@ -5,7 +5,7 @@ import random import sqlite3 import os from datetime import datetime -from typing import Optional, Dict +from typing import Optional, Dict, List from playwright.async_api import async_playwright from browserforge.injectors.playwright import AsyncNewContext @@ -200,7 +200,7 @@ class LinkedInJobScraper: async def scrape_jobs( self, - search_keywords: str, + search_keywords: Optional[str], max_pages: int = 1, credentials: Optional[Dict] = None ): @@ -214,7 +214,7 @@ class LinkedInJobScraper: async with async_playwright() as pw: browser = await pw.chromium.launch( - headless=False, + headless= False, args=['--disable-blink-features=AutomationControlled'] ) context = await AsyncNewContext(browser, fingerprint=profile) @@ -269,7 +269,7 @@ class LinkedInJobScraper: return print(f"🔍 Searching for: {search_keywords}") - await page.goto(search_url, wait_until='load', timeout=60000) + await page.goto(search_url, wait_until='networkidle', timeout=60000) await asyncio.sleep(random.uniform(4.0, 6.0) * self.human_speed) if await self.engine._detect_cloudflare(page):