refactor(job_scraper): improve page loading and typing in linkedin scraper

- Change page load strategy from 'load' to 'domcontentloaded' and 'networkidle' for better performance
- Make search_keywords parameter optional to handle empty searches
- Update type imports to include List for better type hints
- Set headless mode to true by default for production use
This commit is contained in:
Ofure Ikheloa 2025-11-23 09:27:05 +01:00
parent 458e914d71
commit 7dca4c9159

View File

@ -5,7 +5,7 @@ import random
import sqlite3
import os
from datetime import datetime
from typing import Optional, Dict
from typing import Optional, Dict, List
from playwright.async_api import async_playwright
from browserforge.injectors.playwright import AsyncNewContext
@ -200,7 +200,7 @@ class LinkedInJobScraper:
async def scrape_jobs(
self,
search_keywords: str,
search_keywords: Optional[str],
max_pages: int = 1,
credentials: Optional[Dict] = None
):
@ -214,7 +214,7 @@ class LinkedInJobScraper:
async with async_playwright() as pw:
browser = await pw.chromium.launch(
headless=False,
headless= False,
args=['--disable-blink-features=AutomationControlled']
)
context = await AsyncNewContext(browser, fingerprint=profile)
@ -269,7 +269,7 @@ class LinkedInJobScraper:
return
print(f"🔍 Searching for: {search_keywords}")
await page.goto(search_url, wait_until='load', timeout=60000)
await page.goto(search_url, wait_until='networkidle', timeout=60000)
await asyncio.sleep(random.uniform(4.0, 6.0) * self.human_speed)
if await self.engine._detect_cloudflare(page):