refactor(job_scraper): improve page loading and typing in linkedin scraper

- Change page load strategy from 'load' to 'domcontentloaded' and 'networkidle' for better performance
- Make search_keywords parameter optional to handle empty searches
- Update type imports to include List for better type hints
- Set headless mode to true by default for production use
This commit is contained in:
Ofure Ikheloa 2025-11-23 09:27:05 +01:00
parent 458e914d71
commit 7dca4c9159

View File

@ -5,7 +5,7 @@ import random
import sqlite3 import sqlite3
import os import os
from datetime import datetime from datetime import datetime
from typing import Optional, Dict from typing import Optional, Dict, List
from playwright.async_api import async_playwright from playwright.async_api import async_playwright
from browserforge.injectors.playwright import AsyncNewContext from browserforge.injectors.playwright import AsyncNewContext
@ -200,7 +200,7 @@ class LinkedInJobScraper:
async def scrape_jobs( async def scrape_jobs(
self, self,
search_keywords: str, search_keywords: Optional[str],
max_pages: int = 1, max_pages: int = 1,
credentials: Optional[Dict] = None credentials: Optional[Dict] = None
): ):
@ -269,7 +269,7 @@ class LinkedInJobScraper:
return return
print(f"🔍 Searching for: {search_keywords}") print(f"🔍 Searching for: {search_keywords}")
await page.goto(search_url, wait_until='load', timeout=60000) await page.goto(search_url, wait_until='networkidle', timeout=60000)
await asyncio.sleep(random.uniform(4.0, 6.0) * self.human_speed) await asyncio.sleep(random.uniform(4.0, 6.0) * self.human_speed)
if await self.engine._detect_cloudflare(page): if await self.engine._detect_cloudflare(page):