diff --git a/job_scraper2.py b/job_scraper2.py index d1c7a6f..600f2cb 100644 --- a/job_scraper2.py +++ b/job_scraper2.py @@ -202,7 +202,7 @@ class LinkedInJobScraper: await asyncio.sleep(random.uniform(4.0, 6.0) * self.human_speed) # Wait for URL to change or new content try: - await page.wait_for_function("() => window.location.href.includes('start=')", timeout=30000) + await page.wait_for_function("() => window.location.href.includes('start=')", timeout=60000) except: pass current_page += 1 @@ -360,7 +360,7 @@ class LinkedInJobScraper: else: # If no pagination and no new jobs from scroll, check by refreshing print("🔄 Refreshing page to check for new results...") - await page.reload(wait_until='networkidle') + await page.reload(wait_until='load') await asyncio.sleep(random.uniform(3.0, 5.0) * self.human_speed) # Check for new jobs after refresh @@ -439,7 +439,7 @@ class LinkedInJobScraper: try: external_page = await asyncio.wait_for(page_waiter, timeout=5.0) print(" 🌐 External job site opened in new tab.") - await external_page.wait_for_load_state("load", timeout=30000) + await external_page.wait_for_load_state("load", timeout=60000) await asyncio.sleep(2 * self.human_speed) await self.engine._human_like_scroll(external_page) await asyncio.sleep(2 * self.human_speed) diff --git a/llm_agent.py b/llm_agent.py index e409762..f91ef39 100644 --- a/llm_agent.py +++ b/llm_agent.py @@ -9,7 +9,7 @@ from config import GEMINI_API_KEY class LLMJobRefiner: def __init__(self): genai.configure(api_key=GEMINI_API_KEY) - self.model = genai.GenerativeModel('gemini-pro') + self.model = genai.GenerativeModel('gemini-latest-flash') async def refine_job_data(self, raw_data: Dict[str, Any], target_field: str) -> Dict[str, Any]: """ @@ -29,7 +29,7 @@ class LLMJobRefiner: Target Field: {target_field} Raw Page Content: - {raw_data.get('page_content', '')[:3000]} # Limit content size + {raw_data.get('page_content', '')[:6000]} # Limit content size Instructions: 1. Extract only the information relevant to the target field: {target_field}