- Add config module for spoof data management - Implement session persistence to reuse authenticated sessions - Add feedback system to track success rates and adjust fingerprinting - Improve job link collection with pagination and scroll detection - Separate verified/unverified job listings into different folders - Enhance error handling for CAPTCHA and Cloudflare challenges
29 lines
744 B
Python
29 lines
744 B
Python
from scraping_engine import FingerprintScrapingEngine
|
|
from job_scraper import LinkedInJobScraper
|
|
import os
|
|
import asyncio
|
|
|
|
|
|
async def main():
|
|
engine = FingerprintScrapingEngine(
|
|
seed="job_scraping_engine",
|
|
target_os="windows",
|
|
db_path="job_listings.db",
|
|
markdown_path="job_listings.md",
|
|
search_keywords="Data Anaylst"
|
|
)
|
|
|
|
scraper = LinkedInJobScraper(engine, human_speed=1.6)
|
|
|
|
await scraper.scrape_jobs(
|
|
search_keywords="Data Anaylst", # ← Your search terms
|
|
max_pages=3,
|
|
credentials={
|
|
"email": os.getenv("SCRAPING_USERNAME"),
|
|
"password": os.getenv("SCRAPING_PASSWORD")
|
|
}
|
|
)
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|