from scraping_engine import FingerprintScrapingEngine from job_scraper2 import LinkedInJobScraper import os from dotenv import load_dotenv import asyncio import random import time # Load environment variables load_dotenv() async def main(): engine = FingerprintScrapingEngine( seed="job_scraping_12", target_os="windows", db_path="job_listings.db", markdown_path="job_listings.md" ) # Initialize scraper with target field scraper = LinkedInJobScraper(engine, human_speed=1.6, user_request="Extract title, company, location, description, requirements, qualifications, nature of job(remote, onsite, hybrid) and salary") # List of job titles to cycle through job_titles = [ "Software Engineer", "Data Scientist", "Product Manager", "UX Designer", "DevOps Engineer", "Machine Learning Engineer", "Frontend Developer", "Backend Developer", "Full Stack Developer", "Data Analyst" ] fixed_location = "New York" # Keep cycling through all job titles while True: # Shuffle job titles to randomize order random.shuffle(job_titles) for job_title in job_titles: search_keywords = f"{job_title} location:{fixed_location}" print(f"\n{'='*60}") print(f"Starting scrape for: {search_keywords}") print(f"{'='*60}") await scraper.scrape_jobs( search_keywords=search_keywords, credentials={ "email": os.getenv("SCRAPING_USERNAME"), "password": os.getenv("SCRAPING_PASSWORD") } ) print(f"\nāœ… Completed scraping for: {job_title}") print(f"ā³ Waiting 2 minutes before next job title...") # Wait 2 minutes before next job title time.sleep(120) print(f"\nāœ… Completed full cycle of all job titles") print(f"šŸ”„ Starting new cycle...") if __name__ == "__main__": asyncio.run(main())