from scraping_engine import FingerprintScrapingEngine from amazon_job_scraper import AmazonJobScraper # Updated class name from dotenv import load_dotenv import asyncio import random import time load_dotenv() async def main(): engine = FingerprintScrapingEngine( seed="amazon_job_scraping_12", target_os="windows", db_path="amazon_jobs.db", markdown_path="amazon_jobs.md" ) scraper = AmazonJobScraper( engine, human_speed=1.4, user_request="Extract title, company, location, description, basic qualifications, preferred qualifications, job ID, and job type (full-time, part-time, etc.)" ) job_titles = [ "Software Development Engineer", "Data Scientist", "Product Manager", "UX Designer", "Solutions Architect", "Machine Learning Engineer", "Frontend Engineer", "Backend Engineer", "Full Stack Engineer", "Data Engineer" ] fixed_location = "United States" # Amazon uses country/region, not city while True: random.shuffle(job_titles) for job_title in job_titles: search_keywords = f"{job_title} location:{fixed_location}" print(f"\n{'='*60}") print(f"Starting Amazon scrape for: {search_keywords}") print(f"{'='*60}") await scraper.scrape_jobs( search_keywords=search_keywords, max_pages=3 # Amazon loads 10 per page; 3 pages = ~30 jobs ) print(f"\n✅ Completed scraping for: {job_title}") print(f"⏳ Waiting 90 seconds before next job title...") time.sleep(90) print(f"\n✅ Completed full cycle. Restarting...") if __name__ == "__main__": asyncio.run(main())