From c0c7925be3804e81f7b75b6728c166a3751ec8ea Mon Sep 17 00:00:00 2001 From: Ofure Date: Wed, 10 Dec 2025 11:07:39 +0000 Subject: [PATCH] Delete amazon_main.py --- amazon_main.py | 60 -------------------------------------------------- 1 file changed, 60 deletions(-) delete mode 100644 amazon_main.py diff --git a/amazon_main.py b/amazon_main.py deleted file mode 100644 index b8c1504..0000000 --- a/amazon_main.py +++ /dev/null @@ -1,60 +0,0 @@ - -from scraping_engine import FingerprintScrapingEngine -from amazon_job_scraper import AmazonJobScraper # Updated class name -from dotenv import load_dotenv -import asyncio -import random -import time - -load_dotenv() - -async def main(): - engine = FingerprintScrapingEngine( - seed="amazon_job_scraping_12", - target_os="windows", - db_path="amazon_jobs.db", - markdown_path="amazon_jobs.md" - ) - - scraper = AmazonJobScraper( - engine, - human_speed=1.4, - user_request="Extract title, company, location, description, basic qualifications, preferred qualifications, job ID, and job type (full-time, part-time, etc.)" - ) - - job_titles = [ - "Software Development Engineer", - "Data Scientist", - "Product Manager", - "UX Designer", - "Solutions Architect", - "Machine Learning Engineer", - "Frontend Engineer", - "Backend Engineer", - "Full Stack Engineer", - "Data Engineer" - ] - - fixed_location = "United States" # Amazon uses country/region, not city - - while True: - random.shuffle(job_titles) - for job_title in job_titles: - search_keywords = f"{job_title} location:{fixed_location}" - print(f"\n{'='*60}") - print(f"Starting Amazon scrape for: {search_keywords}") - print(f"{'='*60}") - - await scraper.scrape_jobs( - search_keywords=search_keywords, - max_pages=400 # Amazon loads 10 per page; 3 pages = ~30 jobs - ) - - print(f"\n✅ Completed scraping for: {job_title}") - print(f"⏳ Waiting 90 seconds before next job title...") - time.sleep(120) - - print(f"\n✅ Completed full cycle. Restarting...") - -if __name__ == "__main__": - asyncio.run(main()) \ No newline at end of file