Delete amazon_main.py
This commit is contained in:
parent
20408dd5a6
commit
c0c7925be3
@ -1,60 +0,0 @@
|
||||
|
||||
from scraping_engine import FingerprintScrapingEngine
|
||||
from amazon_job_scraper import AmazonJobScraper # Updated class name
|
||||
from dotenv import load_dotenv
|
||||
import asyncio
|
||||
import random
|
||||
import time
|
||||
|
||||
load_dotenv()
|
||||
|
||||
async def main():
|
||||
engine = FingerprintScrapingEngine(
|
||||
seed="amazon_job_scraping_12",
|
||||
target_os="windows",
|
||||
db_path="amazon_jobs.db",
|
||||
markdown_path="amazon_jobs.md"
|
||||
)
|
||||
|
||||
scraper = AmazonJobScraper(
|
||||
engine,
|
||||
human_speed=1.4,
|
||||
user_request="Extract title, company, location, description, basic qualifications, preferred qualifications, job ID, and job type (full-time, part-time, etc.)"
|
||||
)
|
||||
|
||||
job_titles = [
|
||||
"Software Development Engineer",
|
||||
"Data Scientist",
|
||||
"Product Manager",
|
||||
"UX Designer",
|
||||
"Solutions Architect",
|
||||
"Machine Learning Engineer",
|
||||
"Frontend Engineer",
|
||||
"Backend Engineer",
|
||||
"Full Stack Engineer",
|
||||
"Data Engineer"
|
||||
]
|
||||
|
||||
fixed_location = "United States" # Amazon uses country/region, not city
|
||||
|
||||
while True:
|
||||
random.shuffle(job_titles)
|
||||
for job_title in job_titles:
|
||||
search_keywords = f"{job_title} location:{fixed_location}"
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Starting Amazon scrape for: {search_keywords}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
await scraper.scrape_jobs(
|
||||
search_keywords=search_keywords,
|
||||
max_pages=400 # Amazon loads 10 per page; 3 pages = ~30 jobs
|
||||
)
|
||||
|
||||
print(f"\n✅ Completed scraping for: {job_title}")
|
||||
print(f"⏳ Waiting 90 seconds before next job title...")
|
||||
time.sleep(120)
|
||||
|
||||
print(f"\n✅ Completed full cycle. Restarting...")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Loading…
x
Reference in New Issue
Block a user