Web_scraping_project/linkedin_main.py
Ofure Ikheloa fd4e8c9c05 feat(scraper): add LLM-powered job data refinement and new scraping logic
- Implement LLMJobRefiner class for processing job data with Gemini API
- Add new job_scraper2.py with enhanced scraping capabilities
- Remove search_keywords parameter from scraping engine
- Add environment variable loading in config.py
- Update main script to use new scraper and target field
2025-11-24 12:25:50 +01:00

31 lines
822 B
Python

from scraping_engine import FingerprintScrapingEngine
from job_scraper2 import LinkedInJobScraper
import os
from dotenv import load_dotenv
import asyncio
# Load environment variables
load_dotenv()
async def main():
engine = FingerprintScrapingEngine(
seed="job_scraping_engine",
target_os="windows",
db_path="job_listings.db",
markdown_path="job_listings.md"
)
# Initialize scraper with target field
scraper = LinkedInJobScraper(engine, human_speed=1.6, target_field="Web designer")
await scraper.scrape_jobs(
search_keywords="Web Designer location:New York",
credentials={
"email": os.getenv("SCRAPING_USERNAME"),
"password": os.getenv("SCRAPING_PASSWORD")
}
)
if __name__ == "__main__":
asyncio.run(main())