- Replace XAI/Grok integration with DeepSeek's OpenAI-compatible API - Remove schema generation and caching logic - Simplify prompt structure and response parsing - Standardize database schema and markdown output format - Update config to use DEEPSEEK_API_KEY instead of XAI_API_KEY - Change default search keyword in linkedin_main.py
32 lines
929 B
Python
32 lines
929 B
Python
|
|
from scraping_engine import FingerprintScrapingEngine
|
|
from job_scraper2 import LinkedInJobScraper
|
|
import os
|
|
from dotenv import load_dotenv
|
|
import asyncio
|
|
|
|
# Load environment variables
|
|
load_dotenv()
|
|
|
|
|
|
async def main():
|
|
engine = FingerprintScrapingEngine(
|
|
seed="job_scraping_123",
|
|
target_os="windows",
|
|
db_path="job_listings.db",
|
|
markdown_path="job_listings.md"
|
|
)
|
|
|
|
# Initialize scraper with target field
|
|
scraper = LinkedInJobScraper(engine, human_speed=1.6, user_request="Extract title, company, location, description, requirements, qualifications, nature of job(remote, onsite, hybrid) and salary")
|
|
|
|
await scraper.scrape_jobs(
|
|
search_keywords="Lecturer location:New York",
|
|
credentials={
|
|
"email": os.getenv("SCRAPING_USERNAME"),
|
|
"password": os.getenv("SCRAPING_PASSWORD")
|
|
}
|
|
)
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main()) |