Refactor code structure for improved readability and maintainability
This commit is contained in:
parent
06f8e8b086
commit
b0e90972b1
10
ashby.csv
Normal file
10
ashby.csv
Normal file
@ -0,0 +1,10 @@
|
||||
url,timestamp
|
||||
https://jobs.ashbyhq.com/stellar/a8377cf4-280b-4eb3-ac44-a4c9020c2eaf?utm_source=cryptocurrencyjobs.co,2025-12-31T08:32:17.821505
|
||||
https://jobs.ashbyhq.com/artemisanalytics/5f61b6c6-147c-4707-9003-a9632455b984?utm_source=cryptocurrencyjobs.co,2025-12-31T08:51:57.190172
|
||||
https://jobs.ashbyhq.com/lightning/2d77b496-ab0d-4e54-bcf8-33260d1bab6b?utm_source=cryptocurrencyjobs.co,2025-12-31T09:07:09.491831
|
||||
https://jobs.ashbyhq.com/Braiins/cee9cf74-6049-4dab-aae7-96bef0082689?utm_source=cryptocurrencyjobs.co,2025-12-31T09:35:28.137181
|
||||
https://jobs.ashbyhq.com/blockstream/80ebab98-0039-48bf-86d9-9a2a7962b005?utm_source=cryptocurrencyjobs.co,2025-12-31T10:21:19.253356
|
||||
https://jobs.ashbyhq.com/dynamic/fde8a9ff-9701-485f-a8d1-e717c170f215?utm_source=cryptocurrencyjobs.co,2025-12-31T10:25:55.141543
|
||||
https://jobs.ashbyhq.com/ether.fi/6eb1e350-71ce-47f7-a363-3fa3c521dacb?utm_source=cryptocurrencyjobs.co,2025-12-31T10:44:35.913725
|
||||
https://chainlinklabs.com/open-roles?ashby_jid=112a76d3-4dfd-4eea-828c-41465760b3ef&utm_source=ccj,2025-12-31T10:49:07.453900
|
||||
https://jobs.ashbyhq.com/stellar/cdad9af1-9e64-4fd4-8e2c-f87389f1dd16?utm_source=cryptocurrencyjobs.co,2025-12-31T11:13:58.119967
|
||||
|
1591
ashbycompanies.csv
Normal file
1591
ashbycompanies.csv
Normal file
File diff suppressed because it is too large
Load Diff
166
comparator.py
Normal file
166
comparator.py
Normal file
@ -0,0 +1,166 @@
|
||||
|
||||
import csv
|
||||
import os
|
||||
from urllib.parse import urlparse
|
||||
|
||||
# Define platform mappings: (input_file, companies_file, platform_name)
|
||||
platforms = [
|
||||
("ashby.csv", "ashbycompanies.csv", "ashby"),
|
||||
("gem.csv", "gemcompanies.csv", "gem"),
|
||||
("greenhouse.csv", "greenhousecompanies.csv", "greenhouse"),
|
||||
("lever.csv", "levercompanies.csv", "lever"),
|
||||
("rippling.csv", "ripplingcompanies.csv", "rippling"),
|
||||
("workable.csv", "workablecompanies.csv", "workable"),
|
||||
("workday.csv", "workdaycompanies.csv", "workday"),
|
||||
]
|
||||
|
||||
|
||||
def normalize_url(platform, url):
|
||||
"""Normalize URL to a company identifier based on platform."""
|
||||
if not url:
|
||||
return None
|
||||
try:
|
||||
parsed = urlparse(url.lower().strip())
|
||||
netloc = parsed.netloc
|
||||
path = parsed.path
|
||||
|
||||
if platform == "ashby":
|
||||
# https://jobs.ashbyhq.com/company_slug/...
|
||||
if "ashbyhq.com" in netloc:
|
||||
parts = [p for p in path.split('/') if p]
|
||||
return parts[0] if parts else None
|
||||
|
||||
elif platform == "greenhouse":
|
||||
# https://boards.greenhouse.io/company_slug/...
|
||||
if "greenhouse.io" in netloc:
|
||||
parts = [p for p in path.split('/') if p]
|
||||
if len(parts) >= 2 and parts[0] == "boards":
|
||||
return parts[1]
|
||||
elif len(parts) >= 1:
|
||||
return parts[0]
|
||||
return None
|
||||
|
||||
elif platform == "lever":
|
||||
# https://jobs.lever.co/company_slug/...
|
||||
if "lever.co" in netloc:
|
||||
parts = [p for p in path.split('/') if p]
|
||||
return parts[0] if parts else None
|
||||
|
||||
elif platform == "workable":
|
||||
# https://apply.workable.com/company_slug/...
|
||||
if "workable.com" in netloc:
|
||||
parts = [p for p in path.split('/') if p]
|
||||
# Usually: /company_slug/j/jobid/ → take first non-'j' segment
|
||||
for part in parts:
|
||||
if part != 'j' and len(part) > 2:
|
||||
return part
|
||||
return parts[0] if parts else None
|
||||
|
||||
elif platform == "workday":
|
||||
# https://company.workday.com/... → company = subdomain
|
||||
if "myworkdayjobs.com" in netloc or "wd" in netloc:
|
||||
# Extract subdomain before main domain
|
||||
subdomain = netloc.split('.')[0]
|
||||
if subdomain and subdomain not in ['www', 'jobs', 'apply', '']:
|
||||
return subdomain
|
||||
# Fallback: look for company in path (rare)
|
||||
parts = [p for p in path.split('/') if p]
|
||||
if parts:
|
||||
return parts[0]
|
||||
return None
|
||||
|
||||
elif platform == "gem":
|
||||
# https://gem.com/company/... or https://www.gem.com/careers/company/...
|
||||
if "gem.com" in netloc:
|
||||
parts = [p for p in path.split('/') if p]
|
||||
# Often: /company-slug or /careers/company-slug
|
||||
for i, part in enumerate(parts):
|
||||
if part in ['company', 'careers', 'jobs']:
|
||||
if i + 1 < len(parts):
|
||||
return parts[i + 1]
|
||||
return parts[0] if parts else None
|
||||
|
||||
elif platform == "rippling":
|
||||
# Rippling uses generic domain; hard to extract company
|
||||
# Best effort: use full domain + first path segment
|
||||
if "rippling.com" in netloc:
|
||||
parts = [p for p in path.split('/') if p]
|
||||
if parts:
|
||||
return f"{netloc}/{parts[0]}"
|
||||
return netloc
|
||||
|
||||
# Fallback: return full URL if unrecognized
|
||||
return url
|
||||
|
||||
except Exception:
|
||||
return url
|
||||
|
||||
|
||||
def read_company_signatures(filepath, platform):
|
||||
"""Read and normalize company identifiers from companies CSV."""
|
||||
if not os.path.exists(filepath):
|
||||
return set()
|
||||
signatures = set()
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
url = row.get('url', '').strip()
|
||||
if url:
|
||||
sig = normalize_url(platform, url)
|
||||
if sig:
|
||||
signatures.add(sig)
|
||||
return signatures
|
||||
|
||||
|
||||
def filter_csv_by_signatures(input_file, excluded_signatures, platform):
|
||||
"""Keep only rows whose normalized URL is NOT in excluded_signatures."""
|
||||
if not os.path.exists(input_file):
|
||||
return [], None
|
||||
kept_rows = []
|
||||
with open(input_file, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
fieldnames = reader.fieldnames
|
||||
for row in reader:
|
||||
url = row.get('url', '').strip()
|
||||
if not url:
|
||||
kept_rows.append(row) # keep if no URL (shouldn't happen)
|
||||
continue
|
||||
sig = normalize_url(platform, url)
|
||||
if sig not in excluded_signatures:
|
||||
kept_rows.append(row)
|
||||
return kept_rows, fieldnames
|
||||
|
||||
|
||||
def write_csv(filepath, rows, fieldnames):
|
||||
"""Write rows to CSV file."""
|
||||
with open(filepath, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(rows)
|
||||
|
||||
|
||||
def main():
|
||||
for input_file, companies_file, platform in platforms:
|
||||
print(f"Processing {input_file} against {companies_file} using '{platform}' normalizer...")
|
||||
|
||||
# Step 1: Load and normalize known company signatures
|
||||
known_signatures = read_company_signatures(companies_file, platform)
|
||||
print(f" → Loaded {len(known_signatures)} known company signatures from {companies_file}")
|
||||
|
||||
# Step 2: Filter input file using signatures
|
||||
kept_rows, fieldnames = filter_csv_by_signatures(input_file, known_signatures, platform)
|
||||
|
||||
# Step 3: Write back filtered data
|
||||
if fieldnames:
|
||||
write_csv(input_file, kept_rows, fieldnames)
|
||||
print(f" → Kept {len(kept_rows)} new job URLs in {input_file}")
|
||||
else:
|
||||
if os.path.exists(input_file):
|
||||
os.remove(input_file)
|
||||
print(f" → {input_file} was empty or invalid — removed.")
|
||||
|
||||
print("\n✅ All platforms processed successfully.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
508
gemcompanies.csv
Normal file
508
gemcompanies.csv
Normal file
@ -0,0 +1,508 @@
|
||||
name,url
|
||||
10Xconstruction Ai,https://jobs.gem.com/10xconstruction-ai
|
||||
11X Ai,https://jobs.gem.com/11x-ai
|
||||
43North,https://jobs.gem.com/43north
|
||||
8020 Consulting,https://jobs.gem.com/8020-consulting
|
||||
A16Z Speedrun,https://jobs.gem.com/a16z-speedrun
|
||||
Aarden Ai,https://jobs.gem.com/aarden-ai
|
||||
Accel,https://jobs.gem.com/accel
|
||||
Accelos,https://jobs.gem.com/accelos
|
||||
Acre,https://jobs.gem.com/acre
|
||||
Advancelevelllc Com,https://jobs.gem.com/advancelevelllc-com
|
||||
Agenta Ai,https://jobs.gem.com/agenta-ai
|
||||
Agentnoon,https://jobs.gem.com/agentnoon
|
||||
Agora,https://jobs.gem.com/agora
|
||||
Aionex Xyz,https://jobs.gem.com/aionex-xyz
|
||||
Aiphrodite Ai,https://jobs.gem.com/aiphrodite-ai
|
||||
Airframe,https://jobs.gem.com/airframe
|
||||
Airvet Com,https://jobs.gem.com/airvet-com
|
||||
Alex And Ani,https://jobs.gem.com/alex-and-ani
|
||||
Alinia Ai,https://jobs.gem.com/alinia-ai
|
||||
Alitheon,https://jobs.gem.com/alitheon
|
||||
Alpharun,https://jobs.gem.com/alpharun
|
||||
Altzero Xyz,https://jobs.gem.com/altzero-xyz
|
||||
Amya Agency,https://jobs.gem.com/amya-agency
|
||||
Andrenam,https://jobs.gem.com/andrenam
|
||||
Anysphere,https://jobs.gem.com/anysphere
|
||||
Aoniclife,https://jobs.gem.com/aoniclife
|
||||
Apartment List,https://jobs.gem.com/apartment-list
|
||||
Apella,https://jobs.gem.com/apella
|
||||
Apticore Io,https://jobs.gem.com/apticore-io
|
||||
Arlo,https://jobs.gem.com/arlo
|
||||
Ascenda Loyalty,https://jobs.gem.com/ascenda-loyalty
|
||||
Ascendarc,https://jobs.gem.com/ascendarc
|
||||
Astroforge Io,https://jobs.gem.com/astroforge-io
|
||||
Atla Ai Com,https://jobs.gem.com/atla-ai-com
|
||||
Atomica,https://jobs.gem.com/atomica
|
||||
Audicus,https://jobs.gem.com/audicus
|
||||
Aurelian Io,https://jobs.gem.com/aurelian-io
|
||||
Aureliussystems Us,https://jobs.gem.com/aureliussystems-us
|
||||
Autopilotbrand Com,https://jobs.gem.com/autopilotbrand-com
|
||||
Avoca,https://jobs.gem.com/avoca
|
||||
Avol,https://jobs.gem.com/avol
|
||||
Axonify,https://jobs.gem.com/axonify
|
||||
Backops Ai,https://jobs.gem.com/backops-ai
|
||||
Basalt Health,https://jobs.gem.com/basalt-health
|
||||
Baxter Aerospace,https://jobs.gem.com/baxter-aerospace
|
||||
Bead Ai,https://jobs.gem.com/bead-ai
|
||||
Benbase,https://jobs.gem.com/benbase
|
||||
Better Auth,https://jobs.gem.com/better-auth
|
||||
Betterbasket Ai,https://jobs.gem.com/betterbasket-ai
|
||||
Bigeye,https://jobs.gem.com/bigeye
|
||||
Bigpanda,https://jobs.gem.com/bigpanda
|
||||
Bikky,https://jobs.gem.com/bikky
|
||||
Bilt,https://jobs.gem.com/bilt
|
||||
Binarly,https://jobs.gem.com/binarly
|
||||
Biofire,https://jobs.gem.com/biofire
|
||||
Biorender,https://jobs.gem.com/biorender
|
||||
Biorender Inc Ats,https://jobs.gem.com/biorender-inc--ats
|
||||
Birdwood Therapeutics,https://jobs.gem.com/birdwood-therapeutics
|
||||
Black Ore,https://jobs.gem.com/black-ore
|
||||
Blaze Ai,https://jobs.gem.com/blaze-ai
|
||||
Blazetalent,https://jobs.gem.com/blazetalent
|
||||
Blend Inc,https://jobs.gem.com/blend-inc
|
||||
Blue J,https://jobs.gem.com/blue-j
|
||||
Bluejeanfinancial Com,https://jobs.gem.com/bluejeanfinancial-com
|
||||
Blueonion Ai,https://jobs.gem.com/blueonion-ai
|
||||
Blueprint,https://jobs.gem.com/blueprint
|
||||
Bluesky,https://jobs.gem.com/bluesky
|
||||
Blume Technologies,https://jobs.gem.com/blume-technologies
|
||||
Bohler ,https://jobs.gem.com/bohler-
|
||||
Bohler Engineering Gemats,https://jobs.gem.com/bohler-engineering-gemats
|
||||
Bolna,https://jobs.gem.com/bolna
|
||||
Bond Partners,https://jobs.gem.com/bond-partners
|
||||
Boost Robotics,https://jobs.gem.com/boost-robotics
|
||||
Boredm,https://jobs.gem.com/boredm
|
||||
Breadcrumb Ai,https://jobs.gem.com/breadcrumb-ai
|
||||
Breakline Ats,https://jobs.gem.com/breakline-ats
|
||||
Breakline Education,https://jobs.gem.com/breakline-education
|
||||
Brewbird,https://jobs.gem.com/brewbird
|
||||
Buildtrayd Com,https://jobs.gem.com/buildtrayd-com
|
||||
Bull Moose Xyz,https://jobs.gem.com/bull-moose-xyz
|
||||
Cadstrom Io,https://jobs.gem.com/cadstrom-io
|
||||
Caffelabs Com,https://jobs.gem.com/caffelabs-com
|
||||
Calaveras,https://jobs.gem.com/calaveras
|
||||
Canals,https://jobs.gem.com/canals
|
||||
Caplight Com,https://jobs.gem.com/caplight-com
|
||||
Carbon,https://jobs.gem.com/carbon
|
||||
Cardnexus,https://jobs.gem.com/cardnexus
|
||||
Careers,https://jobs.gem.com/careers
|
||||
Carry,https://jobs.gem.com/carry
|
||||
Caseflood Ai,https://jobs.gem.com/caseflood-ai
|
||||
Cellbyte,https://jobs.gem.com/cellbyte
|
||||
Chartahealth,https://jobs.gem.com/chartahealth
|
||||
Civrobotics Com,https://jobs.gem.com/civrobotics-com
|
||||
Clarity,https://jobs.gem.com/clarity
|
||||
Clearchecks Com Ats,https://jobs.gem.com/clearchecks-com-ats
|
||||
Clearesthealth Com,https://jobs.gem.com/clearesthealth-com
|
||||
Cloudanix Com,https://jobs.gem.com/cloudanix-com
|
||||
Cloudraft,https://jobs.gem.com/cloudraft
|
||||
Codegen,https://jobs.gem.com/codegen
|
||||
Codesignal,https://jobs.gem.com/codesignal
|
||||
Cognna,https://jobs.gem.com/cognna
|
||||
Cogram,https://jobs.gem.com/cogram
|
||||
Comfy,https://jobs.gem.com/comfy
|
||||
Conductorai,https://jobs.gem.com/conductorai
|
||||
Confida Ai,https://jobs.gem.com/confida-ai
|
||||
Context Wtf,https://jobs.gem.com/context-wtf
|
||||
Contour App,https://jobs.gem.com/contour-app
|
||||
Converge,https://jobs.gem.com/converge
|
||||
Coupa Software Inc Ats 1,https://jobs.gem.com/coupa-software-inc-ats-1
|
||||
Cox Exponential,https://jobs.gem.com/cox-exponential
|
||||
Crabi Robotics Com,https://jobs.gem.com/crabi-robotics-com
|
||||
Crackenagi,https://jobs.gem.com/crackenagi
|
||||
Create Talent Group,https://jobs.gem.com/create-talent-group
|
||||
Createdbyhumans Ai,https://jobs.gem.com/createdbyhumans-ai
|
||||
Credit Key,https://jobs.gem.com/credit-key
|
||||
Crosby,https://jobs.gem.com/crosby
|
||||
Curex Org,https://jobs.gem.com/curex-org
|
||||
Curiouscardinals Com,https://jobs.gem.com/curiouscardinals-com
|
||||
Cyvl,https://jobs.gem.com/cyvl
|
||||
D4M International,https://jobs.gem.com/d4m-international
|
||||
Dalus,https://jobs.gem.com/dalus
|
||||
Dash Fi,https://jobs.gem.com/dash-fi
|
||||
Data Masters,https://jobs.gem.com/data-masters
|
||||
Datacurve Ai,https://jobs.gem.com/datacurve-ai
|
||||
Dataday Technology Solutions,https://jobs.gem.com/dataday-technology-solutions
|
||||
Datagrid,https://jobs.gem.com/datagrid
|
||||
Dawn Media,https://jobs.gem.com/dawn-media
|
||||
Daxko,https://jobs.gem.com/daxko
|
||||
Deep Infra,https://jobs.gem.com/deep-infra
|
||||
Deliver,https://jobs.gem.com/deliver
|
||||
Detections Ai,https://jobs.gem.com/detections-ai
|
||||
Dianahr Ai,https://jobs.gem.com/dianahr-ai
|
||||
Distributed Spectrum,https://jobs.gem.com/distributed-spectrum
|
||||
Dlvrlog,https://jobs.gem.com/dlvrlog
|
||||
Doowii,https://jobs.gem.com/doowii
|
||||
Dragme,https://jobs.gem.com/dragme
|
||||
Dragonfly Careers,https://jobs.gem.com/dragonfly-careers
|
||||
Dropback,https://jobs.gem.com/dropback
|
||||
Durin,https://jobs.gem.com/durin
|
||||
Dydx,https://jobs.gem.com/dydx
|
||||
Eats2Seats,https://jobs.gem.com/eats2seats
|
||||
Echelon,https://jobs.gem.com/echelon
|
||||
Ecocart Io,https://jobs.gem.com/ecocart-io
|
||||
Edgetrace Ai,https://jobs.gem.com/edgetrace-ai
|
||||
Efference Ai,https://jobs.gem.com/efference-ai
|
||||
Elite Talent Consulting,https://jobs.gem.com/elite-talent-consulting
|
||||
Eliza,https://jobs.gem.com/eliza
|
||||
Elloe Ai,https://jobs.gem.com/elloe-ai
|
||||
Elo Ai,https://jobs.gem.com/elo-ai
|
||||
Emerge Career,https://jobs.gem.com/emerge-career
|
||||
Engineering Codified,https://jobs.gem.com/engineering--codified
|
||||
Entrusted Contracting,https://jobs.gem.com/entrusted-contracting
|
||||
Escargot Com,https://jobs.gem.com/escargot-com
|
||||
Everfit Io,https://jobs.gem.com/everfit-io
|
||||
Excelity Careers,https://jobs.gem.com/excelity-careers
|
||||
Exponent,https://jobs.gem.com/exponent
|
||||
Ezraailabs Tech,https://jobs.gem.com/ezraailabs-tech
|
||||
Fabric,https://jobs.gem.com/fabric
|
||||
Fabrichealth,https://jobs.gem.com/fabrichealth
|
||||
Fancypeople,https://jobs.gem.com/fancypeople
|
||||
Fanpierlabs Com,https://jobs.gem.com/fanpierlabs-com
|
||||
Faraday,https://jobs.gem.com/faraday
|
||||
Fathom Org,https://jobs.gem.com/fathom-org
|
||||
Felix,https://jobs.gem.com/felix
|
||||
Ferry Health,https://jobs.gem.com/ferry-health
|
||||
Fetch Ats,https://jobs.gem.com/fetch-ats
|
||||
Fifthdoor Com,https://jobs.gem.com/fifthdoor-com
|
||||
Fireflies,https://jobs.gem.com/fireflies
|
||||
Firestorm,https://jobs.gem.com/firestorm
|
||||
Flatfee Corp,https://jobs.gem.com/flatfee-corp
|
||||
Flint,https://jobs.gem.com/flint
|
||||
Floot,https://jobs.gem.com/floot
|
||||
Forgent Ai,https://jobs.gem.com/forgent-ai
|
||||
Fountainplatform Com,https://jobs.gem.com/fountainplatform-com
|
||||
Foxbox Digital,https://jobs.gem.com/foxbox-digital
|
||||
Freestone Grove Partners,https://jobs.gem.com/freestone-grove-partners
|
||||
Freshbooks,https://jobs.gem.com/freshbooks
|
||||
Fridayharbor Ai,https://jobs.gem.com/fridayharbor-ai
|
||||
Fuelfinance,https://jobs.gem.com/fuelfinance
|
||||
Fulcrumcareers,https://jobs.gem.com/fulcrumcareers
|
||||
Function Health,https://jobs.gem.com/function-health
|
||||
Galadyne,https://jobs.gem.com/galadyne
|
||||
Galaxyventures,https://jobs.gem.com/galaxyventures
|
||||
Gc Ai,https://jobs.gem.com/gc-ai
|
||||
Gem,https://jobs.gem.com/gem
|
||||
Gem Mckesson,https://jobs.gem.com/gem-mckesson
|
||||
Gem Test Board,https://jobs.gem.com/gem-test-board
|
||||
Generation Alpha Transistor,https://jobs.gem.com/generation-alpha-transistor
|
||||
Genspark,https://jobs.gem.com/genspark
|
||||
Gerra,https://jobs.gem.com/gerra
|
||||
Getaero Io,https://jobs.gem.com/getaero-io
|
||||
Getbirdeye Com Au,https://jobs.gem.com/getbirdeye-com-au
|
||||
Getro,https://jobs.gem.com/getro
|
||||
Gigaml,https://jobs.gem.com/gigaml
|
||||
Go Cadre,https://jobs.gem.com/go-cadre
|
||||
Goatrecruit Com,https://jobs.gem.com/goatrecruit-com
|
||||
Good Life Companies,https://jobs.gem.com/good-life-companies
|
||||
Goodbill,https://jobs.gem.com/goodbill
|
||||
Grailpay Com,https://jobs.gem.com/grailpay-com
|
||||
Granger Construction,https://jobs.gem.com/granger-construction
|
||||
Gratia Health,https://jobs.gem.com/gratia-health
|
||||
Greenlite Ai,https://jobs.gem.com/greenlite-ai
|
||||
Greenvalleyjobs,https://jobs.gem.com/greenvalleyjobs
|
||||
Grit,https://jobs.gem.com/grit
|
||||
Groq,https://jobs.gem.com/groq
|
||||
Growthbook,https://jobs.gem.com/growthbook
|
||||
Guardrail Ai,https://jobs.gem.com/guardrail-ai
|
||||
Guidesage Ai,https://jobs.gem.com/guidesage-ai
|
||||
Hallow,https://jobs.gem.com/hallow
|
||||
Happydance Partnership Integration,https://jobs.gem.com/happydance-partnership-integration
|
||||
Harmonic,https://jobs.gem.com/harmonic
|
||||
Hash,https://jobs.gem.com/hash
|
||||
Hayla,https://jobs.gem.com/hayla
|
||||
Heavy Construction Systems Specialists Llc,https://jobs.gem.com/heavy-construction-systems-specialists-llc
|
||||
Helix,https://jobs.gem.com/helix
|
||||
Hellotrade,https://jobs.gem.com/hellotrade
|
||||
Helm Health,https://jobs.gem.com/helm-health
|
||||
Hilabs Ie,https://jobs.gem.com/hilabs-ie
|
||||
Hipeople,https://jobs.gem.com/hipeople
|
||||
Holacasa Yc W23,https://jobs.gem.com/holacasa-yc-w23
|
||||
Homeboost,https://jobs.gem.com/homeboost
|
||||
Hospitable,https://jobs.gem.com/hospitable
|
||||
Howrecruit Io,https://jobs.gem.com/howrecruit-io
|
||||
Hubspot,https://jobs.gem.com/hubspot
|
||||
Hypernatural Ai,https://jobs.gem.com/hypernatural-ai
|
||||
Inception,https://jobs.gem.com/inception
|
||||
Index Exchange,https://jobs.gem.com/index-exchange
|
||||
Infrastructure Modernization Solutions,https://jobs.gem.com/infrastructure-modernization-solutions
|
||||
Inspiration Commerce Group,https://jobs.gem.com/inspiration-commerce-group
|
||||
Inspiresemi Com,https://jobs.gem.com/inspiresemi-com
|
||||
Instrumental Inc ,https://jobs.gem.com/instrumental-inc-
|
||||
Integral Xyz,https://jobs.gem.com/integral-xyz
|
||||
Integrationscaptain,https://jobs.gem.com/integrationscaptain
|
||||
Intelligentresourcing Co,https://jobs.gem.com/intelligentresourcing-co
|
||||
Interfere Old,https://jobs.gem.com/interfere-old
|
||||
Invoicebutler Ai,https://jobs.gem.com/invoicebutler-ai
|
||||
Iris,https://jobs.gem.com/iris
|
||||
Ironsite Ai,https://jobs.gem.com/ironsite-ai
|
||||
Itsvaleria Co,https://jobs.gem.com/itsvaleria-co
|
||||
Jaguaracareers,https://jobs.gem.com/jaguaracareers
|
||||
Janie,https://jobs.gem.com/janie
|
||||
Jayla Careers,https://jobs.gem.com/jayla-careers
|
||||
Jobma,https://jobs.gem.com/jobma
|
||||
Joinanvil Com,https://jobs.gem.com/joinanvil-com
|
||||
Joinformal,https://jobs.gem.com/joinformal
|
||||
Joyful Health,https://jobs.gem.com/joyful-health
|
||||
Kaikaku,https://jobs.gem.com/kaikaku
|
||||
Kaironhealth,https://jobs.gem.com/kaironhealth
|
||||
Kaironhealth Com,https://jobs.gem.com/kaironhealth-com
|
||||
Kanu Ai,https://jobs.gem.com/kanu-ai
|
||||
Kcs Hiring,https://jobs.gem.com/kcs-hiring
|
||||
Keru Ai,https://jobs.gem.com/keru-ai
|
||||
Key To Web3,https://jobs.gem.com/key-to-web3
|
||||
Knight Electric Inc ,https://jobs.gem.com/knight-electric-inc-
|
||||
Kollectiv Ai,https://jobs.gem.com/kollectiv-ai
|
||||
Kumo Ai,https://jobs.gem.com/kumo-ai
|
||||
Lantern,https://jobs.gem.com/lantern
|
||||
Lavapayments Com,https://jobs.gem.com/lavapayments-com
|
||||
Leap Tools,https://jobs.gem.com/leap-tools
|
||||
Letsdata,https://jobs.gem.com/letsdata
|
||||
Letter Ai,https://jobs.gem.com/letter-ai
|
||||
Level,https://jobs.gem.com/level
|
||||
Linktree,https://jobs.gem.com/linktree
|
||||
Little Otter,https://jobs.gem.com/little-otter
|
||||
Lower Llc,https://jobs.gem.com/lower-llc
|
||||
Lumalabs Ai,https://jobs.gem.com/lumalabs-ai
|
||||
Lunajoy,https://jobs.gem.com/lunajoy
|
||||
Lunch,https://jobs.gem.com/lunch
|
||||
Lunos Ai,https://jobs.gem.com/lunos-ai
|
||||
Magnetic,https://jobs.gem.com/magnetic
|
||||
Manifest,https://jobs.gem.com/manifest
|
||||
Manifested Com,https://jobs.gem.com/manifested-com
|
||||
Marble Health,https://jobs.gem.com/marble-health
|
||||
Mavi,https://jobs.gem.com/mavi
|
||||
Meetdex Ai,https://jobs.gem.com/meetdex-ai
|
||||
Megapot,https://jobs.gem.com/megapot
|
||||
Meineautosdirekt,https://jobs.gem.com/meineautosdirekt
|
||||
Menten Ai,https://jobs.gem.com/menten-ai
|
||||
Merge Sandbox,https://jobs.gem.com/merge-sandbox
|
||||
Metal Ai,https://jobs.gem.com/metal-ai
|
||||
Microsoft Demo Gem Com,https://jobs.gem.com/microsoft-demo-gem-com
|
||||
Mimicrobotics Com,https://jobs.gem.com/mimicrobotics-com
|
||||
Mission,https://jobs.gem.com/mission
|
||||
Moosehead Talent,https://jobs.gem.com/moosehead-talent
|
||||
Motion,https://jobs.gem.com/motion
|
||||
Moxa,https://jobs.gem.com/moxa
|
||||
Multiplierhq,https://jobs.gem.com/multiplierhq
|
||||
Multiscale Ai,https://jobs.gem.com/multiscale-ai
|
||||
Myprize,https://jobs.gem.com/myprize
|
||||
Myriad Technology,https://jobs.gem.com/myriad-technology
|
||||
Myrrsgroup,https://jobs.gem.com/myrrsgroup
|
||||
Nabla Bio,https://jobs.gem.com/nabla-bio
|
||||
Nacelle,https://jobs.gem.com/nacelle
|
||||
Nativemsg,https://jobs.gem.com/nativemsg
|
||||
Nclusion,https://jobs.gem.com/nclusion
|
||||
Nerve,https://jobs.gem.com/nerve
|
||||
Newcrew,https://jobs.gem.com/newcrew
|
||||
Ngram,https://jobs.gem.com/ngram
|
||||
Nimble,https://jobs.gem.com/nimble
|
||||
Niva,https://jobs.gem.com/niva
|
||||
Nominal,https://jobs.gem.com/nominal
|
||||
Northone,https://jobs.gem.com/northone
|
||||
Ntop,https://jobs.gem.com/ntop
|
||||
Nue Ai,https://jobs.gem.com/nue-ai
|
||||
Nutrislice,https://jobs.gem.com/nutrislice
|
||||
Nuvo,https://jobs.gem.com/nuvo
|
||||
Obin Ai,https://jobs.gem.com/obin-ai
|
||||
Obsidian Systems,https://jobs.gem.com/obsidian-systems
|
||||
Odo Do,https://jobs.gem.com/odo-do
|
||||
Omegahhagency Com,https://jobs.gem.com/omegahhagency-com
|
||||
Ondo Finance,https://jobs.gem.com/ondo-finance
|
||||
Onesignal,https://jobs.gem.com/onesignal
|
||||
Onesignal Ats,https://jobs.gem.com/onesignal-ats
|
||||
Onezyme,https://jobs.gem.com/onezyme
|
||||
Onfrontiers,https://jobs.gem.com/onfrontiers
|
||||
Openphone,https://jobs.gem.com/openphone
|
||||
Openreqstaffing,https://jobs.gem.com/openreqstaffing
|
||||
Opine,https://jobs.gem.com/opine
|
||||
Ora So,https://jobs.gem.com/ora-so
|
||||
Overlay,https://jobs.gem.com/overlay
|
||||
Overwatch,https://jobs.gem.com/overwatch
|
||||
Paces,https://jobs.gem.com/paces
|
||||
Pae,https://jobs.gem.com/pae
|
||||
Pagebound,https://jobs.gem.com/pagebound
|
||||
Pally,https://jobs.gem.com/pally
|
||||
Paramark,https://jobs.gem.com/paramark
|
||||
Partao,https://jobs.gem.com/partao
|
||||
Partnerhq,https://jobs.gem.com/partnerhq
|
||||
Patlytics,https://jobs.gem.com/patlytics
|
||||
Pave,https://jobs.gem.com/pave
|
||||
Perceptyx,https://jobs.gem.com/perceptyx
|
||||
Photalabs Com,https://jobs.gem.com/photalabs-com
|
||||
Photon,https://jobs.gem.com/photon
|
||||
Pinnacleconnect Llc,https://jobs.gem.com/pinnacleconnect-llc
|
||||
Piqenergy Com,https://jobs.gem.com/piqenergy-com
|
||||
Planet Fans,https://jobs.gem.com/planet-fans
|
||||
Planned,https://jobs.gem.com/planned
|
||||
Plixai,https://jobs.gem.com/plixai
|
||||
Pogo Recruiting,https://jobs.gem.com/pogo-recruiting
|
||||
Polar,https://jobs.gem.com/polar
|
||||
Polywork,https://jobs.gem.com/polywork
|
||||
Pomerium,https://jobs.gem.com/pomerium
|
||||
Portal Ai,https://jobs.gem.com/portal-ai
|
||||
Poseidonaero,https://jobs.gem.com/poseidonaero
|
||||
Prahsys Com,https://jobs.gem.com/prahsys-com
|
||||
Praxisiq Ai,https://jobs.gem.com/praxisiq-ai
|
||||
Precision Ai,https://jobs.gem.com/precision-ai
|
||||
Prodia,https://jobs.gem.com/prodia
|
||||
Productboard,https://jobs.gem.com/productboard
|
||||
Productboard Ats,https://jobs.gem.com/productboard-ats
|
||||
Prohost Ai,https://jobs.gem.com/prohost-ai
|
||||
Project Method,https://jobs.gem.com/project-method
|
||||
Promptql,https://jobs.gem.com/promptql
|
||||
Propel,https://jobs.gem.com/propel
|
||||
Prospermedical Com,https://jobs.gem.com/prospermedical-com
|
||||
Protegeai,https://jobs.gem.com/protegeai
|
||||
Questdb Com,https://jobs.gem.com/questdb-com
|
||||
Quitwithjones,https://jobs.gem.com/quitwithjones
|
||||
Quo,https://jobs.gem.com/quo
|
||||
Rain Aero,https://jobs.gem.com/rain-aero
|
||||
Raincode Bahrain W L L,https://jobs.gem.com/raincode-bahrain-w-l-l
|
||||
Raylu Ai,https://jobs.gem.com/raylu-ai
|
||||
Rctsglobal Com,https://jobs.gem.com/rctsglobal-com
|
||||
Rditrials,https://jobs.gem.com/rditrials
|
||||
Rebuild Work,https://jobs.gem.com/rebuild-work
|
||||
Redcar,https://jobs.gem.com/redcar
|
||||
Redenvelope Co,https://jobs.gem.com/redenvelope-co
|
||||
Redo,https://jobs.gem.com/redo
|
||||
Rektech,https://jobs.gem.com/rektech
|
||||
Renew,https://jobs.gem.com/renew
|
||||
Resprop,https://jobs.gem.com/resprop
|
||||
Retool,https://jobs.gem.com/retool
|
||||
Revolutionparts,https://jobs.gem.com/revolutionparts
|
||||
Rex,https://jobs.gem.com/rex
|
||||
Rf Renovo Management Company Llc,https://jobs.gem.com/rf-renovo-management-company-llc
|
||||
Riley,https://jobs.gem.com/riley
|
||||
Rinsed,https://jobs.gem.com/rinsed
|
||||
Risely Ai,https://jobs.gem.com/risely-ai
|
||||
Rivia,https://jobs.gem.com/rivia
|
||||
Roadio Ai,https://jobs.gem.com/roadio-ai
|
||||
Roamless,https://jobs.gem.com/roamless
|
||||
Roe Ai,https://jobs.gem.com/roe-ai
|
||||
Rossibuilders Com,https://jobs.gem.com/rossibuilders-com
|
||||
Roundhouse Media,https://jobs.gem.com/roundhouse-media
|
||||
Rove,https://jobs.gem.com/rove
|
||||
Runsybil,https://jobs.gem.com/runsybil
|
||||
Sadnaconsulting Com,https://jobs.gem.com/sadnaconsulting-com
|
||||
Sailorhealth Com,https://jobs.gem.com/sailorhealth-com
|
||||
Sales Marker,https://jobs.gem.com/sales-marker
|
||||
Salesqueze Com,https://jobs.gem.com/salesqueze-com
|
||||
Sandbar Inc,https://jobs.gem.com/sandbar-inc
|
||||
Sandboxschonfeld Com,https://jobs.gem.com/sandboxschonfeld-com
|
||||
Sauron Systems,https://jobs.gem.com/sauron-systems
|
||||
Scope Labs,https://jobs.gem.com/scope-labs
|
||||
Scowtt Com,https://jobs.gem.com/scowtt-com
|
||||
Seated,https://jobs.gem.com/seated
|
||||
Seed2Series Com,https://jobs.gem.com/seed2series-com
|
||||
Seniorverse,https://jobs.gem.com/seniorverse
|
||||
Sennder Gmbh,https://jobs.gem.com/sennder-gmbh
|
||||
Senndertechnologies Gmbh,https://jobs.gem.com/senndertechnologies-gmbh
|
||||
Sensorum Health,https://jobs.gem.com/sensorum-health
|
||||
Serv Ai,https://jobs.gem.com/serv-ai
|
||||
Seven Starling,https://jobs.gem.com/seven-starling
|
||||
Shef Com,https://jobs.gem.com/shef-com
|
||||
Shorebird Dev,https://jobs.gem.com/shorebird-dev
|
||||
Showtime,https://jobs.gem.com/showtime
|
||||
Signoz,https://jobs.gem.com/signoz
|
||||
Silkline,https://jobs.gem.com/silkline
|
||||
Skypilot Co,https://jobs.gem.com/skypilot-co
|
||||
Slash,https://jobs.gem.com/slash
|
||||
Sleep Center,https://jobs.gem.com/sleep-center
|
||||
Smacktechnologies Com,https://jobs.gem.com/smacktechnologies-com
|
||||
Snout,https://jobs.gem.com/snout
|
||||
Softup Technologies,https://jobs.gem.com/softup-technologies
|
||||
Sohar Health,https://jobs.gem.com/sohar-health
|
||||
Soundhound,https://jobs.gem.com/soundhound
|
||||
Spawn,https://jobs.gem.com/spawn
|
||||
Spellbrush,https://jobs.gem.com/spellbrush
|
||||
Sphere Semi,https://jobs.gem.com/sphere-semi
|
||||
Ssg,https://jobs.gem.com/ssg
|
||||
Stack Auth Com,https://jobs.gem.com/stack-auth-com
|
||||
Startup People Solutions,https://jobs.gem.com/startup-people-solutions
|
||||
Stealth Startup,https://jobs.gem.com/stealth-startup
|
||||
Stockapp Com,https://jobs.gem.com/stockapp-com
|
||||
Stryke,https://jobs.gem.com/stryke
|
||||
Sunsethq Com,https://jobs.gem.com/sunsethq-com
|
||||
Super Hi Fi,https://jobs.gem.com/super-hi-fi
|
||||
Superblocks,https://jobs.gem.com/superblocks
|
||||
Supersonik Ai,https://jobs.gem.com/supersonik-ai
|
||||
Supio,https://jobs.gem.com/supio
|
||||
Suppliercanada Com,https://jobs.gem.com/suppliercanada-com
|
||||
Switchgrowth Com,https://jobs.gem.com/switchgrowth-com
|
||||
Symbolica,https://jobs.gem.com/symbolica
|
||||
Syndesus,https://jobs.gem.com/syndesus
|
||||
System Two Security,https://jobs.gem.com/system-two-security
|
||||
Taxgpt Inc ,https://jobs.gem.com/taxgpt-inc-
|
||||
Taxo Ai,https://jobs.gem.com/taxo-ai
|
||||
Tektome Com,https://jobs.gem.com/tektome-com
|
||||
Telora,https://jobs.gem.com/telora
|
||||
Tensorstax Com,https://jobs.gem.com/tensorstax-com
|
||||
Tenx Recruiting,https://jobs.gem.com/tenx-recruiting
|
||||
Terraai Earth,https://jobs.gem.com/terraai-earth
|
||||
Test Board,https://jobs.gem.com/test-board
|
||||
The Boring Company,https://jobs.gem.com/the-boring-company
|
||||
The Brewer Garrett Company,https://jobs.gem.com/the-brewer-garrett-company
|
||||
The Talent Project Com,https://jobs.gem.com/the-talent-project-com
|
||||
Theburntapp Com,https://jobs.gem.com/theburntapp-com
|
||||
Theinterface,https://jobs.gem.com/theinterface
|
||||
Thejobbridge,https://jobs.gem.com/thejobbridge
|
||||
Thelma,https://jobs.gem.com/thelma
|
||||
Theluckyfoundation,https://jobs.gem.com/theluckyfoundation
|
||||
Thenewclub Fyi,https://jobs.gem.com/thenewclub-fyi
|
||||
Theseus Us,https://jobs.gem.com/theseus-us
|
||||
Thinkific,https://jobs.gem.com/thinkific
|
||||
Third Dimension,https://jobs.gem.com/third-dimension
|
||||
Thrivory,https://jobs.gem.com/thrivory
|
||||
Thunder,https://jobs.gem.com/thunder
|
||||
Thunder Compute,https://jobs.gem.com/thunder-compute
|
||||
Timetoperform,https://jobs.gem.com/timetoperform
|
||||
Token Transit,https://jobs.gem.com/token-transit
|
||||
Toolhouse Ai,https://jobs.gem.com/toolhouse-ai
|
||||
Torchsystems Com,https://jobs.gem.com/torchsystems-com
|
||||
Transluce,https://jobs.gem.com/transluce
|
||||
Trashlab,https://jobs.gem.com/trashlab
|
||||
Tricentis,https://jobs.gem.com/tricentis
|
||||
Trilliumhiring Com,https://jobs.gem.com/trilliumhiring-com
|
||||
Tripworks Com,https://jobs.gem.com/tripworks-com
|
||||
Tristero,https://jobs.gem.com/tristero
|
||||
Trojan Trading,https://jobs.gem.com/trojan-trading
|
||||
Tropic,https://jobs.gem.com/tropic
|
||||
Trybree Com,https://jobs.gem.com/trybree-com
|
||||
Tryhelium Com,https://jobs.gem.com/tryhelium-com
|
||||
Tungsten Dev,https://jobs.gem.com/tungsten-dev
|
||||
Turbohome,https://jobs.gem.com/turbohome
|
||||
Twentyfour7 Dev,https://jobs.gem.com/twentyfour7-dev
|
||||
Unify Ai,https://jobs.gem.com/unify-ai
|
||||
Untolabs Com,https://jobs.gem.com/untolabs-com
|
||||
Up Labs,https://jobs.gem.com/up-labs
|
||||
Useful,https://jobs.gem.com/useful
|
||||
Usemalleable Com,https://jobs.gem.com/usemalleable-com
|
||||
Vamo Xyz,https://jobs.gem.com/vamo-xyz
|
||||
Vanguard Cleaning Systems,https://jobs.gem.com/vanguard-cleaning-systems
|
||||
Vantaca,https://jobs.gem.com/vantaca
|
||||
Vantager,https://jobs.gem.com/vantager
|
||||
Vantara Ai,https://jobs.gem.com/vantara-ai
|
||||
Vectorworks,https://jobs.gem.com/vectorworks
|
||||
Vectrasim,https://jobs.gem.com/vectrasim
|
||||
Veho Technologies,https://jobs.gem.com/veho-technologies
|
||||
Ventionteams Com,https://jobs.gem.com/ventionteams-com
|
||||
Venture Guides,https://jobs.gem.com/venture-guides
|
||||
Vercel Ats Sandbox,https://jobs.gem.com/vercel-ats-sandbox
|
||||
Vesseltalent Com,https://jobs.gem.com/vesseltalent-com
|
||||
Voker Ai,https://jobs.gem.com/voker-ai
|
||||
Voltai Com,https://jobs.gem.com/voltai-com
|
||||
Wayback Labs,https://jobs.gem.com/wayback-labs
|
||||
Webflow Ats Sandbox,https://jobs.gem.com/webflow-ats-sandbox
|
||||
Western Governors University,https://jobs.gem.com/western-governors-university
|
||||
Whatconverts,https://jobs.gem.com/whatconverts
|
||||
Wiseroad Recruiting Inc,https://jobs.gem.com/wiseroad-recruiting-inc
|
||||
Wizecamel,https://jobs.gem.com/wizecamel
|
||||
Wolfjaw Careers,https://jobs.gem.com/wolfjaw-careers
|
||||
Wonolo,https://jobs.gem.com/wonolo
|
||||
Woodsideai,https://jobs.gem.com/woodsideai
|
||||
Youtrip,https://jobs.gem.com/youtrip
|
||||
Zefi Ai,https://jobs.gem.com/zefi-ai
|
||||
Zep,https://jobs.gem.com/zep
|
||||
Zorrorx,https://jobs.gem.com/zorrorx
|
||||
|
6
greenhouse.csv
Normal file
6
greenhouse.csv
Normal file
@ -0,0 +1,6 @@
|
||||
url,timestamp
|
||||
https://job-boards.eu.greenhouse.io/bcbgroup/jobs/4681083101?gh_src=cryptocurrencyjobs.co,2025-12-31T08:35:23.424931
|
||||
https://job-boards.greenhouse.io/securitize/jobs/4074121009?gh_src=cryptocurrencyjobs.co,2025-12-31T09:19:17.349713
|
||||
https://job-boards.eu.greenhouse.io/bcbgroup/jobs/4681102101?gh_src=cryptocurrencyjobs.co,2025-12-31T09:58:36.919216
|
||||
https://job-boards.greenhouse.io/kiosk/jobs/4427184005?gh_src=cryptocurrencyjobs.co,2025-12-31T10:10:51.176114
|
||||
https://job-boards.eu.greenhouse.io/bcbgroup/jobs/4681083101?gh_src=cryptocurrencyjobs.co,2025-12-31T11:02:31.869728
|
||||
|
2544
greenhousecompanies.csv
Normal file
2544
greenhousecompanies.csv
Normal file
File diff suppressed because it is too large
Load Diff
7
lever.csv
Normal file
7
lever.csv
Normal file
@ -0,0 +1,7 @@
|
||||
url,timestamp
|
||||
https://jobs.eu.lever.co/kaiko/3f7f3db9-4a6a-4047-8760-bc52c3d03e05?lever-origin=applied&lever-source%5B%5D=cryptocurrencyjobs.co,2025-12-31T09:20:28.542417
|
||||
https://jobs.lever.co/waterfall/880fb1b4-2515-4534-9970-53c497c82f12?lever-origin=applied&lever-source%5B%5D=cryptocurrencyjobs.co,2025-12-31T10:08:17.316072
|
||||
https://jobs.lever.co/obol-tech/fcccd493-54e4-425a-b9bd-82fa6f7e6aff?lever-origin=applied&lever-source%5B%5D=cryptocurrencyjobs.co,2025-12-31T10:35:29.164452
|
||||
https://jobs.eu.lever.co/coinspaid/7605e154-4b1d-45ee-b1d4-35edea13d80b?lever-origin=applied&lever-source%5B%5D=cryptocurrencyjobs.co,2025-12-31T10:51:38.852693
|
||||
https://jobs.lever.co/vedatechlabs/9c59c96c-2bb0-47b0-88fe-5d5a9fd85997?lever-origin=applied&lever-source%5B%5D=cryptocurrencyjobs.co,2025-12-31T11:02:16.120852
|
||||
https://jobs.eu.lever.co/kaiko/3f7f3db9-4a6a-4047-8760-bc52c3d03e05?lever-origin=applied&lever-source%5B%5D=cryptocurrencyjobs.co,2025-12-31T11:16:43.218273
|
||||
|
1792
levercompanies.csv
Normal file
1792
levercompanies.csv
Normal file
File diff suppressed because it is too large
Load Diff
8
linkedin.csv
Normal file
8
linkedin.csv
Normal file
@ -0,0 +1,8 @@
|
||||
url,timestamp
|
||||
https://www.linkedin.com/jobs/view/operations-analyst-at-amber-group-4325538653/?ref=cryptocurrencyjobs.co,2025-12-31T09:20:11.544002
|
||||
https://www.linkedin.com/jobs/view/hr-operations-intern-sg-at-matrixport-official-4338171692/?ref=cryptocurrencyjobs.co,2025-12-31T09:25:10.499933
|
||||
https://www.linkedin.com/jobs/view/operations-analyst-at-matrixport-official-4235087267/?ref=cryptocurrencyjobs.co,2025-12-31T09:33:53.104120
|
||||
https://www.linkedin.com/jobs/view/business-operations-analyst-at-matrixport-official-4215538150/?ref=cryptocurrencyjobs.co,2025-12-31T09:34:24.186519
|
||||
https://www.linkedin.com/jobs/view/graduate-hiring-business-operations-analyst-wealth-management-at-matrixport-official-4131687672/?ref=cryptocurrencyjobs.co,2025-12-31T09:36:47.038648
|
||||
https://www.linkedin.com/jobs/view/customer-support-specialist-at-matrixport-official-4323103235/?ref=cryptocurrencyjobs.co,2025-12-31T10:39:57.272414
|
||||
https://www.linkedin.com/jobs/view/finance-intern-at-amber-group-4248725225/?ref=cryptocurrencyjobs.co,2025-12-31T11:31:03.349275
|
||||
|
1
rippling.csv
Normal file
1
rippling.csv
Normal file
@ -0,0 +1 @@
|
||||
url,timestamp
|
||||
|
1324
ripplingcompanies.csv
Normal file
1324
ripplingcompanies.csv
Normal file
File diff suppressed because it is too large
Load Diff
67
scraper.py
67
scraper.py
@ -11,7 +11,8 @@ import json
|
||||
import redis
|
||||
from urllib.parse import urlparse
|
||||
import hashlib
|
||||
|
||||
import csv
|
||||
import os
|
||||
|
||||
class CryptoJobScraper:
|
||||
def __init__(
|
||||
@ -26,7 +27,7 @@ class CryptoJobScraper:
|
||||
self.human_speed = human_speed
|
||||
self.user_request = user_request
|
||||
self.llm_agent = LLMJobRefiner()
|
||||
self.redis_client = redis.Redis(host='=localhost', port=6379, db=0, decode_responses=True)
|
||||
self.redis_client = redis.Redis(host='localhost', port=6379, db=0, decode_responses=True)
|
||||
|
||||
self.FORBIDDEN_ATS_DOMAINS = [
|
||||
'ashby', 'ashbyhq',
|
||||
@ -37,6 +38,7 @@ class CryptoJobScraper:
|
||||
'smartrecruiters',
|
||||
'workable',
|
||||
'lever', 'jobs.lever.co',
|
||||
'linkedin.com' # ✅ Added LinkedIn
|
||||
]
|
||||
|
||||
self.INVALID_CONTENT_PHRASES = [
|
||||
@ -46,7 +48,7 @@ class CryptoJobScraper:
|
||||
"not a valid job",
|
||||
"job not found",
|
||||
"page not found",
|
||||
"The requested job post could not be found. It may have been removed."
|
||||
"The requested job post could not be found. It may have been removed.",
|
||||
"this page does not contain a job description"
|
||||
]
|
||||
|
||||
@ -220,15 +222,57 @@ class CryptoJobScraper:
|
||||
url_lower = url.lower()
|
||||
return any(domain in url_lower for domain in self.FORBIDDEN_ATS_DOMAINS)
|
||||
|
||||
def _get_ats_platform_name(self, url: str) -> str:
|
||||
"""Return canonical ATS name based on URL (e.g., 'ashby', 'greenhouse')"""
|
||||
url_lower = url.lower()
|
||||
|
||||
# Order matters: more specific first
|
||||
if 'boards.greenhouse.io' in url_lower:
|
||||
return 'greenhouse'
|
||||
elif 'jobs.lever.co' in url_lower:
|
||||
return 'lever'
|
||||
elif 'myworkdayjobs' in url_lower or 'myworkday' in url_lower:
|
||||
return 'workday'
|
||||
elif 'linkedin.com' in url_lower:
|
||||
return 'linkedin'
|
||||
elif 'ashbyhq.com' in url_lower or 'ashby' in url_lower:
|
||||
return 'ashby'
|
||||
elif 'gem.com' in url_lower or 'gem' in url_lower:
|
||||
return 'gem'
|
||||
elif 'rippling' in url_lower:
|
||||
return 'rippling'
|
||||
elif 'smartrecruiters' in url_lower:
|
||||
return 'smartrecruiters'
|
||||
elif 'workable' in url_lower:
|
||||
return 'workable'
|
||||
else:
|
||||
# Fallback: extract domain part
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
domain = parsed.netloc.lower()
|
||||
for forbidden in self.FORBIDDEN_ATS_DOMAINS:
|
||||
if forbidden in domain:
|
||||
return forbidden.split('.')[0] if '.' in forbidden else forbidden
|
||||
except:
|
||||
pass
|
||||
return 'forbidden_ats'
|
||||
|
||||
def _log_forbidden_ats_url(self, url: str, platform: str):
|
||||
"""Append forbidden URL to {platform}.csv"""
|
||||
filename = f"{platform}.csv"
|
||||
file_exists = os.path.isfile(filename)
|
||||
with open(filename, 'a', newline='', encoding='utf-8') as f:
|
||||
writer = csv.writer(f)
|
||||
if not file_exists:
|
||||
writer.writerow(['url', 'timestamp'])
|
||||
writer.writerow([url, datetime.now().isoformat()])
|
||||
print(f" 📥 Logged forbidden ATS URL to {filename}: {url}")
|
||||
|
||||
async def _is_invalid_job_page(self, page_content: str) -> bool:
|
||||
content_lower = page_content.lower()
|
||||
return any(phrase in content_lower for phrase in self.INVALID_CONTENT_PHRASES)
|
||||
|
||||
def _extract_job_id_from_url(self, url: str) -> Optional[str]:
|
||||
"""
|
||||
Extract job ID from URL. Returns ID if it contains at least one digit.
|
||||
Otherwise, returns None (but does NOT mean skip!).
|
||||
"""
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
path_parts = [p for p in parsed.path.split('/') if p]
|
||||
@ -242,7 +286,6 @@ class CryptoJobScraper:
|
||||
if not candidate or not any(c.isdigit() for c in candidate):
|
||||
return None
|
||||
|
||||
# Avoid title-like strings (with spaces or long words + no structure)
|
||||
if re.search(r'[A-Za-z]{6,}\s', candidate):
|
||||
return None
|
||||
|
||||
@ -323,14 +366,12 @@ class CryptoJobScraper:
|
||||
await job_detail_page.goto(href, wait_until='networkidle', timeout=60000)
|
||||
await asyncio.sleep(2 * self.human_speed)
|
||||
|
||||
# Check for invalid content
|
||||
page_content = await job_detail_page.content()
|
||||
if await self._is_invalid_job_page(page_content):
|
||||
print(" 🚫 Page contains invalid content → skipping.")
|
||||
await job_detail_page.close()
|
||||
continue
|
||||
|
||||
# Try to click apply
|
||||
apply_clicked = False
|
||||
apply_selectors = [
|
||||
'a[href*="apply"], a:text("Apply"), a:text("Apply Now"), a:text("Apply here")',
|
||||
@ -361,7 +402,8 @@ class CryptoJobScraper:
|
||||
print(f" New tab opened: {new_url}")
|
||||
|
||||
if new_url and await self._is_forbidden_ats_url(new_url):
|
||||
print(" 🚫 New URL is a forbidden ATS → skipping job.")
|
||||
platform = self._get_ats_platform_name(new_url)
|
||||
self._log_forbidden_ats_url(new_url, platform)
|
||||
if candidate_page != job_detail_page:
|
||||
await candidate_page.close()
|
||||
await job_detail_page.close()
|
||||
@ -376,7 +418,6 @@ class CryptoJobScraper:
|
||||
|
||||
final_scrape_url = apply_page.url
|
||||
|
||||
# Re-check invalid content on final page
|
||||
page_content = await self._extract_page_content_for_llm(apply_page)
|
||||
if await self._is_invalid_job_page(page_content):
|
||||
print(" 🚫 Final page contains invalid content → skipping.")
|
||||
@ -385,10 +426,8 @@ class CryptoJobScraper:
|
||||
await job_detail_page.close()
|
||||
continue
|
||||
|
||||
# Extract job ID — but do NOT fail if missing
|
||||
job_id = self._extract_job_id_from_url(final_scrape_url)
|
||||
if not job_id:
|
||||
# Fallback: hash the URL to create a stable, unique ID
|
||||
job_id = "job_" + hashlib.md5(final_scrape_url.encode()).hexdigest()[:12]
|
||||
|
||||
raw_data = {
|
||||
|
||||
5
workable.csv
Normal file
5
workable.csv
Normal file
@ -0,0 +1,5 @@
|
||||
url,timestamp
|
||||
https://apply.workable.com/thetie/j/C54DFC9985/?ref=cryptocurrencyjobs.co,2025-12-31T08:24:45.755671
|
||||
https://apply.workable.com/thetie/j/C54DFC9985/?ref=cryptocurrencyjobs.co,2025-12-31T09:51:08.343642
|
||||
https://apply.workable.com/thetie/j/2745433865/?ref=cryptocurrencyjobs.co,2025-12-31T09:51:28.331543
|
||||
https://apply.workable.com/thetie/j/1A6C8F2913/?ref=cryptocurrencyjobs.co,2025-12-31T11:22:54.623723
|
||||
|
3548
workablecompanies.csv
Normal file
3548
workablecompanies.csv
Normal file
File diff suppressed because it is too large
Load Diff
1
workday.csv
Normal file
1
workday.csv
Normal file
@ -0,0 +1 @@
|
||||
url,timestamp
|
||||
|
1045
workdaycompanies.csv
Normal file
1045
workdaycompanies.csv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user