|
import json |
|
import traceback |
|
import numpy as np |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
from gemini_api import GeminiAPI |
|
import os |
|
import dotenv |
|
from dotenv import load_dotenv |
|
|
|
|
|
load_dotenv() |
|
|
|
apikey = os.getenv("GEMINI_API_KEY") |
|
|
|
|
|
gemini = GeminiAPI(api_key=apikey) |
|
|
|
|
|
class CVMatcher: |
|
|
|
|
|
|
|
|
|
def __init__(self, model_name="TaylorAI/bge-micro-v2"): |
|
self.model = None |
|
self.model_name = model_name |
|
self.cv_data = None |
|
self.offer_data = None |
|
self.similarity_threshold = 0.6 |
|
|
|
|
|
def _load_model(self): |
|
from sentence_transformers import SentenceTransformer |
|
|
|
if self.model is None: |
|
print("Loading SentenceTransformer model for the first time...") |
|
self.model = SentenceTransformer(self.model_name) |
|
print("Model loaded successfully.") |
|
|
|
|
|
def preprocess_sector(self, sector): |
|
|
|
if isinstance(sector, list): |
|
|
|
processed_list = [str(s).lower().strip() for s in sector] |
|
output = " and ".join(processed_list) |
|
else: |
|
|
|
output = str(sector).lower().strip().replace(",", " and") |
|
|
|
return f"principal job sector: {output}" |
|
|
|
def sector_similarity(self, offer_dict, cv_dict): |
|
offer_sector = offer_dict.get("sector", "") |
|
cv_sector = cv_dict.get("primary_sector", "") |
|
|
|
if not offer_sector or not cv_sector: |
|
return 0.0 |
|
|
|
|
|
offer_sector_processed = self.preprocess_sector(offer_sector) |
|
cv_sector_processed = self.preprocess_sector(cv_sector) |
|
|
|
|
|
if offer_sector_processed == cv_sector_processed: |
|
return 1.0 |
|
|
|
|
|
try: |
|
cv_emb, offer_emb = self.model.encode([cv_sector_processed, offer_sector_processed]) |
|
sim_score = cosine_similarity([offer_emb], [cv_emb])[0][0] |
|
|
|
|
|
sim_score = min(1.0, sim_score + 0.1) |
|
return sim_score |
|
|
|
except Exception as e: |
|
print(f"Error calculating sector similarity: {e}") |
|
return 0.5 |
|
|
|
|
|
|
|
def preprocess_field(self, field): |
|
|
|
return f"field of study: {str(field).lower().strip().replace(',', ' and')}" |
|
|
|
|
|
def education_similarity(self, offer_dict, cv_education): |
|
self._load_model() |
|
offer_field = self.preprocess_field(offer_dict['education']['field']) |
|
cv_field = self.preprocess_field(cv_education['field']) |
|
offer_emb, cv_emb = self.model.encode([offer_field, cv_field]) |
|
sim_score = cosine_similarity([offer_emb], [cv_emb])[0][0] + 0.05 |
|
return float(min(1, sim_score)) |
|
|
|
|
|
|
|
def education_final_score(self, offer_dict, cv_dict): |
|
self._load_model() |
|
|
|
min_education = float(offer_dict.get('education', {}).get('number', 0)) |
|
|
|
|
|
cv_education = cv_dict.get('education', []) |
|
|
|
if not cv_education: |
|
return 0.0 |
|
|
|
|
|
highest_cv_edu = max([float(edu.get('number', 0)) for edu in cv_education]) |
|
|
|
|
|
if highest_cv_edu < min_education: |
|
return 0.0 |
|
|
|
|
|
best_similarity = float(0) |
|
same_level_edu = None |
|
|
|
for edu in cv_education: |
|
edu_level = float(edu.get('number', 0)) |
|
if edu_level >= min_education: |
|
similarity = self.education_similarity(offer_dict, edu) |
|
if similarity > best_similarity: |
|
best_similarity = similarity |
|
if edu_level == min_education: |
|
same_level_edu = edu |
|
|
|
|
|
higher_education = [edu for edu in cv_education if float(edu.get('number', 0)) > min_education] |
|
addon = 0.0 |
|
|
|
for edu in higher_education: |
|
edu_level = float(edu.get('number', 0)) |
|
level_diff = edu_level - min_education |
|
similarity = self.education_similarity(offer_dict, edu) |
|
addon += 0.1 * level_diff * similarity |
|
|
|
|
|
return min(1.0, best_similarity + addon) |
|
|
|
|
|
|
|
def skills_similarity(self, offer_dict, cv_dict, type="technical"): |
|
self._load_model() |
|
if type == "technical": |
|
cv_skills = [s.lower() for s in cv_dict.get("technical_abilities", [])] |
|
offer_skills = [s.lower() for s in offer_dict.get("technical_abilities", [])] |
|
elif type == "soft": |
|
cv_skills = [s.lower() for s in cv_dict.get("soft_skills", [])] |
|
offer_skills = [s.lower() for s in offer_dict.get("soft_skills", [])] |
|
else: |
|
return {}, 0 |
|
|
|
if not offer_skills or not cv_skills: |
|
return {}, 0 |
|
|
|
cv_embeddings = self.model.encode(cv_skills) |
|
offer_embeddings = self.model.encode(offer_skills) |
|
|
|
|
|
skill_similarities = {} |
|
for i, offer_skill in enumerate(offer_skills): |
|
if offer_skill in cv_skills: |
|
|
|
skill_similarities[offer_skill] = 1.0 |
|
else: |
|
|
|
sim_scores = cosine_similarity([offer_embeddings[i]], cv_embeddings)[0] |
|
max_sim = np.max(sim_scores) |
|
skill_similarities[offer_skill] = min(1, max_sim + 0.1) |
|
|
|
avg_similarity = np.mean(list(skill_similarities.values())) if skill_similarities else 0 |
|
return skill_similarities, avg_similarity |
|
|
|
|
|
|
|
def role_similarity(self, offer_role, cv_roles): |
|
self._load_model() |
|
cv_embeddings = self.model.encode(cv_roles) |
|
offer_embedding = self.model.encode(offer_role) |
|
return cosine_similarity([offer_embedding], cv_embeddings)[0] |
|
|
|
def role_experience_similarity(self, offer_dict, cv_dict): |
|
self._load_model() |
|
total_experience = 0 |
|
role_similarities = [] |
|
|
|
|
|
cv_experience = [] |
|
for experience in cv_dict.get('experience', []): |
|
for role in experience.get('roles', []): |
|
position = role.get('position', '') |
|
years = float(role.get('years', 0)) |
|
if position and years > 0: |
|
cv_experience.append({ |
|
'position': position, |
|
'years': years, |
|
'company': experience.get('company', ''), |
|
'duration': experience.get('duration', '') |
|
}) |
|
|
|
if not cv_experience: |
|
return 0, 0, 0, 0 |
|
|
|
|
|
cv_roles = [exp['position'] for exp in cv_experience] |
|
offer_role = offer_dict.get("role", "") |
|
|
|
if not offer_role: |
|
return 0, 0, 0, 0 |
|
|
|
role_similarities = self.role_similarity(offer_role, cv_roles) |
|
|
|
|
|
weighted_experience = 0 |
|
for i, exp in enumerate(cv_experience): |
|
similarity = role_similarities[i] |
|
if similarity >= self.similarity_threshold: |
|
weighted_experience += similarity * exp['years'] |
|
|
|
|
|
min_exp = float(offer_dict.get('experience', {}).get('min', 0.0)) |
|
max_exp = float(offer_dict.get('experience', {}).get('max', 9999.0)) |
|
|
|
|
|
if min_exp > 0: |
|
experience_perc = min(1.0, weighted_experience / min_exp) |
|
else: |
|
experience_perc = 1.0 if weighted_experience > 0 else 0 |
|
|
|
return min_exp, max_exp, weighted_experience, experience_perc |
|
|
|
|
|
|
|
|
|
def final_score(self, offer_path, cv_path): |
|
self._load_model() |
|
""" |
|
Calculate final matching scores between an offer and a CV. |
|
|
|
Args: |
|
offer_path (str): Path to the job offer file |
|
cv_path (str): Path to the CV file |
|
|
|
Returns: |
|
dict: Dictionary containing all matching scores and details |
|
""" |
|
|
|
offer_dict = gemini.parse_offer(offer_path) |
|
cv_dict = gemini.parse_cv(cv_path) |
|
|
|
|
|
return self.create_dict(offer_dict, cv_dict) |
|
|
|
|
|
|
|
|
|
def create_dict(self, offer_dict, cv_dict): |
|
self._load_model() |
|
|
|
tech_skills_dict, tech_score = self.skills_similarity(offer_dict, cv_dict, "technical") |
|
soft_skills_dict, soft_score = self.skills_similarity(offer_dict, cv_dict, "soft") |
|
|
|
|
|
tech_skills = {} |
|
if tech_skills_dict: |
|
sorted_tech = sorted(tech_skills_dict.items(), key=lambda x: x[1], reverse=True) |
|
if len(sorted_tech) >= 6: |
|
tech_skills = { |
|
'top_matches': [skill for skill, _ in sorted_tech[:3]], |
|
'bottom_matches': [skill for skill, _ in sorted_tech[-3:]] |
|
} |
|
else: |
|
tech_skills = { |
|
'title': 'Technical skills similarity order', |
|
'skills': [skill for skill, _ in sorted_tech] |
|
} |
|
|
|
|
|
soft_skills = {} |
|
if soft_skills_dict: |
|
sorted_soft = sorted(soft_skills_dict.items(), key=lambda x: x[1], reverse=True) |
|
if len(sorted_soft) >= 6: |
|
soft_skills = { |
|
'top_matches': [skill for skill, _ in sorted_soft[:3]], |
|
'bottom_matches': [skill for skill, _ in sorted_soft[-3:]] |
|
} |
|
else: |
|
soft_skills = { |
|
'title': 'Soft skills similarity order', |
|
'skills': [skill for skill, _ in sorted_soft] |
|
} |
|
|
|
|
|
min_exp, max_exp, total_exp, exp_score = self.role_experience_similarity(offer_dict, cv_dict) |
|
role = offer_dict.get("role", "") |
|
|
|
|
|
min_exp_raw = offer_dict.get('experience', {}).get('min', 0) |
|
max_exp_raw = offer_dict.get('experience', {}).get('max', 9999.0) |
|
|
|
|
|
min_exp = float(min_exp_raw) if min_exp_raw is not None else 0 |
|
max_exp = float(max_exp_raw) if max_exp_raw is not None else 9999.0 |
|
|
|
experience_requirement_text = "" |
|
|
|
if min_exp == 0: |
|
experience_requirement_text = "There's not any experience required for this role." |
|
|
|
elif max_exp >= 9999.0: |
|
experience_requirement_text = f"The offer is looking for someone with more than {int(min_exp)} years of experience." |
|
|
|
else: |
|
experience_requirement_text = f"The offer is looking for between {int(min_exp)} and {int(max_exp)} years of experience." |
|
|
|
full_explanation = f"You have approximately {round(total_exp, 1)} years of experience in roles similar to '{role}'. {experience_requirement_text}" |
|
|
|
|
|
sector_similarity = self.sector_similarity(offer_dict, cv_dict) |
|
offer_sector = offer_dict.get("sector", "") |
|
cv_sector = cv_dict.get("primary_sector", "") |
|
|
|
|
|
education_score = self.education_final_score(offer_dict, cv_dict) |
|
min_education = float(offer_dict.get("education", {}).get("number", 0)) |
|
min_education_level = offer_dict.get("education", {}).get("min", "No especificado") |
|
min_education_field = offer_dict.get("education", {}).get("field", "No especificado") |
|
|
|
|
|
cv_education_list = cv_dict.get("education", []) |
|
highest_cv_degree = None |
|
if cv_education_list: |
|
sorted_cv_education = sorted(cv_education_list, key=lambda x: float(x.get('number', 0)), reverse=True) |
|
highest_cv_degree = sorted_cv_education[0] |
|
|
|
education_details = {} |
|
education_explanation = "" |
|
|
|
|
|
if min_education == 0: |
|
education_explanation = "The offer does not specify a minimum education level. The candidate's highest degree is shown for reference." |
|
education_details = { |
|
"minimum_required_level": "Not specified", |
|
"minimum_required_field": min_education_field if min_education_field != "No especificado" else "Not specified", |
|
|
|
"equivalent_level_cv": highest_cv_degree.get('degree', 'Not available') if highest_cv_degree else 'Not available', |
|
"equivalent_field_cv": highest_cv_degree.get('field', 'Not available') if highest_cv_degree else 'Not available', |
|
|
|
"higher_education_degrees": [], |
|
"meets_requirement": True |
|
} |
|
|
|
else: |
|
same_level_education = [edu for edu in cv_education_list if float(edu.get('number', 0)) == min_education] |
|
higher_education = [edu for edu in cv_education_list if float(edu.get('number', 0)) > min_education] |
|
|
|
match_text = "The candidate meets the minimum requirement." if same_level_education or higher_education else "The candidate does not meet the minimum requirement." |
|
education_explanation = f"The offer requires at least {min_education_level}. {match_text}" |
|
|
|
|
|
equivalent_education = same_level_education[0] if same_level_education else (highest_cv_degree if higher_education else {}) |
|
|
|
education_details = { |
|
"minimum_required_level": min_education_level, |
|
"minimum_required_field": min_education_field, |
|
"equivalent_level_cv": equivalent_education.get('degree', 'Not available'), |
|
"equivalent_field_cv": equivalent_education.get('field', 'Not available'), |
|
"higher_education_degrees": [edu.get('degree', '') for edu in higher_education], |
|
"meets_requirement": education_score >= 0.5 |
|
} |
|
|
|
|
|
result = { |
|
"technical_skills_score": int(np.round(100 * tech_score, 2)), |
|
"soft_skills_score": int(np.round(100 * soft_score, 2)), |
|
"role_experience_score": int(np.round(100 * exp_score, 2)), |
|
"education_score": int(np.round(100 * education_score, 2)), |
|
"sector_score": int(np.round(100 * sector_similarity, 2)), |
|
|
|
"technical_skills": tech_skills, |
|
"soft_skills": soft_skills, |
|
|
|
"role_experience": { |
|
"explanation": full_explanation, |
|
"details": { |
|
"role": role, "min_years": min_exp, "max_years": max_exp, "total_experience": round(total_exp, 1) |
|
} |
|
}, |
|
|
|
"education": { |
|
"explanation": education_explanation, |
|
"details": education_details |
|
}, |
|
|
|
"sector": { |
|
"explanation": f"The offer's sector is '{offer_sector}' and your main sector is '{' and '.join(cv_sector) if isinstance(cv_sector, list) else cv_sector}'. " |
|
f"The similarity between both sectors is {round(sector_similarity * 100, 1)}%.", |
|
"details": { |
|
"offer_sector": offer_sector, "cv_sector": ' and '.join(cv_sector) if isinstance(cv_sector, list) else cv_sector, "similarity": round(sector_similarity * 100, 1) |
|
} |
|
} |
|
} |
|
|
|
return result |
|
|
|
|
|
|
|
matcher = CVMatcher() |
|
|
|
|
|
|
|
|