import json import traceback import numpy as np from sklearn.metrics.pairwise import cosine_similarity from gemini_api import GeminiAPI import os import dotenv from dotenv import load_dotenv # Cargar variables de entorno desde .env load_dotenv() apikey = os.getenv("GEMINI_API_KEY") # Crear instancia global (o se la pasamos a la clase) gemini = GeminiAPI(api_key=apikey) class CVMatcher: # posibles modelos: # all-MiniLM-L6-v2 # paraphrase-MiniLM-L3-v2 # TaylorAI/bge-micro-v2 def __init__(self, model_name="TaylorAI/bge-micro-v2"): self.model = None self.model_name = model_name self.cv_data = None self.offer_data = None self.similarity_threshold = 0.6 def _load_model(self): from sentence_transformers import SentenceTransformer # Esta función carga el modelo solo si no ha sido cargado antes if self.model is None: print("Loading SentenceTransformer model for the first time...") self.model = SentenceTransformer(self.model_name) print("Model loaded successfully.") # ----------- 1. Sector ----------- def preprocess_sector(self, sector): # Primero, manejamos el caso de que sea una lista if isinstance(sector, list): # Nos aseguramos de que cada elemento de la lista sea un string antes de unir processed_list = [str(s).lower().strip() for s in sector] output = " and ".join(processed_list) else: # Para todo lo demás (str, int, float, None), lo convertimos a string PRIMERO output = str(sector).lower().strip().replace(",", " and") return f"principal job sector: {output}" def sector_similarity(self, offer_dict, cv_dict): offer_sector = offer_dict.get("sector", "") cv_sector = cv_dict.get("primary_sector", "") if not offer_sector or not cv_sector: return 0.0 # Preprocess sectors for better matching offer_sector_processed = self.preprocess_sector(offer_sector) cv_sector_processed = self.preprocess_sector(cv_sector) # If sectors are exactly the same after preprocessing if offer_sector_processed == cv_sector_processed: return 1.0 # Calculate semantic similarity try: cv_emb, offer_emb = self.model.encode([cv_sector_processed, offer_sector_processed]) sim_score = cosine_similarity([offer_emb], [cv_emb])[0][0] # Add a small boost to the score sim_score = min(1.0, sim_score + 0.1) return sim_score except Exception as e: print(f"Error calculating sector similarity: {e}") return 0.5 # Default similarity in case of error # ----------- 2. Educación ----------- def preprocess_field(self, field): #Forzamos la conversión a string ANTES de hacer cualquier otra cosa return f"field of study: {str(field).lower().strip().replace(',', ' and')}" def education_similarity(self, offer_dict, cv_education): self._load_model() offer_field = self.preprocess_field(offer_dict['education']['field']) cv_field = self.preprocess_field(cv_education['field']) offer_emb, cv_emb = self.model.encode([offer_field, cv_field]) sim_score = cosine_similarity([offer_emb], [cv_emb])[0][0] + 0.05 return float(min(1, sim_score)) def education_final_score(self, offer_dict, cv_dict): self._load_model() # Get minimum education level from offer and ensure it's a float min_education = float(offer_dict.get('education', {}).get('number', 0)) # Get all education entries from CV cv_education = cv_dict.get('education', []) if not cv_education: return 0.0 # Find the highest education level in CV, ensuring all are floats highest_cv_edu = max([float(edu.get('number', 0)) for edu in cv_education]) # If highest CV education is below minimum required if highest_cv_edu < min_education: return 0.0 # Calculate base similarity with the closest matching education best_similarity = float(0) same_level_edu = None for edu in cv_education: edu_level = float(edu.get('number', 0)) if edu_level >= min_education: similarity = self.education_similarity(offer_dict, edu) if similarity > best_similarity: best_similarity = similarity if edu_level == min_education: same_level_edu = edu # Calculate addon for higher education higher_education = [edu for edu in cv_education if float(edu.get('number', 0)) > min_education] addon = 0.0 for edu in higher_education: edu_level = float(edu.get('number', 0)) level_diff = edu_level - min_education similarity = self.education_similarity(offer_dict, edu) addon += 0.1 * level_diff * similarity # Cap the final score at 1.0 return min(1.0, best_similarity + addon) # ----------- 3. Skills ----------- def skills_similarity(self, offer_dict, cv_dict, type="technical"): self._load_model() if type == "technical": cv_skills = [s.lower() for s in cv_dict.get("technical_abilities", [])] offer_skills = [s.lower() for s in offer_dict.get("technical_abilities", [])] elif type == "soft": cv_skills = [s.lower() for s in cv_dict.get("soft_skills", [])] offer_skills = [s.lower() for s in offer_dict.get("soft_skills", [])] else: return {}, 0 if not offer_skills or not cv_skills: return {}, 0 cv_embeddings = self.model.encode(cv_skills) offer_embeddings = self.model.encode(offer_skills) # Calculate similarity for each offer skill against all CV skills skill_similarities = {} for i, offer_skill in enumerate(offer_skills): if offer_skill in cv_skills: # Exact match skill_similarities[offer_skill] = 1.0 else: # Semantic similarity sim_scores = cosine_similarity([offer_embeddings[i]], cv_embeddings)[0] max_sim = np.max(sim_scores) skill_similarities[offer_skill] = min(1, max_sim + 0.1) avg_similarity = np.mean(list(skill_similarities.values())) if skill_similarities else 0 return skill_similarities, avg_similarity # ----------- 4. Experiencia en el rol ----------- def role_similarity(self, offer_role, cv_roles): self._load_model() cv_embeddings = self.model.encode(cv_roles) offer_embedding = self.model.encode(offer_role) return cosine_similarity([offer_embedding], cv_embeddings)[0] def role_experience_similarity(self, offer_dict, cv_dict): self._load_model() total_experience = 0 role_similarities = [] # Extract all roles and their years of experience from CV cv_experience = [] for experience in cv_dict.get('experience', []): for role in experience.get('roles', []): position = role.get('position', '') years = float(role.get('years', 0)) if position and years > 0: cv_experience.append({ 'position': position, 'years': years, 'company': experience.get('company', ''), 'duration': experience.get('duration', '') }) if not cv_experience: return 0, 0, 0, 0 # Calculate similarity for each role cv_roles = [exp['position'] for exp in cv_experience] offer_role = offer_dict.get("role", "") if not offer_role: return 0, 0, 0, 0 role_similarities = self.role_similarity(offer_role, cv_roles) # Calculate weighted experience weighted_experience = 0 for i, exp in enumerate(cv_experience): similarity = role_similarities[i] if similarity >= self.similarity_threshold: weighted_experience += similarity * exp['years'] # Get min and max experience from offer min_exp = float(offer_dict.get('experience', {}).get('min', 0.0)) max_exp = float(offer_dict.get('experience', {}).get('max', 9999.0)) # Default range if max not specified # Calculate experience percentage (capped at 1.0) if min_exp > 0: experience_perc = min(1.0, weighted_experience / min_exp) else: experience_perc = 1.0 if weighted_experience > 0 else 0 return min_exp, max_exp, weighted_experience, experience_perc # ----------- 5. Creación del diccionario ----------- def final_score(self, offer_path, cv_path): self._load_model() """ Calculate final matching scores between an offer and a CV. Args: offer_path (str): Path to the job offer file cv_path (str): Path to the CV file Returns: dict: Dictionary containing all matching scores and details """ # Parse the offer and CV offer_dict = gemini.parse_offer(offer_path) cv_dict = gemini.parse_cv(cv_path) # Return the complete matching results return self.create_dict(offer_dict, cv_dict) def create_dict(self, offer_dict, cv_dict): self._load_model() # Get technical skills with similarity scores tech_skills_dict, tech_score = self.skills_similarity(offer_dict, cv_dict, "technical") soft_skills_dict, soft_score = self.skills_similarity(offer_dict, cv_dict, "soft") # Process technical skills - check if we should show top/bottom or all tech_skills = {} if tech_skills_dict: sorted_tech = sorted(tech_skills_dict.items(), key=lambda x: x[1], reverse=True) if len(sorted_tech) >= 6: tech_skills = { 'top_matches': [skill for skill, _ in sorted_tech[:3]], 'bottom_matches': [skill for skill, _ in sorted_tech[-3:]] } else: tech_skills = { 'title': 'Technical skills similarity order', 'skills': [skill for skill, _ in sorted_tech] } # Process soft skills - check if we should show top/bottom or all soft_skills = {} if soft_skills_dict: sorted_soft = sorted(soft_skills_dict.items(), key=lambda x: x[1], reverse=True) if len(sorted_soft) >= 6: soft_skills = { 'top_matches': [skill for skill, _ in sorted_soft[:3]], 'bottom_matches': [skill for skill, _ in sorted_soft[-3:]] } else: soft_skills = { 'title': 'Soft skills similarity order', 'skills': [skill for skill, _ in sorted_soft] } # Get role experience details min_exp, max_exp, total_exp, exp_score = self.role_experience_similarity(offer_dict, cv_dict) role = offer_dict.get("role", "") # --- NUEVA LÓGICA CLARA Y ROBUSTA PARA EL TEXTO DE EXPERIENCIA --- min_exp_raw = offer_dict.get('experience', {}).get('min', 0) max_exp_raw = offer_dict.get('experience', {}).get('max', 9999.0) # Convert to float for consistent comparison min_exp = float(min_exp_raw) if min_exp_raw is not None else 0 max_exp = float(max_exp_raw) if max_exp_raw is not None else 9999.0 experience_requirement_text = "" # Caso 1: No se especifica experiencia mínima o es 0. if min_exp == 0: experience_requirement_text = "There's not any experience required for this role." # Caso 2: Se especifica un mínimo pero no un máximo (o máximo muy alto). elif max_exp >= 9999.0: experience_requirement_text = f"The offer is looking for someone with more than {int(min_exp)} years of experience." # Caso 3: Se especifican ambos, mínimo y máximo. else: experience_requirement_text = f"The offer is looking for between {int(min_exp)} and {int(max_exp)} years of experience." full_explanation = f"You have approximately {round(total_exp, 1)} years of experience in roles similar to '{role}'. {experience_requirement_text}" # Get sector information sector_similarity = self.sector_similarity(offer_dict, cv_dict) offer_sector = offer_dict.get("sector", "") cv_sector = cv_dict.get("primary_sector", "") # Get education information education_score = self.education_final_score(offer_dict, cv_dict) min_education = float(offer_dict.get("education", {}).get("number", 0)) min_education_level = offer_dict.get("education", {}).get("min", "No especificado") min_education_field = offer_dict.get("education", {}).get("field", "No especificado") # Get candidate's education and find the highest degree cv_education_list = cv_dict.get("education", []) highest_cv_degree = None if cv_education_list: sorted_cv_education = sorted(cv_education_list, key=lambda x: float(x.get('number', 0)), reverse=True) highest_cv_degree = sorted_cv_education[0] education_details = {} education_explanation = "" # SCENARIO 1: The offer does NOT specify a minimum education level if min_education == 0: education_explanation = "The offer does not specify a minimum education level. The candidate's highest degree is shown for reference." education_details = { "minimum_required_level": "Not specified", "minimum_required_field": min_education_field if min_education_field != "No especificado" else "Not specified", # USAMOS LAS CLAVES ORIGINALES para no romper el HTML "equivalent_level_cv": highest_cv_degree.get('degree', 'Not available') if highest_cv_degree else 'Not available', "equivalent_field_cv": highest_cv_degree.get('field', 'Not available') if highest_cv_degree else 'Not available', # Devolvemos una lista vacía, el JS lo mostrará como 'None' "higher_education_degrees": [], "meets_requirement": True } # SCENARIO 2: The offer DOES specify a minimum education level else: same_level_education = [edu for edu in cv_education_list if float(edu.get('number', 0)) == min_education] higher_education = [edu for edu in cv_education_list if float(edu.get('number', 0)) > min_education] match_text = "The candidate meets the minimum requirement." if same_level_education or higher_education else "The candidate does not meet the minimum requirement." education_explanation = f"The offer requires at least {min_education_level}. {match_text}" # Find the most relevant degree to show as "equivalent" equivalent_education = same_level_education[0] if same_level_education else (highest_cv_degree if higher_education else {}) education_details = { "minimum_required_level": min_education_level, "minimum_required_field": min_education_field, "equivalent_level_cv": equivalent_education.get('degree', 'Not available'), "equivalent_field_cv": equivalent_education.get('field', 'Not available'), "higher_education_degrees": [edu.get('degree', '') for edu in higher_education], "meets_requirement": education_score >= 0.5 } # Format the final return dictionary with all the processed information result = { "technical_skills_score": int(np.round(100 * tech_score, 2)), "soft_skills_score": int(np.round(100 * soft_score, 2)), "role_experience_score": int(np.round(100 * exp_score, 2)), "education_score": int(np.round(100 * education_score, 2)), "sector_score": int(np.round(100 * sector_similarity, 2)), "technical_skills": tech_skills, "soft_skills": soft_skills, "role_experience": { "explanation": full_explanation, # Usamos la variable que acabamos de crear "details": { "role": role, "min_years": min_exp, "max_years": max_exp, "total_experience": round(total_exp, 1) } }, "education": { "explanation": education_explanation, "details": education_details }, "sector": { "explanation": f"The offer's sector is '{offer_sector}' and your main sector is '{' and '.join(cv_sector) if isinstance(cv_sector, list) else cv_sector}'. " f"The similarity between both sectors is {round(sector_similarity * 100, 1)}%.", "details": { "offer_sector": offer_sector, "cv_sector": ' and '.join(cv_sector) if isinstance(cv_sector, list) else cv_sector, "similarity": round(sector_similarity * 100, 1) } } } return result # instanciamos la clase matcher = CVMatcher()