Spaces:
Sleeping
Sleeping
File size: 2,409 Bytes
c475914 c542ad2 c475914 c542ad2 c475914 c542ad2 c475914 c542ad2 c475914 03acb3a c475914 03acb3a c475914 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import gradio as gr
import pdfplumber
import spacy
from spacy.cli import download
from sentence_transformers import SentenceTransformer, util
# Function to load the spaCy model, downloading it if necessary
def load_spacy_model():
try:
nlp = spacy.load('en_core_web_md')
except OSError:
download('en_core_web_md') # Download the model if it is not found
nlp = spacy.load('en_core_web_md') # Load the model after downloading
return nlp
# Load spaCy model and Sentence Transformer model
nlp = load_spacy_model()
model = SentenceTransformer('all-MiniLM-L6-v2')
def extract_text_from_pdf(pdf_path):
text = ''
with pdfplumber.open(pdf_path) as pdf:
for page in pdf.pages:
text += page.extract_text() + "\n" # Add newline for better formatting
return text
def extract_text_from_txt(txt_path):
with open(txt_path, 'r') as file:
return file.read()
def analyze_resume(resume_file, job_description_file):
# Extract text from the PDF resume
resume_text = extract_text_from_pdf(resume_file.name)
# Extract text from the job description text file
job_description = extract_text_from_txt(job_description_file.name)
# Process the text with spaCy
doc = nlp(resume_text)
# Extract named entities from the resume
entities = [(ent.text, ent.label_) for ent in doc.ents]
# Get embeddings and compute similarity
resume_embedding = model.encode(resume_text)
job_description_embedding = model.encode(job_description)
# Calculate similarity and convert to percentage
similarity = util.pytorch_cos_sim(resume_embedding, job_description_embedding).item() * 100
similarity = round(similarity, 2) # Round to two decimal places
return entities, f"{similarity}%", job_description
# Create a Gradio interface
iface = gr.Interface(
fn=analyze_resume,
inputs=[
gr.File(label="Upload Resume (PDF)"),
gr.File(label="Upload Job Description (TXT)")
],
outputs=[
gr.JSON(label="Extracted Entities"),
gr.Textbox(label="Resume and Job Description Similarity"),
gr.Textbox(label="Job Description Text", interactive=False)
],
title="Resume and Job Description Analyzer",
description="Upload your PDF resume and a TXT job description to extract entities and calculate similarity."
)
# Launch the interface
iface.launch()
|