File size: 2,409 Bytes
c475914
 
 
c542ad2
c475914
 
c542ad2
 
 
 
 
 
 
 
 
c475914
c542ad2
c475914
 
 
 
 
 
c542ad2
c475914
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
03acb3a
 
 
c475914
03acb3a
c475914
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import gradio as gr
import pdfplumber
import spacy
from spacy.cli import download
from sentence_transformers import SentenceTransformer, util

# Function to load the spaCy model, downloading it if necessary
def load_spacy_model():
    try:
        nlp = spacy.load('en_core_web_md')
    except OSError:
        download('en_core_web_md')  # Download the model if it is not found
        nlp = spacy.load('en_core_web_md')  # Load the model after downloading
    return nlp

# Load spaCy model and Sentence Transformer model
nlp = load_spacy_model()
model = SentenceTransformer('all-MiniLM-L6-v2')

def extract_text_from_pdf(pdf_path):
    text = ''
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            text += page.extract_text() + "\n"  # Add newline for better formatting
    return text

def extract_text_from_txt(txt_path):
    with open(txt_path, 'r') as file:
        return file.read()

def analyze_resume(resume_file, job_description_file):
    # Extract text from the PDF resume
    resume_text = extract_text_from_pdf(resume_file.name)
    
    # Extract text from the job description text file
    job_description = extract_text_from_txt(job_description_file.name)

    # Process the text with spaCy
    doc = nlp(resume_text)

    # Extract named entities from the resume
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    
    # Get embeddings and compute similarity
    resume_embedding = model.encode(resume_text)
    job_description_embedding = model.encode(job_description)
    # Calculate similarity and convert to percentage
    similarity = util.pytorch_cos_sim(resume_embedding, job_description_embedding).item() * 100
    similarity = round(similarity, 2)  # Round to two decimal places

    return entities, f"{similarity}%", job_description

# Create a Gradio interface
iface = gr.Interface(
    fn=analyze_resume,
    inputs=[
        gr.File(label="Upload Resume (PDF)"),
        gr.File(label="Upload Job Description (TXT)")
    ],
    outputs=[
        gr.JSON(label="Extracted Entities"),
        gr.Textbox(label="Resume and Job Description Similarity"),
        gr.Textbox(label="Job Description Text", interactive=False)
    ],
    title="Resume and Job Description Analyzer",
    description="Upload your PDF resume and a TXT job description to extract entities and calculate similarity."
)

# Launch the interface
iface.launch()