Manojajj commited on
Commit
7627f9b
·
verified ·
1 Parent(s): 410ca7f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -0
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import pipeline
4
+ import pdfplumber
5
+ import re
6
+
7
+ # Load pre-trained model for Named Entity Recognition (NER) to extract details
8
+ nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", framework="pt")
9
+
10
+ def extract_text_from_pdf(pdf_file):
11
+ """Extract text from the uploaded PDF resume."""
12
+ with pdfplumber.open(pdf_file) as pdf:
13
+ text = ""
14
+ for page in pdf.pages:
15
+ text += page.extract_text()
16
+ return text
17
+
18
+ def parse_resume(resume_text):
19
+ """Parse the resume and extract details like name, email, phone, skills, etc."""
20
+ # Define regex for phone and email extraction
21
+ phone_pattern = r'\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}'
22
+ email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
23
+
24
+ # Extract phone and email using regex
25
+ phone = re.findall(phone_pattern, resume_text)
26
+ email = re.findall(email_pattern, resume_text)
27
+
28
+ # Extract named entities (e.g., skills, education, and experience)
29
+ entities = nlp(resume_text)
30
+
31
+ # For simplicity, we just list out the entities here
32
+ skills = []
33
+ experience = []
34
+ education = []
35
+ certifications = []
36
+
37
+ # Iterate through recognized entities and categorize them
38
+ for entity in entities:
39
+ if 'ORG' in entity['entity']:
40
+ experience.append(entity['word'])
41
+ elif 'MISC' in entity['entity']:
42
+ skills.append(entity['word'])
43
+ elif 'LOC' in entity['entity']:
44
+ education.append(entity['word'])
45
+ else:
46
+ certifications.append(entity['word'])
47
+
48
+ # Create a JSON-like output
49
+ parsed_data = {
50
+ "Phone": phone[0] if phone else "Not found",
51
+ "Email": email[0] if email else "Not found",
52
+ "Skills": ", ".join(skills),
53
+ "Experience": ", ".join(experience),
54
+ "Education": ", ".join(education),
55
+ "Certifications": ", ".join(certifications)
56
+ }
57
+
58
+ return parsed_data
59
+
60
+ # Define Gradio interface
61
+ def resume_parser(pdf_file):
62
+ """Main function for resume parsing"""
63
+ resume_text = extract_text_from_pdf(pdf_file)
64
+ parsed_info = parse_resume(resume_text)
65
+ return parsed_info
66
+
67
+ # Create the Gradio interface
68
+ gr.Interface(
69
+ fn=resume_parser,
70
+ inputs=gr.inputs.File(label="Upload Resume (PDF)"),
71
+ outputs=gr.outputs.JSON(label="Parsed Information"),
72
+ title="AI Resume Parser",
73
+ description="Upload a resume (PDF) to extract details like Name, Email, Phone, Skills, Experience, Education, and Certifications."
74
+ ).launch()