Manojajj commited on
Commit
8a8733e
·
verified ·
1 Parent(s): afb43ce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -12
app.py CHANGED
@@ -3,6 +3,7 @@ import torch
3
  from transformers import pipeline
4
  import pdfplumber
5
  import re
 
6
 
7
  # Load pre-trained model for Named Entity Recognition (NER) to extract details
8
  nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", framework="pt")
@@ -45,7 +46,7 @@ def parse_resume(resume_text):
45
  else:
46
  certifications.append(entity['word'])
47
 
48
- # Create a JSON-like output
49
  parsed_data = {
50
  "Phone": phone[0] if phone else "Not found",
51
  "Email": email[0] if email else "Not found",
@@ -57,18 +58,30 @@ def parse_resume(resume_text):
57
 
58
  return parsed_data
59
 
60
- # Define Gradio interface
61
- def resume_parser(pdf_file):
62
- """Main function for resume parsing"""
63
- resume_text = extract_text_from_pdf(pdf_file)
64
- parsed_info = parse_resume(resume_text)
65
- return parsed_info
 
 
 
 
 
 
66
 
67
- # Create the Gradio interface
 
 
 
 
 
 
68
  gr.Interface(
69
- fn=resume_parser,
70
- inputs=gr.File(label="Upload Resume (PDF)"),
71
- outputs=gr.JSON(label="Parsed Information"),
72
  title="AI Resume Parser",
73
- description="Upload a resume (PDF) to extract details like Name, Email, Phone, Skills, Experience, Education, and Certifications."
74
  ).launch()
 
3
  from transformers import pipeline
4
  import pdfplumber
5
  import re
6
+ import pandas as pd
7
 
8
  # Load pre-trained model for Named Entity Recognition (NER) to extract details
9
  nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", framework="pt")
 
46
  else:
47
  certifications.append(entity['word'])
48
 
49
+ # Create a dictionary of parsed data
50
  parsed_data = {
51
  "Phone": phone[0] if phone else "Not found",
52
  "Email": email[0] if email else "Not found",
 
58
 
59
  return parsed_data
60
 
61
+ def process_resumes(pdf_files):
62
+ """Process multiple resumes and output a single Excel file."""
63
+ all_parsed_data = []
64
+
65
+ # Loop through each uploaded PDF file and parse the data
66
+ for pdf_file in pdf_files:
67
+ resume_text = extract_text_from_pdf(pdf_file)
68
+ parsed_info = parse_resume(resume_text)
69
+ all_parsed_data.append(parsed_info)
70
+
71
+ # Convert the parsed data into a pandas DataFrame
72
+ df = pd.DataFrame(all_parsed_data)
73
 
74
+ # Save the DataFrame to an Excel file
75
+ output_file = "parsed_resumes.xlsx"
76
+ df.to_excel(output_file, index=False)
77
+
78
+ return output_file
79
+
80
+ # Define Gradio interface
81
  gr.Interface(
82
+ fn=process_resumes,
83
+ inputs=gr.File(file_count="multiple", label="Upload Resumes (PDFs)"),
84
+ outputs=gr.File(label="Download Parsed Data (Excel)"),
85
  title="AI Resume Parser",
86
+ description="Upload multiple resumes (PDFs) to extract details like Name, Email, Phone, Skills, Experience, Education, and Certifications. The results will be saved in an Excel file."
87
  ).launch()