Spaces:

kushagrasharma-13
/

Resume_Parser_HuggingFace

Sleeping

App Files Files Community

Resume_Parser_HuggingFace / app.py

kushagrasharma-13

First

d5cc663 8 months ago

raw

history blame contribute delete

4.43 kB

	import re
	from utils import extract_text_from_pdf
	import streamlit as st

	def clean_text(text):
	replacements = {
	'\u2013': '-',
	'\u2014': '-',
	'\u201c': '"',
	'\u201d': '"',
	'\u2022': '',
	'\u2019': "'",
	}
	for key, value in replacements.items():
	text = text.replace(key, value)
	return text

	def extract_contact_info(line, resume_json):
	phone_number_pattern = re.compile(r'\+?\d[\d\s\-]+\d')
	email_pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z\|a-z]{2,}\b')
	linkedin_pattern = re.compile(r'(linkedin.com/in/[A-Za-z0-9-_]+)', re.IGNORECASE)
	github_pattern = re.compile(r'(github.com/[A-Za-z0-9-_]+)', re.IGNORECASE)

	contact_lines = re.split(r'\s[—-]\s\|\s\\|\\|\s\|\s,\s', line)
	for item in contact_lines:
	item = item.strip()
	if not item:
	continue
	if "Email" not in resume_json["Contact Information"] and email_pattern.search(item):
	resume_json["Contact Information"]["Email"] = email_pattern.search(item).group()
	elif "Phone" not in resume_json["Contact Information"] and phone_number_pattern.search(item):
	resume_json["Contact Information"]["Phone"] = phone_number_pattern.search(item).group()
	elif "LinkedIn" not in resume_json["Contact Information"] and linkedin_pattern.search(item):
	resume_json["Contact Information"]["LinkedIn"] = linkedin_pattern.search(item).group()
	elif "GitHub" not in resume_json["Contact Information"] and github_pattern.search(item):
	resume_json["Contact Information"]["GitHub"] = github_pattern.search(item).group()

	def parse_resume(resume_text):
	resume_json = {
	"Contact Information": {},
	"Professional Experience": [],
	"Projects": [],
	"Skills": [],
	"Education": [],
	"Achievements": [],
	"Extra-Curricular Activities": []
	}

	section_keywords = {
	"Contact Information": ["Contact Information", "Contact Info", "Contact"],
	"Professional Experience": ["Professional Experience", "Work Experience", "Experience"],
	"Projects": ["Projects", "Project"],
	"Skills": ["Skills", "Technical Skills"],
	"Education": ["Education", "Academic Background"],
	"Achievements": ["Achievements", "Awards", "Honors"],
	"Extra-Curricular Activities": ["Extra-Curricular Activities", "Extracurricular Activities", "Activities", "Volunteer Experience"]
	}

	contact_info_patterns = ["@", "linkedin", "github", "phone", "+91"]
	section = None

	lines = resume_text.split('\n')
	for line in lines:
	line = clean_text(line.strip())
	if not line:
	continue

	section_detected = False
	for key, keywords in section_keywords.items():
	if any(keyword.lower() in line.lower() for keyword in keywords):
	section = key
	section_detected = True
	break

	if section_detected:
	continue

	if section == "Contact Information" or any(pattern in line.lower() for pattern in contact_info_patterns):
	section = "Contact Information"
	extract_contact_info(line, resume_json)
	elif section and section != "Contact Information":
	resume_json[section].append(line)

	if "Phone" not in resume_json["Contact Information"]:
	resume_json["Contact Information"]["Phone"] = "Not Provided"
	if "Email" not in resume_json["Contact Information"]:
	resume_json["Contact Information"]["Email"] = "Not Provided"
	if "LinkedIn" not in resume_json["Contact Information"]:
	resume_json["Contact Information"]["LinkedIn"] = "Not Provided"
	if "GitHub" not in resume_json["Contact Information"]:
	resume_json["Contact Information"]["GitHub"] = "Not Provided"

	return resume_json

	st.title("Resume Parser to JSON")

	uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
	default_file_path = "Kushagra_Sharma_Resume.pdf"

	if uploaded_file is not None:
	resume_text = extract_text_from_pdf(uploaded_file)
	file_name = uploaded_file.name
	else:
	with open(default_file_path, "rb") as file:
	resume_text = extract_text_from_pdf(file)
	file_name = default_file_path

	parsed_resume = parse_resume(resume_text)
	st.text(f"Currently using file: {file_name}")
	st.json(parsed_resume)