Spaces:
Sleeping
Sleeping
File size: 6,387 Bytes
2053c29 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
# import gradio as gr
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.metrics.pairwise import cosine_similarity
# import fitz
# from docx import Document
#
# def read_resume_file(file):
# if file.name.endswith('.txt'):
# content = file.read().decode('utf-8')
# elif file.name.endswith('.pdf'):
# content = ''
# with fitz.open(stream=file.read(), filetype='pdf') as doc:
# for page in doc:
# content+= page.get_text()
# elif file.name.endswith('.docx'):
# content =''
# document = Document(file)
# for para in document.paragraphs:
# content+=para.text+ '\n'
# else:
# return "Unsupported file format. Please upload a .txt, .pdf, or .docx file."
# return content
#
#
# def calculate_similarity(job_desc, resume):
# vectorizer = TfidfVectorizer(stop_words = 'english')
# tfidf_matrix = vectorizer.fit_transform([job_desc, resume])
# print(tfidf_matrix)
#
# similarityScore = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
# return f"Similarity Score: {similarityScore * 100:.2f}%"
#
# def find_missing_keywords(job_desc, resume):
# vectorizer = TfidfVectorizer(stop_words='english')
# vectorizer.fit_transform([job_desc, resume])
#
# job_desc_words = set(job_desc.lower().split())
# resume_words = set(resume.lower().split())
#
# missing_words = job_desc_words - resume_words
#
# return list(missing_words)
#
# def ats_evalution(job_desc, resume_file):
# resume_text = read_resume_file(resume_file)
# if isinstance(resume_text, str) and resume_text.startswith("Unsupported"):
# return resume_text, ""
# similarity = calculate_similarity(job_desc, resume_text)
# missing_keywords = find_missing_keywords(job_desc, resume_text)
#
# if missing_keywords:
# missing_keywords_str = ", ".join(missing_keywords)
# missing_info = f"Missing Keywords: {missing_keywords_str}"
# else:
# missing_info = "No missing keywords. Your resume covers all keywords in the job description."
# return similarity, missing_info
#
# app = gr.Interface(
# fn=ats_evalution,
# inputs = [
# gr.Textbox(lines = 10, placeholder = 'Paste job description here....'),
# gr.File(label='Upload your resume (.txt & .pdf & .docx)')
# ],
#
# outputs = [
# gr.Text(label="Similarity Score"),
# gr.Text(label="Missing Keywords")
# ],
#
# title = "ATS Resume Score Generator",
# description="Upload your resume and paste the job description to get a similarity score and identify missing keywords."
#
# )
#
# if __name__ == "__main__":
# app.launch()
#
import gradio as gr
import PyPDF2
import docx
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string
import nltk
nltk.download('punkt_tab')
# Download necessary NLTK data
nltk.download('punkt')
nltk.download('stopwords')
# Function to extract text from uploaded files
def extract_text_from_file(file):
if file.name.endswith('.pdf'):
reader = PyPDF2.PdfReader(file)
text = ''
for page in reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text
return text
elif file.name.endswith('.docx'):
doc = docx.Document(file)
return '\n'.join([para.text for para in doc.paragraphs])
elif file.name.endswith('.txt'):
return file.read().decode('utf-8')
else:
return "Unsupported file format. Please upload a .txt, .pdf, or .docx file."
# Function to preprocess the text
def preprocess_text(text):
text = text.lower()
text = re.sub(r'\d+', '', text) # Remove numbers
text = text.translate(str.maketrans('', '', string.punctuation)) # Remove punctuation
tokens = word_tokenize(text)
stop_words = set(stopwords.words('english'))
filtered_tokens = [word for word in tokens if word not in stop_words] # Remove stopwords
return ' '.join(filtered_tokens)
# Function to extract keywords using TF-IDF
def extract_keywords(text, top_n=10):
vectorizer = TfidfVectorizer(max_features=top_n)
tfidf_matrix = vectorizer.fit_transform([text])
feature_names = vectorizer.get_feature_names_out()
return set(feature_names)
# Combined function to evaluate ATS score and find missing keywords
def ats_evaluation(job_desc, resume_file):
resume_text = extract_text_from_file(resume_file)
if isinstance(resume_text, str) and "Unsupported" in resume_text:
return resume_text, ""
job_desc_processed = preprocess_text(job_desc)
resume_processed = preprocess_text(resume_text)
job_keywords = extract_keywords(job_desc_processed)
resume_keywords = extract_keywords(resume_processed)
missing_keywords = job_keywords - resume_keywords
# Calculate similarity score
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform([job_desc_processed, resume_processed])
similarity_score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
# Format output
similarity_output = f"Similarity Score: {similarity_score * 100:.2f}%"
if missing_keywords:
missing_keywords_output = f"Missing Keywords: {', '.join(missing_keywords)}"
else:
missing_keywords_output = "No missing keywords. Your resume covers all key terms."
return similarity_output, missing_keywords_output
# Create the Gradio interface
app = gr.Interface(
fn=ats_evaluation,
inputs=[
gr.Textbox(lines=10, placeholder='Paste job description here...', label="Job Description"),
gr.File(label='Upload your resume (.txt, .pdf, .docx)')
],
outputs=[
gr.Textbox(label="Similarity Score"),
gr.Textbox(label="Missing Keywords")
],
title="ATS Resume Score Generator",
description="Upload your resume and paste the job description to get a similarity score and identify missing keywords."
)
# Run the app
if __name__ == "__main__":
app.launch()
|