Vaishvik1618 commited on
Commit
2053c29
·
verified ·
1 Parent(s): d657ff8

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +174 -0
app.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import gradio as gr
2
+ # from sklearn.feature_extraction.text import TfidfVectorizer
3
+ # from sklearn.metrics.pairwise import cosine_similarity
4
+ # import fitz
5
+ # from docx import Document
6
+ #
7
+ # def read_resume_file(file):
8
+ # if file.name.endswith('.txt'):
9
+ # content = file.read().decode('utf-8')
10
+ # elif file.name.endswith('.pdf'):
11
+ # content = ''
12
+ # with fitz.open(stream=file.read(), filetype='pdf') as doc:
13
+ # for page in doc:
14
+ # content+= page.get_text()
15
+ # elif file.name.endswith('.docx'):
16
+ # content =''
17
+ # document = Document(file)
18
+ # for para in document.paragraphs:
19
+ # content+=para.text+ '\n'
20
+ # else:
21
+ # return "Unsupported file format. Please upload a .txt, .pdf, or .docx file."
22
+ # return content
23
+ #
24
+ #
25
+ # def calculate_similarity(job_desc, resume):
26
+ # vectorizer = TfidfVectorizer(stop_words = 'english')
27
+ # tfidf_matrix = vectorizer.fit_transform([job_desc, resume])
28
+ # print(tfidf_matrix)
29
+ #
30
+ # similarityScore = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
31
+ # return f"Similarity Score: {similarityScore * 100:.2f}%"
32
+ #
33
+ # def find_missing_keywords(job_desc, resume):
34
+ # vectorizer = TfidfVectorizer(stop_words='english')
35
+ # vectorizer.fit_transform([job_desc, resume])
36
+ #
37
+ # job_desc_words = set(job_desc.lower().split())
38
+ # resume_words = set(resume.lower().split())
39
+ #
40
+ # missing_words = job_desc_words - resume_words
41
+ #
42
+ # return list(missing_words)
43
+ #
44
+ # def ats_evalution(job_desc, resume_file):
45
+ # resume_text = read_resume_file(resume_file)
46
+ # if isinstance(resume_text, str) and resume_text.startswith("Unsupported"):
47
+ # return resume_text, ""
48
+ # similarity = calculate_similarity(job_desc, resume_text)
49
+ # missing_keywords = find_missing_keywords(job_desc, resume_text)
50
+ #
51
+ # if missing_keywords:
52
+ # missing_keywords_str = ", ".join(missing_keywords)
53
+ # missing_info = f"Missing Keywords: {missing_keywords_str}"
54
+ # else:
55
+ # missing_info = "No missing keywords. Your resume covers all keywords in the job description."
56
+ # return similarity, missing_info
57
+ #
58
+ # app = gr.Interface(
59
+ # fn=ats_evalution,
60
+ # inputs = [
61
+ # gr.Textbox(lines = 10, placeholder = 'Paste job description here....'),
62
+ # gr.File(label='Upload your resume (.txt & .pdf & .docx)')
63
+ # ],
64
+ #
65
+ # outputs = [
66
+ # gr.Text(label="Similarity Score"),
67
+ # gr.Text(label="Missing Keywords")
68
+ # ],
69
+ #
70
+ # title = "ATS Resume Score Generator",
71
+ # description="Upload your resume and paste the job description to get a similarity score and identify missing keywords."
72
+ #
73
+ # )
74
+ #
75
+ # if __name__ == "__main__":
76
+ # app.launch()
77
+ #
78
+
79
+ import gradio as gr
80
+ import PyPDF2
81
+ import docx
82
+ import re
83
+ from sklearn.feature_extraction.text import TfidfVectorizer
84
+ from sklearn.metrics.pairwise import cosine_similarity
85
+ from nltk.corpus import stopwords
86
+ from nltk.tokenize import word_tokenize
87
+ import string
88
+ import nltk
89
+ nltk.download('punkt_tab')
90
+ # Download necessary NLTK data
91
+ nltk.download('punkt')
92
+ nltk.download('stopwords')
93
+
94
+ # Function to extract text from uploaded files
95
+ def extract_text_from_file(file):
96
+ if file.name.endswith('.pdf'):
97
+ reader = PyPDF2.PdfReader(file)
98
+ text = ''
99
+ for page in reader.pages:
100
+ page_text = page.extract_text()
101
+ if page_text:
102
+ text += page_text
103
+ return text
104
+ elif file.name.endswith('.docx'):
105
+ doc = docx.Document(file)
106
+ return '\n'.join([para.text for para in doc.paragraphs])
107
+ elif file.name.endswith('.txt'):
108
+ return file.read().decode('utf-8')
109
+ else:
110
+ return "Unsupported file format. Please upload a .txt, .pdf, or .docx file."
111
+
112
+ # Function to preprocess the text
113
+ def preprocess_text(text):
114
+ text = text.lower()
115
+ text = re.sub(r'\d+', '', text) # Remove numbers
116
+ text = text.translate(str.maketrans('', '', string.punctuation)) # Remove punctuation
117
+ tokens = word_tokenize(text)
118
+ stop_words = set(stopwords.words('english'))
119
+ filtered_tokens = [word for word in tokens if word not in stop_words] # Remove stopwords
120
+ return ' '.join(filtered_tokens)
121
+
122
+ # Function to extract keywords using TF-IDF
123
+ def extract_keywords(text, top_n=10):
124
+ vectorizer = TfidfVectorizer(max_features=top_n)
125
+ tfidf_matrix = vectorizer.fit_transform([text])
126
+ feature_names = vectorizer.get_feature_names_out()
127
+ return set(feature_names)
128
+
129
+ # Combined function to evaluate ATS score and find missing keywords
130
+ def ats_evaluation(job_desc, resume_file):
131
+ resume_text = extract_text_from_file(resume_file)
132
+ if isinstance(resume_text, str) and "Unsupported" in resume_text:
133
+ return resume_text, ""
134
+
135
+ job_desc_processed = preprocess_text(job_desc)
136
+ resume_processed = preprocess_text(resume_text)
137
+
138
+ job_keywords = extract_keywords(job_desc_processed)
139
+ resume_keywords = extract_keywords(resume_processed)
140
+
141
+ missing_keywords = job_keywords - resume_keywords
142
+
143
+ # Calculate similarity score
144
+ vectorizer = TfidfVectorizer()
145
+ tfidf_matrix = vectorizer.fit_transform([job_desc_processed, resume_processed])
146
+ similarity_score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
147
+
148
+ # Format output
149
+ similarity_output = f"Similarity Score: {similarity_score * 100:.2f}%"
150
+ if missing_keywords:
151
+ missing_keywords_output = f"Missing Keywords: {', '.join(missing_keywords)}"
152
+ else:
153
+ missing_keywords_output = "No missing keywords. Your resume covers all key terms."
154
+
155
+ return similarity_output, missing_keywords_output
156
+
157
+ # Create the Gradio interface
158
+ app = gr.Interface(
159
+ fn=ats_evaluation,
160
+ inputs=[
161
+ gr.Textbox(lines=10, placeholder='Paste job description here...', label="Job Description"),
162
+ gr.File(label='Upload your resume (.txt, .pdf, .docx)')
163
+ ],
164
+ outputs=[
165
+ gr.Textbox(label="Similarity Score"),
166
+ gr.Textbox(label="Missing Keywords")
167
+ ],
168
+ title="ATS Resume Score Generator",
169
+ description="Upload your resume and paste the job description to get a similarity score and identify missing keywords."
170
+ )
171
+
172
+ # Run the app
173
+ if __name__ == "__main__":
174
+ app.launch()