File size: 6,387 Bytes
2053c29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# import gradio as gr
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.metrics.pairwise import cosine_similarity
# import fitz
# from docx import Document
#
# def read_resume_file(file):
#     if file.name.endswith('.txt'):
#         content = file.read().decode('utf-8')
#     elif file.name.endswith('.pdf'):
#         content = ''
#         with fitz.open(stream=file.read(), filetype='pdf') as doc:
#             for page in doc:
#                 content+= page.get_text()
#     elif file.name.endswith('.docx'):
#         content =''
#         document = Document(file)
#         for para in document.paragraphs:
#             content+=para.text+ '\n'
#     else:
#         return "Unsupported file format. Please upload a .txt, .pdf, or .docx file."
#     return content
#
#
# def calculate_similarity(job_desc, resume):
#     vectorizer = TfidfVectorizer(stop_words = 'english')
#     tfidf_matrix = vectorizer.fit_transform([job_desc, resume])
#     print(tfidf_matrix)
#
#     similarityScore = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
#     return f"Similarity Score: {similarityScore * 100:.2f}%"
#
# def find_missing_keywords(job_desc, resume):
#     vectorizer = TfidfVectorizer(stop_words='english')
#     vectorizer.fit_transform([job_desc, resume])
#
#     job_desc_words = set(job_desc.lower().split())
#     resume_words = set(resume.lower().split())
#
#     missing_words = job_desc_words - resume_words
#
#     return list(missing_words)
#
# def ats_evalution(job_desc, resume_file):
#     resume_text = read_resume_file(resume_file)
#     if isinstance(resume_text, str) and resume_text.startswith("Unsupported"):
#         return resume_text, ""
#     similarity = calculate_similarity(job_desc, resume_text)
#     missing_keywords = find_missing_keywords(job_desc, resume_text)
#
#     if missing_keywords:
#         missing_keywords_str = ", ".join(missing_keywords)
#         missing_info = f"Missing Keywords: {missing_keywords_str}"
#     else:
#         missing_info = "No missing keywords. Your resume covers all keywords in the job description."
#     return similarity, missing_info
#
# app = gr.Interface(
#     fn=ats_evalution,
#     inputs = [
#         gr.Textbox(lines = 10, placeholder = 'Paste job description here....'),
#         gr.File(label='Upload your resume (.txt & .pdf & .docx)')
#     ],
#
#     outputs = [
#         gr.Text(label="Similarity Score"),
#         gr.Text(label="Missing Keywords")
#     ],
#
#     title = "ATS Resume Score Generator",
#     description="Upload your resume and paste the job description to get a similarity score and identify missing keywords."
#
# )
#
# if __name__ == "__main__":
#     app.launch()
#

import gradio as gr
import PyPDF2
import docx
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string
import nltk
nltk.download('punkt_tab')
# Download necessary NLTK data
nltk.download('punkt')
nltk.download('stopwords')

# Function to extract text from uploaded files
def extract_text_from_file(file):
    if file.name.endswith('.pdf'):
        reader = PyPDF2.PdfReader(file)
        text = ''
        for page in reader.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text
        return text
    elif file.name.endswith('.docx'):
        doc = docx.Document(file)
        return '\n'.join([para.text for para in doc.paragraphs])
    elif file.name.endswith('.txt'):
        return file.read().decode('utf-8')
    else:
        return "Unsupported file format. Please upload a .txt, .pdf, or .docx file."

# Function to preprocess the text
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'\d+', '', text)  # Remove numbers
    text = text.translate(str.maketrans('', '', string.punctuation))  # Remove punctuation
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [word for word in tokens if word not in stop_words]  # Remove stopwords
    return ' '.join(filtered_tokens)

# Function to extract keywords using TF-IDF
def extract_keywords(text, top_n=10):
    vectorizer = TfidfVectorizer(max_features=top_n)
    tfidf_matrix = vectorizer.fit_transform([text])
    feature_names = vectorizer.get_feature_names_out()
    return set(feature_names)

# Combined function to evaluate ATS score and find missing keywords
def ats_evaluation(job_desc, resume_file):
    resume_text = extract_text_from_file(resume_file)
    if isinstance(resume_text, str) and "Unsupported" in resume_text:
        return resume_text, ""

    job_desc_processed = preprocess_text(job_desc)
    resume_processed = preprocess_text(resume_text)

    job_keywords = extract_keywords(job_desc_processed)
    resume_keywords = extract_keywords(resume_processed)

    missing_keywords = job_keywords - resume_keywords

    # Calculate similarity score
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([job_desc_processed, resume_processed])
    similarity_score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]

    # Format output
    similarity_output = f"Similarity Score: {similarity_score * 100:.2f}%"
    if missing_keywords:
        missing_keywords_output = f"Missing Keywords: {', '.join(missing_keywords)}"
    else:
        missing_keywords_output = "No missing keywords. Your resume covers all key terms."

    return similarity_output, missing_keywords_output

# Create the Gradio interface
app = gr.Interface(
    fn=ats_evaluation,
    inputs=[
        gr.Textbox(lines=10, placeholder='Paste job description here...', label="Job Description"),
        gr.File(label='Upload your resume (.txt, .pdf, .docx)')
    ],
    outputs=[
        gr.Textbox(label="Similarity Score"),
        gr.Textbox(label="Missing Keywords")
    ],
    title="ATS Resume Score Generator",
    description="Upload your resume and paste the job description to get a similarity score and identify missing keywords."
)

# Run the app
if __name__ == "__main__":
    app.launch()