Update app.py
Browse files
app.py
CHANGED
@@ -1,220 +1,220 @@
|
|
1 |
-
import os
|
2 |
-
from datetime import datetime
|
3 |
-
import streamlit as st
|
4 |
-
import google.generativeai as genai
|
5 |
-
import PyPDF2 as pdf
|
6 |
-
from fpdf import FPDF
|
7 |
-
from dotenv import load_dotenv
|
8 |
-
from langchain_huggingface import HuggingFaceEmbeddings
|
9 |
-
from langchain_chroma import Chroma
|
10 |
-
|
11 |
-
# Load environment variables
|
12 |
-
load_dotenv()
|
13 |
-
|
14 |
-
# Configure Generative AI API
|
15 |
-
genai.configure(api_key=("
|
16 |
-
|
17 |
-
# Initialize vectorstore
|
18 |
-
@st.cache_resource
|
19 |
-
def setup_vectorstore():
|
20 |
-
embeddings = HuggingFaceEmbeddings()
|
21 |
-
vectorstore = Chroma(persist_directory="cv_vectordb", embedding_function=embeddings)
|
22 |
-
return vectorstore
|
23 |
-
|
24 |
-
# Convert PDF to text
|
25 |
-
def input_pdf_text(uploaded_file):
|
26 |
-
reader = pdf.PdfReader(uploaded_file)
|
27 |
-
text = ""
|
28 |
-
for page in range(len(reader.pages)):
|
29 |
-
page = reader.pages[page]
|
30 |
-
text += str(page.extract_text())
|
31 |
-
return text
|
32 |
-
|
33 |
-
# Retrieve relevant content from vectorstore
|
34 |
-
def retrieve_from_vectorstore(vectorstore, query):
|
35 |
-
retriever = vectorstore.as_retriever()
|
36 |
-
results = retriever.invoke(query)
|
37 |
-
return "\n".join([doc.page_content for doc in results])
|
38 |
-
|
39 |
-
# Get response from Generative AI
|
40 |
-
def get_gemini_response(prompt):
|
41 |
-
model = genai.GenerativeModel('gemini-pro')
|
42 |
-
response = model.generate_content(prompt)
|
43 |
-
return response.candidates[0].content.parts[0].text if response else None
|
44 |
-
|
45 |
-
def generate_pdf_report(candidate_name, report_content):
|
46 |
-
pdf = FPDF()
|
47 |
-
pdf.add_page()
|
48 |
-
pdf.set_font("Arial", size=12)
|
49 |
-
pdf.cell(0, 8, txt=f"Candidate Report: {candidate_name}", ln=True, align="L")
|
50 |
-
pdf.ln(5) # Add slight spacing after the title
|
51 |
-
|
52 |
-
# Define numbered sections
|
53 |
-
numbered_sections = {
|
54 |
-
1: "Candidate Name and Email",
|
55 |
-
2: '"Can Do" list:',
|
56 |
-
3: '"Should Do" list',
|
57 |
-
4: "Skill Comparison Table:",
|
58 |
-
5: "Overall Matching Score:",
|
59 |
-
6: "Analysis of Strengths and Weaknesses",
|
60 |
-
7: "Recommendations for Improvement",
|
61 |
-
8: "Conclusion on Fitment",
|
62 |
-
}
|
63 |
-
|
64 |
-
# Parse report content
|
65 |
-
lines = report_content.splitlines()
|
66 |
-
current_section = None
|
67 |
-
bullet_point = "\u2022 " # Unicode for a bullet point
|
68 |
-
|
69 |
-
for line in lines:
|
70 |
-
stripped_line = line.strip().replace("*", "") # Remove all asterisks
|
71 |
-
|
72 |
-
# Check if line matches a section header
|
73 |
-
if stripped_line in numbered_sections.values():
|
74 |
-
for number, section in numbered_sections.items():
|
75 |
-
if stripped_line == section:
|
76 |
-
current_section = number
|
77 |
-
pdf.set_font("Arial", style="", size=11)
|
78 |
-
pdf.cell(0, 6, txt=f"{number}. {section}", ln=True, align="L")
|
79 |
-
pdf.ln(3) # Reduced spacing after each section header
|
80 |
-
break
|
81 |
-
# Check for sub-content that starts with "-"
|
82 |
-
elif current_section and stripped_line.startswith("- "):
|
83 |
-
pdf.set_font("Arial", size=10)
|
84 |
-
pdf.cell(5) # Add slight indentation
|
85 |
-
pdf.cell(0, 5, txt=f"{bullet_point}{stripped_line[2:]}", ln=True)
|
86 |
-
# Handle table rows
|
87 |
-
elif "|" in stripped_line:
|
88 |
-
cells = [cell.strip() for cell in stripped_line.split("|")[1:-1]]
|
89 |
-
if len(cells) == 4:
|
90 |
-
pdf.set_font("Arial", size=9)
|
91 |
-
pdf.cell(50, 6, cells[0], border=1)
|
92 |
-
pdf.cell(35, 6, cells[1], border=1, align="C")
|
93 |
-
pdf.cell(35, 6, cells[2], border=1, align="C")
|
94 |
-
pdf.cell(35, 6, cells[3], border=1, align="C")
|
95 |
-
pdf.ln()
|
96 |
-
# Add regular content as plain text
|
97 |
-
else:
|
98 |
-
pdf.set_font("Arial", size=10)
|
99 |
-
pdf.multi_cell(0, 5, stripped_line)
|
100 |
-
|
101 |
-
# Save the report as a PDF file
|
102 |
-
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
103 |
-
file_name = f"{candidate_name}_report_{timestamp}.pdf"
|
104 |
-
pdf.output(file_name)
|
105 |
-
return file_name
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
# Streamlit UI
|
110 |
-
st.title("AI-Powered Candidate Shortlisting")
|
111 |
-
st.markdown("Analyze resumes, job descriptions, and match with company culture using RAG.")
|
112 |
-
|
113 |
-
# Setup vectorstore
|
114 |
-
vectorstore = setup_vectorstore()
|
115 |
-
|
116 |
-
# File upload
|
117 |
-
uploaded_resumes = st.file_uploader("Upload Resumes (PDFs)", type="pdf", accept_multiple_files=True)
|
118 |
-
uploaded_job_description = st.file_uploader("Upload Job Description (PDF)", type="pdf")
|
119 |
-
|
120 |
-
if st.button("Generate Fitment Reports"):
|
121 |
-
if not uploaded_resumes or not uploaded_job_description:
|
122 |
-
st.error("Please upload resumes and a job description.")
|
123 |
-
else:
|
124 |
-
with st.spinner("Processing..."):
|
125 |
-
try:
|
126 |
-
# Convert job description to text
|
127 |
-
job_description_text = input_pdf_text(uploaded_job_description)
|
128 |
-
|
129 |
-
# Retrieve relevant content from vectorstore
|
130 |
-
company_culture_content = retrieve_from_vectorstore(vectorstore, "company culture match")
|
131 |
-
|
132 |
-
# Process each resume
|
133 |
-
fitment_results = []
|
134 |
-
for resume_file in uploaded_resumes:
|
135 |
-
# Extract candidate name
|
136 |
-
candidate_name = os.path.splitext(resume_file.name)[0]
|
137 |
-
|
138 |
-
# Convert resume to text
|
139 |
-
resume_text = input_pdf_text(resume_file)
|
140 |
-
|
141 |
-
# Construct the prompt
|
142 |
-
input_prompt = f"""
|
143 |
-
### Task: Generate a candidate shortlisting report.
|
144 |
-
|
145 |
-
### Instructions:
|
146 |
-
You are a highly intelligent and unbiased system designed to shortlist candidates for a job based on:
|
147 |
-
1. The candidate's resume.
|
148 |
-
2. A provided job description.
|
149 |
-
3. Relevant company culture data retrieved from the vector database.
|
150 |
-
|
151 |
-
### Key Objectives:
|
152 |
-
- Analyze skills, qualifications, and experiences in the resume.
|
153 |
-
- Evaluate alignment with the job description.
|
154 |
-
- Assess cultural fit using company culture data.
|
155 |
-
- Provide detailed scoring, strengths, weaknesses, and recommendations.
|
156 |
-
|
157 |
-
### Required Sections in the Report:
|
158 |
-
- Candidate Name and Email
|
159 |
-
- Parse the job description and create a 'Should Do' list, categorizing required skills into levels: Beginner, Competent, Intermediate, Expert.
|
160 |
-
- Parse the candidate's resume and create a 'Can Do' list, categorizing listed skills into the same levels: Beginner, Competent, Intermediate, Expert.
|
161 |
-
- Matching score: A detailed table showing alignment of skills.
|
162 |
-
- Analysis of strengths and weaknesses.
|
163 |
-
- Recommendations for improvement.
|
164 |
-
- Overall conclusion.
|
165 |
-
|
166 |
-
### Input Data:
|
167 |
-
- **Resume**: {resume_text}
|
168 |
-
- **Job Description**: {job_description_text}
|
169 |
-
- **Company Culture Data**: {company_culture_content}
|
170 |
-
|
171 |
-
### Output Format:
|
172 |
-
1. Candidate Name and Email
|
173 |
-
2."Can Do" list:
|
174 |
-
3. "Should Do" list
|
175 |
-
4. Skill Comparison Table:
|
176 |
-
| Skill | "Can Do" Level | "Should Do" Level | Matching Score |
|
177 |
-
|--------------------------|----------------|--------------------|----------------|
|
178 |
-
5. Overall Matching Score: [Percentage]
|
179 |
-
6. Analysis of Strengths and Weaknesses
|
180 |
-
7. Recommendations for Improvement
|
181 |
-
8. Conclusion on Fitment
|
182 |
-
"""
|
183 |
-
|
184 |
-
# Generate the report
|
185 |
-
report_content = get_gemini_response(input_prompt)
|
186 |
-
|
187 |
-
if report_content:
|
188 |
-
# Extract the matching score safely
|
189 |
-
try:
|
190 |
-
matching_score = float(report_content.split("Overall Matching Score:")[1].split("%")[0].strip())
|
191 |
-
except (IndexError, ValueError):
|
192 |
-
matching_score = 0.0
|
193 |
-
report_content += "\n\n[ERROR: Matching Score could not be parsed]"
|
194 |
-
|
195 |
-
# Generate PDF report
|
196 |
-
report_file = generate_pdf_report(candidate_name, report_content)
|
197 |
-
|
198 |
-
# Save results
|
199 |
-
fitment_results.append((candidate_name, matching_score, report_file))
|
200 |
-
|
201 |
-
# Sort results by matching score in descending order
|
202 |
-
fitment_results.sort(key=lambda x: x[1], reverse=True)
|
203 |
-
|
204 |
-
# Display results in tabular form
|
205 |
-
st.write("### Fitment Results")
|
206 |
-
st.write("Below are the shortlisted candidates ranked by their fitment scores.")
|
207 |
-
for rank, (candidate_name, matching_score, report_file) in enumerate(fitment_results, start=1):
|
208 |
-
col1, col2, col3, col4 = st.columns([3, 2, 2, 2])
|
209 |
-
col1.write(candidate_name)
|
210 |
-
col2.write(f"{matching_score:.2f}%")
|
211 |
-
col3.write(f"Rank {rank}")
|
212 |
-
with open(report_file, "rb") as f:
|
213 |
-
col4.download_button(
|
214 |
-
label="Download Report",
|
215 |
-
data=f,
|
216 |
-
file_name=os.path.basename(report_file),
|
217 |
-
mime="application/pdf",
|
218 |
-
)
|
219 |
-
except Exception as e:
|
220 |
-
st.error(f"Error generating fitment reports: {e}")
|
|
|
1 |
+
import os
|
2 |
+
from datetime import datetime
|
3 |
+
import streamlit as st
|
4 |
+
import google.generativeai as genai
|
5 |
+
import PyPDF2 as pdf
|
6 |
+
from fpdf import FPDF
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
9 |
+
from langchain_chroma import Chroma
|
10 |
+
|
11 |
+
# Load environment variables
|
12 |
+
load_dotenv()
|
13 |
+
|
14 |
+
# Configure Generative AI API
|
15 |
+
genai.configure(api_key=("AIzaSyDc3CnZUpQ8Z_2_hy764kDZdzSyWSXgtVM"))
|
16 |
+
|
17 |
+
# Initialize vectorstore
|
18 |
+
@st.cache_resource
|
19 |
+
def setup_vectorstore():
|
20 |
+
embeddings = HuggingFaceEmbeddings()
|
21 |
+
vectorstore = Chroma(persist_directory="cv_vectordb", embedding_function=embeddings)
|
22 |
+
return vectorstore
|
23 |
+
|
24 |
+
# Convert PDF to text
|
25 |
+
def input_pdf_text(uploaded_file):
|
26 |
+
reader = pdf.PdfReader(uploaded_file)
|
27 |
+
text = ""
|
28 |
+
for page in range(len(reader.pages)):
|
29 |
+
page = reader.pages[page]
|
30 |
+
text += str(page.extract_text())
|
31 |
+
return text
|
32 |
+
|
33 |
+
# Retrieve relevant content from vectorstore
|
34 |
+
def retrieve_from_vectorstore(vectorstore, query):
|
35 |
+
retriever = vectorstore.as_retriever()
|
36 |
+
results = retriever.invoke(query)
|
37 |
+
return "\n".join([doc.page_content for doc in results])
|
38 |
+
|
39 |
+
# Get response from Generative AI
|
40 |
+
def get_gemini_response(prompt):
|
41 |
+
model = genai.GenerativeModel('gemini-pro')
|
42 |
+
response = model.generate_content(prompt)
|
43 |
+
return response.candidates[0].content.parts[0].text if response else None
|
44 |
+
|
45 |
+
def generate_pdf_report(candidate_name, report_content):
|
46 |
+
pdf = FPDF()
|
47 |
+
pdf.add_page()
|
48 |
+
pdf.set_font("Arial", size=12)
|
49 |
+
pdf.cell(0, 8, txt=f"Candidate Report: {candidate_name}", ln=True, align="L")
|
50 |
+
pdf.ln(5) # Add slight spacing after the title
|
51 |
+
|
52 |
+
# Define numbered sections
|
53 |
+
numbered_sections = {
|
54 |
+
1: "Candidate Name and Email",
|
55 |
+
2: '"Can Do" list:',
|
56 |
+
3: '"Should Do" list',
|
57 |
+
4: "Skill Comparison Table:",
|
58 |
+
5: "Overall Matching Score:",
|
59 |
+
6: "Analysis of Strengths and Weaknesses",
|
60 |
+
7: "Recommendations for Improvement",
|
61 |
+
8: "Conclusion on Fitment",
|
62 |
+
}
|
63 |
+
|
64 |
+
# Parse report content
|
65 |
+
lines = report_content.splitlines()
|
66 |
+
current_section = None
|
67 |
+
bullet_point = "\u2022 " # Unicode for a bullet point
|
68 |
+
|
69 |
+
for line in lines:
|
70 |
+
stripped_line = line.strip().replace("*", "") # Remove all asterisks
|
71 |
+
|
72 |
+
# Check if line matches a section header
|
73 |
+
if stripped_line in numbered_sections.values():
|
74 |
+
for number, section in numbered_sections.items():
|
75 |
+
if stripped_line == section:
|
76 |
+
current_section = number
|
77 |
+
pdf.set_font("Arial", style="", size=11)
|
78 |
+
pdf.cell(0, 6, txt=f"{number}. {section}", ln=True, align="L")
|
79 |
+
pdf.ln(3) # Reduced spacing after each section header
|
80 |
+
break
|
81 |
+
# Check for sub-content that starts with "-"
|
82 |
+
elif current_section and stripped_line.startswith("- "):
|
83 |
+
pdf.set_font("Arial", size=10)
|
84 |
+
pdf.cell(5) # Add slight indentation
|
85 |
+
pdf.cell(0, 5, txt=f"{bullet_point}{stripped_line[2:]}", ln=True)
|
86 |
+
# Handle table rows
|
87 |
+
elif "|" in stripped_line:
|
88 |
+
cells = [cell.strip() for cell in stripped_line.split("|")[1:-1]]
|
89 |
+
if len(cells) == 4:
|
90 |
+
pdf.set_font("Arial", size=9)
|
91 |
+
pdf.cell(50, 6, cells[0], border=1)
|
92 |
+
pdf.cell(35, 6, cells[1], border=1, align="C")
|
93 |
+
pdf.cell(35, 6, cells[2], border=1, align="C")
|
94 |
+
pdf.cell(35, 6, cells[3], border=1, align="C")
|
95 |
+
pdf.ln()
|
96 |
+
# Add regular content as plain text
|
97 |
+
else:
|
98 |
+
pdf.set_font("Arial", size=10)
|
99 |
+
pdf.multi_cell(0, 5, stripped_line)
|
100 |
+
|
101 |
+
# Save the report as a PDF file
|
102 |
+
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
103 |
+
file_name = f"{candidate_name}_report_{timestamp}.pdf"
|
104 |
+
pdf.output(file_name)
|
105 |
+
return file_name
|
106 |
+
|
107 |
+
|
108 |
+
|
109 |
+
# Streamlit UI
|
110 |
+
st.title("AI-Powered Candidate Shortlisting")
|
111 |
+
st.markdown("Analyze resumes, job descriptions, and match with company culture using RAG.")
|
112 |
+
|
113 |
+
# Setup vectorstore
|
114 |
+
vectorstore = setup_vectorstore()
|
115 |
+
|
116 |
+
# File upload
|
117 |
+
uploaded_resumes = st.file_uploader("Upload Resumes (PDFs)", type="pdf", accept_multiple_files=True)
|
118 |
+
uploaded_job_description = st.file_uploader("Upload Job Description (PDF)", type="pdf")
|
119 |
+
|
120 |
+
if st.button("Generate Fitment Reports"):
|
121 |
+
if not uploaded_resumes or not uploaded_job_description:
|
122 |
+
st.error("Please upload resumes and a job description.")
|
123 |
+
else:
|
124 |
+
with st.spinner("Processing..."):
|
125 |
+
try:
|
126 |
+
# Convert job description to text
|
127 |
+
job_description_text = input_pdf_text(uploaded_job_description)
|
128 |
+
|
129 |
+
# Retrieve relevant content from vectorstore
|
130 |
+
company_culture_content = retrieve_from_vectorstore(vectorstore, "company culture match")
|
131 |
+
|
132 |
+
# Process each resume
|
133 |
+
fitment_results = []
|
134 |
+
for resume_file in uploaded_resumes:
|
135 |
+
# Extract candidate name
|
136 |
+
candidate_name = os.path.splitext(resume_file.name)[0]
|
137 |
+
|
138 |
+
# Convert resume to text
|
139 |
+
resume_text = input_pdf_text(resume_file)
|
140 |
+
|
141 |
+
# Construct the prompt
|
142 |
+
input_prompt = f"""
|
143 |
+
### Task: Generate a candidate shortlisting report.
|
144 |
+
|
145 |
+
### Instructions:
|
146 |
+
You are a highly intelligent and unbiased system designed to shortlist candidates for a job based on:
|
147 |
+
1. The candidate's resume.
|
148 |
+
2. A provided job description.
|
149 |
+
3. Relevant company culture data retrieved from the vector database.
|
150 |
+
|
151 |
+
### Key Objectives:
|
152 |
+
- Analyze skills, qualifications, and experiences in the resume.
|
153 |
+
- Evaluate alignment with the job description.
|
154 |
+
- Assess cultural fit using company culture data.
|
155 |
+
- Provide detailed scoring, strengths, weaknesses, and recommendations.
|
156 |
+
|
157 |
+
### Required Sections in the Report:
|
158 |
+
- Candidate Name and Email
|
159 |
+
- Parse the job description and create a 'Should Do' list, categorizing required skills into levels: Beginner, Competent, Intermediate, Expert.
|
160 |
+
- Parse the candidate's resume and create a 'Can Do' list, categorizing listed skills into the same levels: Beginner, Competent, Intermediate, Expert.
|
161 |
+
- Matching score: A detailed table showing alignment of skills.
|
162 |
+
- Analysis of strengths and weaknesses.
|
163 |
+
- Recommendations for improvement.
|
164 |
+
- Overall conclusion.
|
165 |
+
|
166 |
+
### Input Data:
|
167 |
+
- **Resume**: {resume_text}
|
168 |
+
- **Job Description**: {job_description_text}
|
169 |
+
- **Company Culture Data**: {company_culture_content}
|
170 |
+
|
171 |
+
### Output Format:
|
172 |
+
1. Candidate Name and Email
|
173 |
+
2."Can Do" list:
|
174 |
+
3. "Should Do" list
|
175 |
+
4. Skill Comparison Table:
|
176 |
+
| Skill | "Can Do" Level | "Should Do" Level | Matching Score |
|
177 |
+
|--------------------------|----------------|--------------------|----------------|
|
178 |
+
5. Overall Matching Score: [Percentage]
|
179 |
+
6. Analysis of Strengths and Weaknesses
|
180 |
+
7. Recommendations for Improvement
|
181 |
+
8. Conclusion on Fitment
|
182 |
+
"""
|
183 |
+
|
184 |
+
# Generate the report
|
185 |
+
report_content = get_gemini_response(input_prompt)
|
186 |
+
|
187 |
+
if report_content:
|
188 |
+
# Extract the matching score safely
|
189 |
+
try:
|
190 |
+
matching_score = float(report_content.split("Overall Matching Score:")[1].split("%")[0].strip())
|
191 |
+
except (IndexError, ValueError):
|
192 |
+
matching_score = 0.0
|
193 |
+
report_content += "\n\n[ERROR: Matching Score could not be parsed]"
|
194 |
+
|
195 |
+
# Generate PDF report
|
196 |
+
report_file = generate_pdf_report(candidate_name, report_content)
|
197 |
+
|
198 |
+
# Save results
|
199 |
+
fitment_results.append((candidate_name, matching_score, report_file))
|
200 |
+
|
201 |
+
# Sort results by matching score in descending order
|
202 |
+
fitment_results.sort(key=lambda x: x[1], reverse=True)
|
203 |
+
|
204 |
+
# Display results in tabular form
|
205 |
+
st.write("### Fitment Results")
|
206 |
+
st.write("Below are the shortlisted candidates ranked by their fitment scores.")
|
207 |
+
for rank, (candidate_name, matching_score, report_file) in enumerate(fitment_results, start=1):
|
208 |
+
col1, col2, col3, col4 = st.columns([3, 2, 2, 2])
|
209 |
+
col1.write(candidate_name)
|
210 |
+
col2.write(f"{matching_score:.2f}%")
|
211 |
+
col3.write(f"Rank {rank}")
|
212 |
+
with open(report_file, "rb") as f:
|
213 |
+
col4.download_button(
|
214 |
+
label="Download Report",
|
215 |
+
data=f,
|
216 |
+
file_name=os.path.basename(report_file),
|
217 |
+
mime="application/pdf",
|
218 |
+
)
|
219 |
+
except Exception as e:
|
220 |
+
st.error(f"Error generating fitment reports: {e}")
|