|
import os |
|
from datetime import datetime |
|
import streamlit as st |
|
import google.generativeai as genai |
|
import PyPDF2 as pdf |
|
from fpdf import FPDF |
|
from dotenv import load_dotenv |
|
from langchain_huggingface import HuggingFaceEmbeddings |
|
from langchain_chroma import Chroma |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
genai.configure(api_key=("AIzaSyC-qSvpq44LP0hYgr7EZLZaOIxlPzezP3g")) |
|
|
|
|
|
@st.cache_resource |
|
def setup_vectorstore(): |
|
embeddings = HuggingFaceEmbeddings() |
|
vectorstore = Chroma(persist_directory="cv_vectordb", embedding_function=embeddings) |
|
return vectorstore |
|
|
|
|
|
def input_pdf_text(uploaded_file): |
|
reader = pdf.PdfReader(uploaded_file) |
|
text = "" |
|
for page in range(len(reader.pages)): |
|
page = reader.pages[page] |
|
text += str(page.extract_text()) |
|
return text |
|
|
|
|
|
def retrieve_from_vectorstore(vectorstore, query): |
|
retriever = vectorstore.as_retriever() |
|
results = retriever.invoke(query) |
|
return "\n".join([doc.page_content for doc in results]) |
|
|
|
|
|
def get_gemini_response(prompt): |
|
model = genai.GenerativeModel('gemini-2.0-flash') |
|
response = model.generate_content(prompt) |
|
return response.candidates[0].content.parts[0].text if response else None |
|
|
|
def generate_pdf_report(candidate_name, report_content): |
|
pdf = FPDF() |
|
pdf.add_page() |
|
pdf.set_font("Arial", size=12) |
|
pdf.cell(0, 8, txt=f"Candidate Report: {candidate_name}", ln=True, align="L") |
|
pdf.ln(5) |
|
|
|
|
|
numbered_sections = { |
|
1: "Candidate Name and Email", |
|
2: '"Can Do" list:', |
|
3: '"Should Do" list', |
|
4: "Skill Comparison Table:", |
|
5: "Overall Matching Score:", |
|
6: "Analysis of Strengths and Weaknesses", |
|
7: "Recommendations for Improvement", |
|
8: "Conclusion on Fitment", |
|
} |
|
|
|
|
|
lines = report_content.splitlines() |
|
current_section = None |
|
bullet_point = "\u2022 " |
|
|
|
for line in lines: |
|
stripped_line = line.strip().replace("*", "") |
|
|
|
|
|
if stripped_line in numbered_sections.values(): |
|
for number, section in numbered_sections.items(): |
|
if stripped_line == section: |
|
current_section = number |
|
pdf.set_font("Arial", style="", size=11) |
|
pdf.cell(0, 6, txt=f"{number}. {section}", ln=True, align="L") |
|
pdf.ln(3) |
|
break |
|
|
|
elif current_section and stripped_line.startswith("- "): |
|
pdf.set_font("Arial", size=10) |
|
pdf.cell(5) |
|
pdf.cell(0, 5, txt=f"{bullet_point}{stripped_line[2:]}", ln=True) |
|
|
|
elif "|" in stripped_line: |
|
cells = [cell.strip() for cell in stripped_line.split("|")[1:-1]] |
|
if len(cells) == 4: |
|
pdf.set_font("Arial", size=9) |
|
pdf.cell(50, 6, cells[0], border=1) |
|
pdf.cell(35, 6, cells[1], border=1, align="C") |
|
pdf.cell(35, 6, cells[2], border=1, align="C") |
|
pdf.cell(35, 6, cells[3], border=1, align="C") |
|
pdf.ln() |
|
|
|
else: |
|
pdf.set_font("Arial", size=10) |
|
pdf.multi_cell(0, 5, stripped_line) |
|
|
|
|
|
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") |
|
file_name = f"{candidate_name}_report_{timestamp}.pdf" |
|
pdf.output(file_name) |
|
return file_name |
|
|
|
|
|
|
|
|
|
st.title("AI-Powered Candidate Shortlisting") |
|
st.markdown("Analyze resumes, job descriptions, and match with company culture using RAG.") |
|
|
|
|
|
vectorstore = setup_vectorstore() |
|
|
|
|
|
uploaded_resumes = st.file_uploader("Upload Resumes (PDFs)", type="pdf", accept_multiple_files=True) |
|
uploaded_job_description = st.file_uploader("Upload Job Description (PDF)", type="pdf") |
|
|
|
if st.button("Generate Fitment Reports"): |
|
if not uploaded_resumes or not uploaded_job_description: |
|
st.error("Please upload resumes and a job description.") |
|
else: |
|
with st.spinner("Processing..."): |
|
try: |
|
|
|
job_description_text = input_pdf_text(uploaded_job_description) |
|
|
|
|
|
company_culture_content = retrieve_from_vectorstore(vectorstore, "company culture match") |
|
|
|
|
|
fitment_results = [] |
|
for resume_file in uploaded_resumes: |
|
|
|
candidate_name = os.path.splitext(resume_file.name)[0] |
|
|
|
|
|
resume_text = input_pdf_text(resume_file) |
|
|
|
|
|
input_prompt = f""" |
|
### Task: Generate a candidate shortlisting report. |
|
|
|
### Instructions: |
|
You are a highly intelligent and unbiased system designed to shortlist candidates for a job based on: |
|
1. The candidate's resume. |
|
2. A provided job description. |
|
3. Relevant company culture data retrieved from the vector database. |
|
|
|
### Key Objectives: |
|
- Accurate Matching the Skills from job description and resumes. |
|
- Analyze skills, qualifications, and experiences in the resume. |
|
- Evaluate alignment with the job description. |
|
- Assess cultural fit using company culture data. |
|
- Provide detailed scoring, strengths, weaknesses, and recommendations. |
|
|
|
### Required Sections in the Report: |
|
- Candidate Name and Email |
|
- Parse properly All the job description and create a 'Should Do' list, categorizing required skills into levels: Beginner, Competent, Intermediate, Expert by Studying and analysing job title, there requirements and all. |
|
- Parse properly All the candidate's resume and create a 'Can Do' list, categorizing listed skills into the same levels: Beginner, Competent, Intermediate, Expert. To categorize the skill see whether there are certificates, projects, internship experinece, any other experinece. |
|
- Matching score: Match the created 'can do' and 'should do' list. To generate the matchinging Score use strategy as if skill level from both list is same then give it 100 and decrease 25 for each difference in skill levels from should do and can do list.And if can do skill level is greater than should do skill level then give 100.To calculate final Mathching score make the of all can do skill score. |
|
- Analysis of strengths and weaknesses. |
|
- Recommendations for improvement. |
|
- Overall conclusion. |
|
|
|
### Input Data: |
|
- **Resume**: {resume_text} |
|
- **Job Description**: {job_description_text} |
|
- **Company Culture Data**: {company_culture_content} |
|
|
|
### Output Format: |
|
1. Candidate Name and Email |
|
2."Can Do" list: |
|
3. "Should Do" list |
|
4. Skill Comparison Table: |
|
| Skill | "Can Do" Level | "Should Do" Level | Matching Score | |
|
|--------------------------|----------------|--------------------|----------------| |
|
5. Overall Matching Score: [Percentage] |
|
6. Analysis of Strengths and Weaknesses |
|
7. Recommendations for Improvement |
|
8. Conclusion on Fitment |
|
Generate Accurate Report of the candedate. |
|
Note:Remove or do not generate the words 'Ok','Okay'and the sentence like 'Okay, I will generate a candidate shortlisting report for ' from the generated pdf of the fitment report |
|
""" |
|
|
|
|
|
report_content = get_gemini_response(input_prompt) |
|
|
|
if report_content: |
|
|
|
try: |
|
matching_score = float(report_content.split("Overall Matching Score:")[1].split("%")[0].strip()) |
|
except (IndexError, ValueError): |
|
matching_score = 0.0 |
|
report_content += "\n\n[ERROR: Matching Score could not be parsed]" |
|
|
|
|
|
report_file = generate_pdf_report(candidate_name, report_content) |
|
|
|
|
|
fitment_results.append((candidate_name, matching_score, report_file)) |
|
|
|
|
|
fitment_results.sort(key=lambda x: x[1], reverse=True) |
|
|
|
|
|
st.write("### Fitment Results") |
|
st.write("Below are the shortlisted candidates ranked by their fitment scores.") |
|
for rank, (candidate_name, matching_score, report_file) in enumerate(fitment_results, start=1): |
|
col1, col2, col3, col4 = st.columns([3, 2, 2, 2]) |
|
col1.write(candidate_name) |
|
col2.write(f"{matching_score:.2f}%") |
|
col3.write(f"Rank {rank}") |
|
with open(report_file, "rb") as f: |
|
col4.download_button( |
|
label="Download Report", |
|
data=f, |
|
file_name=os.path.basename(report_file), |
|
mime="application/pdf", |
|
) |
|
except Exception as e: |
|
st.error(f"Error generating fitment reports: {e}") |
|
|