File size: 9,731 Bytes
2d23c1c
 
 
 
 
 
 
 
 
 
 
 
 
 
3033d33
2d23c1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84746f8
2d23c1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ffe1af
2d23c1c
 
 
 
 
 
 
47e7dcb
 
 
2d23c1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ffe1af
a7bcf43
2d23c1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
import os
from datetime import datetime
import streamlit as st
import google.generativeai as genai
import PyPDF2 as pdf
from fpdf import FPDF
from dotenv import load_dotenv
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma

# Load environment variables
load_dotenv()

# Configure Generative AI API
genai.configure(api_key=("AIzaSyC-qSvpq44LP0hYgr7EZLZaOIxlPzezP3g"))

# Initialize vectorstore
@st.cache_resource
def setup_vectorstore():
    embeddings = HuggingFaceEmbeddings()
    vectorstore = Chroma(persist_directory="cv_vectordb", embedding_function=embeddings)
    return vectorstore

# Convert PDF to text
def input_pdf_text(uploaded_file):
    reader = pdf.PdfReader(uploaded_file)
    text = ""
    for page in range(len(reader.pages)):
        page = reader.pages[page]
        text += str(page.extract_text())
    return text

# Retrieve relevant content from vectorstore
def retrieve_from_vectorstore(vectorstore, query):
    retriever = vectorstore.as_retriever()
    results = retriever.invoke(query)
    return "\n".join([doc.page_content for doc in results])

# Get response from Generative AI
def get_gemini_response(prompt):
    model = genai.GenerativeModel('gemini-2.0-flash')
    response = model.generate_content(prompt)
    return response.candidates[0].content.parts[0].text if response else None

def generate_pdf_report(candidate_name, report_content):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    pdf.cell(0, 8, txt=f"Candidate Report: {candidate_name}", ln=True, align="L")
    pdf.ln(5)  # Add slight spacing after the title

    # Define numbered sections
    numbered_sections = {
        1: "Candidate Name and Email",
        2: '"Can Do" list:',
        3: '"Should Do" list',
        4: "Skill Comparison Table:",
        5: "Overall Matching Score:",
        6: "Analysis of Strengths and Weaknesses",
        7: "Recommendations for Improvement",
        8: "Conclusion on Fitment",
    }

    # Parse report content
    lines = report_content.splitlines()
    current_section = None
    bullet_point = "\u2022 "  # Unicode for a bullet point

    for line in lines:
        stripped_line = line.strip().replace("*", "")  # Remove all asterisks

        # Check if line matches a section header
        if stripped_line in numbered_sections.values():
            for number, section in numbered_sections.items():
                if stripped_line == section:
                    current_section = number
                    pdf.set_font("Arial", style="", size=11)
                    pdf.cell(0, 6, txt=f"{number}. {section}", ln=True, align="L")
                    pdf.ln(3)  # Reduced spacing after each section header
                    break
        # Check for sub-content that starts with "-"
        elif current_section and stripped_line.startswith("- "):
            pdf.set_font("Arial", size=10)
            pdf.cell(5)  # Add slight indentation
            pdf.cell(0, 5, txt=f"{bullet_point}{stripped_line[2:]}", ln=True)
        # Handle table rows
        elif "|" in stripped_line:
            cells = [cell.strip() for cell in stripped_line.split("|")[1:-1]]
            if len(cells) == 4:
                pdf.set_font("Arial", size=9)
                pdf.cell(50, 6, cells[0], border=1)
                pdf.cell(35, 6, cells[1], border=1, align="C")
                pdf.cell(35, 6, cells[2], border=1, align="C")
                pdf.cell(35, 6, cells[3], border=1, align="C")
                pdf.ln()
        # Add regular content as plain text
        else:
            pdf.set_font("Arial", size=10)
            pdf.multi_cell(0, 5, stripped_line)

    # Save the report as a PDF file
    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    file_name = f"{candidate_name}_report_{timestamp}.pdf"
    pdf.output(file_name)
    return file_name



# Streamlit UI
st.title("AI-Powered Candidate Shortlisting")
st.markdown("Analyze resumes, job descriptions, and match with company culture using RAG.")

# Setup vectorstore
vectorstore = setup_vectorstore()

# File upload
uploaded_resumes = st.file_uploader("Upload Resumes (PDFs)", type="pdf", accept_multiple_files=True)
uploaded_job_description = st.file_uploader("Upload Job Description (PDF)", type="pdf")

if st.button("Generate Fitment Reports"):
    if not uploaded_resumes or not uploaded_job_description:
        st.error("Please upload resumes and a job description.")
    else:
        with st.spinner("Processing..."):
            try:
                # Convert job description to text
                job_description_text = input_pdf_text(uploaded_job_description)

                # Retrieve relevant content from vectorstore
                company_culture_content = retrieve_from_vectorstore(vectorstore, "company culture match")

                # Process each resume
                fitment_results = []
                for resume_file in uploaded_resumes:
                    # Extract candidate name
                    candidate_name = os.path.splitext(resume_file.name)[0]

                    # Convert resume to text
                    resume_text = input_pdf_text(resume_file)

                    # Construct the prompt
                    input_prompt = f"""
### Task: Generate a candidate shortlisting report.

### Instructions:
You are a highly intelligent and unbiased system designed to shortlist candidates for a job based on:
1. The candidate's resume.
2. A provided job description.
3. Relevant company culture data retrieved from the vector database.

### Key Objectives:
- Accurate Matching the Skills from job description and resumes.
- Analyze skills, qualifications, and experiences in the resume.
- Evaluate alignment with the job description.
- Assess cultural fit using company culture data.
- Provide detailed scoring, strengths, weaknesses, and recommendations.

### Required Sections in the Report:
- Candidate Name and Email
- Parse properly All the job description and create a 'Should Do' list, categorizing required skills into levels: Beginner, Competent, Intermediate, Expert by Studying and analysing job title, there requirements and all.
- Parse properly All the candidate's resume and create a 'Can Do' list, categorizing listed skills into the same levels: Beginner, Competent, Intermediate, Expert. To categorize the skill see whether there are certificates, projects, internship experinece, any other experinece.
- Matching score: Match the created 'can do' and 'should do' list. To generate the matchinging Score use strategy as if skill level from both list is same then give it 100 and decrease 25 for each difference in skill levels from should do and can do list.And if can do skill level is greater than should do skill level then give 100.To calculate final Mathching score make the of all can do skill score.
- Analysis of strengths and weaknesses.
- Recommendations for improvement.
- Overall conclusion.

### Input Data:
- **Resume**: {resume_text}
- **Job Description**: {job_description_text}
- **Company Culture Data**: {company_culture_content}

### Output Format:
1. Candidate Name and Email
2."Can Do" list:
3. "Should Do" list
4. Skill Comparison Table:
   | Skill                   | "Can Do" Level  | "Should Do" Level  | Matching Score |
   |--------------------------|----------------|--------------------|----------------|
5. Overall Matching Score: [Percentage]
6. Analysis of Strengths and Weaknesses
7. Recommendations for Improvement
8. Conclusion on Fitment
Generate Accurate Report of the candedate.
Note:Remove or do not generate the words 'Ok','Okay'and the sentence like 'Okay, I will generate a candidate shortlisting report for ' from the generated pdf of the  fitment report
                    """

                    # Generate the report
                    report_content = get_gemini_response(input_prompt)

                    if report_content:
                        # Extract the matching score safely
                        try:
                            matching_score = float(report_content.split("Overall Matching Score:")[1].split("%")[0].strip())
                        except (IndexError, ValueError):
                            matching_score = 0.0
                            report_content += "\n\n[ERROR: Matching Score could not be parsed]"

                        # Generate PDF report
                        report_file = generate_pdf_report(candidate_name, report_content)

                        # Save results
                        fitment_results.append((candidate_name, matching_score, report_file))

                # Sort results by matching score in descending order
                fitment_results.sort(key=lambda x: x[1], reverse=True)

                # Display results in tabular form
                st.write("### Fitment Results")
                st.write("Below are the shortlisted candidates ranked by their fitment scores.")
                for rank, (candidate_name, matching_score, report_file) in enumerate(fitment_results, start=1):
                    col1, col2, col3, col4 = st.columns([3, 2, 2, 2])
                    col1.write(candidate_name)
                    col2.write(f"{matching_score:.2f}%")
                    col3.write(f"Rank {rank}")
                    with open(report_file, "rb") as f:
                        col4.download_button(
                            label="Download Report",
                            data=f,
                            file_name=os.path.basename(report_file),
                            mime="application/pdf",
                        )
            except Exception as e:
                st.error(f"Error generating fitment reports: {e}")