File size: 6,732 Bytes
6beb322
 
 
 
 
e89f03d
6beb322
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e14a9fb
 
3be6673
 
 
 
6beb322
 
 
 
 
 
 
 
 
 
 
604c992
6beb322
 
 
 
48bdf6b
6beb322
e14a9fb
6beb322
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7bc96ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6beb322
509fe92
e14a9fb
6beb322
e14a9fb
 
 
 
48bdf6b
e14a9fb
 
 
 
 
 
 
 
 
 
 
6beb322
 
 
 
 
 
 
 
 
 
 
 
 
 
e14a9fb
6beb322
 
7bc96ac
 
 
 
 
6beb322
7bc96ac
 
 
 
 
 
 
 
eef212b
7bc96ac
 
 
 
 
 
 
 
 
 
6beb322
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
from gliner import GLiNER
import re
import fitz 
import gradio as gr

model = GLiNER.from_pretrained("gliner-community/gliner_large-v2.5", load_tokenizer=True)




def clean_text(text):
    # Remove all escape characters
    cleaned_text = re.sub(r'[\n\r\t\f\v]', ' ', text)

    # Remove any other non-printable characters
    cleaned_text = re.sub(r'[^\x20-\x7E]', '', cleaned_text)

    # Replace multiple spaces with a single space
    cleaned_text = re.sub(r'\s+', ' ', cleaned_text)

    # Strip leading and trailing whitespace
    cleaned_text = cleaned_text.strip()

    return cleaned_text


def pdf2text(file_path):

  with fitz.open(file_path) as doc:
      text = ""
      for page in doc:
          text += page.get_text()

  return clean_text(text)


def ner(text, labels, threshold) :
    labels = labels.split(",")
    labels = [label.strip() for label in labels]
    print(labels)


    return {
        "text": text,
        "entities": [
            {
                "entity": entity["label"],
                "word": entity["text"],
                "start": entity["start"],
                "end": entity["end"],
                "score": 0,
            }
            for entity in model.predict_entities(
                text, labels, flat_ner=True, threshold=threshold
            )
        ],
    }

def parser(file_path, labels, threshold):
  text = pdf2text(file_path)
  return ner(text, labels, threshold)


# Define a custom CSS style
custom_css = """
body {
    background-color: #f0f8ff;
    font-family: 'Arial', sans-serif;
}
.container {
    margin: auto;
    padding: 20px;
    border-radius: 10px;
    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
}
h1 {
    color: #3d1ad9;
    text-align: center;
}
#file_upload {
    display: flex;
    justify-content: center;
    margin-bottom: 20px;
}
"""


with gr.Blocks(css=custom_css) as demo:
  #home page
  with gr.Tab("Home"):
    # Title and Description
    gr.Markdown("""
    # πŸ“ **Resume Parser with GLiNER Model** πŸ“
    
    **Welcome!** This project is a **resume parser** designed to make the process of extracting important information from resumes easy and efficient. It uses a **generalized approach** to recognize and parse key details like **name**, **email**, **profession**, and more from different resume formats.
    
    By leveraging the **GLiNER model** for **Named Entity Recognition (NER)** and the power of **Hugging Face**, this tool ensures accurate and fast resume data extraction. It's perfect for **recruiters**, **HR professionals**, and **data analysts** who want to streamline their workflows.
    """)
    # Use Cases Section
    gr.Markdown("""
    ## 🎯 **Use Cases**:
    1. **Recruiters**: Automatically extract and store candidate information from resumes.
    2. **HR Professionals**: Parse bulk resumes to organize important information quickly.
    3. **Data Analysts**: Analyze trends in candidate attributes for better decision-making.
    4. **Job Boards**: Categorize and structure resume submissions automatically.
    """, elem_classes=["use-cases"])

    # Technologies Used Section
    gr.Markdown("""
    ## πŸ’» **Technologies Used**:
    - **Python** 🐍: Backend logic and automation.
    - **Hugging Face** πŸ€—: Model hosting and integration.
    - **GLiNER** 🧠: Named Entity Recognition model used to extract structured data.
    """, elem_classes=["technologies"])

    # Another Image Placeholder for Technologies Used
    #gr.Image("technologies_image_here.png", label="Technologies Used")

    # Footer or Additional Information
    gr.Markdown("""
    ### 🌟 **Additional Information**:
    This project is scalable and can be integrated into various systems like **ATS (Applicant Tracking Systems)**, job boards, and recruitment platforms. It's designed to handle **diverse resume formats** and ensures that all critical information is captured with **high accuracy**.
    
    Start **automating your workflow** and let this parser do the heavy lifting! πŸš€
    """, elem_classes=["additional-info"])

  # app page
  with gr.Tab("Resume Parser"):
    gr.HTML("<h1>AI-Powered Resume Parser</h1>")
    gr.HTML("<p style='text-align: center;'>This application extracts important data from your resume using innovative NLP methods. This tool's key advantage is that, in contrast to conventional resume parsers, it is generalized(Thanks to GLiNER team), meaning it functions in accordance with your needs. Simply enter the labels (NER) that you wish to extract, then adjust the threshold and submit the resume. Magic will happen in a few seconds.</p>")

    
    with gr.Row() as row:
        labels = gr.Textbox(
            label="Labels",
            placeholder="Enter your labels here (comma separated)",
            scale=2,
        )
        threshold = gr.Slider(
            0,
            1,
            value=0.3,
            step=0.01,
            label="Threshold",
            info="Lower the threshold to increase how many entities get predicted.",
            scale=0,
        )

    with gr.Row():
        file_input = gr.File(label="Upload Resume",
                file_types=['.pdf'],
                 elem_id="file_upload"
                )
        
    with gr.Row():
        parse_button = gr.Button("Parse Resume")

    with gr.Row():
        output = gr.HighlightedText(label="Parsed Resume",
                           combine_adjacent=True
                           )

    parse_button.click(fn=parser, inputs=[file_input,labels, threshold], outputs=output)

    gr.HTML("<p style='text-align: center;'>Our resume parser can identify and extract important details such as personal information, education, work experience, skills, and more. Simply upload your resume and let our AI do the work!</p>")

  # contact us
  with gr.Tab("Contact"):
    gr.Markdown("""
    # πŸ“§ **Contact Krish Goyani** πŸ“§
    
    I am happy to accept your feedback and suggestions! Feel free to reach out using the details below.
    """)
    
    # Contact Information
    with gr.Row(): # Changed from gr.Box to gr.Row
        gr.Markdown("""
        ## πŸ§‘ **Krish Goyani**
        - **Email**: [email protected]
        - **Portfolio**: [Krish Goyani](https://www.datascienceportfol.io/Krish_Goyani)
        - **LinkedIn**: [Krish Goyani](https://www.linkedin.com/in/krish-goyani/)
        - **GitHub**: [github.com/krish-goyani](https://github.com/Krish-Goyani)
        """)
    
    # Message
    gr.Markdown("""
    Thank you for visiting my page. I'm always open to hearing from you. Feel free to share any suggestions or feedback, and I'll get back to you as soon as possible! ✨
    """)
    

# Launch the interface
demo.queue()
demo.launch(share=True, debug=True)