Spaces:
Sleeping
Sleeping
File size: 3,807 Bytes
67d85c4 d8a8115 67d85c4 d8a8115 67d85c4 efa6192 d8a8115 efa6192 67d85c4 d8a8115 67d85c4 d8a8115 67d85c4 d8a8115 807012c 67d85c4 d8a8115 67d85c4 d8a8115 67d85c4 d8a8115 67d85c4 bbbf59d d8a8115 2b16bf8 d8a8115 d728717 807012c 67d85c4 d8a8115 67d85c4 4fc594e 67d85c4 d8a8115 67d85c4 d8a8115 4119b5d 67d85c4 bbbf59d 67d85c4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
import streamlit as st
import pandas as pd
import numpy as np
import re
import pickle
import pdfminer
from pdfminer.high_level import extract_text
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, MaxPooling1D, LSTM, Dense, GlobalMaxPooling1D
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
def cleanResume(resumeText):
resumeText = re.sub('http\S+\s*', ' ', resumeText)
resumeText = re.sub('RT|cc', ' ', resumeText)
resumeText = re.sub('#\S+', '', resumeText)
resumeText = re.sub('@\S+', ' ', resumeText)
resumeText = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), ' ', resumeText)
resumeText = re.sub(r'[^\x00-\x7f]',r' ', resumeText)
resumeText = re.sub('\s+', ' ', resumeText)
return resumeText
def pdf_to_text(file):
text = extract_text(file)
return text
def predict_category(resumes_data, selected_category,max_sequence_length):
model = load_deeprank_model(max_sequence_length)
resumes_df = pd.DataFrame(resumes_data)
resumes_text = resumes_df['ResumeText'].values
tokenized_text = tokenizer.texts_to_sequences(resumes_text)
max_sequence_length = 500
padded_text = pad_sequences(tokenized_text, maxlen=max_sequence_length)
predicted_probs = model.predict(padded_text)
for i, category in enumerate(label.classes_):
resumes_df[category] = predicted_probs[:, i]
resumes_df_sorted = resumes_df.sort_values(by=selected_category, ascending=False)
ranks = []
for rank, (idx, row) in enumerate(resumes_df_sorted.iterrows()):
rank = rank + 1
file_name = row['FileName']
ranks.append({'Rank': rank, 'FileName': file_name})
return ranks
def load_deeprank_model(max_sequence_length):
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=128, input_length=max_sequence_length))
model.add(Conv1D(filters=128, kernel_size=5, activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(LSTM(64))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.load_weights('deeprank_model_v2.h5')
return model
def main():
st.title("Resume Ranking App")
st.text("Upload resumes and select a category to rank them.")
resumes_data = []
selected_category = ""
files = st.file_uploader("Upload resumes", type=["pdf"], accept_multiple_files=True)
if files:
for file in files:
text = cleanResume(pdf_to_text(file))
resumes_data.append({'ResumeText': text, 'FileName': file.name})
selected_category = st.selectbox("Select a category to rank by", label.classes_)
if st.button("Rank Resumes"):
if not resumes_data or not selected_category:
st.warning("Please upload resumes and select a category to continue.")
else:
ranks = predict_category(resumes_data, selected_category,max_sequence_length)
st.write(pd.DataFrame(ranks))
if __name__ == '__main__':
df = pd.read_csv('UpdatedResumeDataSet.csv')
df['cleaned'] = df['Resume'].apply(lambda x: cleanResume(x))
label = LabelEncoder()
df['Category'] = label.fit_transform(df['Category'])
text = df['cleaned'].values
#text=df['Resume'].values
tokenizer = Tokenizer()
tokenizer.fit_on_texts(text)
vocab_size = len(tokenizer.word_index) + 1
num_classes = len(label.classes_)
max_sequence_length = 500
main()
|