Spaces:
Runtime error
Runtime error
improving embeddings
Browse files- README.md +2 -2
- main.py +14 -14
- match_utils.py +2 -2
README.md
CHANGED
@@ -13,7 +13,7 @@ pinned: true
|
|
13 |

|
14 |
|
15 |
## Purpose:
|
16 |
-
#### This is a FastAPI web application designed to allow job-seekers to learn more about various occupations and explore their future career path. See below for details and page descriptions. If you like the app, please star and/or fork and check back for future releases.
|
17 |
|
18 |
## To Access the App:
|
19 |
https://huggingface.co/spaces/celise88/Pathfinder
|
@@ -67,4 +67,4 @@ And navigate to http://localhost:8000/ in your browser
|
|
67 |
* The classification model underlying the skills extractor is a custom distilbert-base-uncased binary classification model that was finetuned using a balanced dataset comprised of the emsi (now Lightcast) open skills database and a random sample of the dbpedia database. The model achieved an f1 score of 0.967 on the validation sample (accuracy of 0.967, loss of 0.096). It can be accessed via Hugging Face: https://huggingface.co/celise88/distilbert-base-uncased-finetuned-binary-classifier.
|
68 |
* Cohere's LLM is used to get the neural text embeddings. (This is why a cohere API key is needed for the new functionality to work in this release; I plan to incorporate an open-source embedding model in a future release.)
|
69 |
|
70 |
-
* Version 1.1.2 (current version) - 1/29/2024 - Migrated from finetuned Distilbert LLM to Mistral (see https://huggingface.co/mistralai/Mistral-7B-v0.1 for more information).
|
|
|
13 |

|
14 |
|
15 |
## Purpose:
|
16 |
+
#### This is a FastAPI web application designed to allow job-seekers to learn more about various occupations and explore their future career path. See below for details and page descriptions. If you like the app, please star and/or fork and check back for future releases. Please note that this is a work in progress and may yield unexpected job match results - we are currently moving away from using Cohere.ai's embedding model and have not yet found a replacement that achieves acceptable performance.
|
17 |
|
18 |
## To Access the App:
|
19 |
https://huggingface.co/spaces/celise88/Pathfinder
|
|
|
67 |
* The classification model underlying the skills extractor is a custom distilbert-base-uncased binary classification model that was finetuned using a balanced dataset comprised of the emsi (now Lightcast) open skills database and a random sample of the dbpedia database. The model achieved an f1 score of 0.967 on the validation sample (accuracy of 0.967, loss of 0.096). It can be accessed via Hugging Face: https://huggingface.co/celise88/distilbert-base-uncased-finetuned-binary-classifier.
|
68 |
* Cohere's LLM is used to get the neural text embeddings. (This is why a cohere API key is needed for the new functionality to work in this release; I plan to incorporate an open-source embedding model in a future release.)
|
69 |
|
70 |
+
* Version 1.1.2 (current version) - 1/29/2024 - Migrated from a finetuned Distilbert LLM and Cohere.ai's embedding model to Mistral (see https://huggingface.co/mistralai/Mistral-7B-v0.1 for more information).
|
main.py
CHANGED
@@ -170,24 +170,24 @@ async def post_matches(request: Request, bt: BackgroundTasks, resume: UploadFile
|
|
170 |
|
171 |
username = localStorage.getItem('username')
|
172 |
|
173 |
-
def add_data_to_db(
|
174 |
db = pd.read_csv('static/res_embeddings.csv')
|
175 |
-
embeds = format(skillEmbed(
|
176 |
db.iloc[db['username']== username,5:] = embeds
|
177 |
db.to_csv('static/res_embeddings.csv', index=False)
|
178 |
|
179 |
-
def get_jobs_from_db(
|
180 |
-
job_matches = sim_result_loop_jobFinder(
|
181 |
print(job_matches)
|
182 |
|
183 |
resume = get_resume(resume)
|
184 |
skills = skill_extractor(resume)
|
185 |
-
simResults = await sim_result_loop(
|
186 |
links = get_links(simResults[0])
|
187 |
|
188 |
if username is not None:
|
189 |
-
bt.add_task(add_data_to_db,
|
190 |
-
bt.add_task(get_jobs_from_db,
|
191 |
|
192 |
return templates.TemplateResponse('find_my_match.html', context={'request': request, 'resume': resume, 'skills': skills, 'simResults': simResults[0], 'links': links, 'statelist': statelist})
|
193 |
|
@@ -211,24 +211,24 @@ async def post_matches(request: Request, bt: BackgroundTasks, jobdesc: UploadFil
|
|
211 |
|
212 |
username = localStorage.getItem('username')
|
213 |
|
214 |
-
def add_data_to_db(
|
215 |
db = pd.read_csv('static/jd_embeddings.csv')
|
216 |
-
embeds = format(skillEmbed(
|
217 |
db.iloc[db['username']== username,5:] = embeds
|
218 |
db.to_csv('static/jd_embeddings.csv', index=False)
|
219 |
|
220 |
-
def get_cand_from_db(
|
221 |
-
cand_matches = sim_result_loop_candFinder(
|
222 |
print(cand_matches)
|
223 |
|
224 |
jobdesc = get_resume(jobdesc)
|
225 |
skills = skill_extractor(jobdesc)
|
226 |
-
simResults = await sim_result_loop(
|
227 |
links = get_links(simResults[0])
|
228 |
|
229 |
if username is not None:
|
230 |
-
bt.add_task(add_data_to_db,
|
231 |
-
bt.add_task(get_cand_from_db,
|
232 |
|
233 |
return templates.TemplateResponse('candidate_matcher.html', context={'request': request, 'jobdesc': jobdesc, 'skills': skills, 'simResults': simResults[0], 'links': links})
|
234 |
|
|
|
170 |
|
171 |
username = localStorage.getItem('username')
|
172 |
|
173 |
+
def add_data_to_db(skills):
|
174 |
db = pd.read_csv('static/res_embeddings.csv')
|
175 |
+
embeds = format(skillEmbed(skills)).replace('[[','').replace(']]','').replace('[','').replace(']','').split(',')
|
176 |
db.iloc[db['username']== username,5:] = embeds
|
177 |
db.to_csv('static/res_embeddings.csv', index=False)
|
178 |
|
179 |
+
def get_jobs_from_db(skills):
|
180 |
+
job_matches = sim_result_loop_jobFinder(skills)
|
181 |
print(job_matches)
|
182 |
|
183 |
resume = get_resume(resume)
|
184 |
skills = skill_extractor(resume)
|
185 |
+
simResults = await sim_result_loop(skills)
|
186 |
links = get_links(simResults[0])
|
187 |
|
188 |
if username is not None:
|
189 |
+
bt.add_task(add_data_to_db, skills)
|
190 |
+
bt.add_task(get_jobs_from_db, skills)
|
191 |
|
192 |
return templates.TemplateResponse('find_my_match.html', context={'request': request, 'resume': resume, 'skills': skills, 'simResults': simResults[0], 'links': links, 'statelist': statelist})
|
193 |
|
|
|
211 |
|
212 |
username = localStorage.getItem('username')
|
213 |
|
214 |
+
def add_data_to_db(skills):
|
215 |
db = pd.read_csv('static/jd_embeddings.csv')
|
216 |
+
embeds = format(skillEmbed(skills)).replace('[[','').replace(']]','').split(',')
|
217 |
db.iloc[db['username']== username,5:] = embeds
|
218 |
db.to_csv('static/jd_embeddings.csv', index=False)
|
219 |
|
220 |
+
def get_cand_from_db(skills):
|
221 |
+
cand_matches = sim_result_loop_candFinder(skills)
|
222 |
print(cand_matches)
|
223 |
|
224 |
jobdesc = get_resume(jobdesc)
|
225 |
skills = skill_extractor(jobdesc)
|
226 |
+
simResults = await sim_result_loop(skills)
|
227 |
links = get_links(simResults[0])
|
228 |
|
229 |
if username is not None:
|
230 |
+
bt.add_task(add_data_to_db, skills)
|
231 |
+
bt.add_task(get_cand_from_db, skills)
|
232 |
|
233 |
return templates.TemplateResponse('candidate_matcher.html', context={'request': request, 'jobdesc': jobdesc, 'skills': skills, 'simResults': simResults[0], 'links': links})
|
234 |
|
match_utils.py
CHANGED
@@ -19,8 +19,8 @@ except AttributeError:
|
|
19 |
else:
|
20 |
ssl._create_default_https_context = _create_unverified_https_context
|
21 |
|
22 |
-
# LOAD
|
23 |
-
simdat = pd.read_csv('static/embeddings/
|
24 |
coheredat = pd.read_csv('static/cohere_tSNE_dat.csv')
|
25 |
|
26 |
# LOAD LLM MODELS:
|
|
|
19 |
else:
|
20 |
ssl._create_default_https_context = _create_unverified_https_context
|
21 |
|
22 |
+
# LOAD EMBEDDINGS:
|
23 |
+
simdat = pd.read_csv('static/embeddings/onet_embeddings.csv')
|
24 |
coheredat = pd.read_csv('static/cohere_tSNE_dat.csv')
|
25 |
|
26 |
# LOAD LLM MODELS:
|