edited parse_cv for more robustness
Browse files- .gitignore +2 -1
- project/gemini_api.py +19 -4
.gitignore
CHANGED
@@ -4,4 +4,5 @@ project/__pycache__
|
|
4 |
*.pyc
|
5 |
.env
|
6 |
project/ninja_cv.ipynb
|
7 |
-
project/tmp
|
|
|
|
4 |
*.pyc
|
5 |
.env
|
6 |
project/ninja_cv.ipynb
|
7 |
+
project/tmp
|
8 |
+
*.pdf
|
project/gemini_api.py
CHANGED
@@ -63,10 +63,12 @@ def read_cv(file_path: str) -> str:
|
|
63 |
blocks_sorted = sorted(text, key=lambda b: (b[1], b[0]))
|
64 |
for b in blocks_sorted:
|
65 |
cv += b[4]
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
|
|
|
|
70 |
return cv
|
71 |
|
72 |
|
@@ -217,6 +219,19 @@ class GeminiAPI:
|
|
217 |
def parse_cv(self, cv_path: str) -> dict:
|
218 |
"""Devuelve CV como diccionario limpio"""
|
219 |
cv_text = read_cv(cv_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
prompt = self.load_prompt("./prompts/prompt_cv.txt")
|
221 |
now = datetime.now()
|
222 |
actual_date = now.strftime("%B, %Y")
|
|
|
63 |
blocks_sorted = sorted(text, key=lambda b: (b[1], b[0]))
|
64 |
for b in blocks_sorted:
|
65 |
cv += b[4]
|
66 |
+
|
67 |
+
# if len(cv) > 10000:
|
68 |
+
# return -1
|
69 |
+
# elif len(cv) < 10:
|
70 |
+
# return -2
|
71 |
+
|
72 |
return cv
|
73 |
|
74 |
|
|
|
219 |
def parse_cv(self, cv_path: str) -> dict:
|
220 |
"""Devuelve CV como diccionario limpio"""
|
221 |
cv_text = read_cv(cv_path)
|
222 |
+
|
223 |
+
# --- AÑADIR ESTA COMPROBACIÓN AQUÍ ---
|
224 |
+
if isinstance(cv_text, int):
|
225 |
+
if cv_text == -1:
|
226 |
+
# Este diccionario de error será devuelto y mostrado al usuario
|
227 |
+
return {"error": "The CV text is too long (more than 10,000 characters)."}
|
228 |
+
elif cv_text == -2:
|
229 |
+
# Este también
|
230 |
+
return {"error": "The CV text is too short or could not be read properly from the PDF."}
|
231 |
+
else:
|
232 |
+
return {"error": "An unknown error occurred while reading the CV file."}
|
233 |
+
|
234 |
+
# Si el código llega aquí, significa que cv_text es texto y podemos continuar
|
235 |
prompt = self.load_prompt("./prompts/prompt_cv.txt")
|
236 |
now = datetime.now()
|
237 |
actual_date = now.strftime("%B, %Y")
|