srijaydeshpande commited on
Commit
0f709ec
·
verified ·
1 Parent(s): c4cf05f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -6
app.py CHANGED
@@ -1,5 +1,5 @@
1
- from pdfminer.high_level import extract_pages
2
- from pdfminer.layout import LTTextContainer
3
  from tqdm import tqdm
4
  import re
5
  import gradio as gr
@@ -9,6 +9,7 @@ import spaces
9
  import subprocess
10
  from huggingface_hub import hf_hub_download
11
  from llama_cpp import Llama
 
12
 
13
  from huggingface_hub import login
14
 
@@ -88,10 +89,13 @@ def craft_cv(llm, cv_text, job_description, maxtokens, temperature, top_probabil
88
 
89
  @spaces.GPU(duration=150)
90
  def pdf_to_text(cv_file, job_description, maxtokens=2048, temperature=0, top_probability=0.95):
91
- page2content = process_document(cv_file)
92
- cv_text = ""
93
- for page_id in page2content:
94
- cv_text += page2content[page_id] + ' '
 
 
 
95
  llm = Llama(
96
  model_path="models/" + model_id,
97
  flash_attn=True,
 
1
+ # from pdfminer.high_level import extract_pages
2
+ # from pdfminer.layout import LTTextContainer
3
  from tqdm import tqdm
4
  import re
5
  import gradio as gr
 
9
  import subprocess
10
  from huggingface_hub import hf_hub_download
11
  from llama_cpp import Llama
12
+ from docling.document_converter import DocumentConverter
13
 
14
  from huggingface_hub import login
15
 
 
89
 
90
  @spaces.GPU(duration=150)
91
  def pdf_to_text(cv_file, job_description, maxtokens=2048, temperature=0, top_probability=0.95):
92
+ # page2content = process_document(cv_file)
93
+ # cv_text = ""
94
+ # for page_id in page2content:
95
+ # cv_text += page2content[page_id] + ' '
96
+ converter = DocumentConverter()
97
+ result = converter.convert(cv_file)
98
+ cv_text = result.document.export_to_markdown()
99
  llm = Llama(
100
  model_path="models/" + model_id,
101
  flash_attn=True,