crystalchen commited on
Commit
735fe06
·
verified ·
1 Parent(s): 9ab56a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -15
app.py CHANGED
@@ -1,11 +1,6 @@
1
  ## Set Environment
2
  import os
3
- #os.system('python -m venv .env')
4
- #os.system('source .env/bin/activate')
5
- ## Install poppler in os
6
- #os.system('apt-get update')
7
- #os.system('apt-get install poppler-utils')
8
- ##
9
 
10
  from pdf2image import convert_from_path
11
  import cv2
@@ -17,13 +12,6 @@ import json
17
  from anthropic import Anthropic, Client
18
  import gradio as gr
19
 
20
- def get_base64_encorded_image(image_path):
21
- with open(image_path, "rb") as image_file:
22
- binary_data = image_file.read()
23
- base64_encorded_data = base64.b64encode(binary_data)
24
- base64_string = base64_encorded_data.decode('utf-8')
25
- return base64_string
26
-
27
  ## Set Environment
28
  os.system('python -m venv env')
29
  os.system('source env/bin/activate')
@@ -33,6 +21,12 @@ os.system('apt-get update')
33
  os.system('sudo apt-get install poppler-utils')
34
 
35
  ## The rest of your app.py code goes here
 
 
 
 
 
 
36
 
37
  ## Process pdf
38
 
@@ -120,7 +114,6 @@ def extract_table_info(image_path):
120
  "type": "text",
121
  "text": """
122
  Please extract the table information of the image, keep the context in Traditional Chinese without translation.
123
- if you can not recognize the value precisely, please infer it and try to make a best guess.
124
  If you can not make the best guess, please return “UNK”.
125
  Create a structured set of data in json format providing key information about a table.
126
  Keep the section titles in the table as a parts of json.
@@ -129,7 +122,7 @@ def extract_table_info(image_path):
129
  Do not do any sort operation with all the rows.
130
  Extract the text information of each cell precisely. Do not make inference between "代碼" and "項目" if you can not extract it precisely.
131
  Make sure the length of each cell you predict is the same as you extract.
132
- Please do not mix "代碼" and "項目" with other case.
133
  JSON fields must be labelled as:
134
  Example json structure is:
135
  <json>
 
1
  ## Set Environment
2
  import os
3
+
 
 
 
 
 
4
 
5
  from pdf2image import convert_from_path
6
  import cv2
 
12
  from anthropic import Anthropic, Client
13
  import gradio as gr
14
 
 
 
 
 
 
 
 
15
  ## Set Environment
16
  os.system('python -m venv env')
17
  os.system('source env/bin/activate')
 
21
  os.system('sudo apt-get install poppler-utils')
22
 
23
  ## The rest of your app.py code goes here
24
+ def get_base64_encorded_image(image_path):
25
+ with open(image_path, "rb") as image_file:
26
+ binary_data = image_file.read()
27
+ base64_encorded_data = base64.b64encode(binary_data)
28
+ base64_string = base64_encorded_data.decode('utf-8')
29
+ return base64_string
30
 
31
  ## Process pdf
32
 
 
114
  "type": "text",
115
  "text": """
116
  Please extract the table information of the image, keep the context in Traditional Chinese without translation.
 
117
  If you can not make the best guess, please return “UNK”.
118
  Create a structured set of data in json format providing key information about a table.
119
  Keep the section titles in the table as a parts of json.
 
122
  Do not do any sort operation with all the rows.
123
  Extract the text information of each cell precisely. Do not make inference between "代碼" and "項目" if you can not extract it precisely.
124
  Make sure the length of each cell you predict is the same as you extract.
125
+ Please do not make any guess with "項目" based on the value of "代碼".
126
  JSON fields must be labelled as:
127
  Example json structure is:
128
  <json>