Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,11 +1,6 @@
|
|
1 |
## Set Environment
|
2 |
import os
|
3 |
-
|
4 |
-
#os.system('source .env/bin/activate')
|
5 |
-
## Install poppler in os
|
6 |
-
#os.system('apt-get update')
|
7 |
-
#os.system('apt-get install poppler-utils')
|
8 |
-
##
|
9 |
|
10 |
from pdf2image import convert_from_path
|
11 |
import cv2
|
@@ -17,13 +12,6 @@ import json
|
|
17 |
from anthropic import Anthropic, Client
|
18 |
import gradio as gr
|
19 |
|
20 |
-
def get_base64_encorded_image(image_path):
|
21 |
-
with open(image_path, "rb") as image_file:
|
22 |
-
binary_data = image_file.read()
|
23 |
-
base64_encorded_data = base64.b64encode(binary_data)
|
24 |
-
base64_string = base64_encorded_data.decode('utf-8')
|
25 |
-
return base64_string
|
26 |
-
|
27 |
## Set Environment
|
28 |
os.system('python -m venv env')
|
29 |
os.system('source env/bin/activate')
|
@@ -33,6 +21,12 @@ os.system('apt-get update')
|
|
33 |
os.system('sudo apt-get install poppler-utils')
|
34 |
|
35 |
## The rest of your app.py code goes here
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
## Process pdf
|
38 |
|
@@ -120,7 +114,6 @@ def extract_table_info(image_path):
|
|
120 |
"type": "text",
|
121 |
"text": """
|
122 |
Please extract the table information of the image, keep the context in Traditional Chinese without translation.
|
123 |
-
if you can not recognize the value precisely, please infer it and try to make a best guess.
|
124 |
If you can not make the best guess, please return “UNK”.
|
125 |
Create a structured set of data in json format providing key information about a table.
|
126 |
Keep the section titles in the table as a parts of json.
|
@@ -129,7 +122,7 @@ def extract_table_info(image_path):
|
|
129 |
Do not do any sort operation with all the rows.
|
130 |
Extract the text information of each cell precisely. Do not make inference between "代碼" and "項目" if you can not extract it precisely.
|
131 |
Make sure the length of each cell you predict is the same as you extract.
|
132 |
-
Please do not
|
133 |
JSON fields must be labelled as:
|
134 |
Example json structure is:
|
135 |
<json>
|
|
|
1 |
## Set Environment
|
2 |
import os
|
3 |
+
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
from pdf2image import convert_from_path
|
6 |
import cv2
|
|
|
12 |
from anthropic import Anthropic, Client
|
13 |
import gradio as gr
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
## Set Environment
|
16 |
os.system('python -m venv env')
|
17 |
os.system('source env/bin/activate')
|
|
|
21 |
os.system('sudo apt-get install poppler-utils')
|
22 |
|
23 |
## The rest of your app.py code goes here
|
24 |
+
def get_base64_encorded_image(image_path):
|
25 |
+
with open(image_path, "rb") as image_file:
|
26 |
+
binary_data = image_file.read()
|
27 |
+
base64_encorded_data = base64.b64encode(binary_data)
|
28 |
+
base64_string = base64_encorded_data.decode('utf-8')
|
29 |
+
return base64_string
|
30 |
|
31 |
## Process pdf
|
32 |
|
|
|
114 |
"type": "text",
|
115 |
"text": """
|
116 |
Please extract the table information of the image, keep the context in Traditional Chinese without translation.
|
|
|
117 |
If you can not make the best guess, please return “UNK”.
|
118 |
Create a structured set of data in json format providing key information about a table.
|
119 |
Keep the section titles in the table as a parts of json.
|
|
|
122 |
Do not do any sort operation with all the rows.
|
123 |
Extract the text information of each cell precisely. Do not make inference between "代碼" and "項目" if you can not extract it precisely.
|
124 |
Make sure the length of each cell you predict is the same as you extract.
|
125 |
+
Please do not make any guess with "項目" based on the value of "代碼".
|
126 |
JSON fields must be labelled as:
|
127 |
Example json structure is:
|
128 |
<json>
|