|
import gradio as gr |
|
import requests |
|
import mimetypes |
|
import json, os |
|
import asyncio |
|
import aiohttp |
|
|
|
import subprocess |
|
|
|
def upgrade_pip(): |
|
try: |
|
subprocess.check_call([os.sys.executable, "-m", "pip", "install", "--upgrade", "pip"]) |
|
print("pip 升級成功") |
|
except subprocess.CalledProcessError: |
|
print("pip 升級失敗") |
|
|
|
|
|
upgrade_pip() |
|
|
|
LLM_API = os.environ.get("LLM_API") |
|
LLM_URL = os.environ.get("LLM_URL") |
|
|
|
USER_ID = "HuggingFace Space" |
|
|
|
async def send_chat_message(LLM_URL, LLM_API, category, file_id): |
|
payload = { |
|
"inputs": {}, |
|
"query": category, |
|
"response_mode": "streaming", |
|
"conversation_id": "", |
|
"user": USER_ID, |
|
"files": [ |
|
{ |
|
"type": "image", |
|
"transfer_method": "local_file", |
|
"upload_file_id": file_id |
|
} |
|
] |
|
} |
|
print("Sending chat message payload:", payload) |
|
async with aiohttp.ClientSession() as session: |
|
async with session.post( |
|
f"{LLM_URL}/chat-messages", |
|
headers={"Authorization": f"Bearer {LLM_API}"}, |
|
json=payload |
|
) as response: |
|
print("Request URL:", f"{LLM_URL}/chat-messages") |
|
print("Response status code:", response.status) |
|
if response.status == 404: |
|
return "Error: Endpoint not found (404)" |
|
|
|
last_thought = None |
|
async for line in response.content: |
|
if line: |
|
try: |
|
data = json.loads(line.split(b"data: ")[1].decode("utf-8")) |
|
if data.get("event") == "agent_thought": |
|
last_thought = data.get("thought") |
|
except (IndexError, json.JSONDecodeError): |
|
continue |
|
|
|
if last_thought: |
|
return last_thought.strip() |
|
else: |
|
return "Error: No thought found in the response" |
|
|
|
async def upload_file(LLM_URL, LLM_API, file_path, user_id): |
|
if not os.path.exists(file_path): |
|
return f"Error: File {file_path} not found" |
|
mime_type, _ = mimetypes.guess_type(file_path) |
|
with open(file_path, 'rb') as f: |
|
async with aiohttp.ClientSession() as session: |
|
form_data = aiohttp.FormData() |
|
form_data.add_field('file', f, filename=file_path, content_type=mime_type) |
|
form_data.add_field('user', user_id) |
|
|
|
async with session.post( |
|
f"{LLM_URL}/files/upload", |
|
headers={"Authorization": f"Bearer {LLM_API}"}, |
|
data=form_data |
|
) as response: |
|
print("Upload response status code:", response.status) |
|
if response.status == 404: |
|
return "Error: Endpoint not found (404)" |
|
response_text = await response.text() |
|
print("Raw upload response text:", response_text) |
|
try: |
|
return json.loads(response_text) |
|
except json.JSONDecodeError: |
|
return "Error: Invalid JSON response" |
|
|
|
async def handle_input(file_path, category): |
|
upload_response = await upload_file(LLM_URL, LLM_API, file_path, USER_ID) |
|
print("Upload response:", upload_response) |
|
if "error" in upload_response: |
|
return upload_response |
|
file_id = upload_response.get("id") |
|
if not file_id: |
|
return "Error: No file ID returned from upload" |
|
|
|
chat_response = await send_chat_message(LLM_URL, LLM_API, category, file_id) |
|
print("Chat response:", chat_response) |
|
return chat_response |
|
|
|
|
|
file_input = gr.Image(label='圖片上傳', type='filepath') |
|
category = gr.Radio(label="Message Category", choices=["機票", "計程車乘車證明", "通行明細 (etag)", "QRCODE發票", "超商高鐵車票", "高鐵車票", "超商台鐵車票", "台鐵車票"]) |
|
|
|
examples = [ |
|
['DEMO/boarding-pass.png', '機票'], |
|
['DEMO/taxi.jpg', '計程車乘車證明'], |
|
['DEMO/etag.jpg', '通行明細 (etag)'], |
|
["DEMO/qrcode.jpg", 'QRCODE發票'], |
|
['DEMO/mthsr.JPG', '超商高鐵車票'], |
|
['DEMO/thsr.jpg', '高鐵車票'], |
|
['DEMO/mtra.jpg', '超商台鐵車票'], |
|
['DEMO/tra.JPG', '台鐵車票'], |
|
] |
|
|
|
TITLE = """<h1>Multimodal Playground 💬 輸入各種單據並選擇種類,解析得到各種關鍵資訊 </h1>""" |
|
SUBTITLE = """<h2><a href='https://www.twman.org' target='_blank'>TonTon Huang Ph.D. @ 2024/04 </a><br></h2>""" |
|
LINKS = """ |
|
<a href='https://github.com/Deep-Learning-101' target='_blank'>Deep Learning 101 Github</a> | <a href='http://deeplearning101.twman.org' target='_blank'>Deep Learning 101</a> | <a href='https://www.facebook.com/groups/525579498272187/' target='_blank'>台灣人工智慧社團 FB</a> | <a href='https://www.youtube.com/c/DeepLearning101' target='_blank'>YouTube</a><br> |
|
<a href='https://reurl.cc/g6GlZX' target='_blank'>手把手帶你一起踩AI坑</a> | <a href='https://blog.twman.org/2024/11/diffusion.html' target='_blank'>ComfyUI + Stable Diffuision</a><br> |
|
<a href='https://blog.twman.org/2024/08/LLM.html' target='_blank'>白話文手把手帶你科普 GenAI</a> | <a href='https://blog.twman.org/2024/09/LLM.html' target='_blank'>大型語言模型直接就打完收工?</a><br> |
|
<a href='https://blog.twman.org/2023/04/GPT.html' target='_blank'>什麼是大語言模型,它是什麼?想要嗎?</a> | <a href='https://blog.twman.org/2024/07/RAG.html' target='_blank'>那些檢索增強生成要踩的坑 </a><br> |
|
<a href='https://blog.twman.org/2021/04/ASR.html' target='_blank'>那些語音處理 (Speech Processing) 踩的坑</a> | <a href='https://blog.twman.org/2021/04/NLP.html' target='_blank'>那些自然語言處理 (Natural Language Processing, NLP) 踩的坑</a><br> |
|
<a href='https://blog.twman.org/2024/02/asr-tts.html' target='_blank'>那些ASR和TTS可能會踩的坑</a> | <a href='https://blog.twman.org/2024/02/LLM.html' target='_blank'>那些大模型開發會踩的坑</a><br> |
|
<a href='https://blog.twman.org/2023/07/wsl.html' target='_blank'>用PPOCRLabel來幫PaddleOCR做OCR的微調和標註</a> | <a href='https://blog.twman.org/2023/07/HugIE.html' target='_blank'>基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析</a><br> |
|
""" |
|
with gr.Blocks() as iface: |
|
gr.HTML(TITLE) |
|
gr.HTML(SUBTITLE) |
|
gr.HTML(LINKS) |
|
gr.Interface( |
|
fn=handle_input, |
|
inputs=[file_input, category], |
|
outputs="text", |
|
examples=examples, |
|
allow_flagging="never" |
|
) |
|
|
|
iface.launch() |
|
|