Spaces:

DeepLearning101
/

Multimodal-Playground

Running

App Files Files Community

Multimodal-Playground / app.py

DeepLearning101

Update app.py

8cc52e7 verified 4 months ago

raw

history blame contribute delete

6.86 kB

	import gradio as gr
	import requests
	import mimetypes
	import json, os
	import asyncio
	import aiohttp

	import subprocess

	def upgrade_pip():
	try:
	subprocess.check_call([os.sys.executable, "-m", "pip", "install", "--upgrade", "pip"])
	print("pip 升級成功")
	except subprocess.CalledProcessError:
	print("pip 升級失敗")

	# 呼叫升級函數
	upgrade_pip()

	LLM_API = os.environ.get("LLM_API")
	LLM_URL = os.environ.get("LLM_URL")

	USER_ID = "HuggingFace Space" # Placeholder user ID

	async def send_chat_message(LLM_URL, LLM_API, category, file_id):
	payload = {
	"inputs": {},
	"query": category,
	"response_mode": "streaming",
	"conversation_id": "",
	"user": USER_ID,
	"files": [
	{
	"type": "image",
	"transfer_method": "local_file",
	"upload_file_id": file_id
	}
	]
	}
	print("Sending chat message payload:", payload) # Debug information
	async with aiohttp.ClientSession() as session:
	async with session.post(
	f"{LLM_URL}/chat-messages",
	headers={"Authorization": f"Bearer {LLM_API}"},
	json=payload
	) as response:
	print("Request URL:", f"{LLM_URL}/chat-messages")
	print("Response status code:", response.status)
	if response.status == 404:
	return "Error: Endpoint not found (404)"

	last_thought = None
	async for line in response.content:
	if line:
	try:
	data = json.loads(line.split(b"data: ")[1].decode("utf-8"))
	if data.get("event") == "agent_thought":
	last_thought = data.get("thought")
	except (IndexError, json.JSONDecodeError):
	continue

	if last_thought:
	return last_thought.strip()
	else:
	return "Error: No thought found in the response"

	async def upload_file(LLM_URL, LLM_API, file_path, user_id):
	if not os.path.exists(file_path):
	return f"Error: File {file_path} not found"
	mime_type, _ = mimetypes.guess_type(file_path)
	with open(file_path, 'rb') as f:
	async with aiohttp.ClientSession() as session:
	form_data = aiohttp.FormData()
	form_data.add_field('file', f, filename=file_path, content_type=mime_type)
	form_data.add_field('user', user_id)

	async with session.post(
	f"{LLM_URL}/files/upload",
	headers={"Authorization": f"Bearer {LLM_API}"},
	data=form_data
	) as response:
	print("Upload response status code:", response.status) # Debug information
	if response.status == 404:
	return "Error: Endpoint not found (404)"
	response_text = await response.text()
	print("Raw upload response text:", response_text) # Debug information
	try:
	return json.loads(response_text)
	except json.JSONDecodeError:
	return "Error: Invalid JSON response"

	async def handle_input(file_path, category):
	upload_response = await upload_file(LLM_URL, LLM_API, file_path, USER_ID)
	print("Upload response:", upload_response) # Debug information
	if "error" in upload_response:
	return upload_response
	file_id = upload_response.get("id") # Extract file ID from the response
	if not file_id:
	return "Error: No file ID returned from upload"

	chat_response = await send_chat_message(LLM_URL, LLM_API, category, file_id)
	print("Chat response:", chat_response) # Debug information
	return chat_response

	# Define Gradio interface
	file_input = gr.Image(label='圖片上傳', type='filepath')
	category = gr.Radio(label="Message Category", choices=["機票", "計程車乘車證明", "通行明細 (etag)", "QRCODE發票", "超商高鐵車票", "高鐵車票", "超商台鐵車票", "台鐵車票"])

	examples = [
	['DEMO/boarding-pass.png', '機票'],
	['DEMO/taxi.jpg', '計程車乘車證明'],
	['DEMO/etag.jpg', '通行明細 (etag)'],
	["DEMO/qrcode.jpg", 'QRCODE發票'],
	['DEMO/mthsr.JPG', '超商高鐵車票'],
	['DEMO/thsr.jpg', '高鐵車票'],
	['DEMO/mtra.jpg', '超商台鐵車票'],
	['DEMO/tra.JPG', '台鐵車票'],
	]

	TITLE = """<h1>Multimodal Playground 💬 輸入各種單據並選擇種類，解析得到各種關鍵資訊 </h1>"""
	SUBTITLE = """<h2><a href='https://www.twman.org' target='_blank'>TonTon Huang Ph.D. @ 2024/04 </a><br></h2>"""
	LINKS = """
	<a href='https://github.com/Deep-Learning-101' target='_blank'>Deep Learning 101 Github</a> \| <a href='http://deeplearning101.twman.org' target='_blank'>Deep Learning 101</a> \| <a href='https://www.facebook.com/groups/525579498272187/' target='_blank'>台灣人工智慧社團 FB</a> \| <a href='https://www.youtube.com/c/DeepLearning101' target='_blank'>YouTube</a><br>
	<a href='https://reurl.cc/g6GlZX' target='_blank'>手把手帶你一起踩AI坑</a> \| <a href='https://blog.twman.org/2024/11/diffusion.html' target='_blank'>ComfyUI + Stable Diffuision</a><br>
	<a href='https://blog.twman.org/2024/08/LLM.html' target='_blank'>白話文手把手帶你科普 GenAI</a> \| <a href='https://blog.twman.org/2024/09/LLM.html' target='_blank'>大型語言模型直接就打完收工？</a><br>
	<a href='https://blog.twman.org/2023/04/GPT.html' target='_blank'>什麼是大語言模型，它是什麼？想要嗎？</a> \| <a href='https://blog.twman.org/2024/07/RAG.html' target='_blank'>那些檢索增強生成要踩的坑 </a><br>
	<a href='https://blog.twman.org/2021/04/ASR.html' target='_blank'>那些語音處理 (Speech Processing) 踩的坑</a> \| <a href='https://blog.twman.org/2021/04/NLP.html' target='_blank'>那些自然語言處理 (Natural Language Processing, NLP) 踩的坑</a><br>
	<a href='https://blog.twman.org/2024/02/asr-tts.html' target='_blank'>那些ASR和TTS可能會踩的坑</a> \| <a href='https://blog.twman.org/2024/02/LLM.html' target='_blank'>那些大模型開發會踩的坑</a><br>
	<a href='https://blog.twman.org/2023/07/wsl.html' target='_blank'>用PPOCRLabel來幫PaddleOCR做OCR的微調和標註</a> \| <a href='https://blog.twman.org/2023/07/HugIE.html' target='_blank'>基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析</a><br>
	"""
	with gr.Blocks() as iface:
	gr.HTML(TITLE)
	gr.HTML(SUBTITLE)
	gr.HTML(LINKS)
	gr.Interface(
	fn=handle_input,
	inputs=[file_input, category],
	outputs="text",
	examples=examples,
	allow_flagging="never"
	)

	iface.launch()