import gradio as gr import requests import mimetypes import json, os import asyncio import aiohttp LLM_API = os.environ.get("LLM_API") LLM_URL = os.environ.get("LLM_URL") USER_ID = "HuggingFace Space" # Placeholder user ID async def send_chat_message(LLM_URL, LLM_API, user_input, file_id): payload = { "inputs": {}, "query": user_input, "response_mode": "streaming", "conversation_id": "", "user": USER_ID, "files": [ { "type": "image", "transfer_method": "local_file", "upload_file_id": file_id } ] } print("Sending chat message payload:", payload) # Debug information async with aiohttp.ClientSession() as session: async with session.post( f"{LLM_URL}/chat-messages", headers={"Authorization": f"Bearer {LLM_API}"}, json=payload ) as response: print("Request URL:", f"{LLM_URL}/chat-messages") print("Response status code:", response.status) if response.status == 404: return "Error: Endpoint not found (404)" last_thought = None async for line in response.content: if line: try: # 去掉前面的 "data: " 字串並解析 JSON line_data = json.loads(line.decode("utf-8").replace("data: ", "")) print("Line data:", line_data) # Debug: 輸出每行的資料內容 # 提取含有 `thought` 或 `answer` 的資料 if line_data.get("data", {}).get("outputs", {}).get("answer"): last_thought = line_data["data"]["outputs"]["answer"] break # 找到答案後退出迴圈 except (IndexError, json.JSONDecodeError) as e: print("Error parsing line:", e) # Debug: 輸出解析錯誤訊息 continue if last_thought: return last_thought.strip() else: return "Error: No thought or answer found in the response" async def upload_file(LLM_URL, LLM_API, file_path, user_id): if not os.path.exists(file_path): return f"Error: File {file_path} not found" mime_type, _ = mimetypes.guess_type(file_path) with open(file_path, 'rb') as f: async with aiohttp.ClientSession() as session: form_data = aiohttp.FormData() form_data.add_field('file', f, filename=file_path, content_type=mime_type) form_data.add_field('user', user_id) async with session.post( f"{LLM_URL}/files/upload", headers={"Authorization": f"Bearer {LLM_API}"}, data=form_data ) as response: print("Upload response status code:", response.status) # Debug information if response.status == 404: return "Error: Endpoint not found (404)" response_text = await response.text() print("Raw upload response text:", response_text) # Debug information try: response_json = json.loads(response_text) file_id = response_json.get("id") if file_id: return response_json else: return "Error: No file ID returned in upload response" except json.JSONDecodeError: return "Error: Invalid JSON response" async def handle_input(file_path, user_input): upload_response = await upload_file(LLM_URL, LLM_API, file_path, USER_ID) print("Upload response:", upload_response) # Debug information if isinstance(upload_response, str) and "Error" in upload_response: return upload_response file_id = upload_response.get("id") # Extract file ID from the response if not file_id: return "Error: No file ID returned from upload" chat_response = await send_chat_message(LLM_URL, LLM_API, user_input, file_id) print("Chat response:", chat_response) # Debug information return chat_response # 定義界面標題和描述 TITLE = """

Multimodal RAG Playground 💬 輸入工地照片,生成工地場景及相關法規和缺失描述

""" SUBTITLE = """

TonTon Huang Ph.D. @ 2024/11

""" LINKS = """ Deep Learning 101 Github | Deep Learning 101 | 台灣人工智慧社團 FB | YouTube
手把手帶你一起踩AI坑 | ComfyUI + Stable Diffuision
白話文手把手帶你科普 GenAI | 大型語言模型直接就打完收工?
什麼是大語言模型,它是什麼?想要嗎? | 那些檢索增強生成要踩的坑
那些語音處理 (Speech Processing) 踩的坑 | 那些自然語言處理 (Natural Language Processing, NLP) 踩的坑
那些ASR和TTS可能會踩的坑 | 那些大模型開發會踩的坑
用PPOCRLabel來幫PaddleOCR做OCR的微調和標註 | 基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析
""" # Define Gradio interface file_input = gr.Image(label='圖片上傳', type='filepath') user_input = gr.Textbox(label='輸入問題描述', value="分析一下這張工地場景照片", placeholder="請輸入您的問題描述...") output_text = gr.Textbox(label="結果輸出", lines=4) # # 範例資料 examples = [ ['DEMO/DEMO_0004.jpg', '0004-51'], ['DEMO/DEMO_0005.jpg', '0005-92'], ['DEMO/DEMO_0006.jpg', '0006-281'], ['DEMO/DEMO_0008.jpg', '0008-281'], ['DEMO/DEMO_0011.jpg', '0011-108'], ] with gr.Blocks() as iface: gr.HTML(TITLE) gr.HTML(SUBTITLE) gr.HTML(LINKS) gr.Interface( fn=handle_input, inputs=[file_input, user_input], outputs="text", examples=examples, flagging_mode="never" # 更新此處 ) iface.launch()