Spaces:

DeepLearning101
/

Multimodal-RAG-Agent

Sleeping

App Files Files Community

DeepLearning101 commited on Feb 7

Commit

62a9d23

verified ·

1 Parent(s): f160dc8

Create app.py

Browse files

Files changed (1) hide show

app.py +149 -0

app.py ADDED Viewed

	@@ -0,0 +1,149 @@

+import gradio as gr
+import requests
+import mimetypes
+import json, os
+import asyncio
+import aiohttp
+LLM_API = os.environ.get("LLM_API")
+LLM_URL = os.environ.get("LLM_URL")
+USER_ID = "HuggingFace Space"  # Placeholder user ID
+async def send_chat_message(LLM_URL, LLM_API, user_input, file_id):
+    payload = {
+        "inputs": {},
+        "query": user_input,
+        "response_mode": "streaming",
+        "conversation_id": "",
+        "user": USER_ID,
+        "files": [
+            {
+                "type": "image",
+                "transfer_method": "local_file",
+                "upload_file_id": file_id
+            }
+        ]
+    }
+    print("Sending chat message payload:", payload)  # Debug information
+    async with aiohttp.ClientSession() as session:
+        async with session.post(
+            f"{LLM_URL}/chat-messages",
+            headers={"Authorization": f"Bearer {LLM_API}"},
+            json=payload
+        ) as response:
+            print("Request URL:", f"{LLM_URL}/chat-messages")
+            print("Response status code:", response.status)
+            if response.status == 404:
+                return "Error: Endpoint not found (404)"
+            last_thought = None
+            async for line in response.content:
+                if line:
+                    try:
+                        # 去掉前面的 "data: " 字串並解析 JSON
+                        line_data = json.loads(line.decode("utf-8").replace("data: ", ""))
+                        print("Line data:", line_data)  # Debug: 輸出每行的資料內容
+                        # 提取含有 `thought` 或 `answer` 的資料
+                        if line_data.get("data", {}).get("outputs", {}).get("answer"):
+                            last_thought = line_data["data"]["outputs"]["answer"]
+                            break  # 找到答案後退出迴圈
+                    except (IndexError, json.JSONDecodeError) as e:
+                        print("Error parsing line:", e)  # Debug: 輸出解析錯誤訊息
+                        continue
+            if last_thought:
+                return last_thought.strip()
+            else:
+                return "Error: No thought or answer found in the response"
+async def upload_file(LLM_URL, LLM_API, file_path, user_id):
+    if not os.path.exists(file_path):
+        return f"Error: File {file_path} not found"
+    mime_type, _ = mimetypes.guess_type(file_path)
+    with open(file_path, 'rb') as f:
+        async with aiohttp.ClientSession() as session:
+            form_data = aiohttp.FormData()
+            form_data.add_field('file', f, filename=file_path, content_type=mime_type)
+            form_data.add_field('user', user_id)
+            async with session.post(
+                f"{LLM_URL}/files/upload",
+                headers={"Authorization": f"Bearer {LLM_API}"},
+                data=form_data
+            ) as response:
+                print("Upload response status code:", response.status)  # Debug information
+                if response.status == 404:
+                    return "Error: Endpoint not found (404)"
+                response_text = await response.text()
+                print("Raw upload response text:", response_text)  # Debug information
+                try:
+                    response_json = json.loads(response_text)
+                    file_id = response_json.get("id")
+                    if file_id:
+                        return response_json
+                    else:
+                        return "Error: No file ID returned in upload response"
+                except json.JSONDecodeError:
+                    return "Error: Invalid JSON response"
+async def handle_input(file_path, user_input):
+    upload_response = await upload_file(LLM_URL, LLM_API, file_path, USER_ID)
+    print("Upload response:", upload_response)  # Debug information
+    if isinstance(upload_response, str) and "Error" in upload_response:
+        return upload_response
+    file_id = upload_response.get("id")  # Extract file ID from the response
+    if not file_id:
+        return "Error: No file ID returned from upload"
+    chat_response = await send_chat_message(LLM_URL, LLM_API, user_input, file_id)
+    print("Chat response:", chat_response)  # Debug information
+    return chat_response
+# 定義界面標題和描述
+TITLE = """<h1>Multimodal RAG Playground 💬 輸入工地照片，生成工地場景及相關法規和缺失描述</h1>"""
+SUBTITLE = """<h2><a href='https://www.twman.org' target='_blank'>TonTon Huang Ph.D. @ 2024/11 </a><br></h2>"""
+LINKS = """
+<a href='https://github.com/Deep-Learning-101' target='_blank'>Deep Learning 101 Github</a> | <a href='http://deeplearning101.twman.org' target='_blank'>Deep Learning 101</a> | <a href='https://www.facebook.com/groups/525579498272187/' target='_blank'>台灣人工智慧社團 FB</a> | <a href='https://www.youtube.com/c/DeepLearning101' target='_blank'>YouTube</a><br>
+<a href='https://reurl.cc/g6GlZX' target='_blank'>手把手帶你一起踩AI坑</a> | <a href='https://blog.twman.org/2024/11/diffusion.html' target='_blank'>ComfyUI + Stable Diffuision</a><br>
+<a href='https://blog.twman.org/2024/08/LLM.html' target='_blank'>白話文手把手帶你科普 GenAI</a> | <a href='https://blog.twman.org/2024/09/LLM.html' target='_blank'>大型語言模型直接就打完收工？</a><br>
+<a href='https://blog.twman.org/2023/04/GPT.html' target='_blank'>什麼是大語言模型，它是什麼？想要嗎？</a> | <a href='https://blog.twman.org/2024/07/RAG.html' target='_blank'>那些檢索增強生成要踩的坑 </a><br>
+<a href='https://blog.twman.org/2021/04/ASR.html' target='_blank'>那些語音處理 (Speech Processing) 踩的坑</a> | <a href='https://blog.twman.org/2021/04/NLP.html' target='_blank'>那些自然語言處理 (Natural Language Processing, NLP) 踩的坑</a><br>
+<a href='https://blog.twman.org/2024/02/asr-tts.html' target='_blank'>那些ASR和TTS可能會踩的坑</a> | <a href='https://blog.twman.org/2024/02/LLM.html' target='_blank'>那些大模型開發會踩的坑</a><br>
+<a href='https://blog.twman.org/2023/07/wsl.html' target='_blank'>用PPOCRLabel來幫PaddleOCR做OCR的微調和標註</a> | <a href='https://blog.twman.org/2023/07/HugIE.html' target='_blank'>基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析</a><br>
+"""
+# Define Gradio interface
+file_input = gr.Image(label='圖片上傳', type='filepath')
+user_input = gr.Textbox(label='輸入問題描述', placeholder="請輸入您的問題描述...")
+output_text = gr.Textbox(label="結果輸出", lines=4)
+# 範例資料
+examples = [
+    ['DEMO/0004.jpg', '0004-51'],
+    ['DEMO/0005.jpg', '0005-92'],
+    ['DEMO/0006.jpg', '0006-281'],
+    ['DEMO/0008.jpg', '0008-281'],
+    ['DEMO/0011.jpg', '0011-108'],
+    ['DEMO/0013.jpg', '0013-108'],
+    ['DEMO/0014.jpg', '0014-108'],
+    ['DEMO/0015.jpg', '0015-108'],
+]
+with gr.Blocks() as iface:
+    gr.HTML(TITLE)
+    gr.HTML(SUBTITLE)
+    gr.HTML(LINKS)
+    gr.Interface(
+        fn=handle_input,
+        inputs=[file_input, user_input],
+        outputs="text",
+        examples=examples,
+        flagging_mode="never"  # 更新此處
+    )
+iface.launch()