Spaces:

julin90
/

20250724001

Sleeping

App Files Files Community

julin90 commited on Jul 24

Commit

7682901

1 Parent(s): 8c2ae28

更新語音助理功能

Browse files

Files changed (3) hide show

.gitignore.txt +17 -0
app.py +66 -0
requirements.txt +5 -0

.gitignore.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+# 忽略虛擬環境和 cache
+__pycache__/
+*.py[cod]
+*.tmp
+*.log
+*.mp3
+*.wav
+# 忽略 huggingface token
+*.env
+# VSCode 和 Jupyter
+.vscode/
+.ipynb_checkpoints/
+# 不可上傳的大模型（如vosk-model）
+vosk-model-small-cn-0.22/

app.py ADDED Viewed

	@@ -0,0 +1,66 @@

+# app.py
+import gradio as gr
+import os
+from gtts import gTTS
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+import json
+import tempfile
+import base64
+# 使用小模型，因Hugging Face Space限制
+MODEL_NAME = "Qwen/Qwen1.5-0.5B-Chat"
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, trust_remote_code=True).eval()
+# 語音辨識 - 用 placeholder，不在huggingface運行vosk（因大小限制）
+def fake_transcribe(audio):
+    return "你好，請問有什麼可以幫忙的？"
+# 回答問題
+def answer_question(text):
+    messages = [
+        {"role": "user", "content": text}
+    ]
+    input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt")
+    with torch.no_grad():
+        outputs = model.generate(input_ids, max_new_tokens=200)
+    response = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True)
+    return response
+# TTS 文字轉語音
+def text_to_speech(text):
+    tts = gTTS(text, lang='zh')
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
+        tts.save(fp.name)
+        with open(fp.name, "rb") as f:
+            audio_b64 = base64.b64encode(f.read()).decode("utf-8")
+    return f"data:audio/mp3;base64,{audio_b64}"
+# 整合流程
+def chat_pipeline(audio_input=None, text_input=None):
+    if audio_input:
+        text = fake_transcribe(audio_input)
+    elif text_input:
+        text = text_input
+    else:
+        return "請輸入問題或語音", None
+    response = answer_question(text)
+    speech_url = text_to_speech(response)
+    return response, speech_url
+# Gradio介面
+with gr.Blocks() as demo:
+    gr.Markdown("## 🎙️ 語音助理（Hugging Face Space 測試版）")
+    with gr.Row():
+        mic = gr.Audio(source="microphone", type="filepath", label="輸入語音")
+        text_input = gr.Textbox(label="或輸入文字")
+    with gr.Row():
+        submit = gr.Button("送出")
+    output_text = gr.Textbox(label="回答")
+    output_audio = gr.Audio(label="語音回答", type="filepath")
+    submit.click(fn=chat_pipeline, inputs=[mic, text_input], outputs=[output_text, output_audio])
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio==4.28.3
+gtts==2.5.1
+transformers==4.41.1
+torch==2.3.0
+requests==2.31.0