File size: 2,341 Bytes
4f6ec29 743b29b 90f994a 743b29b 181ba1b 743b29b 181ba1b 743b29b 181ba1b 4f6ec29 743b29b 181ba1b 743b29b 4f6ec29 743b29b 181ba1b 743b29b 181ba1b 743b29b 4f6ec29 743b29b 90f994a 181ba1b 90f994a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import gradio as gr
import requests
import os
# Ollama API地址 - 使用内部地址
OLLAMA_API_URL = "http://127.0.0.1:11434/api/generate"
def generate_text(prompt):
data = {
"model": "llama3-zh",
"prompt": prompt,
"stream": False
}
try:
# 添加重试机制
max_retries = 3
for attempt in range(max_retries):
try:
response = requests.post(
OLLAMA_API_URL,
json=data,
timeout=120,
proxies={'http': None, 'https': None} # 禁用代理
)
print(f"Attempt {attempt + 1}: Status {response.status_code}")
if response.status_code == 200:
return response.json()["response"]
elif response.status_code == 404:
return "错误:模型未找到,请等待模型加载完成后重试"
else:
if attempt == max_retries - 1:
return f"错误:{response.status_code} - {response.text}"
except requests.exceptions.ConnectionError:
if attempt == max_retries - 1:
return "错误:无法连接到 Ollama 服务"
print(f"连接失败,尝试重试 {attempt + 1}/{max_retries}")
time.sleep(2) # 等待2秒后重试
except Exception as e:
return f"错误:{str(e)}"
# Gradio界面
def chat_interface(prompt):
if not prompt.strip():
return "请输入有效的问题"
return generate_text(prompt)
# 创建Gradio应用
iface = gr.Interface(
fn=chat_interface,
inputs=gr.Textbox(
lines=3,
placeholder="请输入您的问题...",
label="输入"
),
outputs=gr.Textbox(
lines=5,
label="回答"
),
title="Llama3.1-8B-Chinese-Chat (CPU)",
description="与 Llama3.1-8B-Chinese-Chat 模型对话(CPU 模式)",
examples=[
["你好,请做个自我介绍"],
["解释一下量子计算的基本原理"],
["写一首关于春天的诗"]
]
)
# 启动应用
if __name__ == "__main__":
iface.launch(
server_name="0.0.0.0",
server_port=7860,
max_threads=1
) |