File size: 2,341 Bytes
4f6ec29
743b29b
90f994a
743b29b
181ba1b
 
743b29b
 
 
 
 
 
 
 
181ba1b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
743b29b
181ba1b
4f6ec29
743b29b
 
181ba1b
 
743b29b
4f6ec29
743b29b
 
 
181ba1b
 
 
 
 
 
 
 
 
743b29b
181ba1b
 
 
 
 
 
743b29b
4f6ec29
743b29b
90f994a
 
 
 
181ba1b
90f994a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import gradio as gr
import requests
import os

# Ollama API地址 - 使用内部地址
OLLAMA_API_URL = "http://127.0.0.1:11434/api/generate"

def generate_text(prompt):
    data = {
        "model": "llama3-zh",
        "prompt": prompt,
        "stream": False
    }
    try:
        # 添加重试机制
        max_retries = 3
        for attempt in range(max_retries):
            try:
                response = requests.post(
                    OLLAMA_API_URL, 
                    json=data, 
                    timeout=120,
                    proxies={'http': None, 'https': None}  # 禁用代理
                )
                print(f"Attempt {attempt + 1}: Status {response.status_code}")
                
                if response.status_code == 200:
                    return response.json()["response"]
                elif response.status_code == 404:
                    return "错误:模型未找到,请等待模型加载完成后重试"
                else:
                    if attempt == max_retries - 1:
                        return f"错误:{response.status_code} - {response.text}"
            except requests.exceptions.ConnectionError:
                if attempt == max_retries - 1:
                    return "错误:无法连接到 Ollama 服务"
                print(f"连接失败,尝试重试 {attempt + 1}/{max_retries}")
                time.sleep(2)  # 等待2秒后重试
                
    except Exception as e:
        return f"错误:{str(e)}"

# Gradio界面
def chat_interface(prompt):
    if not prompt.strip():
        return "请输入有效的问题"
    return generate_text(prompt)

# 创建Gradio应用
iface = gr.Interface(
    fn=chat_interface,
    inputs=gr.Textbox(
        lines=3, 
        placeholder="请输入您的问题...",
        label="输入"
    ),
    outputs=gr.Textbox(
        lines=5,
        label="回答"
    ),
    title="Llama3.1-8B-Chinese-Chat (CPU)",
    description="与 Llama3.1-8B-Chinese-Chat 模型对话(CPU 模式)",
    examples=[
        ["你好,请做个自我介绍"],
        ["解释一下量子计算的基本原理"],
        ["写一首关于春天的诗"]
    ]
)

# 启动应用
if __name__ == "__main__":
    iface.launch(
        server_name="0.0.0.0",
        server_port=7860,
        max_threads=1
    )