import gradio as gr import requests import os # Ollama API地址 - 使用内部地址 OLLAMA_API_URL = "http://127.0.0.1:11434/api/generate" def generate_text(prompt): data = { "model": "llama3-zh", "prompt": prompt, "stream": False } try: # 添加重试机制 max_retries = 3 for attempt in range(max_retries): try: response = requests.post( OLLAMA_API_URL, json=data, timeout=120, proxies={'http': None, 'https': None} # 禁用代理 ) print(f"Attempt {attempt + 1}: Status {response.status_code}") if response.status_code == 200: return response.json()["response"] elif response.status_code == 404: return "错误:模型未找到,请等待模型加载完成后重试" else: if attempt == max_retries - 1: return f"错误:{response.status_code} - {response.text}" except requests.exceptions.ConnectionError: if attempt == max_retries - 1: return "错误:无法连接到 Ollama 服务" print(f"连接失败,尝试重试 {attempt + 1}/{max_retries}") time.sleep(2) # 等待2秒后重试 except Exception as e: return f"错误:{str(e)}" # Gradio界面 def chat_interface(prompt): if not prompt.strip(): return "请输入有效的问题" return generate_text(prompt) # 创建Gradio应用 iface = gr.Interface( fn=chat_interface, inputs=gr.Textbox( lines=3, placeholder="请输入您的问题...", label="输入" ), outputs=gr.Textbox( lines=5, label="回答" ), title="Llama3.1-8B-Chinese-Chat (CPU)", description="与 Llama3.1-8B-Chinese-Chat 模型对话(CPU 模式)", examples=[ ["你好,请做个自我介绍"], ["解释一下量子计算的基本原理"], ["写一首关于春天的诗"] ] ) # 启动应用 if __name__ == "__main__": iface.launch( server_name="0.0.0.0", server_port=7860, max_threads=1 )