Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from huggingface_hub import hf_hub_download | |
| from llama_cpp import Llama | |
| # Download the openLLaMA-3B GGUF into the Space cache | |
| model_path = hf_hub_download( | |
| repo_id="openlm-research/openLLaMA-3B", | |
| filename="openLLaMA-3B-GGUF.q4_0.gguf" | |
| ) | |
| llm = Llama(model_path=model_path) | |
| def generate(prompt: str, temperature: float, max_tokens: int): | |
| resp = llm(prompt, temperature=temperature, max_tokens=max_tokens) | |
| return resp["choices"][0]["text"] | |
| iface = gr.Interface( | |
| fn=generate, | |
| inputs=[ | |
| gr.Textbox(lines=4, label="Prompt"), | |
| gr.Slider(0.0, 1.0, 0.1, label="Temperature", value=0.7), | |
| gr.Slider(16, 512, 16, label="Max Tokens", value=128), | |
| ], | |
| outputs="text", | |
| title="openLLaMA-3B (Q4_0 on CPU)" | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch(server_name="0.0.0.0", server_port=7860) | |