Spaces:

akashD22
/

llama2-chat-prompteval

Runtime error

akashD22 commited on Jul 26

Commit

b9fcc4a

verified ·

1 Parent(s): 6beb042

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import gradio as gr
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+# Download the GGUF model into the Space's cache
+model_path = hf_hub_download(
+    repo_id="TheBloke/Llama-2-7B-Chat-GGUF",
+    filename="llama2-7b-chat.gguf"
+)
+llm = Llama(model_path=model_path)
+def generate(prompt: str, temperature: float, max_tokens: int):
+    resp = llm(prompt, temperature=temperature, max_tokens=max_tokens)
+    return resp["choices"][0]["text"]
+iface = gr.Interface(
+    fn=generate,
+    inputs=[
+        gr.Textbox(lines=4, label="Prompt"),
+        gr.Slider(0.0, 1.0, 0.1, label="Temperature", value=0.7),
+        gr.Slider(16, 512, 16, label="Max Tokens", value=128),
+    ],
+    outputs="text",
+    title="Llama-2-7B-Chat (Q4_0 on CPU)"
+)
+if __name__ == "__main__":
+    iface.launch(server_name="0.0.0.0", server_port=7860)