Spaces:
Runtime error
Runtime error
import os # to check if file exists | |
import sys # to flush stdout | |
import gradio as gr | |
#import transformers | |
#from transformers import pipeline | |
from llama_cpp import Llama | |
from huggingface_hub import hf_hub_download | |
model_repo="TheBloke/Nous-Hermes-13B-GGML" | |
model_filename="nous-hermes-13b.ggmlv3.q4_K_S.bin" | |
#model="TheBloke/Nous-Hermes-13B-GGML" | |
#model="https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_K_S.bin" | |
def download_model(): | |
# See https://github.com/OpenAccess-AI-Collective/ggml-webui/blob/main/tabbed.py | |
file_path="/home/user/.cache/huggingface/hub/models--TheBloke--Nous-Hermes-13B-GGML/snapshots/f1a48f90a07550e1ba30e347b2be69d4fa5e393b/nous-hermes-13b.ggmlv3.q4_K_S.bin" | |
if os.path.exists(file_path): | |
return file_path | |
else: | |
print("Downloading model...") | |
sys.stdout.flush() | |
file = hf_hub_download( | |
repo_id=model_repo, filename=model_filename | |
) | |
print("Downloaded " + file) | |
return file | |
def question_answer(context, question, max_tokens): | |
mfile=download_model() | |
# structure the prompt to make it easier for the ai | |
question1="\"\"\"\n" + question + "\n\"\"\"\n" | |
text=context + "\n\nQuestion: " + question1 + "\nPlease use markdown formatting for answer. \nAnswer:\n" | |
llm = Llama(model_path=mfile) | |
output = llm(text, max_tokens=max_tokens, stop=["### Response"], echo=True) | |
print(output) | |
return question, gr.Markdown(output['choices'][0]['text']) | |
''' | |
Output is of the form: | |
{ | |
"id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", | |
"object": "text_completion", | |
"created": 1679561337, | |
"model": "./models/7B/ggml-model.bin", | |
"choices": [ | |
{ | |
"text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.", | |
"index": 0, | |
"logprobs": None, | |
"finish_reason": "stop" | |
} | |
], | |
"usage": { | |
"prompt_tokens": 14, | |
"completion_tokens": 28, | |
"total_tokens": 42 | |
} | |
} | |
''' | |
#generator = pipeline(model=model, device_map="auto") | |
#return generator(text) | |
app=gr.Interface(fn=question_answer, inputs=["text", "text",gr.Slider(33, 2333)], outputs=["textbox", "text"]) | |
app.launch() | |