llama.cpp / app.py
kat33's picture
Update app.py
e721849
raw
history blame
1.77 kB
import gradio as gr
#import transformers
#from transformers import pipeline
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
model_repo="TheBloke/Nous-Hermes-13B-GGML"
model_filename="nous-hermes-13b.ggmlv3.q4_K_S.bin"
#model="TheBloke/Nous-Hermes-13B-GGML"
#model="https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_K_S.bin"
def download_model:
# See https://github.com/OpenAccess-AI-Collective/ggml-webui/blob/main/tabbed.py
repo_id = hub_config.pop("repo_id")
filename = hub_config.pop("filename")
file = hf_hub_download(
repo_id=model_repo, filename=model_filename
)
return file
def question_answer(context, question):
mfile=download_model()
text=context + "\n\nQuestion: \"\"\"\n" + question + "\nPlease use markdown formatting for answer. \nAnswer:\n"
llm = Llama(model_path=mfile)
output = llm(text, max_tokens=33, stop=["### Response", "\n"], echo=True)
print(output)
return output.choices[0].text
'''
Output is of the form:
{
"id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
"object": "text_completion",
"created": 1679561337,
"model": "./models/7B/ggml-model.bin",
"choices": [
{
"text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.",
"index": 0,
"logprobs": None,
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 14,
"completion_tokens": 28,
"total_tokens": 42
}
}
'''
#generator = pipeline(model=model, device_map="auto")
#return generator(text)
app=gr.Interface(fn=question_answer, inputs=["text", "text"], outputs=["textbox", "text"])
app.launch()