Spaces:
Runtime error
Runtime error
File size: 2,809 Bytes
2e1a289 f20f7fd 2e1a289 c1e6490 cd5e755 e721849 cf3dc70 e721849 2fcbfe7 e721849 36d9e29 2e1a289 3fae970 ab4a091 e721849 9fe5d5a ab4a091 e721849 ab4a091 cd5e755 f20f7fd ab4a091 f20f7fd cd5e755 f20f7fd cd5e755 f20f7fd cd5e755 3fae970 c1e6490 fa52c5f 85c036e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import os # to check if file exists
import sys # to flush stdout
import markdown # to render answer
import gradio as gr
#import transformers
#from transformers import pipeline
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
model_repo="TheBloke/Nous-Hermes-13B-GGML"
model_filename="nous-hermes-13b.ggmlv3.q4_K_S.bin"
#model="TheBloke/Nous-Hermes-13B-GGML"
#model="https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_K_S.bin"
def download_model():
# See https://github.com/OpenAccess-AI-Collective/ggml-webui/blob/main/tabbed.py
file_path="/home/user/.cache/huggingface/hub/models--TheBloke--Nous-Hermes-13B-GGML/snapshots/f1a48f90a07550e1ba30e347b2be69d4fa5e393b/nous-hermes-13b.ggmlv3.q4_K_S.bin"
if os.path.exists(file_path):
return file_path
else:
print("Downloading model...")
sys.stdout.flush()
file = hf_hub_download(
repo_id=model_repo, filename=model_filename
)
print("Downloaded " + file)
return file
def question_answer(context, question, max_tokens):
mfile=download_model()
# structure the prompt to make it easier for the ai
question1="\"\"\"\n" + question + "\n\"\"\"\n"
text=context + "\n\nQuestion: " + question1 + "\nPlease use markdown formatting for answer. \nAnswer:\n"
llm = Llama(model_path=mfile)
output = llm(text, max_tokens=max_tokens, stop=["### Response"], echo=True)
print(output)
# remove the context and leave only the answer
answer=output['choices'][0]['text']
answer = answer.replace(text, "", 1)
# render the markdown and return the html and question
html_answer = markdown.markdown(answer)
return question, html_answer
'''
Output is of the form:
{
"id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
"object": "text_completion",
"created": 1679561337,
"model": "./models/7B/ggml-model.bin",
"choices": [
{
"text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.",
"index": 0,
"logprobs": None,
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 14,
"completion_tokens": 28,
"total_tokens": 42
}
}
'''
# old transformers code
#generator = pipeline(model=model, device_map="auto")
#return generator(text)
app=gr.Interface(fn=question_answer, inputs=["text", "text",gr.Slider(33, 2333)], outputs=["textbox", "html"])
app.launch()
|