Spaces:

kat33
/

llama.cpp

Runtime error

File size: 2,283 Bytes

2e1a289
 
 
c1e6490
cd5e755
 
 
e721849
cf3dc70
e721849
 
 
 
 
 
2fcbfe7
e721849
36d9e29
2e1a289
 
 
 
 
 
 
 
 
 
3fae970
ab4a091
e721849
9fe5d5a
ab4a091
 
e721849
ab4a091
cd5e755
ab4a091
 
cd5e755
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3fae970
c1e6490
ab4a091
85c036e

import os   # to check if file exists
import sys  # to flush stdout

import gradio as gr
#import transformers
#from transformers import pipeline
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

model_repo="TheBloke/Nous-Hermes-13B-GGML"
model_filename="nous-hermes-13b.ggmlv3.q4_K_S.bin"

#model="TheBloke/Nous-Hermes-13B-GGML"
#model="https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_K_S.bin"

def download_model():
    # See https://github.com/OpenAccess-AI-Collective/ggml-webui/blob/main/tabbed.py
    file_path="/home/user/.cache/huggingface/hub/models--TheBloke--Nous-Hermes-13B-GGML/snapshots/f1a48f90a07550e1ba30e347b2be69d4fa5e393b/nous-hermes-13b.ggmlv3.q4_K_S.bin"
    if os.path.exists(file_path):
        return file_path
    else:
        print("Downloading model...")
        sys.stdout.flush()
        file = hf_hub_download(
                repo_id=model_repo, filename=model_filename
        )
        print("Downloaded " + file)
        return file

def question_answer(context, question, max_tokens):
    mfile=download_model()
    # structure the prompt to make it easier for the ai
    question1="\"\"\"\n" + question + "\n\"\"\"\n"
    text=context + "\n\nQuestion: " + question1 + "\nPlease use markdown formatting for answer. \nAnswer:\n" 
    llm = Llama(model_path=mfile)
    output = llm(text, max_tokens=max_tokens, stop=["### Response"], echo=True)
    print(output)
    
    return question, gr.Markdown(output['choices'][0]['text'])
    '''
    Output is of the form:
    {
  "id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
  "object": "text_completion",
  "created": 1679561337,
  "model": "./models/7B/ggml-model.bin",
  "choices": [
    {
      "text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.",
      "index": 0,
      "logprobs": None,
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 14,
    "completion_tokens": 28,
    "total_tokens": 42
  }
}
    '''
    
    #generator = pipeline(model=model, device_map="auto")

    #return generator(text)


app=gr.Interface(fn=question_answer, inputs=["text", "text",gr.Slider(33, 2333)], outputs=["textbox", "text"])
app.launch()