import gradio as gr #import transformers #from transformers import pipeline from llama_cpp import Llama from huggingface_hub import hf_hub_download model_repo="TheBloke/Nous-Hermes-13B-GGML" model_filename="nous-hermes-13b.ggmlv3.q4_K_S.bin" #model="TheBloke/Nous-Hermes-13B-GGML" #model="https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_K_S.bin" def download_model: # See https://github.com/OpenAccess-AI-Collective/ggml-webui/blob/main/tabbed.py repo_id = hub_config.pop("repo_id") filename = hub_config.pop("filename") file = hf_hub_download( repo_id=model_repo, filename=model_filename ) return file def question_answer(context, question): mfile=download_model() text=context + "\n\nQuestion: \"\"\"\n" + question + "\nPlease use markdown formatting for answer. \nAnswer:\n" llm = Llama(model_path=mfile) output = llm(text, max_tokens=33, stop=["### Response", "\n"], echo=True) print(output) return output.choices[0].text ''' Output is of the form: { "id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", "object": "text_completion", "created": 1679561337, "model": "./models/7B/ggml-model.bin", "choices": [ { "text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.", "index": 0, "logprobs": None, "finish_reason": "stop" } ], "usage": { "prompt_tokens": 14, "completion_tokens": 28, "total_tokens": 42 } } ''' #generator = pipeline(model=model, device_map="auto") #return generator(text) app=gr.Interface(fn=question_answer, inputs=["text", "text"], outputs=["textbox", "text"]) app.launch()