Spaces:

kat33
/

llama.cpp

Runtime error

App Files Files Community

llama.cpp / app.py

kat33

Update app.py

e721849 over 1 year ago

raw

history blame

1.77 kB

	import gradio as gr
	#import transformers
	#from transformers import pipeline
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download

	model_repo="TheBloke/Nous-Hermes-13B-GGML"
	model_filename="nous-hermes-13b.ggmlv3.q4_K_S.bin"

	#model="TheBloke/Nous-Hermes-13B-GGML"
	#model="https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_K_S.bin"

	def download_model:
	# See https://github.com/OpenAccess-AI-Collective/ggml-webui/blob/main/tabbed.py
	repo_id = hub_config.pop("repo_id")
	filename = hub_config.pop("filename")
	file = hf_hub_download(
	repo_id=model_repo, filename=model_filename
	)
	return file

	def question_answer(context, question):
	mfile=download_model()
	text=context + "\n\nQuestion: \"\"\"\n" + question + "\nPlease use markdown formatting for answer. \nAnswer:\n"
	llm = Llama(model_path=mfile)
	output = llm(text, max_tokens=33, stop=["### Response", "\n"], echo=True)
	print(output)
	return output.choices[0].text
	'''
	Output is of the form:
	{
	"id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
	"object": "text_completion",
	"created": 1679561337,
	"model": "./models/7B/ggml-model.bin",
	"choices": [
	{
	"text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.",
	"index": 0,
	"logprobs": None,
	"finish_reason": "stop"
	}
	],
	"usage": {
	"prompt_tokens": 14,
	"completion_tokens": 28,
	"total_tokens": 42
	}
	}
	'''

	#generator = pipeline(model=model, device_map="auto")

	#return generator(text)


	app=gr.Interface(fn=question_answer, inputs=["text", "text"], outputs=["textbox", "text"])
	app.launch()