Spaces:

kat33
/

llama.cpp

Runtime error

App Files Files Community

llama.cpp / app.py

kat33

Update app.py

ab4a091 over 1 year ago

raw

history blame

2.28 kB

	import os # to check if file exists
	import sys # to flush stdout

	import gradio as gr
	#import transformers
	#from transformers import pipeline
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download

	model_repo="TheBloke/Nous-Hermes-13B-GGML"
	model_filename="nous-hermes-13b.ggmlv3.q4_K_S.bin"

	#model="TheBloke/Nous-Hermes-13B-GGML"
	#model="https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_K_S.bin"

	def download_model():
	# See https://github.com/OpenAccess-AI-Collective/ggml-webui/blob/main/tabbed.py
	file_path="/home/user/.cache/huggingface/hub/models--TheBloke--Nous-Hermes-13B-GGML/snapshots/f1a48f90a07550e1ba30e347b2be69d4fa5e393b/nous-hermes-13b.ggmlv3.q4_K_S.bin"
	if os.path.exists(file_path):
	return file_path
	else:
	print("Downloading model...")
	sys.stdout.flush()
	file = hf_hub_download(
	repo_id=model_repo, filename=model_filename
	)
	print("Downloaded " + file)
	return file

	def question_answer(context, question, max_tokens):
	mfile=download_model()
	# structure the prompt to make it easier for the ai
	question1="\"\"\"\n" + question + "\n\"\"\"\n"
	text=context + "\n\nQuestion: " + question1 + "\nPlease use markdown formatting for answer. \nAnswer:\n"
	llm = Llama(model_path=mfile)
	output = llm(text, max_tokens=max_tokens, stop=["### Response"], echo=True)
	print(output)

	return question, gr.Markdown(output['choices'][0]['text'])
	'''
	Output is of the form:
	{
	"id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
	"object": "text_completion",
	"created": 1679561337,
	"model": "./models/7B/ggml-model.bin",
	"choices": [
	{
	"text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.",
	"index": 0,
	"logprobs": None,
	"finish_reason": "stop"
	}
	],
	"usage": {
	"prompt_tokens": 14,
	"completion_tokens": 28,
	"total_tokens": 42
	}
	}
	'''

	#generator = pipeline(model=model, device_map="auto")

	#return generator(text)


	app=gr.Interface(fn=question_answer, inputs=["text", "text",gr.Slider(33, 2333)], outputs=["textbox", "text"])
	app.launch()