Spaces:

contenteaseAI
/

GGUF_Model

Runtime error

App Files Files Community

GGUF_Model / app.py

contenteaseAI

Upload app.py

ed5c12c verified 8 months ago

raw

history blame

3.63 kB

	import spaces
	import json
	import subprocess
	from llama_cpp import Llama
	from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
	from llama_cpp_agent.providers import LlamaCppPythonProvider
	import gradio as gr
	from huggingface_hub import hf_hub_download
	import logging
	import time

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	repo_id = "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF"
	filename = "Meta-Llama-3-8B-Instruct.Q8_0.gguf"

	try:
	start_time = time.time()
	logger.info("Downloading Model....")
	hf_hub_download(
	repo_id = repo_id ,
	filename = filename,
	local_dir="./model"
	)
	end_time = time.time()
	logger.info(f"Download complete. Time taken : {start_time - end_time} seconds.")

	except Exception as e:
	logger.error(f"Unable to download Model : {e}")
	raise

	llm = None
	llm_model = None

	@spaces.GPU(duration=120)
	def respond(
	message,
	model,
	system_message,
	max_tokens,
	temperature,
	):
	chat_template = MessagesFormatterType.LLAMA_3

	global llm
	global llm_model

	if llm is None or llm_model != model:
	llm = Llama(
	model_path=f"models/{model}",
	flash_attn=True,
	n_gpu_layers=-1,
	n_batch=1024,
	n_ctx=8192,
	)
	llm_model = model

	provider = LlamaCppPythonProvider(llm)

	agent = LlamaCppAgent(
	provider,
	system_prompt=f"{system_message}",
	predefined_messages_formatter_type=chat_template,
	debug_output=True
	)

	settings = provider.get_provider_default_settings()
	settings.temperature = temperature
	settings.max_tokens = max_tokens
	settings.stream = True

	stream = agent.get_chat_response(
	message,
	llm_sampling_settings=settings,
	returns_streaming_generator=True,
	print_output=False
	)

	outputs = ""
	for output in stream:
	outputs += output
	yield outputs

	DESCRIPTION = '''
	<div>
	<h1 style="text-align: center;">ContenteaseAI custom trained model</h1>
	</div>
	'''

	LICENSE = """
	<p/>
	---
	For more information, visit our [website](https://contentease.ai).
	"""

	PLACEHOLDER = """
	<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
	<h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">ContenteaseAI Custom AI trained model</h1>
	<p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Enter the text extracted from the PDF:</p>
	</div>
	"""

	css = """
	h1 {
	text-align: center;
	display: block;
	}
	"""
	# Gradio block
	chatbot = gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='Gradio ChatInterface')

	with gr.Blocks(fill_height=True, css=css) as demo:
	gr.Markdown(DESCRIPTION)

	gr.ChatInterface(
	fn=respond,
	chatbot=chatbot,
	fill_height=True,
	additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
	additional_inputs=[
	gr.Slider(minimum=0, maximum=1, step=0.1, value=0.95, label="Temperature", render=False),
	gr.Slider(minimum=128, maximum=2000, step=1, value=700, label="Max new tokens", render=False),
	]
	)

	gr.Markdown(LICENSE)

	if __name__ == "__main__":
	try:
	demo.launch(show_error=True, debug = True)
	except Exception as e:
	logger.error(f"Error launching Gradio demo: {e}")