Spaces:
Runtime error
Runtime error
import spaces | |
import json | |
import subprocess | |
from llama_cpp import Llama | |
from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType | |
from llama_cpp_agent.providers import LlamaCppPythonProvider | |
import gradio as gr | |
from huggingface_hub import hf_hub_download | |
import logging | |
import time | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
repo_id = "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF" | |
filename = "Meta-Llama-3-8B-Instruct.Q8_0.gguf" | |
try: | |
start_time = time.time() | |
logger.info("Downloading Model....") | |
hf_hub_download( | |
repo_id = repo_id , | |
filename = filename, | |
local_dir="./model" | |
) | |
end_time = time.time() | |
logger.info(f"Download complete. Time taken : {start_time - end_time} seconds.") | |
except Exception as e: | |
logger.error(f"Unable to download Model : {e}") | |
raise | |
llm = None | |
llm_model = None | |
def respond( | |
message, | |
model, | |
system_message, | |
max_tokens, | |
temperature, | |
): | |
chat_template = MessagesFormatterType.LLAMA_3 | |
global llm | |
global llm_model | |
if llm is None or llm_model != model: | |
llm = Llama( | |
model_path=f"models/{model}", | |
flash_attn=True, | |
n_gpu_layers=-1, | |
n_batch=1024, | |
n_ctx=8192, | |
) | |
llm_model = model | |
provider = LlamaCppPythonProvider(llm) | |
agent = LlamaCppAgent( | |
provider, | |
system_prompt=f"{system_message}", | |
predefined_messages_formatter_type=chat_template, | |
debug_output=True | |
) | |
settings = provider.get_provider_default_settings() | |
settings.temperature = temperature | |
settings.max_tokens = max_tokens | |
settings.stream = True | |
stream = agent.get_chat_response( | |
message, | |
llm_sampling_settings=settings, | |
returns_streaming_generator=True, | |
print_output=False | |
) | |
outputs = "" | |
for output in stream: | |
outputs += output | |
yield outputs | |
DESCRIPTION = ''' | |
<div> | |
<h1 style="text-align: center;">ContenteaseAI custom trained model</h1> | |
</div> | |
''' | |
LICENSE = """ | |
<p/> | |
--- | |
For more information, visit our [website](https://contentease.ai). | |
""" | |
PLACEHOLDER = """ | |
<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;"> | |
<h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">ContenteaseAI Custom AI trained model</h1> | |
<p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Enter the text extracted from the PDF:</p> | |
</div> | |
""" | |
css = """ | |
h1 { | |
text-align: center; | |
display: block; | |
} | |
""" | |
# Gradio block | |
chatbot = gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='Gradio ChatInterface') | |
with gr.Blocks(fill_height=True, css=css) as demo: | |
gr.Markdown(DESCRIPTION) | |
gr.ChatInterface( | |
fn=respond, | |
chatbot=chatbot, | |
fill_height=True, | |
additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False), | |
additional_inputs=[ | |
gr.Slider(minimum=0, maximum=1, step=0.1, value=0.95, label="Temperature", render=False), | |
gr.Slider(minimum=128, maximum=2000, step=1, value=700, label="Max new tokens", render=False), | |
] | |
) | |
gr.Markdown(LICENSE) | |
if __name__ == "__main__": | |
try: | |
demo.launch(show_error=True, debug = True) | |
except Exception as e: | |
logger.error(f"Error launching Gradio demo: {e}") |