Spaces:
Runtime error
Runtime error
import spaces | |
import json | |
import subprocess | |
from llama_cpp import Llama | |
from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType | |
from llama_cpp_agent.providers import LlamaCppPythonProvider | |
import gradio as gr | |
from huggingface_hub import hf_hub_download | |
import logging | |
import time | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
repo_id = "MaziyarPanahi/Meta-Llama-3.1-70B-Instruct-GGUF" | |
filename = "Meta-Llama-3.1-70B-Instruct.IQ1_M.gguf" | |
def chunk_text(text, chunk_size=5000): | |
""" | |
Splits the input text into chunks of specified size. | |
Args: | |
text (str): The input text to be chunked. | |
chunk_size (int): The size of each chunk in tokens. | |
Returns: | |
list: A list of text chunks. | |
""" | |
words = text.split() | |
chunks = [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)] | |
return chunks | |
try: | |
start_time = time.time() | |
logger.info("Downloading Model....") | |
hf_hub_download( | |
repo_id = repo_id , | |
filename = filename, | |
local_dir="./model" | |
) | |
end_time = time.time() | |
logger.info(f"Download complete. Time taken : {end_time - start_time} seconds.") | |
except Exception as e: | |
logger.error(f"Unable to download Model : {e}") | |
raise | |
llm = None | |
def respond(message, history, temperature, max_tokens): | |
""" | |
Generate a streaming response using the llama3-8b model with chunking. | |
Args: | |
message (str): The input message. | |
history (list): The conversation history used by ChatInterface. - Not used. | |
temperature (float): The temperature for generating the response. | |
max_new_tokens (int): The maximum number of new tokens to generate. | |
Returns: | |
str: The generated response. | |
""" | |
chat_template = MessagesFormatterType.LLAMA_3 | |
global llm | |
start_time = time.time() | |
logging.info("Loading Model...") | |
if llm is None: | |
model = Llama( | |
model_path=f"model/{filename}", | |
flash_attn=True, | |
n_gpu_layers=-1, | |
n_batch=1, | |
n_ctx=8192, | |
last_n_tokens = 0 | |
) | |
llm = model | |
end_time = time.time() | |
logger.info(f"Model Loaded. Time taken : {end_time - start_time} seconds.") | |
start_time = time.time() | |
logger.info("Loading Provider and Agent for the Llama Model....") | |
provider = LlamaCppPythonProvider(llm) | |
SYS_PROMPT =""" | |
Extract the following information from the given text: | |
Identify the specific areas where the work needs to be done and Add the furniture that has to be changed. | |
Do not specify the work that has to be done. | |
Format the extracted information in the following JSON structure: | |
{ | |
"Area Type1": { | |
"Furnture1", | |
"Furnture2", | |
... | |
} | |
"Area Type2": { | |
"Furnture1", | |
"Furnture2", | |
... | |
} | |
} | |
Requirements: | |
1. Each area type (e.g., lobby, bar, etc.) should have its own node. | |
3. List the furniture on which the work needs to be performed without specifying the work or units of items. | |
4. Ignore any personal information or irrelevant details. | |
5. Follow the JSON pattern strictly and ensure clarity and accuracy in the extracted information. | |
Example: | |
Given the paragraph: "In the lobby, replace 5 light fixtures and remove 2 old carpets. In the bar, | |
install 3 new tables and remove 4 broken chairs." | |
The JSON output should be: | |
{ | |
"Lobby": { | |
"Light fixtures" | |
"Old carpets" | |
}, | |
"Bar": { | |
"New tables" | |
"Broken chairs" | |
} | |
} | |
} | |
Please ensure that the output JSON is well-structured and includes only relevant details about the work to be done. | |
""" | |
agent = LlamaCppAgent( | |
provider, | |
system_prompt=SYS_PROMPT, | |
predefined_messages_formatter_type=chat_template, | |
debug_output=False | |
) | |
settings = provider.get_provider_default_settings() | |
settings.temperature = temperature | |
settings.max_tokens = max_tokens | |
settings.stream = True | |
end_time = time.time() | |
logger.info(f"Provider settings updated. Prompt Loaded.Time taken : {end_time - start_time} seconds.") | |
chunks = chunk_text(message) | |
responses = [] | |
start_time = time.time() | |
logger.info("Generating responses...") | |
for chunk in chunks: | |
response = agent.get_chat_response( | |
chunk, | |
llm_sampling_settings=settings, | |
returns_streaming_generator = True, #generate streamer | |
print_output = False | |
) | |
responses.append(response) | |
logger.info(f"Responses generated. Time taken : {time.time() - start_time} seconds.") | |
output = "" | |
for response in responses: | |
for text in response: | |
output += text | |
yield output | |
DESCRIPTION = ''' | |
<div> | |
<h1 style="text-align: center;">ContenteaseAI custom trained model</h1> | |
</div> | |
''' | |
LICENSE = """ | |
<p/> | |
--- | |
For more information, visit our [website](https://contentease.ai). | |
""" | |
PLACEHOLDER = """ | |
<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;"> | |
<h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">ContenteaseAI Custom AI trained model</h1> | |
<p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Enter the text extracted from the PDF:</p> | |
</div> | |
""" | |
css = """ | |
h1 { | |
text-align: center; | |
display: block; | |
} | |
""" | |
# Gradio block | |
chatbot = gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='Gradio ChatInterface') | |
with gr.Blocks(fill_height=True, css=css) as demo: | |
gr.Markdown(DESCRIPTION) | |
gr.ChatInterface( | |
fn=respond, | |
chatbot=chatbot, | |
fill_height=True, | |
additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False), | |
additional_inputs=[ | |
gr.Slider(minimum=0, maximum=1, step=0.1, value=0.90, label="Temperature", render=False), | |
gr.Slider(minimum=128, maximum=2000, step=1, value=1500, label="Max new tokens", render=False), | |
] | |
) | |
gr.Markdown(LICENSE) | |
if __name__ == "__main__": | |
try: | |
demo.launch(show_error=True, debug = True) | |
except Exception as e: | |
logger.error(f"Error launching Gradio demo: {e}") |