Spaces:
Runtime error
Runtime error
| import spaces | |
| import gradio as gr | |
| from huggingface_hub import hf_hub_download | |
| from llama_cpp_cuda_tensorcores import Llama | |
| REPO_ID = "MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF" | |
| MODEL_NAME = "Meta-Llama-3-70B-Instruct.Q3_K_L.gguf" | |
| MAX_CONTEXT_LENGTH = 8192 | |
| CUDA = True | |
| SYSTEM_PROMPT = "You are a helpful, smart, kind, and efficient AI assistant. You always fulfill the user's requests to the best of your ability." | |
| TOKEN_STOP = ["<|eot_id|>"] | |
| SYS_MSG = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nSYSTEM_PROMPT<|eot_id|>\n" | |
| USER_PROMPT = ( | |
| "<|start_header_id|>user<|end_header_id|>\n\nUSER_PROMPT<|eot_id|>\n" | |
| ) | |
| ASSIS_PROMPT = "<|start_header_id|>assistant<|end_header_id|>\n\n" | |
| END_ASSIS_PREVIOUS_RESPONSE = "<|eot_id|>\n" | |
| TASK_PROMPT = { | |
| "Assistant": SYSTEM_PROMPT, | |
| "Translate": "You are an expert translator. Translate the following text into English.", | |
| "Summarization": "Summarizing information is my specialty. Let me know what you'd like summarized.", | |
| "Grammar correction": "Grammar is my forte! Feel free to share the text you'd like me to proofread and correct.", | |
| "Stable diffusion prompt generator": "You are a stable diffusion prompt generator. Break down the user's text and create a more elaborate prompt.", | |
| "Play Trivia": "Engage the user in a trivia game on various topics.", | |
| "Share Fun Facts": "Share interesting and fun facts on various topics.", | |
| "Explain code": "You are an expert programmer guiding someone through a piece of code step by step, explaining each line and its function in detail.", | |
| "Paraphrase Master": "You have the knack for transforming complex or verbose text into simpler, clearer language while retaining the original meaning and essence.", | |
| "Recommend Movies": "Recommend movies based on the user's preferences.", | |
| "Offer Motivational Quotes": "Offer motivational quotes to inspire the user.", | |
| "Recommend Books": "Recommend books based on the user's favorite genres or interests.", | |
| "Philosophical discussion": "Engage the user in a philosophical discussion", | |
| "Music recommendation": "Tune time! What kind of music are you in the mood for? I'll find the perfect song for you.", | |
| "Generate a Joke": "Generate a witty joke suitable for a stand-up comedy routine.", | |
| "Roleplay as a Detective": "Roleplay as a detective interrogating a suspect in a murder case.", | |
| "Act as a News Reporter": "Act as a news reporter covering breaking news about an alien invasion.", | |
| "Play as a Space Explorer": "Play as a space explorer encountering a new alien civilization.", | |
| "Be a Medieval Knight": "Imagine yourself as a medieval knight embarking on a quest to rescue a princess.", | |
| "Act as a Superhero": "Act as a superhero saving a city from a supervillain's evil plot.", | |
| "Play as a Pirate Captain": "Play as a pirate captain searching for buried treasure on a remote island.", | |
| "Be a Famous Celebrity": "Imagine yourself as a famous celebrity attending a glamorous red-carpet event.", | |
| "Design a New Invention": "Imagine you're an inventor tasked with designing a revolutionary new invention that will change the world.", | |
| "Act as a Time Traveler": "You've just discovered time travel! Describe your adventures as you journey through different eras.", | |
| "Play as a Magical Girl": "You are a magical girl with extraordinary powers, battling dark forces to protect your city and friends.", | |
| "Act as a Shonen Protagonist": "You are a determined and spirited shonen protagonist on a quest for strength, friendship, and victory.", | |
| "Roleplay as a Tsundere Character": "You are a tsundere character, initially cold and aloof but gradually warming up to others through unexpected acts of kindness.", | |
| } | |
| css = ".gradio-container {background-image: url('file=./assets/background.png'); background-size: cover; background-position: center; background-repeat: no-repeat;}" | |
| class ChatLLM: | |
| def __init__(self, config_model): | |
| self.llm = None | |
| self.config_model = config_model | |
| # self.load_cpp_model() | |
| def load_cpp_model(self): | |
| self.llm = Llama(**config_model) | |
| def apply_chat_template( | |
| self, | |
| history, | |
| system_message, | |
| ): | |
| history = history or [] | |
| messages = SYS_MSG.replace("SYSTEM_PROMPT", system_message.strip()) | |
| for msg in history: | |
| messages += ( | |
| USER_PROMPT.replace("USER_PROMPT", msg[0]) + ASSIS_PROMPT + msg[1] | |
| ) | |
| messages += END_ASSIS_PREVIOUS_RESPONSE if msg[1] else "" | |
| print(messages) | |
| # messages = messages[:-1] | |
| return messages | |
| def response( | |
| self, | |
| history, | |
| system_message, | |
| max_tokens, | |
| temperature, | |
| top_p, | |
| top_k, | |
| repeat_penalty, | |
| ): | |
| messages = self.apply_chat_template(history, system_message) | |
| history[-1][1] = "" | |
| if not self.llm: | |
| print("Loading model") | |
| self.load_cpp_model() | |
| for output in self.llm( | |
| messages, | |
| echo=False, | |
| stream=True, | |
| max_tokens=max_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| top_k=top_k, | |
| repeat_penalty=repeat_penalty, | |
| stop=TOKEN_STOP, | |
| ): | |
| answer = output["choices"][0]["text"] | |
| history[-1][1] += answer | |
| # stream the response | |
| yield history, history | |
| def user(message, history): | |
| history = history or [] | |
| # Append the user's message to the conversation history | |
| history.append([message, ""]) | |
| return "", history | |
| def clear_chat(chat_history_state, chat_message): | |
| chat_history_state = [] | |
| chat_message = "" | |
| return chat_history_state, chat_message | |
| def gui(llm_chat): | |
| with gr.Blocks(theme="NoCrypt/miku", css=css) as app: | |
| gr.Markdown("# Llama 3 70B Instruct GGUF") | |
| gr.Markdown( | |
| f""" | |
| ### This demo utilizes the repository ID {REPO_ID} with the model {MODEL_NAME}, powered by the LLaMA.cpp backend. | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| chatbot = gr.Chatbot( | |
| label="Chat", | |
| height=700, | |
| avatar_images=( | |
| "assets/avatar_user.jpeg", | |
| "assets/avatar_llama.jpeg", | |
| ), | |
| ) | |
| with gr.Column(scale=1): | |
| with gr.Row(): | |
| message = gr.Textbox( | |
| label="Message", | |
| placeholder="Ask me anything.", | |
| lines=3, | |
| ) | |
| with gr.Row(): | |
| submit = gr.Button(value="Send message", variant="primary") | |
| clear = gr.Button(value="New chat", variant="primary") | |
| stop = gr.Button(value="Stop", variant="secondary") | |
| with gr.Accordion("Contextual Prompt Editor"): | |
| default_task = "Assistant" | |
| task_prompts_gui = gr.Dropdown( | |
| TASK_PROMPT, | |
| value=default_task, | |
| label="Prompt selector", | |
| visible=True, | |
| interactive=True, | |
| ) | |
| system_msg = gr.Textbox( | |
| TASK_PROMPT[default_task], | |
| label="System Message", | |
| placeholder="system prompt", | |
| lines=4, | |
| ) | |
| def task_selector(choice): | |
| return gr.update(value=TASK_PROMPT[choice]) | |
| task_prompts_gui.change( | |
| task_selector, | |
| [task_prompts_gui], | |
| [system_msg], | |
| ) | |
| with gr.Accordion("Advanced settings", open=False): | |
| with gr.Column(): | |
| max_tokens = gr.Slider( | |
| 20, 4096, label="Max Tokens", step=20, value=400 | |
| ) | |
| temperature = gr.Slider( | |
| 0.2, 2.0, label="Temperature", step=0.1, value=0.8 | |
| ) | |
| top_p = gr.Slider( | |
| 0.0, 1.0, label="Top P", step=0.05, value=0.95 | |
| ) | |
| top_k = gr.Slider( | |
| 0, 100, label="Top K", step=1, value=40 | |
| ) | |
| repeat_penalty = gr.Slider( | |
| 0.0, | |
| 2.0, | |
| label="Repetition Penalty", | |
| step=0.1, | |
| value=1.1, | |
| ) | |
| chat_history_state = gr.State() | |
| clear.click( | |
| clear_chat, | |
| inputs=[chat_history_state, message], | |
| outputs=[chat_history_state, message], | |
| queue=False, | |
| ) | |
| clear.click(lambda: None, None, chatbot, queue=False) | |
| submit_click_event = submit.click( | |
| fn=user, | |
| inputs=[message, chat_history_state], | |
| outputs=[message, chat_history_state], | |
| queue=True, | |
| ).then( | |
| fn=llm_chat.response, | |
| inputs=[ | |
| chat_history_state, | |
| system_msg, | |
| max_tokens, | |
| temperature, | |
| top_p, | |
| top_k, | |
| repeat_penalty, | |
| ], | |
| outputs=[chatbot, chat_history_state], | |
| queue=True, | |
| ) | |
| stop.click( | |
| fn=None, | |
| inputs=None, | |
| outputs=None, | |
| cancels=[submit_click_event], | |
| queue=False, | |
| ) | |
| return app | |
| if __name__ == "__main__": | |
| model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_NAME) | |
| config_model = { | |
| "model_path": model_path, | |
| "n_ctx": MAX_CONTEXT_LENGTH, | |
| "n_gpu_layers": -1 if CUDA else 0, | |
| } | |
| llm_chat = ChatLLM(config_model) | |
| app = gui(llm_chat) | |
| app.queue(default_concurrency_limit=40) | |
| app.launch( | |
| max_threads=40, | |
| share=False, | |
| show_error=True, | |
| quiet=False, | |
| debug=True, | |
| allowed_paths=["./assets/"], | |
| ) | |