|
import os |
|
import json |
|
import subprocess |
|
import gradio as gr |
|
from threading import Thread |
|
from huggingface_hub import hf_hub_download |
|
from llama_cpp import Llama |
|
from datetime import datetime |
|
|
|
|
|
MODEL_ID = "large-traversaal/Alif-1.0-8B-Instruct" |
|
MODEL_FILE = "model-Q8_0.gguf" |
|
|
|
model_path_file = hf_hub_download(MODEL_ID, filename=MODEL_FILE) |
|
|
|
|
|
llama = Llama( |
|
model_path=model_path_file, |
|
n_gpu_layers=40, |
|
n_threads=8, |
|
n_batch=512, |
|
n_ctx=4096, |
|
verbose=True |
|
) |
|
|
|
|
|
|
|
def generate_response(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p): |
|
|
|
chat_prompt = f"{system_prompt}\n ### Instruction: {message}\n ### Response:" |
|
response = llama(chat_prompt, temperature=temperature, max_tokens=max_new_tokens, top_k=top_k, repeat_penalty=repetition_penalty, top_p=top_p, stop=["Q:", "\n"], echo=False, stream=True) |
|
|
|
text = "" |
|
for chunk in response: |
|
content = chunk["choices"][0]["text"] |
|
if content: |
|
text += content |
|
yield text |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
on_load = """ |
|
async()=>{ alert("Welcome to the Traversaal Alif 1.0 Chatbot! This is an experimental AI model. Please use responsibly."); } |
|
""" |
|
|
|
placeholder = """ |
|
<center><h1>10 Questions</h1><br>Think of a person, place, or thing. I'll ask you 10 yes/no questions to try and guess it. |
|
</center> |
|
""" |
|
|
|
|
|
with gr.Blocks(js=on_load, theme=gr.themes.Default()) as demo: |
|
with gr.Column(scale=1, elem_id="center-content"): |
|
gr.Markdown( |
|
""" |
|
<div style="text-align: center;"> |
|
<h1>Alif 1.0 Urdu & English Chatbot 🚀</h1> |
|
<p>Alif 1.0 8B Instruct is an open-source model with highly advanced multilingual reasoning capabilities. It utilizes human refined multilingual synthetic data paired with reasoning to enhance cultural nuance and reasoning capabilities in english and urdu languages.</p> |
|
</div> |
|
""", |
|
) |
|
|
|
chat = gr.ChatInterface( |
|
generate_response, |
|
|
|
|
|
|
|
examples=[ |
|
["شہر کراچی کے بارے میں بتاؤ"], |
|
["قابل تجدید توانائی کیا ہے؟"], |
|
["پاکستان کے بارے میں بتائیں"] |
|
], |
|
additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False), |
|
additional_inputs=[ |
|
gr.Textbox(value="You are an Urdu Chatbot. Write an appropriate response for the given instruction in Urdu. Your response should be extremely comprehensive", label="System prompt", render=False), |
|
gr.Slider(0, 1, 0.8, label="Temperature", render=False), |
|
gr.Slider(128, 4096, 2048, label="Max new tokens", render=False), |
|
gr.Slider(1, 80, 40, step=1, label="Top K sampling", render=False), |
|
gr.Slider(0, 2, 1.1, label="Repetition penalty", render=False), |
|
gr.Slider(0, 1, 0.95, label="Top P sampling", render=False), |
|
], |
|
) |
|
|
|
demo.queue(max_size=10).launch(share=True) |
|
|