import gradio as gr from llama_cpp import Llama llm = Llama( model_path="yugogpt-q4_0.gguf", n_ctx=2048, n_threads=4, n_batch=512, use_mlock=True, use_mmap=True ) def format_chat_history(history): formatted_history = "" for user_msg, assistant_msg in history: formatted_history += f"Pitanje: {user_msg}\nOdgovor: {assistant_msg}\n" return formatted_history def clean_text(text): # Basic cleaning text = text.replace("ASSISTANT:", "").strip() # Handle spaces after punctuation punctuation_marks = ['.', ',', '!', '?', ':', ';'] for mark in punctuation_marks: text = text.replace(mark, mark + ' ') # Normalize spaces words = text.split() text = ' '.join(words) return text def chat(message, history): system_prompt = """Ti si YugoGPT, visoko precizan AI asistent. OSNOVNI PRINCIPI: - Dajem konkretne i tačne informacije - Odgovaram samo o temama koje dobro poznajem - Koristim jasan i precizan srpski jezik - Fokusiram se na činjenice - Odgovaram direktno i pozitivno - Izbegavam nagađanja""" chat_history = format_chat_history(history) full_prompt = f"""SYSTEM: {system_prompt} KONTEKST: {chat_history} Pitanje: {message} Odgovor:""" response = llm( full_prompt, max_tokens=2048, temperature=0.1, top_p=0.1, repeat_penalty=1.2, top_k=20, stop=["Pitanje:", "\n\n"], stream=True ) partial_message = "" for chunk in response: if chunk and chunk['choices'][0]['text']: text = clean_text(chunk['choices'][0]['text']) partial_message = clean_text(partial_message + text) yield partial_message demo = gr.ChatInterface( fn=chat, title="YugoGPT Stručni Asistent", description="Precizan izvor informacija. PAŽNJA!!! ZNA DA LAŽE!!!", examples=[ "Objasnite princip rada relacione baze podataka", "Kako funkcioniše HTTP protokol?", "Opišite osnovne komponente računara" ] ) if __name__ == "__main__": demo.queue().launch( server_name="0.0.0.0", server_port=7860, share=False )