import os import requests import time import streamlit as st # Get the Hugging Face API Token from environment variables HF_API_TOKEN = os.getenv("HF_API_KEY") if not HF_API_TOKEN: raise ValueError("Hugging Face API Token is not set in the environment variables.") # Hugging Face API URLs and headers for models MISTRAL_API_URL = "https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1" MINICHAT_API_URL = "https://api-inference.huggingface.co/models/GeneZC/MiniChat-2-3B" DIALOGPT_API_URL = "https://api-inference.huggingface.co/models/microsoft/DialoGPT-large" PHI3_API_URL = "https://api-inference.huggingface.co/models/microsoft/Phi-3-mini-4k-instruct" META_LLAMA_70B_API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-70B-Instruct" META_LLAMA_8B_API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct" GEMMA_27B_API_URL = "https://api-inference.huggingface.co/models/google/gemma-2-27b" GEMMA_27B_IT_API_URL = "https://api-inference.huggingface.co/models/google/gemma-2-27b-it" HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"} def query_mistral(payload): response = requests.post(MISTRAL_API_URL, headers=HEADERS, json=payload) st.write(f"Mistral API response: {response.json()}") # Debugging log return response.json() def query_minichat(payload): response = requests.post(MINICHAT_API_URL, headers=HEADERS, json=payload) return response.json() def query_dialogpt(payload): response = requests.post(DIALOGPT_API_URL, headers=HEADERS, json=payload) return response.json() def query_phi3(payload): response = requests.post(PHI3_API_URL, headers=HEADERS, json=payload) return response.json() def query_meta_llama_70b(payload): response = requests.post(META_LLAMA_70B_API_URL, headers=HEADERS, json=payload) return response.json() def query_meta_llama_8b(payload): response = requests.post(META_LLAMA_8B_API_URL, headers=HEADERS, json=payload) return response.json() def query_gemma_27b(payload): response = requests.post(GEMMA_27B_API_URL, headers=HEADERS, json=payload) return response.json() def query_gemma_27b_it(payload): response = requests.post(GEMMA_27B_IT_API_URL, headers=HEADERS, json=payload) return response.json() def count_tokens(text): return len(text.split()) # Token limit handling MAX_TOKENS_PER_MINUTE = 1000 token_count = 0 start_time = time.time() def handle_token_limit(text): global token_count, start_time current_time = time.time() if current_time - start_time > 60: token_count = 0 start_time = current_time token_count += count_tokens(text) if token_count > MAX_TOKENS_PER_MINUTE: raise ValueError("Token limit exceeded. Please wait before sending more messages.") def add_message_to_conversation(user_message, bot_message, model_name): st.session_state.conversation.append((user_message, bot_message, model_name)) # Streamlit app st.set_page_config(page_title="Multi-LLM Chatbot Interface", layout="wide") st.title("Multi-LLM Chatbot Interface") st.write("Multi LLM-Chatbot Interface by Thariq Arian") # Initialize session state for conversation and model history if "conversation" not in st.session_state: st.session_state.conversation = [] if "model_history" not in st.session_state: st.session_state.model_history = {model: [] for model in ["Mistral-8x7B", "Meta-Llama-3-70B-Instruct", "Meta-Llama-3-8B-Instruct", "MiniChat-2-3B", "DialoGPT (GPT-2-1.5B)", "Phi-3-mini-4k-instruct", "Gemma-2-27B", "Gemma-2-27B-IT"]} # Dropdown for LLM selection llm_selection = st.selectbox("Select Language Model", ["Mistral-8x7B", "Meta-Llama-3-70B-Instruct", "Meta-Llama-3-8B-Instruct", "MiniChat-2-3B", "DialoGPT (GPT-2-1.5B)", "Phi-3-mini-4k-instruct", "Gemma-2-27B", "Gemma-2-27B-IT"]) # User input for question question = st.text_input("Question", placeholder="Enter your question here...") # Handle user input and LLM response if st.button("Send") and question: try: handle_token_limit(question) with st.spinner("Waiting for the model to respond..."): chat_history = " ".join(st.session_state.model_history[llm_selection]) + f"User: {question}\n" if llm_selection == "Mistral-8x7B": mistral_response = query_mistral({"inputs": chat_history}) if isinstance(mistral_response, list) and len(mistral_response) > 0: mistral_answer = mistral_response[0].get("generated_text", "No response") else: mistral_answer = "No response" add_message_to_conversation(question, mistral_answer, llm_selection) st.session_state.model_history[llm_selection].append(f"User: {question}\nMistral-8x7B: {mistral_answer}\n") elif llm_selection == "Meta-Llama-3-70B-Instruct": meta_llama_70b_response = query_meta_llama_70b({"inputs": chat_history}) if isinstance(meta_llama_70b_response, dict) and "generated_text" in meta_llama_70b_response: meta_llama_70b_answer = meta_llama_70b_response["generated_text"] elif isinstance(meta_llama_70b_response, list) and len(meta_llama_70b_response) > 0: meta_llama_70b_answer = meta_llama_70b_response[0].get("generated_text", "No response") else: meta_llama_70b_answer = "No response" add_message_to_conversation(question, meta_llama_70b_answer, llm_selection) st.session_state.model_history[llm_selection].append(f"User: {question}\nMeta-Llama-3-70B-Instruct: {meta_llama_70b_answer}\n") elif llm_selection == "Meta-Llama-3-8B-Instruct": meta_llama_8b_response = query_meta_llama_8b({"inputs": chat_history}) if isinstance(meta_llama_8b_response, dict) and "generated_text" in meta_llama_8b_response: meta_llama_8b_answer = meta_llama_8b_response["generated_text"] elif isinstance(meta_llama_8b_response, list) and len(meta_llama_8b_response) > 0: meta_llama_8b_answer = meta_llama_8b_response[0].get("generated_text", "No response") else: meta_llama_8b_answer = "No response" add_message_to_conversation(question, meta_llama_8b_answer, llm_selection) st.session_state.model_history[llm_selection].append(f"User: {question}\nMeta-Llama-3-8B-Instruct: {meta_llama_8b_answer}\n") elif llm_selection == "MiniChat-2-3B": minichat_response = query_minichat({"inputs": chat_history}) if "error" in minichat_response and "is currently loading" in minichat_response["error"]: minichat_answer = f"Model is loading, please wait {minichat_response['estimated_time']} seconds." elif isinstance(minichat_response, list) and len(minichat_response) > 0: minichat_answer = minichat_response[0].get("generated_text", "No response") else: minichat_answer = "No response" add_message_to_conversation(question, minichat_answer, llm_selection) st.session_state.model_history[llm_selection].append(f"User: {question}\nMiniChat-2-3B: {minichat_answer}\n") elif llm_selection == "DialoGPT (GPT-2-1.5B)": dialogpt_response = query_dialogpt({"inputs": chat_history}) if isinstance(dialogpt_response, dict) and "generated_text" in dialogpt_response: dialogpt_answer = dialogpt_response["generated_text"] elif isinstance(dialogpt_response, list) and len(dialogpt_response) > 0: dialogpt_answer = dialogpt_response[0].get("generated_text", "No response") else: dialogpt_answer = "No response" add_message_to_conversation(question, dialogpt_answer, llm_selection) st.session_state.model_history[llm_selection].append(f"User: {question}\nDialoGPT (GPT-2-1.5B): {dialogpt_answer}\n") elif llm_selection == "Phi-3-mini-4k-instruct": phi3_response = query_phi3({"inputs": chat_history}) if isinstance(phi3_response, list) and len(phi3_response) > 0: phi3_answer = phi3_response[0].get("generated_text", "No response") else: phi3_answer = "No response" add_message_to_conversation(question, phi3_answer, llm_selection) st.session_state.model_history[llm_selection].append(f"User: {question}\nPhi-3-mini-4k-instruct: {phi3_answer}\n") elif llm_selection == "Gemma-2-27B": gemma_response = query_gemma_27b({"inputs": chat_history}) if isinstance(gemma_response, dict) and "generated_text" in gemma_response: gemma_answer = gemma_response["generated_text"] elif isinstance(gemma_response, list) and len(gemma_response) > 0: gemma_answer = gemma_response[0].get("generated_text", "No response") else: gemma_answer = "No response" add_message_to_conversation(question, gemma_answer, llm_selection) st.session_state.model_history[llm_selection].append(f"User: {question}\nGemma-2-27B: {gemma_answer}\n") elif llm_selection == "Gemma-2-27B-IT": gemma_27b_it_response = query_gemma_27b_it({"inputs": chat_history}) if isinstance(gemma_27b_it_response, dict) and "generated_text" in gemma_27b_it_response: gemma_27b_it_answer = gemma_27b_it_response["generated_text"] elif isinstance(gemma_27b_it_response, list) and len(gemma_27b_it_response) > 0: gemma_27b_it_answer = gemma_27b_it_response[0].get("generated_text", "No response") else: gemma_27b_it_answer = "No response" add_message_to_conversation(question, gemma_27b_it_answer, llm_selection) st.session_state.model_history[llm_selection].append(f"User: {question}\nGemma-2-27B-IT: {gemma_27b_it_answer}\n") except ValueError as e: st.error(str(e)) # Custom CSS for chat bubbles st.markdown( """ """, unsafe_allow_html=True ) # Display the conversation st.write('
', unsafe_allow_html=True) for user_message, bot_message, model_name in st.session_state.conversation: st.write(f'
You: {user_message}
', unsafe_allow_html=True) st.write(f'
{model_name}: {bot_message}
', unsafe_allow_html=True) st.write('
', unsafe_allow_html=True)