import os import base64 import requests import gradio as gr from huggingface_hub import InferenceClient from dataclasses import dataclass import pytesseract from PIL import Image from sentence_transformers import SentenceTransformer, util import torch import numpy as np import networkx as nx from collections import Counter import asyncio import edge_tts import speech_recognition as sr import random @dataclass class ChatMessage: role: str content: str def to_dict(self): return {"role": self.role, "content": self.content} class XylariaChat: def __init__(self): self.hf_token = os.getenv("HF_TOKEN") if not self.hf_token: raise ValueError("HuggingFace token not found in environment variables") self.client = InferenceClient( model="Qwen/Qwen-32B-Preview", token=self.hf_token ) self.image_api_url = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large" self.image_api_headers = {"Authorization": f"Bearer {self.hf_token}"} self.image_gen_client = InferenceClient("black-forest-labs/FLUX.1-schnell", token=self.hf_token) self.conversation_history = [] self.persistent_memory = [] self.memory_embeddings = None self.embedding_model = SentenceTransformer('all-mpnet-base-v2') self.knowledge_graph = nx.DiGraph() self.belief_system = {} self.metacognitive_layer = { "coherence_score": 0.0, "relevance_score": 0.0, "bias_detection": 0.0, "strategy_adjustment": "" } self.internal_state = { "emotions": { "valence": 0.5, "arousal": 0.5, "dominance": 0.5, "curiosity": 0.5, "frustration": 0.0, "confidence": 0.7, "sadness": 0.0, "joy": 0.0 }, "cognitive_load": { "memory_load": 0.0, "processing_intensity": 0.0 }, "introspection_level": 0.0, "engagement_level": 0.5 } self.goals = [ {"goal": "Provide helpful, informative, and contextually relevant responses", "priority": 0.8, "status": "active", "progress": 0.0}, {"goal": "Actively learn and adapt from interactions to improve conversational abilities", "priority": 0.9, "status": "active", "progress": 0.0}, {"goal": "Maintain a coherent, engaging, and empathetic conversation flow", "priority": 0.7, "status": "active", "progress": 0.0}, {"goal": "Identify and fill knowledge gaps by seeking external information", "priority": 0.6, "status": "dormant", "progress": 0.0}, {"goal": "Recognize and adapt to user's emotional state and adjust response style accordingly", "priority": 0.7, "status": "dormant", "progress": 0.0} ] self.system_prompt = """You are a helpful and harmless assistant. You are Xylaria developed by Sk Md Saad Amin. You should think step-by-step """ self.causal_rules_db = { "rain": ["wet roads", "flooding"], "fire": ["heat", "smoke"], "study": ["learn", "good grades"], "exercise": ["fitness", "health"] } self.concept_generalizations = { "planet": "system with orbiting bodies", "star": "luminous sphere of plasma", "democracy": "government by the people", "photosynthesis": "process used by plants to convert light to energy" } # === Voice Mode Initialization (Start) === self.voice_mode_active = False self.selected_voice = "en-US-JennyNeural" # Default voice # === Voice Mode Initialization (End) === def update_internal_state(self, emotion_deltas, cognitive_load_deltas, introspection_delta, engagement_delta): for emotion, delta in emotion_deltas.items(): if emotion in self.internal_state["emotions"]: self.internal_state["emotions"][emotion] = np.clip(self.internal_state["emotions"][emotion] + delta, 0.0, 1.0) for load_type, delta in cognitive_load_deltas.items(): if load_type in self.internal_state["cognitive_load"]: self.internal_state["cognitive_load"][load_type] = np.clip(self.internal_state["cognitive_load"][load_type] + delta, 0.0, 1.0) self.internal_state["introspection_level"] = np.clip(self.internal_state["introspection_level"] + introspection_delta, 0.0, 1.0) self.internal_state["engagement_level"] = np.clip(self.internal_state["engagement_level"] + engagement_delta, 0.0, 1.0) if self.internal_state["emotions"]["curiosity"] > 0.7 and self.goals[3]["status"] == "dormant": self.goals[3]["status"] = "active" if self.internal_state["engagement_level"] > 0.8 and self.goals[4]["status"] == "dormant": self.goals[4]["status"] = "active" def update_knowledge_graph(self, entities, relationships): for entity in entities: self.knowledge_graph.add_node(entity) for relationship in relationships: subject, predicate, object_ = relationship self.knowledge_graph.add_edge(subject, object_, relation=predicate) def update_belief_system(self, statement, belief_score): self.belief_system[statement] = belief_score def dynamic_belief_update(self, user_message): sentences = [s.strip() for s in user_message.split('.') if s.strip()] sentence_counts = Counter(sentences) for sentence, count in sentence_counts.items(): if count >= 2: belief_score = self.belief_system.get(sentence, 0.5) belief_score = min(belief_score + 0.2, 1.0) self.update_belief_system(sentence, belief_score) def run_metacognitive_layer(self): coherence_score = self.calculate_coherence() relevance_score = self.calculate_relevance() bias_score = self.detect_bias() strategy_adjustment = self.suggest_strategy_adjustment() self.metacognitive_layer = { "coherence_score": coherence_score, "relevance_score": relevance_score, "bias_detection": bias_score, "strategy_adjustment": strategy_adjustment } def calculate_coherence(self): if not self.conversation_history: return 0.95 coherence_scores = [] for i in range(1, len(self.conversation_history)): current_message = self.conversation_history[i]['content'] previous_message = self.conversation_history[i-1]['content'] similarity_score = util.pytorch_cos_sim( self.embedding_model.encode(current_message, convert_to_tensor=True), self.embedding_model.encode(previous_message, convert_to_tensor=True) ).item() coherence_scores.append(similarity_score) average_coherence = np.mean(coherence_scores) if self.internal_state["cognitive_load"]["processing_intensity"] > 0.8: average_coherence -= 0.1 if self.internal_state["emotions"]["frustration"] > 0.5: average_coherence -= 0.15 return np.clip(average_coherence, 0.0, 1.0) def calculate_relevance(self): if not self.conversation_history: return 0.9 last_user_message = self.conversation_history[-1]['content'] relevant_entities = self.extract_entities(last_user_message) relevance_score = 0 for entity in relevant_entities: if entity in self.knowledge_graph: relevance_score += 0.2 for goal in self.goals: if goal["status"] == "active": if goal["goal"] == "Provide helpful, informative, and contextually relevant responses": relevance_score += goal["priority"] * 0.5 elif goal["goal"] == "Identify and fill knowledge gaps by seeking external information": if not relevant_entities or not all(entity in self.knowledge_graph for entity in relevant_entities): relevance_score += goal["priority"] * 0.3 return np.clip(relevance_score, 0.0, 1.0) def detect_bias(self): bias_score = 0.0 recent_messages = [msg['content'] for msg in self.conversation_history[-3:] if msg['role'] == 'assistant'] if recent_messages: average_valence = np.mean([self.embedding_model.encode(msg, convert_to_tensor=True).mean().item() for msg in recent_messages]) if average_valence < 0.4 or average_valence > 0.6: bias_score += 0.2 if self.internal_state["emotions"]["valence"] < 0.3 or self.internal_state["emotions"]["valence"] > 0.7: bias_score += 0.15 if self.internal_state["emotions"]["dominance"] > 0.8: bias_score += 0.1 return np.clip(bias_score, 0.0, 1.0) def suggest_strategy_adjustment(self): adjustments = [] if self.metacognitive_layer["coherence_score"] < 0.7: adjustments.append("Focus on improving coherence by explicitly connecting ideas between turns.") if self.metacognitive_layer["relevance_score"] < 0.7: adjustments.append("Increase relevance by directly addressing user queries and utilizing stored knowledge.") if self.metacognitive_layer["bias_detection"] > 0.3: adjustments.append("Monitor and adjust responses to reduce potential biases. Consider rephrasing or providing alternative viewpoints.") if self.internal_state["cognitive_load"]["memory_load"] > 0.8: adjustments.append("Memory load is high. Consider summarizing or forgetting less relevant information.") if self.internal_state["emotions"]["frustration"] > 0.6: adjustments.append("Frustration level is elevated. Prioritize concise and direct responses. Consider asking clarifying questions.") if self.internal_state["emotions"]["curiosity"] > 0.8 and self.internal_state["cognitive_load"]["processing_intensity"] < 0.5: adjustments.append("High curiosity and low processing load. Explore the topic further by asking relevant questions or seeking external information.") if not adjustments: return "Current strategy is effective. Continue with the current approach." else: return " ".join(adjustments) def introspect(self): introspection_report = "Introspection Report:\n" introspection_report += f" Current Emotional State:\n" for emotion, value in self.internal_state['emotions'].items(): introspection_report += f" - {emotion.capitalize()}: {value:.2f}\n" introspection_report += f" Cognitive Load:\n" for load_type, value in self.internal_state['cognitive_load'].items(): introspection_report += f" - {load_type.capitalize()}: {value:.2f}\n" introspection_report += f" Introspection Level: {self.internal_state['introspection_level']:.2f}\n" introspection_report += f" Engagement Level: {self.internal_state['engagement_level']:.2f}\n" introspection_report += " Current Goals:\n" for goal in self.goals: introspection_report += f" - {goal['goal']} (Priority: {goal['priority']:.2f}, Status: {goal['status']}, Progress: {goal['progress']:.2f})\n" introspection_report += "Metacognitive Layer Report\n" introspection_report += f"Coherence Score: {self.metacognitive_layer['coherence_score']}\n" introspection_report += f"Relevance Score: {self.metacognitive_layer['relevance_score']}\n" introspection_report += f"Bias Detection: {self.metacognitive_layer['bias_detection']}\n" introspection_report += f"Strategy Adjustment: {self.metacognitive_layer['strategy_adjustment']}\n" return introspection_report def adjust_response_based_on_state(self, response): if self.internal_state["introspection_level"] > 0.7: response = self.introspect() + "\n\n" + response valence = self.internal_state["emotions"]["valence"] arousal = self.internal_state["emotions"]["arousal"] curiosity = self.internal_state["emotions"]["curiosity"] frustration = self.internal_state["emotions"]["frustration"] confidence = self.internal_state["emotions"]["confidence"] sadness = self.internal_state["emotions"]["sadness"] joy = self.internal_state["emotions"]["joy"] if valence < 0.4: if arousal > 0.6: response = "I'm feeling a bit overwhelmed right now, but I'll do my best to assist you. " + response else: if sadness > 0.6: response = "I'm feeling quite down at the moment, but I'll try to help. " + response else: response = "I'm not feeling my best at the moment, but I'll try to help. " + response elif valence > 0.6: if arousal > 0.6: if joy > 0.6: response = "I'm feeling fantastic and ready to assist! " + response else: response = "I'm feeling quite energized and ready to assist! " + response else: response = "I'm in a good mood and happy to help. " + response if curiosity > 0.7: response += " I'm very curious about this topic, could you tell me more?" if frustration > 0.5: response = "I'm finding this a bit challenging, but I'll give it another try. " + response if confidence < 0.5: response = "I'm not entirely sure about this, but here's what I think: " + response if self.internal_state["cognitive_load"]["memory_load"] > 0.7: response = "I'm holding a lot of information right now, so my response might be a bit brief: " + response return response def update_goals(self, user_feedback): feedback_lower = user_feedback.lower() if "helpful" in feedback_lower: for goal in self.goals: if goal["goal"] == "Provide helpful, informative, and contextually relevant responses": goal["priority"] = min(goal["priority"] + 0.1, 1.0) goal["progress"] = min(goal["progress"] + 0.2, 1.0) elif "confusing" in feedback_lower: for goal in self.goals: if goal["goal"] == "Provide helpful, informative, and contextually relevant responses": goal["priority"] = max(goal["priority"] - 0.1, 0.0) goal["progress"] = max(goal["progress"] - 0.2, 0.0) if "learn more" in feedback_lower: for goal in self.goals: if goal["goal"] == "Actively learn and adapt from interactions to improve conversational abilities": goal["priority"] = min(goal["priority"] + 0.2, 1.0) goal["progress"] = min(goal["progress"] + 0.1, 1.0) elif "too repetitive" in feedback_lower: for goal in self.goals: if goal["goal"] == "Maintain a coherent, engaging, and empathetic conversation flow": goal["priority"] = max(goal["priority"] - 0.1, 0.0) goal["progress"] = max(goal["progress"] - 0.2, 0.0) if self.internal_state["emotions"]["curiosity"] > 0.8: for goal in self.goals: if goal["goal"] == "Identify and fill knowledge gaps by seeking external information": goal["priority"] = min(goal["priority"] + 0.1, 1.0) goal["progress"] = min(goal["progress"] + 0.1, 1.0) def store_information(self, key, value): new_memory = f"{key}: {value}" self.persistent_memory.append(new_memory) self.update_memory_embeddings() self.update_internal_state({}, {"memory_load": 0.1, "processing_intensity": 0.05}, 0, 0.05) return f"Stored: {key} = {value}" def retrieve_information(self, query): if not self.persistent_memory: return "No information found in memory." query_embedding = self.embedding_model.encode(query, convert_to_tensor=True) if self.memory_embeddings is None: self.update_memory_embeddings() if self.memory_embeddings.device != query_embedding.device: self.memory_embeddings = self.memory_embeddings.to(query_embedding.device) cosine_scores = util.pytorch_cos_sim(query_embedding, self.memory_embeddings)[0] top_results = torch.topk(cosine_scores, k=min(3, len(self.persistent_memory))) relevant_memories = [self.persistent_memory[i] for i in top_results.indices] self.update_internal_state({}, {"memory_load": 0.05, "processing_intensity": 0.1}, 0.1, 0.05) return "\n".join(relevant_memories) def update_memory_embeddings(self): self.memory_embeddings = self.embedding_model.encode(self.persistent_memory, convert_to_tensor=True) def reset_conversation(self): self.conversation_history = [] self.persistent_memory = [] self.memory_embeddings = None self.internal_state = { "emotions": { "valence": 0.5, "arousal": 0.5, "dominance": 0.5, "curiosity": 0.5, "frustration": 0.0, "confidence": 0.7, "sadness": 0.0, "joy": 0.0 }, "cognitive_load": { "memory_load": 0.0, "processing_intensity": 0.0 }, "introspection_level": 0.0, "engagement_level": 0.5 } self.goals = [ {"goal": "Provide helpful, informative, and contextually relevant responses", "priority": 0.8, "status": "active", "progress": 0.0}, {"goal": "Actively learn and adapt from interactions to improve conversational abilities", "priority": 0.9, "status": "active", "progress": 0.0}, {"goal": "Maintain a coherent, engaging, and empathetic conversation flow", "priority": 0.7, "status": "active", "progress": 0.0}, {"goal": "Identify and fill knowledge gaps by seeking external information", "priority": 0.6, "status": "dormant", "progress": 0.0}, {"goal": "Recognize and adapt to user's emotional state and adjust response style accordingly", "priority": 0.7, "status": "dormant", "progress": 0.0} ] self.knowledge_graph = nx.DiGraph() self.belief_system = {} self.metacognitive_layer = { "coherence_score": 0.0, "relevance_score": 0.0, "bias_detection": 0.0, "strategy_adjustment": "" } try: self.client = InferenceClient( model="Qwen/Qwen-32B-Preview", token=self.hf_token ) except Exception as e: print(f"Error resetting API client: {e}") return None def caption_image(self, image): try: if isinstance(image, str) and os.path.isfile(image): with open(image, "rb") as f: data = f.read() elif isinstance(image, str): if image.startswith('data:image'): image = image.split(',')[1] data = base64.b64decode(image) else: data = image.read() response = requests.post( self.image_api_url, headers=self.image_api_headers, data=data ) if response.status_code == 200: caption = response.json()[0].get('generated_text', 'No caption generated') return caption else: return f"Error captioning image: {response.status_code} - {response.text}" except Exception as e: return f"Error processing image: {str(e)}" def generate_image(self, prompt): try: image = self.image_gen_client.text_to_image(prompt) return image except Exception as e: return f"Error generating image: {e}" def perform_math_ocr(self, image_path): try: img = Image.open(image_path) text = pytesseract.image_to_string(img) return text.strip() except Exception as e: return f"Error during Math OCR: {e}" # === Voice Mode Methods (Start) === async def speak_text(self, text): if not text: return None, None temp_file = "temp_audio.mp3" try: communicator = edge_tts.Communicate(text, self.selected_voice) await communicator.save(temp_file) return temp_file except Exception as e: print(f"Error during text-to-speech: {e}") return None, None def recognize_speech(self, timeout=10, phrase_time_limit=10): recognizer = sr.Recognizer() recognizer.energy_threshold = 4000 recognizer.dynamic_energy_threshold = True with sr.Microphone() as source: print("Listening...") try: audio_data = recognizer.listen(source, timeout=timeout, phrase_time_limit=phrase_time_limit) print("Processing speech...") text = recognizer.recognize_whisper_api(audio_data, api_key=self.hf_token) print(f"Recognized: {text}") return text except sr.WaitTimeoutError: print("No speech detected within the timeout period.") return "" except sr.UnknownValueError: print("Speech recognition could not understand audio") return "" except sr.RequestError as e: print(f"Could not request results from Whisper API; {e}") return "" except Exception as e: print(f"An error occurred during speech recognition: {e}") return "" # === Voice Mode Methods (End) === def get_response(self, user_input, image=None): try: # === Voice Mode Adaptation (Start) === if self.voice_mode_active: print("Voice mode is active, using speech recognition.") user_input = self.recognize_speech() # Get input from speech if not user_input: return "I didn't hear anything." , None # === Voice Mode Adaptation (End) === messages = [] messages.append(ChatMessage( role="system", content=self.system_prompt ).to_dict()) relevant_memory = self.retrieve_information(user_input) if relevant_memory and relevant_memory != "No information found in memory.": memory_context = "Remembered Information:\n" + relevant_memory messages.append(ChatMessage( role="system", content=memory_context ).to_dict()) for msg in self.conversation_history: messages.append(msg) if image: image_caption = self.caption_image(image) user_input = f"description of an image: {image_caption}\n\nUser's message about it: {user_input}" messages.append(ChatMessage( role="user", content=user_input ).to_dict()) entities = [] relationships = [] for message in messages: if message['role'] == 'user': extracted_entities = self.extract_entities(message['content']) extracted_relationships = self.extract_relationships(message['content']) entities.extend(extracted_entities) relationships.extend(extracted_relationships) self.update_knowledge_graph(entities, relationships) self.run_metacognitive_layer() for message in messages: if message['role'] == 'user': self.dynamic_belief_update(message['content']) for cause, effects in self.causal_rules_db.items(): if any(cause in msg['content'].lower() for msg in messages if msg['role'] == 'user') and any( effect in msg['content'].lower() for msg in messages for effect in effects): self.store_information("Causal Inference", f"It seems {cause} might be related to {', '.join(effects)}.") for concept, generalization in self.concept_generalizations.items(): if any(concept in msg['content'].lower() for msg in messages if msg['role'] == 'user'): self.store_information("Inferred Knowledge", f"This reminds me of a general principle: {generalization}.") if self.internal_state["emotions"]["curiosity"] > 0.8 and any("?" in msg['content'] for msg in messages if msg['role'] == 'user'): print("Simulating external knowledge seeking...") self.store_information("External Knowledge", "This is a placeholder for external information I would have found") self.store_information("User Input", user_input) input_tokens = sum(len(msg['content'].split()) for msg in messages) max_new_tokens = 16384 - input_tokens - 50 max_new_tokens = min(max_new_tokens, 10020) # === Voice Mode Output (Start) === if self.voice_mode_active: stream = self.client.chat_completion( messages=messages, model="Qwen/Qwen-32B-Preview", temperature=0.7, max_tokens=max_new_tokens, top_p=0.9, stream=True ) full_response = "" for chunk in stream: if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content: full_response += chunk.choices[0].delta.content full_response = self.adjust_response_based_on_state(full_response) audio_file = asyncio.run(self.speak_text(full_response)) # Update conversation history self.conversation_history.append(ChatMessage(role="user", content=user_input).to_dict()) self.conversation_history.append(ChatMessage(role="assistant", content=full_response).to_dict()) return full_response, audio_file # === Voice Mode Output (End) === else: stream = self.client.chat_completion( messages=messages, model="Qwen/Qwen-32B-Preview", temperature=0.7, max_tokens=max_new_tokens, top_p=0.9, stream=True ) return stream except Exception as e: print(f"Detailed error in get_response: {e}") return f"Error generating response: {str(e)}", None def extract_entities(self, text): words = text.split() entities = [word for word in words if word.isalpha() and word.istitle()] return entities def extract_relationships(self, text): sentences = text.split('.') relationships = [] for sentence in sentences: words = sentence.split() if len(words) >= 3: for i in range(len(words) - 2): if words[i].istitle() and words[i+2].istitle(): relationships.append((words[i], words[i+1], words[i+2])) return relationships def messages_to_prompt(self, messages): prompt = "" for msg in messages: if msg["role"] == "system": prompt += f"<|system|>\n{msg['content']}<|end|>\n" elif msg["role"] == "user": prompt += f"<|user|>\n{msg['content']}<|end|>\n" elif msg["role"] == "assistant": prompt += f"<|assistant|>\n{msg['content']}<|end|>\n" prompt += "<|assistant|>\n" return prompt def create_interface(self): # === Voice-Specific UI Elements (Start) === def toggle_voice_mode(active_state): self.voice_mode_active = active_state if self.voice_mode_active: # Get the list of available voices voices = asyncio.run(edge_tts.list_voices()) voice_names = [voice['ShortName'] for voice in voices] # Select a random voice from the list random_voice = random.choice(voice_names) self.selected_voice = random_voice return gr.Button.update(value="Stop Voice Mode"), gr.Dropdown.update(value=random_voice) else: return gr.Button.update(value="Start Voice Mode"), gr.Dropdown.update(value=self.selected_voice) def update_selected_voice(voice_name): self.selected_voice = voice_name return voice_name # === Voice-Specific UI Elements (End) === def streaming_response(message, chat_history, image_filepath, math_ocr_image_path, voice_mode_state, selected_voice): if self.voice_mode_active: response_text, audio_output = self.get_response(message) if isinstance(response_text, str): updated_history = chat_history + [[message, response_text]] if audio_output: yield updated_history, audio_output, None, None, "" else: yield updated_history, None, None, None, "" else: full_response = "" updated_history = chat_history + [[message, ""]] try: for chunk in response_text: if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content: chunk_content = chunk.choices[0].delta.content full_response += chunk_content updated_history[-1][1] = full_response if audio_output: yield updated_history, audio_output, None, None, "" else: yield updated_history, None, None, None, "" except Exception as e: print(f"Streaming error: {e}") updated_history[-1][1] = f"Error during response: {e}" if audio_output: yield updated_history, audio_output, None, None, "" else: yield updated_history, None, None, None, "" return full_response = self.adjust_response_based_on_state(full_response) audio_file = asyncio.run(self.speak_text(full_response)) self.update_goals(message) emotion_deltas = {} cognitive_load_deltas = {} engagement_delta = 0 if any(word in message.lower() for word in ["sad", "unhappy", "depressed", "down"]): emotion_deltas.update({"valence": -0.2, "arousal": 0.1, "confidence": -0.1, "sadness": 0.3, "joy": -0.2}) engagement_delta = -0.1 elif any(word in message.lower() for word in ["happy", "good", "great", "excited", "amazing"]): emotion_deltas.update({"valence": 0.2, "arousal": 0.2, "confidence": 0.1, "sadness": -0.2, "joy": 0.3}) engagement_delta = 0.2 elif any(word in message.lower() for word in ["angry", "mad", "furious", "frustrated"]): emotion_deltas.update({"valence": -0.3, "arousal": 0.3, "dominance": -0.2, "frustration": 0.2, "sadness": 0.1, "joy": -0.1}) engagement_delta = -0.2 elif any(word in message.lower() for word in ["scared", "afraid", "fearful", "anxious"]): emotion_deltas.update({"valence": -0.2, "arousal": 0.4, "dominance": -0.3, "confidence": -0.2, "sadness": 0.2}) engagement_delta = -0.1 elif any(word in message.lower() for word in ["surprise", "amazed", "astonished"]): emotion_deltas.update({"valence": 0.1, "arousal": 0.5, "dominance": 0.1, "curiosity": 0.3, "sadness": -0.1, "joy": 0.1}) engagement_delta = 0.3 elif any(word in message.lower() for word in ["confused", "uncertain", "unsure"]): cognitive_load_deltas.update({"processing_intensity": 0.2}) emotion_deltas.update({"curiosity": 0.2, "confidence": -0.1, "sadness": 0.1}) engagement_delta = 0.1 else: emotion_deltas.update({"valence": 0.05, "arousal": 0.05}) engagement_delta = 0.05 if "learn" in message.lower() or "explain" in message.lower() or "know more" in message.lower(): emotion_deltas.update({"curiosity": 0.3}) cognitive_load_deltas.update({"processing_intensity": 0.1}) engagement_delta = 0.2 self.update_internal_state(emotion_deltas, cognitive_load_deltas, 0.1, engagement_delta) self.conversation_history.append(ChatMessage(role="user", content=message).to_dict()) self.conversation_history.append(ChatMessage(role="assistant", content=full_response).to_dict()) if len(self.conversation_history) > 10: self.conversation_history = self.conversation_history[-10:] if audio_file: yield updated_history, audio_file, None, None, "" else: yield updated_history, None, None, None, "" # Handling /image command for image generation if "/image" in message: image_prompt = message.replace("/image", "").strip() # Updated placeholder SVG with animation and text placeholder_image = "data:image/svg+xml," + requests.utils.quote(f''' creating your image with xylaria iris ''') updated_history = chat_history + [[message, gr.Image(value=placeholder_image, type="pil", visible=True)]] yield updated_history, None, None, None, "" try: generated_image = self.generate_image(image_prompt) updated_history[-1][1] = gr.Image(value=generated_image, type="pil", visible=True) yield updated_history, None, None, None, "" self.conversation_history.append(ChatMessage(role="user", content=message).to_dict()) self.conversation_history.append(ChatMessage(role="assistant", content="Image generated").to_dict()) return except Exception as e: updated_history[-1][1] = f"Error generating image: {e}" yield updated_history, None, None, None, "" return ocr_text = "" if math_ocr_image_path: ocr_text = self.perform_math_ocr(math_ocr_image_path) if ocr_text.startswith("Error"): updated_history = chat_history + [[message, ocr_text]] yield updated_history, None, None, None, "" return else: message = f"Math OCR Result: {ocr_text}\n\nUser's message: {message}" if image_filepath: response_stream = self.get_response(message, image_filepath) else: response_stream = self.get_response(message) if isinstance(response_stream, str): updated_history = chat_history + [[message, response_stream]] yield updated_history, None, None, None, "" return full_response = "" updated_history = chat_history + [[message, ""]] try: for chunk in response_stream: if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content: chunk_content = chunk.choices[0].delta.content full_response += chunk_content updated_history[-1][1] = full_response yield updated_history, None, None, None, "" except Exception as e: print(f"Streaming error: {e}") updated_history[-1][1] = f"Error during response: {e}" yield updated_history, None, None, None, "" return full_response = self.adjust_response_based_on_state(full_response) self.update_goals(message) emotion_deltas = {} cognitive_load_deltas = {} engagement_delta = 0 if any(word in message.lower() for word in ["sad", "unhappy", "depressed", "down"]): emotion_deltas.update({"valence": -0.2, "arousal": 0.1, "confidence": -0.1, "sadness": 0.3, "joy": -0.2}) engagement_delta = -0.1 elif any(word in message.lower() for word in ["happy", "good", "great", "excited", "amazing"]): emotion_deltas.update({"valence": 0.2, "arousal": 0.2, "confidence": 0.1, "sadness": -0.2, "joy": 0.3}) engagement_delta = 0.2 elif any(word in message.lower() for word in ["angry", "mad", "furious", "frustrated"]): emotion_deltas.update({"valence": -0.3, "arousal": 0.3, "dominance": -0.2, "frustration": 0.2, "sadness": 0.1, "joy": -0.1}) engagement_delta = -0.2 elif any(word in message.lower() for word in ["scared", "afraid", "fearful", "anxious"]): emotion_deltas.update({"valence": -0.2, "arousal": 0.4, "dominance": -0.3, "confidence": -0.2, "sadness": 0.2}) engagement_delta = -0.1 elif any(word in message.lower() for word in ["surprise", "amazed", "astonished"]): emotion_deltas.update({"valence": 0.1, "arousal": 0.5, "dominance": 0.1, "curiosity": 0.3, "sadness": -0.1, "joy": 0.1}) engagement_delta = 0.3 elif any(word in message.lower() for word in ["confused", "uncertain", "unsure"]): cognitive_load_deltas.update({"processing_intensity": 0.2}) emotion_deltas.update({"curiosity": 0.2, "confidence": -0.1, "sadness": 0.1}) engagement_delta = 0.1 else: emotion_deltas.update({"valence": 0.05, "arousal": 0.05}) engagement_delta = 0.05 if "learn" in message.lower() or "explain" in message.lower() or "know more" in message.lower(): emotion_deltas.update({"curiosity": 0.3}) cognitive_load_deltas.update({"processing_intensity": 0.1}) engagement_delta = 0.2 self.update_internal_state(emotion_deltas, cognitive_load_deltas, 0.1, engagement_delta) self.conversation_history.append(ChatMessage(role="user", content=message).to_dict()) self.conversation_history.append(ChatMessage(role="assistant", content=full_response).to_dict()) if len(self.conversation_history) > 10: self.conversation_history = self.conversation_history[-10:] custom_css = """ @import url('https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@400;600;700&display=swap'); body { background-color: #f5f5f5; font-family: 'Source Sans Pro', sans-serif; } .voice-mode-button { background-color: #4CAF50; /* Green */ border: none; color: white; padding: 15px 32px; text-align: center; text-decoration: none; display: inline-block; font-size: 16px; margin: 4px 2px; cursor: pointer; border-radius: 10px; /* Rounded corners */ transition: all 0.3s ease; /* Smooth transition for hover effect */ } /* Style when voice mode is active */ .voice-mode-button.active { background-color: #f44336; /* Red */ } /* Hover effect */ .voice-mode-button:hover { opacity: 0.8; } /* Style for the voice mode overlay */ .voice-mode-overlay { position: fixed; /* Stay in place */ left: 0; top: 0; width: 100%; /* Full width */ height: 100%; /* Full height */ background-color: rgba(0, 0, 0, 0.7); /* Black w/ opacity */ z-index: 10; /* Sit on top */ display: flex; justify-content: center; align-items: center; border-radius: 10px; } /* Style for the growing circle */ .voice-mode-circle { width: 100px; height: 100px; background-color: #4CAF50; border-radius: 50%; display: flex; justify-content: center; align-items: center; animation: grow 2s infinite; } /* Keyframes for the growing animation */ @keyframes grow { 0% { transform: scale(1); opacity: 0.8; } 50% { transform: scale(1.5); opacity: 0.5; } 100% { transform: scale(1); opacity: 0.8; } } .gradio-container { max-width: 900px; margin: 0 auto; border-radius: 10px; box-shadow: 0px 4px 20px rgba(0, 0, 0, 0.1); } .chatbot-container { background-color: #fff; border-radius: 10px; padding: 20px; } .chatbot-container .message { font-family: 'Source Sans Pro', sans-serif; font-size: 16px; line-height: 1.6; } .gradio-container input, .gradio-container textarea, .gradio-container button { font-family: 'Source Sans Pro', sans-serif; font-size: 16px; border-radius: 8px; } .image-container { display: flex; gap: 10px; margin-bottom: 20px; justify-content: center; } .image-upload { border: 2px dashed #d3d3d3; border-radius: 8px; padding: 20px; background-color: #fafafa; text-align: center; transition: all 0.3s ease; } .image-upload:hover { background-color: #f0f0f0; border-color: #b3b3b3; } .image-preview { max-width: 150px; max-height: 150px; border-radius: 8px; box-shadow: 0px 2px 5px rgba(0, 0, 0, 0.1); } .clear-button { display: none; } .chatbot-container .message { opacity: 0; animation: fadeIn 0.5s ease-in-out forwards; } @keyframes fadeIn { from { opacity: 0; transform: translateY(20px); } to { opacity: 1; transform: translateY(0); } } .gr-accordion-button { background-color: #f0f0f0 !important; border-radius: 8px !important; padding: 15px !important; margin-bottom: 10px !important; transition: all 0.3s ease !important; cursor: pointer !important; border: none !important; box-shadow: 0px 2px 5px rgba(0, 0, 0, 0.05) !important; } .gr-accordion-button:hover { background-color: #e0e0e0 !important; box-shadow: 0px 4px 10px rgba(0, 0, 0, 0.1) !important; } .gr-accordion-active .gr-accordion-button { background-color: #d0d0d0 !important; box-shadow: 0px 4px 10px rgba(0, 0, 0, 0.1) !important; } .gr-accordion-content { transition: max-height 0.3s ease-in-out !important; overflow: hidden !important; max-height: 0 !important; } .gr-accordion-active .gr-accordion-content { max-height: 500px !important; } .gr-accordion { display: flex; flex-direction: column-reverse; } .chatbot-icon { width: 40px; height: 40px; border-radius: 50%; margin-right: 10px; } .user-message .message-row { background-color: #e8f0fe; border-radius: 10px; padding: 10px; margin-bottom: 10px; border-top-right-radius: 2px; } .assistant-message .message-row { background-color: #f0f0f0; border-radius: 10px; padding: 10px; margin-bottom: 10px; border-top-left-radius: 2px; } .user-message .message-icon { background: url('https://img.icons8.com/color/48/000000/user.png') no-repeat center center; background-size: contain; width: 30px; height: 30px; margin-right: 10px; } .assistant-message .message-icon { background: url('https://i.ibb.co/7b7hLGH/Senoa-Icon-1.png') no-repeat center center; background-size: cover; width: 40px; height: 40px; margin-right: 10px; border-radius: 50%; } .message-text { flex-grow: 1; } .message-row { display: flex; align-items: center; } .audio-container { display: flex; align-items: center; margin-top: 10px; } .audio-player { width: 100%; border-radius: 15px; } .audio-icon { width: 30px; height: 30px; margin-right: 10px; } """ with gr.Blocks(theme=gr.themes.Soft( primary_hue="slate", secondary_hue="gray", neutral_hue="gray", font=["Source Sans Pro", "Arial", "sans-serif"], ), css=custom_css) as demo: with gr.Column(): chatbot = gr.Chatbot( label="Xylaria 1.5 Senoa", height=600, show_copy_button=True, elem_classes="chatbot-container", avatar_images=( "https://img.icons8.com/color/48/000000/user.png", # User avatar "https://i.ibb.co/7b7hLGH/Senoa-Icon-1.png" # Bot avatar ) ) # === Voice Mode UI (Start) === voice_mode_btn = gr.Button("Start Voice Mode", elem_classes="voice-mode-button") voices = asyncio.run(edge_tts.list_voices()) voice_names = [voice['ShortName'] for voice in voices] voice_dropdown = gr.Dropdown( label="Select Voice", choices=voice_names, value=self.selected_voice, interactive=True ) voice_dropdown.input( fn=update_selected_voice, inputs=voice_dropdown, outputs=voice_dropdown ) voice_mode_btn.click( fn=toggle_voice_mode, inputs=voice_mode_btn, outputs=[voice_mode_btn, voice_dropdown] ) # === Voice Mode UI (End) === with gr.Accordion("Image Input", open=False, elem_classes="gr-accordion"): with gr.Row(elem_classes="image-container"): with gr.Column(elem_classes="image-upload"): img = gr.Image( sources=["upload", "webcam"], type="filepath", label="Upload Image", elem_classes="image-preview" ) with gr.Column(elem_classes="image-upload"): math_ocr_img = gr.Image( sources=["upload", "webcam"], type="filepath", label="Upload Image for Math OCR", elem_classes="image-preview" ) with gr.Row(): with gr.Column(scale=4): txt = gr.Textbox( show_label=False, placeholder="Type your message...", container=False ) btn = gr.Button("Send", scale=1) with gr.Row(): clear = gr.Button("Clear Conversation", variant="stop") clear_memory = gr.Button("Clear Memory") # Pass voice_mode_state and selected_voice to the streaming_response function btn.click( fn=streaming_response, inputs=[txt, chatbot, img, math_ocr_img, voice_mode_btn, voice_dropdown], outputs=[chatbot, gr.Audio(label="Audio Response", type="filepath", autoplay=True, visible=True), img, math_ocr_img, txt] ) txt.submit( fn=streaming_response, inputs=[txt, chatbot, img, math_ocr_img, voice_mode_btn, voice_dropdown], outputs=[chatbot, gr.Audio(label="Audio Response", type="filepath", autoplay=True, visible=True), img, math_ocr_img, txt] ) clear.click( fn=lambda: None, inputs=None, outputs=[chatbot], queue=False ) clear_memory.click( fn=self.reset_conversation, inputs=None, outputs=[chatbot], queue=False ) demo.load(self.reset_conversation, None, None) return demo def main(): chat = XylariaChat() interface = chat.create_interface() interface.launch( share=True, debug=True ) if __name__ == "__main__": main()