import gradio as gr from transformers import LlamaTokenizer, LlamaForCausalLM import tempfile import numpy as np # Initialize LLaMA Model for Question Answering llama_tokenizer = LlamaTokenizer.from_pretrained('huggingface/llama-7b') llama_model = LlamaForCausalLM.from_pretrained('huggingface/llama-7b') # Updated transcribe_and_predict_video function from your code def transcribe_and_predict_video(video): # Process video frames for image-based emotion recognition image_emotion = process_video(video) # Process audio for text and audio-based emotion recognition text_emotion, audio_emotion = process_audio_from_video(video) # Determine the overall emotion (could be based on majority vote or some other logic) overall_emotion = Counter([text_emotion, audio_emotion, image_emotion]).most_common(1)[0][0] return overall_emotion # Emotion-aware Question Answering with LLM def emotion_aware_qa(question, video): # Get the emotion from the video (this uses the emotion detection you already implemented) detected_emotion = transcribe_and_predict_video(video) # Create a custom response context based on the detected emotion if detected_emotion == 'joy': emotion_context = "You're in a good mood! Let's keep the positivity going." elif detected_emotion == 'sadness': emotion_context = "It seems like you're feeling a bit down. Let me help with that." elif detected_emotion == 'anger': emotion_context = "I sense some frustration. Let's work through it together." elif detected_emotion == 'fear': emotion_context = "It sounds like you're anxious. How can I assist in calming things down?" elif detected_emotion == 'neutral': emotion_context = "You're feeling neutral. How can I help you today?" else: emotion_context = "You're in an uncertain emotional state. Let me guide you." # Prepare the prompt for LLaMA, including emotion context and user question prompt = f"{emotion_context} User asks: {question}" # Tokenize and generate response from LLaMA inputs = llama_tokenizer(prompt, return_tensors="pt") outputs = llama_model.generate(inputs['input_ids'], max_length=150) answer = llama_tokenizer.decode(outputs[0], skip_special_tokens=True) return answer # Create Gradio interface to interact with the LLM and video emotion detection def gradio_interface(question, video): response = emotion_aware_qa(question, video) return response iface = gr.Interface(fn=gradio_interface, inputs=["text", gr.Video()], outputs="text", title="Emotion-Aware Question Answering", description="Ask a question and get an emotion-aware response based on the video.") iface.launch()