Spaces:

Garvitj
/

emotion-llm

Sleeping

App Files Files Community

Garvitj commited on Jan 18

Commit

dac5e9b

verified ·

1 Parent(s): 9f87106

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -12

app.py CHANGED Viewed

@@ -187,17 +187,19 @@ def process_video(video_path):
-# Process audio from video and predict emotions
 def process_audio_from_video(video_path):
-    audio_path = video_path.replace(".mp4", ".wav")
     try:
-        # Extract audio using FFmpeg
-        ffmpeg.input(video_path).output(audio_path, format='wav', acodec='pcm_s16le', ac=1, ar='16000').run(overwrite_output=True)
         recognizer = sr.Recognizer()
-        with sr.AudioFile(audio_path) as source:
             audio_record = recognizer.record(source)
             text = recognizer.recognize_google(audio_record)
             pre_text = preprocess_text(text)
@@ -206,24 +208,45 @@ def process_audio_from_video(video_path):
             inp1 = np.array(padded_title_seq)
             text_prediction = text_model.predict(inp1)
-        os.remove(audio_path)
         max_index = text_prediction.argmax()
         text_emotion = {0: "anger", 1: "disgust", 2: "fear", 3: "joy", 4: "neutral", 5: "sadness", 6: "surprise"}[max_index]
-        # Load audio with pydub for NumPy conversion
-        audio_segment = AudioSegment.from_wav(audio_path)
-        sound_array = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
-        # Predict emotion from audio
-        audio_emotion = predict_emotion((16000, sound_array))
     except Exception as e:
-        print(f"Error processing audio: {e}")
         audio_emotion = "Error in audio processing"
     return text_emotion, audio_emotion
 # Main function to handle video emotion recognition
 def transcribe_and_predict_video(video):
     image_emotion = process_video(video)

 def process_audio_from_video(video_path):
+    text_emotion = "Error in text processing"  # Initialize text_emotion
     try:
+        # Load the video using an alternative library (e.g., ffmpeg or cv2)
+        import ffmpeg
+        audio_output = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
+        ffmpeg.input(video_path).output(audio_output, format="wav").run(quiet=True)
         recognizer = sr.Recognizer()
+        with sr.AudioFile(audio_output) as source:
             audio_record = recognizer.record(source)
             text = recognizer.recognize_google(audio_record)
             pre_text = preprocess_text(text)
             inp1 = np.array(padded_title_seq)
             text_prediction = text_model.predict(inp1)
+        os.remove(audio_output)
         max_index = text_prediction.argmax()
         text_emotion = {0: "anger", 1: "disgust", 2: "fear", 3: "joy", 4: "neutral", 5: "sadness", 6: "surprise"}[max_index]
+    except Exception as e:
+        print(f"Error processing text from audio: {e}")
+        text_emotion = "Error in text processing"
+    try:
+        # Extract audio features for emotion recognition
+        sample_rate, data = librosa.load(video_path, sr=None, mono=True)
+        data = data.flatten()
+        if data.dtype != np.float32:
+            data = data.astype(np.float32)
+        data = data / np.max(np.abs(data))
+        features = extract_features(data, sample_rate)
+        features = np.expand_dims(features, axis=0)
+        scaled_features = scaler.transform(features)
+        scaled_features = np.expand_dims(scaled_features, axis=2)
+        prediction = audio_model.predict(scaled_features)
+        emotion_index = np.argmax(prediction)
+        num_classes = len(encoder.categories_[0])
+        emotion_array = np.zeros((1, num_classes))
+        emotion_array[0, emotion_index] = 1
+        audio_emotion = encoder.inverse_transform(emotion_array)[0]
     except Exception as e:
+        print(f"Error processing audio features: {e}")
         audio_emotion = "Error in audio processing"
     return text_emotion, audio_emotion
 # Main function to handle video emotion recognition
 def transcribe_and_predict_video(video):
     image_emotion = process_video(video)