Garvitj commited on
Commit
dac5e9b
·
verified ·
1 Parent(s): 9f87106

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -12
app.py CHANGED
@@ -187,17 +187,19 @@ def process_video(video_path):
187
 
188
 
189
 
190
- # Process audio from video and predict emotions
191
  def process_audio_from_video(video_path):
192
- audio_path = video_path.replace(".mp4", ".wav")
193
 
194
  try:
195
- # Extract audio using FFmpeg
196
- ffmpeg.input(video_path).output(audio_path, format='wav', acodec='pcm_s16le', ac=1, ar='16000').run(overwrite_output=True)
 
 
 
197
 
198
  recognizer = sr.Recognizer()
199
 
200
- with sr.AudioFile(audio_path) as source:
201
  audio_record = recognizer.record(source)
202
  text = recognizer.recognize_google(audio_record)
203
  pre_text = preprocess_text(text)
@@ -206,24 +208,45 @@ def process_audio_from_video(video_path):
206
  inp1 = np.array(padded_title_seq)
207
  text_prediction = text_model.predict(inp1)
208
 
209
- os.remove(audio_path)
210
 
211
  max_index = text_prediction.argmax()
212
  text_emotion = {0: "anger", 1: "disgust", 2: "fear", 3: "joy", 4: "neutral", 5: "sadness", 6: "surprise"}[max_index]
213
 
214
- # Load audio with pydub for NumPy conversion
215
- audio_segment = AudioSegment.from_wav(audio_path)
216
- sound_array = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
 
 
 
 
 
 
 
 
 
217
 
218
- # Predict emotion from audio
219
- audio_emotion = predict_emotion((16000, sound_array))
 
 
 
 
 
 
 
 
 
 
 
220
 
221
  except Exception as e:
222
- print(f"Error processing audio: {e}")
223
  audio_emotion = "Error in audio processing"
224
 
225
  return text_emotion, audio_emotion
226
 
 
227
  # Main function to handle video emotion recognition
228
  def transcribe_and_predict_video(video):
229
  image_emotion = process_video(video)
 
187
 
188
 
189
 
 
190
  def process_audio_from_video(video_path):
191
+ text_emotion = "Error in text processing" # Initialize text_emotion
192
 
193
  try:
194
+ # Load the video using an alternative library (e.g., ffmpeg or cv2)
195
+ import ffmpeg
196
+
197
+ audio_output = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
198
+ ffmpeg.input(video_path).output(audio_output, format="wav").run(quiet=True)
199
 
200
  recognizer = sr.Recognizer()
201
 
202
+ with sr.AudioFile(audio_output) as source:
203
  audio_record = recognizer.record(source)
204
  text = recognizer.recognize_google(audio_record)
205
  pre_text = preprocess_text(text)
 
208
  inp1 = np.array(padded_title_seq)
209
  text_prediction = text_model.predict(inp1)
210
 
211
+ os.remove(audio_output)
212
 
213
  max_index = text_prediction.argmax()
214
  text_emotion = {0: "anger", 1: "disgust", 2: "fear", 3: "joy", 4: "neutral", 5: "sadness", 6: "surprise"}[max_index]
215
 
216
+ except Exception as e:
217
+ print(f"Error processing text from audio: {e}")
218
+ text_emotion = "Error in text processing"
219
+
220
+ try:
221
+ # Extract audio features for emotion recognition
222
+ sample_rate, data = librosa.load(video_path, sr=None, mono=True)
223
+ data = data.flatten()
224
+
225
+ if data.dtype != np.float32:
226
+ data = data.astype(np.float32)
227
+ data = data / np.max(np.abs(data))
228
 
229
+ features = extract_features(data, sample_rate)
230
+ features = np.expand_dims(features, axis=0)
231
+ scaled_features = scaler.transform(features)
232
+ scaled_features = np.expand_dims(scaled_features, axis=2)
233
+
234
+ prediction = audio_model.predict(scaled_features)
235
+ emotion_index = np.argmax(prediction)
236
+
237
+ num_classes = len(encoder.categories_[0])
238
+ emotion_array = np.zeros((1, num_classes))
239
+ emotion_array[0, emotion_index] = 1
240
+
241
+ audio_emotion = encoder.inverse_transform(emotion_array)[0]
242
 
243
  except Exception as e:
244
+ print(f"Error processing audio features: {e}")
245
  audio_emotion = "Error in audio processing"
246
 
247
  return text_emotion, audio_emotion
248
 
249
+
250
  # Main function to handle video emotion recognition
251
  def transcribe_and_predict_video(video):
252
  image_emotion = process_video(video)