Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -2,15 +2,16 @@ import gradio as gr
|
|
2 |
import numpy as np
|
3 |
import librosa
|
4 |
import cv2
|
5 |
-
import
|
6 |
import speech_recognition as sr
|
7 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
8 |
import tensorflow as tf
|
9 |
from tensorflow.keras.preprocessing.text import tokenizer_from_json
|
10 |
from tensorflow.keras.models import load_model
|
11 |
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
12 |
-
from tensorflow.keras.preprocessing.image import img_to_array
|
13 |
from collections import Counter
|
|
|
14 |
|
15 |
# Load necessary models and files
|
16 |
text_model = load_model('model_for_text_emotion_updated(1).keras') # Load your text emotion model
|
@@ -73,12 +74,13 @@ def process_video(video_path):
|
|
73 |
most_common_emotion = Counter(predictions).most_common(1)[0][0]
|
74 |
return emotion_mapping[most_common_emotion]
|
75 |
|
76 |
-
# Extract audio from video
|
77 |
def extract_audio_from_video(video_path):
|
78 |
-
video = mp.VideoFileClip(video_path)
|
79 |
-
audio = video.audio
|
80 |
audio_file = 'audio.wav'
|
81 |
-
|
|
|
|
|
|
|
82 |
return audio_file
|
83 |
|
84 |
def transcribe_audio(audio_file):
|
|
|
2 |
import numpy as np
|
3 |
import librosa
|
4 |
import cv2
|
5 |
+
import ffmpeg
|
6 |
import speech_recognition as sr
|
7 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
8 |
import tensorflow as tf
|
9 |
from tensorflow.keras.preprocessing.text import tokenizer_from_json
|
10 |
from tensorflow.keras.models import load_model
|
11 |
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
12 |
+
from tensorflow.keras.preprocessing.image import img_to_array
|
13 |
from collections import Counter
|
14 |
+
import os
|
15 |
|
16 |
# Load necessary models and files
|
17 |
text_model = load_model('model_for_text_emotion_updated(1).keras') # Load your text emotion model
|
|
|
74 |
most_common_emotion = Counter(predictions).most_common(1)[0][0]
|
75 |
return emotion_mapping[most_common_emotion]
|
76 |
|
77 |
+
# Extract audio from video using ffmpeg-python
|
78 |
def extract_audio_from_video(video_path):
|
|
|
|
|
79 |
audio_file = 'audio.wav'
|
80 |
+
(ffmpeg
|
81 |
+
.input(video_path)
|
82 |
+
.output(audio_file, format='wav', acodec='pcm_s16le')
|
83 |
+
.run(overwrite_output=True))
|
84 |
return audio_file
|
85 |
|
86 |
def transcribe_audio(audio_file):
|