Skriller0208 commited on
Commit
5ac1f2e
·
verified ·
1 Parent(s): cb3f8d2

Upload whisper_processor.py

Browse files
Files changed (1) hide show
  1. whisper_processor.py +71 -0
whisper_processor.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+ import sys
3
+ import os
4
+ from pydub import AudioSegment
5
+ import io
6
+ import wave
7
+
8
+ def process_audio(wav_file, model_name, lang):
9
+ """
10
+ Processes an audio file using a specified model and returns the processed string.
11
+ Resamples the file to 16 kHz in-memory if needed.
12
+
13
+ :param wav_file: Path to the WAV file
14
+ :param model_name: Name of the model to use
15
+ :return: Processed string output from the audio processing
16
+ :raises: Exception if an error occurs during processing
17
+ """
18
+ model = f"models/ggml-{model_name}.bin"
19
+
20
+ # Check if the model file exists
21
+ if not os.path.exists(model):
22
+ raise FileNotFoundError(f"Model file not found: {model} \n\nDownload a model with this command:\n\n> bash ./models/download-ggml-model.sh {model_name}\n\n")
23
+
24
+ if not os.path.exists(wav_file):
25
+ raise FileNotFoundError(f"WAV file not found: {wav_file}")
26
+
27
+ # Load and resample the audio to 16 kHz if necessary
28
+ audio = AudioSegment.from_wav(wav_file)
29
+ if audio.frame_rate != 16000:
30
+ print(f"Resampling {wav_file} to 16 kHz...")
31
+ audio = audio.set_frame_rate(16000)
32
+
33
+ # Export the audio to an in-memory bytes buffer
34
+ audio_buffer = io.BytesIO()
35
+ audio.export(audio_buffer, format="wav")
36
+ audio_buffer.seek(0)
37
+
38
+ # Prepare the audio for the subprocess by writing it to a temporary file
39
+ with wave.open(audio_buffer, 'rb') as wf:
40
+ # Prepare a temporary file in-memory for subprocess
41
+ temp_file = io.BytesIO()
42
+ with wave.open(temp_file, 'wb') as temp_wav:
43
+ temp_wav.setnchannels(wf.getnchannels())
44
+ temp_wav.setsampwidth(wf.getsampwidth())
45
+ temp_wav.setframerate(16000)
46
+ temp_wav.writeframes(wf.readframes(wf.getnframes()))
47
+ temp_file.seek(0)
48
+
49
+ # Write temp_file to disk if needed, or pass to subprocess directly if possible.
50
+ # Here we assume it's passed to subprocess via its filename as before
51
+ temp_wav_path = '/tmp/temp_audio.wav'
52
+ with open(temp_wav_path, 'wb') as f:
53
+ f.write(temp_file.read())
54
+
55
+ # Run the processing using the temporary WAV file
56
+ full_command = f"./main -m {model} -f {temp_wav_path} -l {lang} -np -nt"
57
+
58
+ # Execute the command
59
+ process = subprocess.Popen(full_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
60
+
61
+ # Get the output and error (if any)
62
+ output, error = process.communicate()
63
+
64
+ if error:
65
+ raise Exception(f"Error processing audio: {error.decode('utf-8')}")
66
+
67
+ # Process and return the output string
68
+ decoded_str = output.decode('utf-8').strip()
69
+ processed_str = decoded_str.replace('[BLANK_AUDIO]', '').strip()
70
+
71
+ return processed_str