Spaces:

Skriller0208
/

AudioValidation

Sleeping

App Files Files Community

AudioValidation / whisper_processor.py

Skriller0208

Update whisper_processor.py

bef3363 verified 7 months ago

raw

history blame contribute delete

2.71 kB

	import subprocess
	import sys
	import os
	from pydub import AudioSegment
	import io
	import wave

	def process_audio(wav_file, model_name, lang):
	"""
	Processes an audio file using a specified model and returns the processed string.
	Resamples the file to 16 kHz in-memory if needed.

	:param wav_file: Path to the WAV file
	:param model_name: Name of the model to use
	:return: Processed string output from the audio processing
	:raises: Exception if an error occurs during processing
	"""
	model = f"ggml-{model_name}.bin"

	# Check if the model file exists
	if not os.path.exists(model):
	raise FileNotFoundError(f"Model file not found: {model} \n\nDownload a model with this command:\n\n> bash ./models/download-ggml-model.sh {model_name}\n\n")

	if not os.path.exists(wav_file):
	raise FileNotFoundError(f"WAV file not found: {wav_file}")

	# Load and resample the audio to 16 kHz if necessary
	audio = AudioSegment.from_wav(wav_file)
	if audio.frame_rate != 16000:
	print(f"Resampling {wav_file} to 16 kHz...")
	audio = audio.set_frame_rate(16000)

	# Export the audio to an in-memory bytes buffer
	audio_buffer = io.BytesIO()
	audio.export(audio_buffer, format="wav")
	audio_buffer.seek(0)

	# Prepare the audio for the subprocess by writing it to a temporary file
	with wave.open(audio_buffer, 'rb') as wf:
	# Prepare a temporary file in-memory for subprocess
	temp_file = io.BytesIO()
	with wave.open(temp_file, 'wb') as temp_wav:
	temp_wav.setnchannels(wf.getnchannels())
	temp_wav.setsampwidth(wf.getsampwidth())
	temp_wav.setframerate(16000)
	temp_wav.writeframes(wf.readframes(wf.getnframes()))
	temp_file.seek(0)

	# Write temp_file to disk if needed, or pass to subprocess directly if possible.
	# Here we assume it's passed to subprocess via its filename as before
	temp_wav_path = '/tmp/temp_audio.wav'
	with open(temp_wav_path, 'wb') as f:
	f.write(temp_file.read())

	# Run the processing using the temporary WAV file
	full_command = f"./main -m {model} -f {temp_wav_path} -l {lang} -np -nt"

	# Execute the command
	process = subprocess.Popen(full_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

	# Get the output and error (if any)
	output, error = process.communicate()

	if error:
	raise Exception(f"Error processing audio: {error.decode('utf-8')}")

	# Process and return the output string
	decoded_str = output.decode('utf-8').strip()
	processed_str = decoded_str.replace('[BLANK_AUDIO]', '').strip()

	return processed_str