File size: 2,709 Bytes
5ac1f2e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bef3363
5ac1f2e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import subprocess
import sys
import os
from pydub import AudioSegment
import io
import wave

def process_audio(wav_file, model_name, lang):
    """
    Processes an audio file using a specified model and returns the processed string.
    Resamples the file to 16 kHz in-memory if needed.

    :param wav_file: Path to the WAV file
    :param model_name: Name of the model to use
    :return: Processed string output from the audio processing
    :raises: Exception if an error occurs during processing
    """
    model = f"ggml-{model_name}.bin"

    # Check if the model file exists
    if not os.path.exists(model):
        raise FileNotFoundError(f"Model file not found: {model} \n\nDownload a model with this command:\n\n> bash ./models/download-ggml-model.sh {model_name}\n\n")

    if not os.path.exists(wav_file):
        raise FileNotFoundError(f"WAV file not found: {wav_file}")

    # Load and resample the audio to 16 kHz if necessary
    audio = AudioSegment.from_wav(wav_file)
    if audio.frame_rate != 16000:
        print(f"Resampling {wav_file} to 16 kHz...")
        audio = audio.set_frame_rate(16000)

    # Export the audio to an in-memory bytes buffer
    audio_buffer = io.BytesIO()
    audio.export(audio_buffer, format="wav")
    audio_buffer.seek(0)

    # Prepare the audio for the subprocess by writing it to a temporary file
    with wave.open(audio_buffer, 'rb') as wf:
        # Prepare a temporary file in-memory for subprocess
        temp_file = io.BytesIO()
        with wave.open(temp_file, 'wb') as temp_wav:
            temp_wav.setnchannels(wf.getnchannels())
            temp_wav.setsampwidth(wf.getsampwidth())
            temp_wav.setframerate(16000)
            temp_wav.writeframes(wf.readframes(wf.getnframes()))
        temp_file.seek(0)

        # Write temp_file to disk if needed, or pass to subprocess directly if possible.
        # Here we assume it's passed to subprocess via its filename as before
        temp_wav_path = '/tmp/temp_audio.wav'
        with open(temp_wav_path, 'wb') as f:
            f.write(temp_file.read())

    # Run the processing using the temporary WAV file
    full_command = f"./main -m {model} -f {temp_wav_path}  -l {lang} -np -nt"

    # Execute the command
    process = subprocess.Popen(full_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

    # Get the output and error (if any)
    output, error = process.communicate()

    if error:
        raise Exception(f"Error processing audio: {error.decode('utf-8')}")

    # Process and return the output string
    decoded_str = output.decode('utf-8').strip()
    processed_str = decoded_str.replace('[BLANK_AUDIO]', '').strip()

    return processed_str