File size: 2,679 Bytes
cbad54b
 
 
 
 
 
 
13dd234
cbad54b
3df36f0
 
 
cbad54b
 
 
 
 
 
 
 
 
 
 
 
3df36f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cbad54b
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from streamlit_mic_recorder import mic_recorder
import streamlit as st
import io
from openai import OpenAI
import os


def whisper_stt(openai_api_key, start_prompt="Start recording", stop_prompt="Stop recording", just_once=False,
               use_container_width=False, language=None, callback=None, args=(), kwargs=None, key=None):
    
    

    if not '_last_speech_to_text_transcript_id' in st.session_state:
        st.session_state._last_speech_to_text_transcript_id = 0
    if not '_last_speech_to_text_transcript' in st.session_state:
        st.session_state._last_speech_to_text_transcript = None
    if key and not key + '_output' in st.session_state:
        st.session_state[key + '_output'] = None
    audio = mic_recorder(start_prompt=start_prompt, stop_prompt=stop_prompt, just_once=just_once,
                         use_container_width=use_container_width,format="webm", key=key)
    new_output = False
    if audio is None:
        output = None
    else:
        if openai_api_key:
            if not 'openai_client' in st.session_state:
                #assert openai_api_key, openai_api_key
                st.session_state.openai_client = OpenAI(api_key=openai_api_key)

            id = audio['id']
            new_output = (id > st.session_state._last_speech_to_text_transcript_id)
            if new_output:
                output = None
                st.session_state._last_speech_to_text_transcript_id = id
                audio_bio = io.BytesIO(audio['bytes'])
                audio_bio.name = 'audio.webm'
                success = False
                err = 0
                while not success and err < 3:  # Retry up to 3 times in case of OpenAI server error.
                    try:
                        transcript = st.session_state.openai_client.audio.transcriptions.create(
                            model="whisper-1",
                            file=audio_bio,
                            language=language
                        )
                    except Exception as e:
                        print(str(e))  # log the exception in the terminal
                        err += 1
                    else:
                        success = True
                        output = transcript.text
                        st.session_state._last_speech_to_text_transcript = output
            elif not just_once:
                output = st.session_state._last_speech_to_text_transcript
            else:
                output = None
        else:
            output = None
    if key:
        st.session_state[key + '_output'] = output
    if new_output and callback:
        callback(*args, **(kwargs or {}))
    return output