File size: 3,011 Bytes
55137ed
b9da6aa
55137ed
 
2c8b5b1
b9da6aa
55137ed
b9da6aa
55137ed
 
b9da6aa
2c8b5b1
 
 
 
 
 
 
 
55137ed
b9da6aa
 
 
 
 
55137ed
 
b9da6aa
55137ed
b9da6aa
 
 
2c8b5b1
b9da6aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55137ed
 
b9da6aa
 
 
 
 
55137ed
b9da6aa
 
 
 
 
 
55137ed
2c8b5b1
fce3faa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c8b5b1
55137ed
0d82e75
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer
from gtts import gTTS
import tempfile
import os
import speech_recognition as sr

# Set your Hugging Face API key
HUGGING_FACE_API_KEY = "voicebot"

# Load the model and tokenizer
@st.cache_resource
def load_model():
    model_name = "declare-lab/tango-full"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=HUGGING_FACE_API_KEY)
    return model, tokenizer

model, tokenizer = load_model()

# Function to get a response from the chatbot
def get_response(input_text):
    inputs = tokenizer.encode(input_text, return_tensors='pt')
    response_ids = model.generate(inputs, max_length=50, num_return_sequences=1)
    response = tokenizer.decode(response_ids[0], skip_special_tokens=True)
    return response

# Function to convert text to speech
def text_to_speech(text):
    tts = gTTS(text=text, lang='en')
    with tempfile.NamedTemporaryFile(delete=True) as fp:
        tts.save(f"{fp.name}.mp3")
        os.system(f"start {fp.name}.mp3")  # Adjust command based on OS

# Speech Recognition Function
def recognize_speech():
    r = sr.Recognizer()
    with sr.Microphone() as source:
        st.write("Listening...")
        audio = r.listen(source)
        st.write("Recognizing...")
        try:
            text = r.recognize_google(audio)
            st.success(f"You said: {text}")
            return text
        except sr.UnknownValueError:
            st.error("Sorry, I could not understand the audio.")
            return None
        except sr.RequestError:
            st.error("Could not request results from Google Speech Recognition service.")
            return None

# Streamlit Interface
st.title("Voice-to-Text Chatbot")

# Recognize speech
if st.button("Speak"):
    user_input = recognize_speech()
else:
    user_input = st.text_input("Type your message here:")

# Display response and convert to speech
if user_input:
    st.write("You: ", user_input)
    chatbot_response = get_response(user_input)
    st.write("Chatbot: ", chatbot_response)
    text_to_speech(chatbot_response)

    text_to_speech(chatbot_response)
    import logging

# Configure logging
logging.basicConfig(level=logging.INFO)

# Use logging instead of print for more structured output
def load_model():
    try:
        logging.info("Loading model...")
        model_name = "declare-lab/tango-full"
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=HUGGING_FACE_API_KEY)
        logging.info("Model loaded successfully.")
        return model, tokenizer
    except Exception as e:
        logging.error(f"Error loading model: {e}")
        raise

# Example usage in your Streamlit code
if __name__ == "__main__":
    try:
        model, tokenizer = load_model()
    except Exception as e:
        logging.error(f"Application failed to start: {e}")