Project8-final / voice_utils.py
Krithikesh77's picture
Upload 6 files
e1c7e5d verified
import requests
import base64
import os
from dotenv import load_dotenv
load_dotenv()
HF_API_KEY = os.getenv("HF_API_KEY")
# -----------------------------
# Speech-to-Text (STT) using HuggingFace Whisper
# -----------------------------
def speech_to_text(audio_file):
with open(audio_file, "rb") as f:
audio_bytes = f.read()
response = requests.post(
"https://api-inference.huggingface.co/models/openai/whisper-small",
headers={"Authorization": f"Bearer {HF_API_KEY}"},
data=audio_bytes
)
if response.status_code == 200:
result = response.json()
return result.get("text", "Sorry, I couldn’t transcribe that.")
else:
print(f"STT request failed: {response.status_code} {response.text}")
return "Speech recognition failed."
# -----------------------------
# Text-to-Speech (TTS) using Kitten TTS
# -----------------------------
def text_to_speech(text):
url = "https://huggingface.co/KittenML/kitten-tts-nano-0.1/resolve/main/tts"
payload = {"text": text}
response = requests.post(url, json=payload)
if response.status_code == 200:
audio_base64 = response.json()["audio"]
audio_bytes = base64.b64decode(audio_base64)
output_path = "output_audio.wav"
with open(output_path, "wb") as f:
f.write(audio_bytes)
return output_path
else:
print(f"TTS request failed: {response.status_code}")
return None