Spaces:
Sleeping
Sleeping
import requests | |
import base64 | |
import os | |
from dotenv import load_dotenv | |
load_dotenv() | |
HF_API_KEY = os.getenv("HF_API_KEY") | |
# ----------------------------- | |
# Speech-to-Text (STT) using HuggingFace Whisper | |
# ----------------------------- | |
def speech_to_text(audio_file): | |
with open(audio_file, "rb") as f: | |
audio_bytes = f.read() | |
response = requests.post( | |
"https://api-inference.huggingface.co/models/openai/whisper-small", | |
headers={"Authorization": f"Bearer {HF_API_KEY}"}, | |
data=audio_bytes | |
) | |
if response.status_code == 200: | |
result = response.json() | |
return result.get("text", "Sorry, I couldn’t transcribe that.") | |
else: | |
print(f"STT request failed: {response.status_code} {response.text}") | |
return "Speech recognition failed." | |
# ----------------------------- | |
# Text-to-Speech (TTS) using Kitten TTS | |
# ----------------------------- | |
def text_to_speech(text): | |
url = "https://huggingface.co/KittenML/kitten-tts-nano-0.1/resolve/main/tts" | |
payload = {"text": text} | |
response = requests.post(url, json=payload) | |
if response.status_code == 200: | |
audio_base64 = response.json()["audio"] | |
audio_bytes = base64.b64decode(audio_base64) | |
output_path = "output_audio.wav" | |
with open(output_path, "wb") as f: | |
f.write(audio_bytes) | |
return output_path | |
else: | |
print(f"TTS request failed: {response.status_code}") | |
return None | |