Spaces:
Running
Running
import os | |
import gradio as gr | |
import torch | |
import whisper | |
from TTS.api import TTS | |
from torch.serialization import add_safe_globals | |
from TTS.tts.configs.xtts_config import XttsConfig | |
from TTS.tts.models.xtts import XttsAudioConfig, XttsArgs | |
from TTS.config.shared_configs import BaseDatasetConfig | |
from fetch_data import get_botpress_response | |
import requests, os, json | |
import warnings | |
import gradio as gr | |
import librosa | |
import torch | |
import numpy as np | |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor | |
import assemblyai as aai | |
warnings.filterwarnings("ignore") | |
# Load model and processor | |
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h") | |
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h") | |
def process_media(media_id, access_token, phone_no_id, phone_no,business_id): | |
"""Process media ID to send audio message.""" | |
# Fetch media URL | |
media_url = get_media_url(media_id, access_token, phone_no_id) | |
if not media_url: | |
return "Failed to fetch media URL." | |
# Download the voice note | |
audio_path = download_voice_note(media_url, access_token) | |
if not audio_path: | |
return "Failed to download voice note." | |
# Transcribe audio | |
transcription = audio_transcribe(audio_path) | |
if not transcription: | |
return "Hi" | |
return transcription | |
def audio_transcribe(audio_path): | |
try: | |
# Load and preprocess audio | |
aai.settings.api_key = "c3db74329eea4ff68b95888dca1e2d72" | |
transcriber = aai.Transcriber() | |
transcript = transcriber.transcribe(audio_path) | |
return transcript.text | |
except Exception as e: | |
return f"Error: {str(e)}" | |
def get_media_url(media_id, access_token, phone_no_id): | |
"""Fetch media URL from Facebook Graph API.""" | |
url = f"https://graph.facebook.com/v21.0/{media_id}?phone_number_id={phone_no_id}" | |
headers = {'Authorization': f'Bearer {access_token}'} | |
response = requests.get(url, headers=headers) | |
if response.status_code == 200: | |
return response.json().get('url', None) | |
return None | |
def download_voice_note(url, access_token): | |
"""Download a voice note and store it in the current directory.""" | |
headers = {"Authorization": f"Bearer {access_token}"} | |
response = requests.get(url, headers=headers, stream=True) | |
if response.status_code == 200: | |
file_name = "voice_note.mp3" | |
file_path = os.path.join(os.getcwd(), file_name) # Save to current directory | |
with open(file_path, 'wb') as f: | |
for chunk in response.iter_content(chunk_size=8192): | |
f.write(chunk) | |
print(f"✅ Download complete: {file_path}") | |
return file_path | |
else: | |
print(f"❌ Failed to download file. Status code: {response.status_code}") | |
return None | |
def upload_audio(audio_path, access_token, phone_no_id): | |
"""Upload an audio file to WhatsApp Business API.""" | |
url = f"https://graph.facebook.com/v21.0/{phone_no_id}/media" | |
payload = {'messaging_product': 'whatsapp'} | |
with open(audio_path, 'rb') as audio_file: | |
files = [('file', ('output.ogg', audio_file, 'audio/ogg'))] | |
headers = {'Authorization': f'Bearer {access_token}'} | |
response = requests.post(url, headers=headers, data=payload, files=files) | |
print(f"response is {response.text}") | |
return response.json().get('id', None) | |
def send_audio(phone_no, audio_id, access_token, phone_no_id): | |
"""Send an audio message via WhatsApp API.""" | |
url = f"https://graph.facebook.com/v21.0/{phone_no_id}/messages" | |
payload = json.dumps({ | |
"messaging_product": "whatsapp", | |
"recipient_type": "individual", | |
"to": phone_no, | |
"type": "audio", | |
"audio": {"id": audio_id} | |
}) | |
headers = {'Content-Type': 'application/json', 'Authorization': f'Bearer {access_token}'} | |
try: | |
response = requests.post(url, headers=headers, data=payload) | |
response.raise_for_status() | |
print(response.text) | |
return response.json() | |
except requests.exceptions.RequestException as e: | |
return str(e) | |
# Gradio Interface | |
iface = gr.Interface( | |
fn=process_media, | |
inputs=[ | |
gr.Textbox(label="Media ID"), | |
gr.Textbox(label="Access Token"), | |
gr.Textbox(label="Phone Number ID"), | |
gr.Textbox(label="Recipient Phone Number"), | |
gr.Textbox(label="Business ID") | |
], | |
outputs="text", | |
title="WhatsApp Audio Processor", | |
description="Enter Media ID, Access Token, Phone Number ID, and Recipient Phone Number to process and send audio." | |
) | |
iface.launch(debug=True) |