Ben Wiley
Adding HF Auth attempt
4739174
import torchaudio
import gradio as gr
from pyannote.audio import Pipeline
from pyannote.audio.pipelines.utils.hook import ProgressHook
import scipy.io.wavfile
import os
from huggingface_hub import HfApi
# Global variable to store the user's token
HUGGINGFACE_ACCESS_TOKEN = None
def perform_separation(audio_file_path: str):
global HUGGINGFACE_ACCESS_TOKEN
if not HUGGINGFACE_ACCESS_TOKEN:
return [], "Please log in with your HuggingFace account first."
# Instantiate the pipeline
try:
pipeline = Pipeline.from_pretrained(
"pyannote/speech-separation-ami-1.0",
use_auth_token=HUGGINGFACE_ACCESS_TOKEN,
)
except Exception as e:
return [], f"Error loading pipeline: {str(e)}"
waveform, sample_rate = torchaudio.load(audio_file_path)
# Run the pipeline
with ProgressHook() as hook:
diarization, sources = pipeline(
{"waveform": waveform, "sample_rate": sample_rate}, hook=hook
)
# Save separated sources to disk as SPEAKER_XX.wav files
output_file_paths = []
for s, speaker in enumerate(diarization.labels()):
number_of_separated_sources = sources.data.shape[1]
if s >= number_of_separated_sources:
break
output_file_path = f"{speaker}.wav"
scipy.io.wavfile.write(
output_file_path, sample_rate, sources.data[:, s].numpy()
)
output_file_paths.append(output_file_path)
# Generate RTTM content
rttm_content = diarization.to_rttm()
return output_file_paths, rttm_content
def gradio_wrapper(audio_file_path: str, request: gr.Request):
global HUGGINGFACE_ACCESS_TOKEN
if not HUGGINGFACE_ACCESS_TOKEN:
return [""] * 10 + ["Please log in with your HuggingFace account first."]
output_file_paths, rttm_content = perform_separation(audio_file_path)
return output_file_paths + [""] * (10 - len(output_file_paths)) + [rttm_content]
def login(request: gr.Request):
global HUGGINGFACE_ACCESS_TOKEN
if request.username:
# User is authenticated
HUGGINGFACE_ACCESS_TOKEN = request.auth
return f"Welcome, {request.username}! You are now logged in."
else:
return "Please log in with your HuggingFace account to use this app."
with gr.Blocks() as demo:
gr.Markdown("## Speech Separation and Diarization")
gr.Markdown("Please log in with your HuggingFace account to use this app.")
login_status = gr.Markdown()
with gr.Row():
input_audio = gr.Audio(label="Input Audio", type="filepath")
with gr.Row():
submit_button = gr.Button("Process Audio")
outputs = []
max_speakers = 10
for i in range(max_speakers):
outputs.append(gr.Audio(label=f"Speaker {i+1}", type="filepath"))
rttm_output = gr.Textbox(label="RTTM Output")
demo.load(login, inputs=None, outputs=login_status)
submit_button.click(
gradio_wrapper, inputs=[input_audio], outputs=outputs + [rttm_output]
)
demo.launch(auth={"hf_oauth": True})