import gradio as gr import torch from pyannote.audio import Inference import numpy as np from sklearn.metrics.pairwise import cosine_similarity import os # โœ… Use HF token from Hugging Face Space secrets hf_token = os.getenv("HF_TOKEN") # ๐Ÿ” Load model with authentication model = Inference("pyannote/embedding", use_auth_token=hf_token, window="whole") # ๐ŸŽง Load known speaker embeddings speaker_embeddings = {} for speaker in os.listdir("known_speakers"): if speaker.endswith(".wav"): emb = model(f"known_speakers/{speaker}") speaker_embeddings[speaker.replace(".wav", "")] = emb def identify_speaker(audio): input_embedding = model(audio) best_score = -1 best_speaker = "Unknown" for name, emb in speaker_embeddings.items(): score = cosine_similarity(input_embedding.numpy().reshape(1, -1), emb.numpy().reshape(1, -1))[0][0] if score > best_score: best_score = score best_speaker = name return f"๐Ÿง Identified Speaker: {best_speaker}\n๐Ÿงช Similarity Score: {best_score:.2f}" # ๐Ÿš€ Launch Gradio UI gr.Interface( fn=identify_speaker, inputs=gr.Audio(source="microphone", type="filepath", label="๐ŸŽ™๏ธ Upload or record voice"), outputs="text", title="๐ŸŽค Speaker Identification App", description="Upload a voice clip to identify the speaker." ).launch()