File size: 2,073 Bytes
d4cae71
 
 
 
 
7f87ea1
d4cae71
 
 
909dc8c
d4cae71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cdc5c5a
d4cae71
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import gradio as gr
import soundfile as sf
import wave
from pyannote.audio import Pipeline
import torch
import os

pipeline = Pipeline.from_pretrained(
  "pyannote/speaker-diarization-3.0",
  use_auth_token=os.getenv("HF_AUTH_TOKEN"))
pipeline.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

def process_audio(audio_file):
    diarization = pipeline(audio_file)

    with open("audio.rttm", "w") as rttm:
        diarization.write_rttm(rttm)

    speaker_durations = {}
    first_speaker = None

    with open("audio.rttm", "r") as file:
        for line in file:
            parts = line.strip().split()
            speaker = parts[7]
            start_time = float(parts[3])
            duration = float(parts[4])

            if first_speaker is None:
                first_speaker = speaker

            if speaker not in speaker_durations:
                speaker_durations[speaker] = 0
            speaker_durations[speaker] += duration

    total_duration = sum(speaker_durations.values())
    first_speaker_duration = speaker_durations.get(first_speaker, 0)
    percentage_first_speaker = (first_speaker_duration / total_duration) * 100 if total_duration > 0 else 0

    return percentage_first_speaker

def record_and_process(audio):
    if audio is None:
        return "No audio was recorded. Please try again."

    sample_rate, audio_data = audio
    file_path = "audio.wav"
    sf.write(file_path, audio_data, sample_rate)
    percentage = process_audio(file_path)
    return f"Percentage of time spoken by the first speaker: {percentage:.2f}%"

interface = gr.Interface(
    fn=record_and_process,
    inputs=gr.Audio(type="numpy"),
    outputs="text",
    title="See How Much You Talk in a Conversation",
    description=(
    "Make sure you are the first person to speak!<br>"
    "You can also use a sample audio file for testing: "
    "<a href='https://www.uclass.psychol.ucl.ac.uk/Release2/Conversation/AudioOnly/wav/M_0025_11y10m_1.wav' "
    "target='_blank'>sample audio</a>."
    ),
    allow_flagging="never"
)
interface.launch()