vrajshroff commited on
Commit
d4cae71
·
verified ·
1 Parent(s): bdbee05

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -0
app.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import soundfile as sf
3
+ import wave
4
+ from pyannote.audio import Pipeline
5
+ import torch
6
+
7
+ pipeline = Pipeline.from_pretrained(
8
+ "pyannote/speaker-diarization-3.0",
9
+ use_auth_token=os.getenv("HF_AUTH_TOKEN")
10
+ pipeline.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
11
+
12
+ def process_audio(audio_file):
13
+ diarization = pipeline(audio_file)
14
+
15
+ with open("audio.rttm", "w") as rttm:
16
+ diarization.write_rttm(rttm)
17
+
18
+ speaker_durations = {}
19
+ first_speaker = None
20
+
21
+ with open("audio.rttm", "r") as file:
22
+ for line in file:
23
+ parts = line.strip().split()
24
+ speaker = parts[7]
25
+ start_time = float(parts[3])
26
+ duration = float(parts[4])
27
+
28
+ if first_speaker is None:
29
+ first_speaker = speaker
30
+
31
+ if speaker not in speaker_durations:
32
+ speaker_durations[speaker] = 0
33
+ speaker_durations[speaker] += duration
34
+
35
+ total_duration = sum(speaker_durations.values())
36
+ first_speaker_duration = speaker_durations.get(first_speaker, 0)
37
+ percentage_first_speaker = (first_speaker_duration / total_duration) * 100 if total_duration > 0 else 0
38
+
39
+ return percentage_first_speaker
40
+
41
+ def record_and_process(audio):
42
+ if audio is None:
43
+ return "No audio was recorded. Please try again."
44
+
45
+ sample_rate, audio_data = audio
46
+ file_path = "audio.wav"
47
+ sf.write(file_path, audio_data, sample_rate)
48
+ percentage = process_audio(file_path)
49
+ return f"Percentage of time spoken by the first speaker: {percentage:.2f}%"
50
+
51
+ interface = gr.Interface(
52
+ fn=record_and_process,
53
+ inputs=gr.Audio(type="numpy"),
54
+ outputs="text",
55
+ title="See How Much You Talk in a Conversation",
56
+ description=(
57
+ "Make sure you are the first person to speak!<br>"
58
+ "You can also use a sample audio file for testing: "
59
+ "<a href='https://www.uclass.psychol.ucl.ac.uk/Release2/Conversation/AudioOnly/wav/M_0025_11y10m_1.wav' "
60
+ "target='_blank'>sample audio</a>.<br>"
61
+ "<u>Your voice never leaves your device.</u>"
62
+ ),
63
+ allow_flagging="never"
64
+ )
65
+ interface.launch()