awaisrwp commited on
Commit
76b1d4d
·
verified ·
1 Parent(s): 0745f2e

deployment

Browse files
Files changed (1) hide show
  1. app.py +38 -71
app.py CHANGED
@@ -1,21 +1,13 @@
1
- # import os
2
- # import speech_recognition as sr
3
- # import pickle
4
- # import nltk
5
- # from nltk.corpus import wordnet
6
  import pandas as pd
7
  import difflib
8
  import gradio as gr
9
  from transformers import pipeline
10
  import librosa
 
11
 
12
  # import numpy as np
13
 
14
- transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base")
15
-
16
-
17
- # nltk.download('wordnet')
18
-
19
 
20
 
21
  class Model_Voice_Text():
@@ -28,6 +20,7 @@ class Model_Voice_Text():
28
  def __init__(self) -> None:
29
  # self.SR_obj = sr.Recognizer()
30
  self.KEYWORDS = ['suicide', 'urgent', 'poor', 'in-need', 'old', 'pregnant', 'refugee', 'new immigrant', 'patient', 'ill', 'sick', 'anxiety', 'anxious']
 
31
  # self.fuzzer = fuzz.Fuzz()
32
 
33
  # Define a function to find the number of times the word similar to the word stored in variable target_var, in a text stored in a variable named text_res
@@ -50,7 +43,35 @@ class Model_Voice_Text():
50
 
51
  # Return the number of similar words
52
  return len(similar_words)
53
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  def matching_text(self, text):
56
  df = pd.DataFrame()
@@ -68,12 +89,17 @@ class Model_Voice_Text():
68
  if ret == []:
69
  ret.append("nothing found")
70
 
 
71
 
 
 
 
72
 
73
  # initialize data of lists.
74
  data = {'Keywords': [ret],
75
  'Phone Number': ph_num,
76
  'SIN': sin,
 
77
  'text': text}
78
  df = pd.DataFrame(data)
79
 
@@ -81,10 +107,6 @@ class Model_Voice_Text():
81
  return df
82
 
83
  def transcribe(self, audio_f):
84
- # sr, y = audio
85
- # y = y.astype(np.float32)
86
- # y /= np.max(np.abs(y))
87
- # print(type(audio))
88
  text = ""
89
 
90
  # First load the file
@@ -104,70 +126,21 @@ class Model_Voice_Text():
104
  buffer = samples_total - samples_wrote
105
 
106
  block = audio[samples_wrote : (samples_wrote + buffer)]
107
- # out_filename = "split_" + str(counter) + "_" + audio_f
108
-
109
- # Write 2 second segment
110
- # sf.write(out_filename, block, sr)
111
-
112
- # Transcribing the audio to text
113
  text += transcriber(block)["text"]
114
  counter += 1
115
  samples_wrote += buffer
116
- # print(counter)
117
- # print(text)
118
 
119
  return text
120
 
121
  def voice_to_text_s(self, audio):
122
- # SR_obj = self.SR_obj
123
- # info = sr.AudioFile(audio)
124
  tran_text = self.transcribe(audio)
125
  # print(tran_text)
126
  match_results = self.matching_text(tran_text.lower())
127
  return match_results
128
 
129
- # print(info)
130
-
131
- # with info as source:
132
- # SR_obj.adjust_for_ambient_noise(source)
133
- # audio_data = SR_obj.record(source,duration=100)
134
- # result = SR_obj.recognize_google(audio_data)
135
- # match_results = self.matching_text(result)
136
- # return match_results
137
-
138
-
139
- # def voice_to_text(self, voicefolder):
140
- # SR_obj = self.SR_obj
141
- # text_list = []
142
- # res_list = []
143
-
144
- # for subdir, dirs, files in os.walk(voicefolder):
145
- # for file in files:
146
- # print(os.path.join(subdir, file))
147
- # info = sr.AudioFile(os.path.join(subdir, file))
148
- # print(info)
149
-
150
- # with info as source:
151
- # SR_obj.adjust_for_ambient_noise(source)
152
- # audio_data = SR_obj.record(source,duration=100)
153
- # result = SR_obj.recognize_google(audio_data)
154
- # text_list.append(result)
155
- # match_results = self.matching_text(result)
156
- # res_list.append([file, match_results, result])
157
-
158
- # return(text_list, res_list)
159
-
160
 
161
  model = Model_Voice_Text()
162
 
163
- # path = "/home/si-lab/Desktop/Projects/DataSciencePrpjects/Voice_records"
164
- # text, results = model.voice_to_text(path)
165
-
166
- # f = open("demofile2.txt", "a")
167
- # f.write(text)
168
- # f.close()
169
- # df = pd.DataFrame(results)
170
- # df.to_csv("list.csv", index=False)
171
 
172
  demo = gr.Blocks()
173
 
@@ -187,10 +160,4 @@ with demo:
187
  ["Transcribe Microphone", "Transcribe Audio File"],
188
  )
189
 
190
- demo.launch(debug=True)
191
-
192
- # pickle.dump(model, open("voice_txt.pkl", "wb"))
193
-
194
-
195
-
196
-
 
 
 
 
 
 
1
  import pandas as pd
2
  import difflib
3
  import gradio as gr
4
  from transformers import pipeline
5
  import librosa
6
+ import re
7
 
8
  # import numpy as np
9
 
10
+ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3")
 
 
 
 
11
 
12
 
13
  class Model_Voice_Text():
 
20
  def __init__(self) -> None:
21
  # self.SR_obj = sr.Recognizer()
22
  self.KEYWORDS = ['suicide', 'urgent', 'poor', 'in-need', 'old', 'pregnant', 'refugee', 'new immigrant', 'patient', 'ill', 'sick', 'anxiety', 'anxious']
23
+ self.sins = [5678, 1967, 4530, 3986, 9750, 1065, 7134, 6410, 2906, 8056, 1307, 3503, 7708, 4980, 1248, 3491, 6157, 9242, 3198, 5632]
24
  # self.fuzzer = fuzz.Fuzz()
25
 
26
  # Define a function to find the number of times the word similar to the word stored in variable target_var, in a text stored in a variable named text_res
 
43
 
44
  # Return the number of similar words
45
  return len(similar_words)
46
+
47
+ def extract_phone_number(self, text):
48
+ # Define a regular expression pattern to match phone numbers
49
+ phone_pattern = re.compile(r'\b\d{7,}\b')
50
+
51
+ # Search for the phone number in the text
52
+ match = re.search(phone_pattern, text)
53
+
54
+ # Check if a match is found and return the phone number
55
+ if match:
56
+ return match.group()
57
+ else:
58
+ return "000"
59
+
60
+ def extract_sin(self, text):
61
+ # Define a regular expression pattern to match phone numbers
62
+ sin_pattern = re.compile(r'\b\d{4}\b')
63
+
64
+ # Search for the phone number in the text
65
+ matches = re.findall(sin_pattern, text)
66
+ if matches:
67
+ return matches
68
+ else: return "Not detected"
69
+
70
+ def check_eligibility(self, sins_ex):
71
+ for number in sins_ex:
72
+ if number in self.sins:
73
+ return "Eligible"
74
+ return "Not Eligible"
75
 
76
  def matching_text(self, text):
77
  df = pd.DataFrame()
 
89
  if ret == []:
90
  ret.append("nothing found")
91
 
92
+ ph_num = self.extract_phone_number(text=text)
93
 
94
+ sin = self.extract_sin(text=text)
95
+
96
+ eligib = self.check_eligibility(sins_ex=sin)
97
 
98
  # initialize data of lists.
99
  data = {'Keywords': [ret],
100
  'Phone Number': ph_num,
101
  'SIN': sin,
102
+ 'Eligible': eligib,
103
  'text': text}
104
  df = pd.DataFrame(data)
105
 
 
107
  return df
108
 
109
  def transcribe(self, audio_f):
 
 
 
 
110
  text = ""
111
 
112
  # First load the file
 
126
  buffer = samples_total - samples_wrote
127
 
128
  block = audio[samples_wrote : (samples_wrote + buffer)]
 
 
 
 
 
 
129
  text += transcriber(block)["text"]
130
  counter += 1
131
  samples_wrote += buffer
 
 
132
 
133
  return text
134
 
135
  def voice_to_text_s(self, audio):
 
 
136
  tran_text = self.transcribe(audio)
137
  # print(tran_text)
138
  match_results = self.matching_text(tran_text.lower())
139
  return match_results
140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
  model = Model_Voice_Text()
143
 
 
 
 
 
 
 
 
 
144
 
145
  demo = gr.Blocks()
146
 
 
160
  ["Transcribe Microphone", "Transcribe Audio File"],
161
  )
162
 
163
+ demo.launch(debug=True)