Spaces:

awaisrwp
/

care_taker

Runtime error

App Files Files Community

awaisrwp commited on Jan 24, 2024

Commit

76b1d4d

verified ·

1 Parent(s): 0745f2e

deployment

Browse files

Files changed (1) hide show

app.py +38 -71

app.py CHANGED Viewed

@@ -1,21 +1,13 @@
-# import os
-# import speech_recognition as sr
-# import pickle
-# import nltk
-# from nltk.corpus import wordnet
 import pandas as pd
 import difflib
 import gradio as gr
 from transformers import pipeline
 import librosa
 # import numpy as np
-transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base")
-# nltk.download('wordnet')
 class Model_Voice_Text():
@@ -28,6 +20,7 @@ class Model_Voice_Text():
     def __init__(self) -> None:
         # self.SR_obj = sr.Recognizer()
         self.KEYWORDS = ['suicide', 'urgent', 'poor', 'in-need', 'old', 'pregnant', 'refugee', 'new immigrant', 'patient', 'ill', 'sick', 'anxiety', 'anxious']
         # self.fuzzer = fuzz.Fuzz()
     # Define a function to find the number of times the word similar to the word stored in variable target_var, in a text stored in a variable named text_res
@@ -50,7 +43,35 @@ class Model_Voice_Text():
         # Return the number of similar words
         return len(similar_words)
     def matching_text(self, text):
         df = pd.DataFrame()
@@ -68,12 +89,17 @@ class Model_Voice_Text():
         if ret == []:
             ret.append("nothing found")
         # initialize data of lists.
         data = {'Keywords': [ret],
                 'Phone Number': ph_num,
                 'SIN': sin,
                 'text': text}
         df = pd.DataFrame(data)
@@ -81,10 +107,6 @@ class Model_Voice_Text():
         return df
     def transcribe(self, audio_f):
-        # sr, y = audio
-        # y = y.astype(np.float32)
-        # y /= np.max(np.abs(y))
-        # print(type(audio))
         text = ""
         # First load the file
@@ -104,70 +126,21 @@ class Model_Voice_Text():
                 buffer = samples_total - samples_wrote
             block = audio[samples_wrote : (samples_wrote + buffer)]
-            # out_filename = "split_" + str(counter) + "_" + audio_f
-            # Write 2 second segment
-            # sf.write(out_filename, block, sr)
-            # Transcribing the audio to text
             text += transcriber(block)["text"]
             counter += 1
             samples_wrote += buffer
-            # print(counter)
-            # print(text)
         return text
     def voice_to_text_s(self, audio):
-        # SR_obj = self.SR_obj
-        # info = sr.AudioFile(audio)
         tran_text = self.transcribe(audio)
         # print(tran_text)
         match_results = self.matching_text(tran_text.lower())
         return match_results
-        # print(info)
-        # with info as source:
-        #     SR_obj.adjust_for_ambient_noise(source)
-        #     audio_data = SR_obj.record(source,duration=100)
-        #     result = SR_obj.recognize_google(audio_data)
-        #     match_results = self.matching_text(result)
-        #     return match_results
-    # def voice_to_text(self, voicefolder):
-    #     SR_obj = self.SR_obj
-    #     text_list = []
-    #     res_list = []
-    #     for subdir, dirs, files in os.walk(voicefolder):
-    #         for file in files:
-    #             print(os.path.join(subdir, file))
-    #             info = sr.AudioFile(os.path.join(subdir, file))
-    #             print(info)
-    #             with info as source:
-    #                 SR_obj.adjust_for_ambient_noise(source)
-    #                 audio_data = SR_obj.record(source,duration=100)
-    #                 result = SR_obj.recognize_google(audio_data)
-    #                 text_list.append(result)
-    #                 match_results = self.matching_text(result)
-    #                 res_list.append([file, match_results, result])
-    #     return(text_list, res_list)
 model = Model_Voice_Text()
-# path = "/home/si-lab/Desktop/Projects/DataSciencePrpjects/Voice_records"
-# text, results = model.voice_to_text(path)
-# f = open("demofile2.txt", "a")
-# f.write(text)
-# f.close()
-# df = pd.DataFrame(results)
-# df.to_csv("list.csv", index=False)
 demo = gr.Blocks()
@@ -187,10 +160,4 @@ with demo:
         ["Transcribe Microphone", "Transcribe Audio File"],
     )
-demo.launch(debug=True)
-# pickle.dump(model, open("voice_txt.pkl", "wb"))

 import pandas as pd
 import difflib
 import gradio as gr
 from transformers import pipeline
 import librosa
+import re
 # import numpy as np
+transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3")
 class Model_Voice_Text():
     def __init__(self) -> None:
         # self.SR_obj = sr.Recognizer()
         self.KEYWORDS = ['suicide', 'urgent', 'poor', 'in-need', 'old', 'pregnant', 'refugee', 'new immigrant', 'patient', 'ill', 'sick', 'anxiety', 'anxious']
+        self.sins = [5678, 1967, 4530, 3986, 9750, 1065, 7134, 6410, 2906, 8056, 1307, 3503, 7708, 4980, 1248, 3491, 6157, 9242, 3198, 5632]
         # self.fuzzer = fuzz.Fuzz()
     # Define a function to find the number of times the word similar to the word stored in variable target_var, in a text stored in a variable named text_res
         # Return the number of similar words
         return len(similar_words)
+    def extract_phone_number(self, text):
+        # Define a regular expression pattern to match phone numbers
+        phone_pattern = re.compile(r'\b\d{7,}\b')
+        # Search for the phone number in the text
+        match = re.search(phone_pattern, text)
+        # Check if a match is found and return the phone number
+        if match:
+            return match.group()
+        else:
+            return "000"
+    def extract_sin(self, text):
+        # Define a regular expression pattern to match phone numbers
+        sin_pattern = re.compile(r'\b\d{4}\b')
+        # Search for the phone number in the text
+        matches = re.findall(sin_pattern, text)
+        if matches:
+            return matches
+        else: return "Not detected"
+    def check_eligibility(self, sins_ex):
+        for number in sins_ex:
+            if number in self.sins:
+                return "Eligible"
+        return "Not Eligible"
     def matching_text(self, text):
         df = pd.DataFrame()
         if ret == []:
             ret.append("nothing found")
+        ph_num = self.extract_phone_number(text=text)
+        sin = self.extract_sin(text=text)
+        eligib = self.check_eligibility(sins_ex=sin)
         # initialize data of lists.
         data = {'Keywords': [ret],
                 'Phone Number': ph_num,
                 'SIN': sin,
+                'Eligible': eligib,
                 'text': text}
         df = pd.DataFrame(data)
         return df
     def transcribe(self, audio_f):
         text = ""
         # First load the file
                 buffer = samples_total - samples_wrote
             block = audio[samples_wrote : (samples_wrote + buffer)]
             text += transcriber(block)["text"]
             counter += 1
             samples_wrote += buffer
         return text
     def voice_to_text_s(self, audio):
         tran_text = self.transcribe(audio)
         # print(tran_text)
         match_results = self.matching_text(tran_text.lower())
         return match_results
 model = Model_Voice_Text()
 demo = gr.Blocks()
         ["Transcribe Microphone", "Transcribe Audio File"],
     )
+demo.launch(debug=True)