cdactvm commited on
Commit
4288e0d
·
verified ·
1 Parent(s): bb36909

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -91
app.py CHANGED
@@ -55,103 +55,103 @@ def apply_wiener_filter(audio):
55
 
56
 
57
 
58
- def createlex(filename):
59
- # Initialize an empty dictionary
60
- data_dict = {}
61
-
62
- # Open the file and read it line by line
63
- with open(filename, "r", encoding="utf-8") as f:
64
- for line in f:
65
- # Strip newline characters and split by tab
66
- key, value = line.strip().split("\t")
67
- # Add to dictionary
68
- data_dict[key] = value
69
- return data_dict
70
 
71
- lex=createlex("num_words_ta.txt")
72
 
73
- def addnum(inlist):
74
- sum=0
75
- for num in inlist:
76
- sum+=int(num)
77
 
78
- return sum
79
 
80
- from rapidfuzz import process
81
- def get_val(word, lexicon):
82
- threshold = 80 # Minimum similarity score
83
- length_difference = 4
84
- #length_range = (4, 6) # Acceptable character length range (min, max)
85
-
86
- # Find the best match above the similarity threshold
87
- result = process.extractOne(word, lexicon.keys(), score_cutoff=threshold)
88
- #print (result)
89
- if result:
90
- match, score, _ = result
91
- #print(lexicon[match])
92
- #return lexicon[match]
93
- if abs(len(match) - len(word)) <= length_difference:
94
- #if length_range[0] <= len(match) <= length_range[1]:
95
- return lexicon[match]
96
- else:
97
- return None
98
- else:
99
- return None
100
-
101
- def convert2num(input, lex):
102
- input += " #" # Add a period for termination
103
- words = input.split()
104
- i = 0
105
- num = 0
106
- outstr = ""
107
- digit_end = True
108
- numlist = []
109
- addflag = False
110
-
111
- # Process the words
112
- while i < len(words):
113
- #checkwordlist = handleSpecialnum(words[i])
114
 
115
- # Handle special numbers
116
- #if len(checkwordlist) == 2:
117
- # words[i] = checkwordlist[0]
118
- # words.insert(i + 1, checkwordlist[1]) # Collect new word for later processing
119
-
120
- # Get numerical value of the word
121
- numval = get_val(words[i], lex)
122
- if numval is not None:
123
- if words[i][-4:] in ('த்து', 'ற்று'):
124
- addflag = True
125
- numlist.append(numval)
126
- else:
127
- if addflag:
128
- numlist.append(numval)
129
- num = addnum(numlist)
130
- outstr += str(num) + " "
131
- addflag = False
132
- numlist = []
133
- else:
134
- outstr += " " + str(numval) + " "
135
- digit_end = False
136
- else:
137
- if addflag:
138
- num = addnum(numlist)
139
- outstr += str(num) + " " + words[i] + " "
140
- addflag = False
141
- numlist = []
142
- else:
143
- outstr += words[i] + " "
144
- if not digit_end:
145
- digit_end = True
146
-
147
- # Move to the next word
148
- i += 1
149
-
150
- # Final processing
151
- outstr = outstr.replace('#','') # Remove trailing spaces
152
- return outstr
153
 
154
- # Function to handle speech recognition
155
  def recognize_speech(audio_file):
156
  audio, sr = librosa.load(audio_file, sr=16000)
157
  audio = high_pass_filter(audio, sr)
 
55
 
56
 
57
 
58
+ # def createlex(filename):
59
+ # # Initialize an empty dictionary
60
+ # data_dict = {}
61
+
62
+ # # Open the file and read it line by line
63
+ # with open(filename, "r", encoding="utf-8") as f:
64
+ # for line in f:
65
+ # # Strip newline characters and split by tab
66
+ # key, value = line.strip().split("\t")
67
+ # # Add to dictionary
68
+ # data_dict[key] = value
69
+ # return data_dict
70
 
71
+ # lex=createlex("num_words_ta.txt")
72
 
73
+ # def addnum(inlist):
74
+ # sum=0
75
+ # for num in inlist:
76
+ # sum+=int(num)
77
 
78
+ # return sum
79
 
80
+ # from rapidfuzz import process
81
+ # def get_val(word, lexicon):
82
+ # threshold = 80 # Minimum similarity score
83
+ # length_difference = 4
84
+ # #length_range = (4, 6) # Acceptable character length range (min, max)
85
+
86
+ # # Find the best match above the similarity threshold
87
+ # result = process.extractOne(word, lexicon.keys(), score_cutoff=threshold)
88
+ # #print (result)
89
+ # if result:
90
+ # match, score, _ = result
91
+ # #print(lexicon[match])
92
+ # #return lexicon[match]
93
+ # if abs(len(match) - len(word)) <= length_difference:
94
+ # #if length_range[0] <= len(match) <= length_range[1]:
95
+ # return lexicon[match]
96
+ # else:
97
+ # return None
98
+ # else:
99
+ # return None
100
+
101
+ # def convert2num(input, lex):
102
+ # input += " #" # Add a period for termination
103
+ # words = input.split()
104
+ # i = 0
105
+ # num = 0
106
+ # outstr = ""
107
+ # digit_end = True
108
+ # numlist = []
109
+ # addflag = False
110
+
111
+ # # Process the words
112
+ # while i < len(words):
113
+ # #checkwordlist = handleSpecialnum(words[i])
114
 
115
+ # # Handle special numbers
116
+ # #if len(checkwordlist) == 2:
117
+ # # words[i] = checkwordlist[0]
118
+ # # words.insert(i + 1, checkwordlist[1]) # Collect new word for later processing
119
+
120
+ # # Get numerical value of the word
121
+ # numval = get_val(words[i], lex)
122
+ # if numval is not None:
123
+ # if words[i][-4:] in ('த்து', 'ற்று'):
124
+ # addflag = True
125
+ # numlist.append(numval)
126
+ # else:
127
+ # if addflag:
128
+ # numlist.append(numval)
129
+ # num = addnum(numlist)
130
+ # outstr += str(num) + " "
131
+ # addflag = False
132
+ # numlist = []
133
+ # else:
134
+ # outstr += " " + str(numval) + " "
135
+ # digit_end = False
136
+ # else:
137
+ # if addflag:
138
+ # num = addnum(numlist)
139
+ # outstr += str(num) + " " + words[i] + " "
140
+ # addflag = False
141
+ # numlist = []
142
+ # else:
143
+ # outstr += words[i] + " "
144
+ # if not digit_end:
145
+ # digit_end = True
146
+
147
+ # # Move to the next word
148
+ # i += 1
149
+
150
+ # # Final processing
151
+ # outstr = outstr.replace('#','') # Remove trailing spaces
152
+ # return outstr
153
 
154
+ # # Function to handle speech recognition
155
  def recognize_speech(audio_file):
156
  audio, sr = librosa.load(audio_file, sr=16000)
157
  audio = high_pass_filter(audio, sr)