Spaces:

cdactvm
/

Punjabi_ASR_Demo

Sleeping

App Files Files Community

cdactvm commited on Jan 24

Commit

4288e0d

verified ·

1 Parent(s): bb36909

Update app.py

Browse files

Files changed (1) hide show

app.py +91 -91

app.py CHANGED Viewed

@@ -55,103 +55,103 @@ def apply_wiener_filter(audio):
-def createlex(filename):
-# Initialize an empty dictionary
-    data_dict = {}
-# Open the file and read it line by line
-    with open(filename, "r", encoding="utf-8") as f:
-        for line in f:
-        # Strip newline characters and split by tab
-            key, value = line.strip().split("\t")
-        # Add to dictionary
-            data_dict[key] = value
-    return data_dict
-lex=createlex("num_words_ta.txt")
-def addnum(inlist):
-    sum=0
-    for num in inlist:
-        sum+=int(num)
-    return sum
-from rapidfuzz import process
-def get_val(word, lexicon):
-    threshold = 80  # Minimum similarity score
-    length_difference = 4
-    #length_range = (4, 6)  # Acceptable character length range (min, max)
-    # Find the best match above the similarity threshold
-    result = process.extractOne(word, lexicon.keys(), score_cutoff=threshold)
-    #print (result)
-    if result:
-        match, score, _ = result
-        #print(lexicon[match])
-        #return lexicon[match]
-        if abs(len(match) - len(word)) <= length_difference:
-        #if length_range[0] <= len(match) <= length_range[1]:
-            return lexicon[match]
-        else:
-            return None
-    else:
-        return None
-def convert2num(input, lex):
-    input += " #"  # Add a period for termination
-    words = input.split()
-    i = 0
-    num = 0
-    outstr = ""
-    digit_end = True
-    numlist = []
-    addflag = False
-    # Process the words
-    while i < len(words):
-        #checkwordlist = handleSpecialnum(words[i])
-        # Handle special numbers
-        #if len(checkwordlist) == 2:
-        #    words[i] = checkwordlist[0]
-        #    words.insert(i + 1, checkwordlist[1])  # Collect new word for later processing
-        # Get numerical value of the word
-        numval = get_val(words[i], lex)
-        if numval is not None:
-            if words[i][-4:] in ('த்து', 'ற்று'):
-                addflag = True
-                numlist.append(numval)
-            else:
-                if addflag:
-                    numlist.append(numval)
-                    num = addnum(numlist)
-                    outstr += str(num) + " "
-                    addflag = False
-                    numlist = []
-                else:
-                    outstr += " " + str(numval) + " "
-            digit_end = False
-        else:
-            if addflag:
-                num = addnum(numlist)
-                outstr += str(num) + " " + words[i] + " "
-                addflag = False
-                numlist = []
-            else:
-                outstr += words[i] + " "
-            if not digit_end:
-                digit_end = True
-        # Move to the next word
-        i += 1
-    # Final processing
-    outstr = outstr.replace('#','')  # Remove trailing spaces
-    return outstr
-# Function to handle speech recognition
 def recognize_speech(audio_file):
     audio, sr = librosa.load(audio_file, sr=16000)
     audio = high_pass_filter(audio, sr)

+# def createlex(filename):
+# # Initialize an empty dictionary
+#     data_dict = {}
+# # Open the file and read it line by line
+#     with open(filename, "r", encoding="utf-8") as f:
+#         for line in f:
+#         # Strip newline characters and split by tab
+#             key, value = line.strip().split("\t")
+#         # Add to dictionary
+#             data_dict[key] = value
+#     return data_dict
+# lex=createlex("num_words_ta.txt")
+# def addnum(inlist):
+#     sum=0
+#     for num in inlist:
+#         sum+=int(num)
+#     return sum
+# from rapidfuzz import process
+# def get_val(word, lexicon):
+#     threshold = 80  # Minimum similarity score
+#     length_difference = 4
+#     #length_range = (4, 6)  # Acceptable character length range (min, max)
+#     # Find the best match above the similarity threshold
+#     result = process.extractOne(word, lexicon.keys(), score_cutoff=threshold)
+#     #print (result)
+#     if result:
+#         match, score, _ = result
+#         #print(lexicon[match])
+#         #return lexicon[match]
+#         if abs(len(match) - len(word)) <= length_difference:
+#         #if length_range[0] <= len(match) <= length_range[1]:
+#             return lexicon[match]
+#         else:
+#             return None
+#     else:
+#         return None
+# def convert2num(input, lex):
+#     input += " #"  # Add a period for termination
+#     words = input.split()
+#     i = 0
+#     num = 0
+#     outstr = ""
+#     digit_end = True
+#     numlist = []
+#     addflag = False
+#     # Process the words
+#     while i < len(words):
+#         #checkwordlist = handleSpecialnum(words[i])
+#         # Handle special numbers
+#         #if len(checkwordlist) == 2:
+#         #    words[i] = checkwordlist[0]
+#         #    words.insert(i + 1, checkwordlist[1])  # Collect new word for later processing
+#         # Get numerical value of the word
+#         numval = get_val(words[i], lex)
+#         if numval is not None:
+#             if words[i][-4:] in ('த்து', 'ற்று'):
+#                 addflag = True
+#                 numlist.append(numval)
+#             else:
+#                 if addflag:
+#                     numlist.append(numval)
+#                     num = addnum(numlist)
+#                     outstr += str(num) + " "
+#                     addflag = False
+#                     numlist = []
+#                 else:
+#                     outstr += " " + str(numval) + " "
+#             digit_end = False
+#         else:
+#             if addflag:
+#                 num = addnum(numlist)
+#                 outstr += str(num) + " " + words[i] + " "
+#                 addflag = False
+#                 numlist = []
+#             else:
+#                 outstr += words[i] + " "
+#             if not digit_end:
+#                 digit_end = True
+#         # Move to the next word
+#         i += 1
+#     # Final processing
+#     outstr = outstr.replace('#','')  # Remove trailing spaces
+#     return outstr
+# # Function to handle speech recognition
 def recognize_speech(audio_file):
     audio, sr = librosa.load(audio_file, sr=16000)
     audio = high_pass_filter(audio, sr)