Spaces:

cdactvm
/

Tamil_ASR_Demo

Sleeping

Tamil_ASR_Demo / convert2list.py

Update convert2list.py

8811fa1 verified about 1 month ago

1.28 kB

	# import nbimporter
	import nbimporter
	from Text2List import text_to_list
	def convert_to_list(text, text_list):
	matched_words = []
	unmatched_text = '' # To accumulate unmatched characters

	# Sort text_list by length in descending order to prioritize longest matches first
	text_list_sorted = sorted(text_list, key=len, reverse=True)

	while text:
	matched = False
	for word in text_list_sorted:
	if text.startswith(word):
	# Add any accumulated unmatched text before appending the matched word
	if unmatched_text:
	matched_words.append(unmatched_text)
	unmatched_text = '' # Reset unmatched text accumulator

	matched_words.append(word)
	text = text[len(word):] # Remove the matched part from text
	matched = True
	break

	if not matched:
	# Accumulate unmatched characters
	unmatched_text += text[0]
	text = text[1:]

	# If there's any remaining unmatched text, add it to the result
	if unmatched_text:
	matched_words.append(unmatched_text)

	# Join matched words and unmatched text with a space
	result = ' '.join(matched_words)
	return result