Spaces:
				
			
			
	
			
			
		Build error
		
	
	
	
			
			
	
	
	
	
		
		
		Build error
		
	| import re | |
| import xml.etree.ElementTree as ET | |
| from xml.sax import saxutils | |
| #import nltk | |
| # Chunked generation originally from https://github.com/serp-ai/bark-with-voice-clone | |
| def split_and_recombine_text(text, desired_length=100, max_length=150): | |
| # return nltk.sent_tokenize(text) | |
| # from https://github.com/neonbjb/tortoise-tts | |
| """Split text it into chunks of a desired length trying to keep sentences intact.""" | |
| # normalize text, remove redundant whitespace and convert non-ascii quotes to ascii | |
| text = re.sub(r"\n\n+", "\n", text) | |
| text = re.sub(r"\s+", " ", text) | |
| text = re.sub(r"[“”]", '"', text) | |
| rv = [] | |
| in_quote = False | |
| current = "" | |
| split_pos = [] | |
| pos = -1 | |
| end_pos = len(text) - 1 | |
| def seek(delta): | |
| nonlocal pos, in_quote, current | |
| is_neg = delta < 0 | |
| for _ in range(abs(delta)): | |
| if is_neg: | |
| pos -= 1 | |
| current = current[:-1] | |
| else: | |
| pos += 1 | |
| current += text[pos] | |
| if text[pos] == '"': | |
| in_quote = not in_quote | |
| return text[pos] | |
| def peek(delta): | |
| p = pos + delta | |
| return text[p] if p < end_pos and p >= 0 else "" | |
| def commit(): | |
| nonlocal rv, current, split_pos | |
| rv.append(current) | |
| current = "" | |
| split_pos = [] | |
| while pos < end_pos: | |
| c = seek(1) | |
| # do we need to force a split? | |
| if len(current) >= max_length: | |
| if len(split_pos) > 0 and len(current) > (desired_length / 2): | |
| # we have at least one sentence and we are over half the desired length, seek back to the last split | |
| d = pos - split_pos[-1] | |
| seek(-d) | |
| else: | |
| # no full sentences, seek back until we are not in the middle of a word and split there | |
| while c not in "!?.,\n " and pos > 0 and len(current) > desired_length: | |
| c = seek(-1) | |
| commit() | |
| # check for sentence boundaries | |
| elif not in_quote and (c in "!?]\n" or (c == "." and peek(1) in "\n ")): | |
| # seek forward if we have consecutive boundary markers but still within the max length | |
| while ( | |
| pos < len(text) - 1 and len(current) < max_length and peek(1) in "!?.]" | |
| ): | |
| c = seek(1) | |
| split_pos.append(pos) | |
| if len(current) >= desired_length: | |
| commit() | |
| # treat end of quote as a boundary if its followed by a space or newline | |
| elif in_quote and peek(1) == '"' and peek(2) in "\n ": | |
| seek(2) | |
| split_pos.append(pos) | |
| rv.append(current) | |
| # clean up, remove lines with only whitespace or punctuation | |
| rv = [s.strip() for s in rv] | |
| rv = [s for s in rv if len(s) > 0 and not re.match(r"^[\s\.,;:!?]*$", s)] | |
| return rv | |
| def is_ssml(value): | |
| try: | |
| ET.fromstring(value) | |
| except ET.ParseError: | |
| return False | |
| return True | |
| def build_ssml(rawtext, selected_voice): | |
| texts = rawtext.split("\n") | |
| joinedparts = "" | |
| for textpart in texts: | |
| textpart = textpart.strip() | |
| if len(textpart) < 1: | |
| continue | |
| joinedparts = joinedparts + f"\n<voice name=\"{selected_voice}\">{saxutils.escape(textpart)}</voice>" | |
| ssml = f"""<?xml version="1.0"?> | |
| <speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" | |
| xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |
| xsi:schemaLocation="http://www.w3.org/2001/10/synthesis | |
| http://www.w3.org/TR/speech-synthesis/synthesis.xsd" | |
| xml:lang="en-US"> | |
| {joinedparts} | |
| </speak> | |
| """ | |
| return ssml | |
| def create_clips_from_ssml(ssmlinput): | |
| # Parse the XML | |
| tree = ET.ElementTree(ET.fromstring(ssmlinput)) | |
| root = tree.getroot() | |
| # Create an empty list | |
| voice_list = [] | |
| # Loop through all voice tags | |
| for voice in root.iter('{http://www.w3.org/2001/10/synthesis}voice'): | |
| # Extract the voice name attribute and the content text | |
| voice_name = voice.attrib['name'] | |
| voice_content = voice.text.strip() if voice.text else '' | |
| if(len(voice_content) > 0): | |
| parts = split_and_recombine_text(voice_content) | |
| for p in parts: | |
| if(len(p) > 1): | |
| # add to tuple list | |
| voice_list.append((voice_name, p)) | |
| return voice_list | |
