Spaces:
Running
on
Zero
Running
on
Zero
| # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| from .text_normlization import * | |
| rep_map = { | |
| ":": ",", | |
| ";": ",", | |
| ",": ",", | |
| "。": ".", | |
| "!": "!", | |
| "?": "?", | |
| "\n": ".", | |
| "·": ",", | |
| "、": ",", | |
| "...": "…", | |
| "$": ".", | |
| "/": ",", | |
| "—": "-", | |
| "~": "…", | |
| "~": "…", | |
| } | |
| def replace_punctuation(text): | |
| text = text.replace("嗯", "恩").replace("呣", "母") | |
| pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys())) | |
| replaced_text = pattern.sub(lambda x: rep_map[x.group()], text) | |
| punctuation = ["!", "?", "…", ",", "."] | |
| replaced_text = re.sub( | |
| r"[^\u4e00-\u9fa5" + "".join(punctuation) + r"]+", "", replaced_text | |
| ) | |
| print(replaced_text) | |
| return replaced_text | |
| def text_normalize(text): | |
| # https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization | |
| tx = TextNormalizer() | |
| sentences = tx.normalize(text) | |
| dest_text = "" | |
| for sentence in sentences: | |
| dest_text += replace_punctuation(sentence) | |
| print(dest_text, sentence) | |
| return dest_text | |