Spaces:
Sleeping
Sleeping
| import re | |
| import cn2an | |
| from style_bert_vits2.nlp.symbols import PUNCTUATIONS | |
| __REPLACE_MAP = { | |
| ":": ",", | |
| ";": ",", | |
| ",": ",", | |
| "。": ".", | |
| "!": "!", | |
| "?": "?", | |
| "\n": ".", | |
| "·": ",", | |
| "、": ",", | |
| "...": "…", | |
| "$": ".", | |
| "“": "'", | |
| "”": "'", | |
| '"': "'", | |
| "‘": "'", | |
| "’": "'", | |
| "(": "'", | |
| ")": "'", | |
| "(": "'", | |
| ")": "'", | |
| "《": "'", | |
| "》": "'", | |
| "【": "'", | |
| "】": "'", | |
| "[": "'", | |
| "]": "'", | |
| "—": "-", | |
| "~": "-", | |
| "~": "-", | |
| "「": "'", | |
| "」": "'", | |
| } | |
| def normalize_text(text: str) -> str: | |
| numbers = re.findall(r"\d+(?:\.?\d+)?", text) | |
| for number in numbers: | |
| text = text.replace(number, cn2an.an2cn(number), 1) | |
| text = replace_punctuation(text) | |
| return text | |
| def replace_punctuation(text: str) -> str: | |
| text = text.replace("嗯", "恩").replace("呣", "母") | |
| pattern = re.compile("|".join(re.escape(p) for p in __REPLACE_MAP)) | |
| replaced_text = pattern.sub(lambda x: __REPLACE_MAP[x.group()], text) | |
| replaced_text = re.sub( | |
| r"[^\u4e00-\u9fa5" + "".join(PUNCTUATIONS) + r"]+", "", replaced_text | |
| ) | |
| return replaced_text | |