# -*- coding: utf-8 -*- # @Time : 2021/12/2 5:41 p.m. # @Author : JianingWang # @File : common.py def is_chinese_char(cp): """Checks whether CP is the codepoint of a CJK character.""" # This defines a "chinese character" as anything in the CJK Unicode block: # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block) # # Note that the CJK Unicode block is NOT all Japanese and Korean characters, # despite its name. The modern Korean Hangul alphabet is a different block, # as is Japanese Hiragana and Katakana. Those alphabets are used to write # space-separated words, so they are not treated specially and handled # like the all of the other languages. if ( (0x4E00 <= cp <= 0x9FFF) or (0x3400 <= cp <= 0x4DBF) # or (0x20000 <= cp <= 0x2A6DF) # or (0x2A700 <= cp <= 0x2B73F) # or (0x2B740 <= cp <= 0x2B81F) # or (0x2B820 <= cp <= 0x2CEAF) # or (0xF900 <= cp <= 0xFAFF) or (0x2F800 <= cp <= 0x2FA1F) # ): # return True return False def is_chinese(word: str): # word like "180" or "身高" or "神" for char in word: char = ord(char) if not is_chinese_char(char): return 0 return 1