Spaces:
Sleeping
Sleeping
File size: 1,309 Bytes
45311fe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
# -*- coding: utf-8 -*-
# @Time : 2021/12/2 5:41 p.m.
# @Author : JianingWang
# @File : common.py
def is_chinese_char(cp):
"""Checks whether CP is the codepoint of a CJK character."""
# This defines a "chinese character" as anything in the CJK Unicode block:
# https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
#
# Note that the CJK Unicode block is NOT all Japanese and Korean characters,
# despite its name. The modern Korean Hangul alphabet is a different block,
# as is Japanese Hiragana and Katakana. Those alphabets are used to write
# space-separated words, so they are not treated specially and handled
# like the all of the other languages.
if (
(0x4E00 <= cp <= 0x9FFF)
or (0x3400 <= cp <= 0x4DBF) #
or (0x20000 <= cp <= 0x2A6DF) #
or (0x2A700 <= cp <= 0x2B73F) #
or (0x2B740 <= cp <= 0x2B81F) #
or (0x2B820 <= cp <= 0x2CEAF) #
or (0xF900 <= cp <= 0xFAFF)
or (0x2F800 <= cp <= 0x2FA1F) #
): #
return True
return False
def is_chinese(word: str):
# word like "180" or "身高" or "神"
for char in word:
char = ord(char)
if not is_chinese_char(char):
return 0
return 1
|