summaryrefslogtreecommitdiff
path: root/tools/check_character_names.py
blob: ca7effbe91e31746c059132144d8e49d79d36624 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import re
import unidecode

name_re = re.compile("LATIN (SMALL|CAPITAL) LETTER (?:.* )?([A-Z])(?: .*)?$")

total = 0
good = 0
with open("NamesList.txt") as fp:
    for line in fp:
        f = line.split('\t')
        try:
            cp = int(f[0], 16)
        except ValueError:
            continue
        name = f[1]

        g = name_re.search(name)
        if g:
            cap = g.group(1)
            letter = g.group(2)

            if cap == 'SMALL':
                letter = letter.lower()

            char = chr(cp)
            letteru = unidecode.unidecode(char)

            if letteru != letter:
                print(letteru, letter, char, "%05x" % cp, name.strip())
            else:
                good += 1

            total += 1

print(100.0 * good / total)