blob: ca7effbe91e31746c059132144d8e49d79d36624 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
|
import re
import unidecode
name_re = re.compile("LATIN (SMALL|CAPITAL) LETTER (?:.* )?([A-Z])(?: .*)?$")
total = 0
good = 0
with open("NamesList.txt") as fp:
for line in fp:
f = line.split('\t')
try:
cp = int(f[0], 16)
except ValueError:
continue
name = f[1]
g = name_re.search(name)
if g:
cap = g.group(1)
letter = g.group(2)
if cap == 'SMALL':
letter = letter.lower()
char = chr(cp)
letteru = unidecode.unidecode(char)
if letteru != letter:
print(letteru, letter, char, "%05x" % cp, name.strip())
else:
good += 1
total += 1
print(100.0 * good / total)
|