diff options
author | Alon Bar-Lev <alon.barlev@gmail.com> | 2021-08-04 19:47:51 +0300 |
---|---|---|
committer | Alon Bar-Lev <alon.barlev@gmail.com> | 2021-08-31 20:42:16 +0300 |
commit | b27d0291e50dc78bf5425da6567d22c983e4ea25 (patch) | |
tree | 918618fb05cce4b23891ae477483efcc581b7fe8 | |
parent | 57a921f3f6b551b4bf4ff9298be8b23c1159c0b6 (diff) | |
download | unidecode-b27d0291e50dc78bf5425da6567d22c983e4ea25.tar.gz |
Improve Hebrew conversion
Cleanup special rearly used characters.
Regular characters closer to formal document[1].
[1] https://hebrew-academy.org.il/wp-content/uploads/taatik-ivrit-latinit-1-1.pdf
-rw-r--r-- | unidecode/x005.py | 28 |
1 files changed, 14 insertions, 14 deletions
diff --git a/unidecode/x005.py b/unidecode/x005.py index ec85c09..8779da6 100644 --- a/unidecode/x005.py +++ b/unidecode/x005.py @@ -175,7 +175,7 @@ None, # 0x90 '', # 0xad '', # 0xae '', # 0xaf -'@', # 0xb0 +'', # 0xb0 'e', # 0xb1 'a', # 0xb2 'o', # 0xb3 @@ -187,14 +187,14 @@ None, # 0x90 'o', # 0xb9 'o', # 0xba 'u', # 0xbb -'\'', # 0xbc +'', # 0xbc '', # 0xbd '-', # 0xbe -'-', # 0xbf +'', # 0xbf '|', # 0xc0 '', # 0xc1 '', # 0xc2 -':', # 0xc3 +'.', # 0xc3 '', # 0xc4 '', # 0xc5 'n', # 0xc6 @@ -214,11 +214,11 @@ None, # 0xcf 'h', # 0xd4 'v', # 0xd5 'z', # 0xd6 -'KH', # 0xd7 -'t', # 0xd8 +'H', # 0xd7 +'T', # 0xd8 'y', # 0xd9 -'k', # 0xda -'k', # 0xdb +'KH', # 0xda +'KH', # 0xdb 'l', # 0xdc 'm', # 0xdd 'm', # 0xde @@ -230,7 +230,7 @@ None, # 0xcf 'p', # 0xe4 'TS', # 0xe5 'TS', # 0xe6 -'q', # 0xe7 +'k', # 0xe7 'r', # 0xe8 'SH', # 0xe9 't', # 0xea @@ -238,15 +238,15 @@ None, # 0xeb None, # 0xec None, # 0xed None, # 0xee -None, # 0xef +'YYY', # 0xef 'V', # 0xf0 'OY', # 0xf1 -'i', # 0xf2 +'EY', # 0xf2 '\'', # 0xf3 '"', # 0xf4 -'v', # 0xf5 -'n', # 0xf6 -'q', # 0xf7 +None, # 0xf5 +None, # 0xf6 +None, # 0xf7 None, # 0xf8 None, # 0xf9 None, # 0xfa |