diff options
Diffstat (limited to 'Doc/lib/libunicodedata.tex')
-rw-r--r-- | Doc/lib/libunicodedata.tex | 25 |
1 files changed, 22 insertions, 3 deletions
diff --git a/Doc/lib/libunicodedata.tex b/Doc/lib/libunicodedata.tex index dcbda773ba..435466a317 100644 --- a/Doc/lib/libunicodedata.tex +++ b/Doc/lib/libunicodedata.tex @@ -14,11 +14,11 @@ This module provides access to the Unicode Character Database which defines character properties for all Unicode characters. The data in this database is based on the \file{UnicodeData.txt} file version -4.1.0 which is publically available from \url{ftp://ftp.unicode.org/}. +4.1.0 which is publicly available from \url{ftp://ftp.unicode.org/}. The module uses the same names and symbols as defined by the UnicodeData File Format 4.1.0 (see -\url{http://www.unicode.org/Public/4.1-Update/UnicodeData-4.1.0.html}). It +\url{http://www.unicode.org/Public/4.1.0/ucd/UCD.html}). It defines the following functions: \begin{funcdesc}{lookup}{name} @@ -108,7 +108,7 @@ decomposition, and translates each character into its decomposed form. Normal form C (NFC) first applies a canonical decomposition, then composes pre-combined characters again. -In addition to these two forms, there two additional normal forms +In addition to these two forms, there are two additional normal forms based on compatibility equivalence. In Unicode, certain characters are supported which normally would be unified with other characters. For example, U+2160 (ROMAN NUMERAL ONE) is really the same thing as U+0049 @@ -139,3 +139,22 @@ the Unicode database (such as IDNA). \versionadded{2.5} \end{datadesc} + +Examples: + +\begin{verbatim} +>>> unicodedata.lookup('LEFT CURLY BRACKET') +u'{' +>>> unicodedata.name(u'/') +'SOLIDUS' +>>> unicodedata.decimal(u'9') +9 +>>> unicodedata.decimal(u'a') +Traceback (most recent call last): + File "<stdin>", line 1, in ? +ValueError: not a decimal +>>> unicodedata.category(u'A') # 'L'etter, 'u'ppercase +'Lu' +>>> unicodedata.bidirectional(u'\u0660') # 'A'rabic, 'N'umber +'AN' +\end{verbatim} |