summaryrefslogtreecommitdiff
path: root/Doc/lib/libunicodedata.tex
diff options
context:
space:
mode:
Diffstat (limited to 'Doc/lib/libunicodedata.tex')
-rw-r--r--Doc/lib/libunicodedata.tex25
1 files changed, 22 insertions, 3 deletions
diff --git a/Doc/lib/libunicodedata.tex b/Doc/lib/libunicodedata.tex
index dcbda773ba..435466a317 100644
--- a/Doc/lib/libunicodedata.tex
+++ b/Doc/lib/libunicodedata.tex
@@ -14,11 +14,11 @@
This module provides access to the Unicode Character Database which
defines character properties for all Unicode characters. The data in
this database is based on the \file{UnicodeData.txt} file version
-4.1.0 which is publically available from \url{ftp://ftp.unicode.org/}.
+4.1.0 which is publicly available from \url{ftp://ftp.unicode.org/}.
The module uses the same names and symbols as defined by the
UnicodeData File Format 4.1.0 (see
-\url{http://www.unicode.org/Public/4.1-Update/UnicodeData-4.1.0.html}). It
+\url{http://www.unicode.org/Public/4.1.0/ucd/UCD.html}). It
defines the following functions:
\begin{funcdesc}{lookup}{name}
@@ -108,7 +108,7 @@ decomposition, and translates each character into its decomposed form.
Normal form C (NFC) first applies a canonical decomposition, then
composes pre-combined characters again.
-In addition to these two forms, there two additional normal forms
+In addition to these two forms, there are two additional normal forms
based on compatibility equivalence. In Unicode, certain characters are
supported which normally would be unified with other characters. For
example, U+2160 (ROMAN NUMERAL ONE) is really the same thing as U+0049
@@ -139,3 +139,22 @@ the Unicode database (such as IDNA).
\versionadded{2.5}
\end{datadesc}
+
+Examples:
+
+\begin{verbatim}
+>>> unicodedata.lookup('LEFT CURLY BRACKET')
+u'{'
+>>> unicodedata.name(u'/')
+'SOLIDUS'
+>>> unicodedata.decimal(u'9')
+9
+>>> unicodedata.decimal(u'a')
+Traceback (most recent call last):
+ File "<stdin>", line 1, in ?
+ValueError: not a decimal
+>>> unicodedata.category(u'A') # 'L'etter, 'u'ppercase
+'Lu'
+>>> unicodedata.bidirectional(u'\u0660') # 'A'rabic, 'N'umber
+'AN'
+\end{verbatim}