summaryrefslogtreecommitdiff
path: root/Doc/lib/libunicodedata.tex
diff options
context:
space:
mode:
Diffstat (limited to 'Doc/lib/libunicodedata.tex')
-rw-r--r--Doc/lib/libunicodedata.tex160
1 files changed, 0 insertions, 160 deletions
diff --git a/Doc/lib/libunicodedata.tex b/Doc/lib/libunicodedata.tex
deleted file mode 100644
index 435466a317..0000000000
--- a/Doc/lib/libunicodedata.tex
+++ /dev/null
@@ -1,160 +0,0 @@
-\section{\module{unicodedata} ---
- Unicode Database}
-
-\declaremodule{standard}{unicodedata}
-\modulesynopsis{Access the Unicode Database.}
-\moduleauthor{Marc-Andre Lemburg}{mal@lemburg.com}
-\sectionauthor{Marc-Andre Lemburg}{mal@lemburg.com}
-\sectionauthor{Martin v. L\"owis}{martin@v.loewis.de}
-
-\index{Unicode}
-\index{character}
-\indexii{Unicode}{database}
-
-This module provides access to the Unicode Character Database which
-defines character properties for all Unicode characters. The data in
-this database is based on the \file{UnicodeData.txt} file version
-4.1.0 which is publicly available from \url{ftp://ftp.unicode.org/}.
-
-The module uses the same names and symbols as defined by the
-UnicodeData File Format 4.1.0 (see
-\url{http://www.unicode.org/Public/4.1.0/ucd/UCD.html}). It
-defines the following functions:
-
-\begin{funcdesc}{lookup}{name}
- Look up character by name. If a character with the
- given name is found, return the corresponding Unicode
- character. If not found, \exception{KeyError} is raised.
-\end{funcdesc}
-
-\begin{funcdesc}{name}{unichr\optional{, default}}
- Returns the name assigned to the Unicode character
- \var{unichr} as a string. If no name is defined,
- \var{default} is returned, or, if not given,
- \exception{ValueError} is raised.
-\end{funcdesc}
-
-\begin{funcdesc}{decimal}{unichr\optional{, default}}
- Returns the decimal value assigned to the Unicode character
- \var{unichr} as integer. If no such value is defined,
- \var{default} is returned, or, if not given,
- \exception{ValueError} is raised.
-\end{funcdesc}
-
-\begin{funcdesc}{digit}{unichr\optional{, default}}
- Returns the digit value assigned to the Unicode character
- \var{unichr} as integer. If no such value is defined,
- \var{default} is returned, or, if not given,
- \exception{ValueError} is raised.
-\end{funcdesc}
-
-\begin{funcdesc}{numeric}{unichr\optional{, default}}
- Returns the numeric value assigned to the Unicode character
- \var{unichr} as float. If no such value is defined, \var{default} is
- returned, or, if not given, \exception{ValueError} is raised.
-\end{funcdesc}
-
-\begin{funcdesc}{category}{unichr}
- Returns the general category assigned to the Unicode character
- \var{unichr} as string.
-\end{funcdesc}
-
-\begin{funcdesc}{bidirectional}{unichr}
- Returns the bidirectional category assigned to the Unicode character
- \var{unichr} as string. If no such value is defined, an empty string
- is returned.
-\end{funcdesc}
-
-\begin{funcdesc}{combining}{unichr}
- Returns the canonical combining class assigned to the Unicode
- character \var{unichr} as integer. Returns \code{0} if no combining
- class is defined.
-\end{funcdesc}
-
-\begin{funcdesc}{east_asian_width}{unichr}
- Returns the east asian width assigned to the Unicode character
- \var{unichr} as string.
-\versionadded{2.4}
-\end{funcdesc}
-
-\begin{funcdesc}{mirrored}{unichr}
- Returns the mirrored property assigned to the Unicode character
- \var{unichr} as integer. Returns \code{1} if the character has been
- identified as a ``mirrored'' character in bidirectional text,
- \code{0} otherwise.
-\end{funcdesc}
-
-\begin{funcdesc}{decomposition}{unichr}
- Returns the character decomposition mapping assigned to the Unicode
- character \var{unichr} as string. An empty string is returned in case
- no such mapping is defined.
-\end{funcdesc}
-
-\begin{funcdesc}{normalize}{form, unistr}
-
-Return the normal form \var{form} for the Unicode string \var{unistr}.
-Valid values for \var{form} are 'NFC', 'NFKC', 'NFD', and 'NFKD'.
-
-The Unicode standard defines various normalization forms of a Unicode
-string, based on the definition of canonical equivalence and
-compatibility equivalence. In Unicode, several characters can be
-expressed in various way. For example, the character U+00C7 (LATIN
-CAPITAL LETTER C WITH CEDILLA) can also be expressed as the sequence
-U+0043 (LATIN CAPITAL LETTER C) U+0327 (COMBINING CEDILLA).
-
-For each character, there are two normal forms: normal form C and
-normal form D. Normal form D (NFD) is also known as canonical
-decomposition, and translates each character into its decomposed form.
-Normal form C (NFC) first applies a canonical decomposition, then
-composes pre-combined characters again.
-
-In addition to these two forms, there are two additional normal forms
-based on compatibility equivalence. In Unicode, certain characters are
-supported which normally would be unified with other characters. For
-example, U+2160 (ROMAN NUMERAL ONE) is really the same thing as U+0049
-(LATIN CAPITAL LETTER I). However, it is supported in Unicode for
-compatibility with existing character sets (e.g. gb2312).
-
-The normal form KD (NFKD) will apply the compatibility decomposition,
-i.e. replace all compatibility characters with their equivalents. The
-normal form KC (NFKC) first applies the compatibility decomposition,
-followed by the canonical composition.
-
-\versionadded{2.3}
-\end{funcdesc}
-
-In addition, the module exposes the following constant:
-
-\begin{datadesc}{unidata_version}
-The version of the Unicode database used in this module.
-
-\versionadded{2.3}
-\end{datadesc}
-
-\begin{datadesc}{ucd_3_2_0}
-This is an object that has the same methods as the entire
-module, but uses the Unicode database version 3.2 instead,
-for applications that require this specific version of
-the Unicode database (such as IDNA).
-
-\versionadded{2.5}
-\end{datadesc}
-
-Examples:
-
-\begin{verbatim}
->>> unicodedata.lookup('LEFT CURLY BRACKET')
-u'{'
->>> unicodedata.name(u'/')
-'SOLIDUS'
->>> unicodedata.decimal(u'9')
-9
->>> unicodedata.decimal(u'a')
-Traceback (most recent call last):
- File "<stdin>", line 1, in ?
-ValueError: not a decimal
->>> unicodedata.category(u'A') # 'L'etter, 'u'ppercase
-'Lu'
->>> unicodedata.bidirectional(u'\u0660') # 'A'rabic, 'N'umber
-'AN'
-\end{verbatim}