summaryrefslogtreecommitdiff
path: root/Include/unicodeobject.h
diff options
context:
space:
mode:
Diffstat (limited to 'Include/unicodeobject.h')
-rw-r--r--Include/unicodeobject.h319
1 files changed, 192 insertions, 127 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 64f30e5373..4d2a8e4d63 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -79,7 +79,7 @@ Copyright (c) Corporation for National Research Initiatives.
#define Py_UNICODE_WIDE
#endif
-/* Set these flags if the platform has "wchar.h", "wctype.h" and the
+/* Set these flags if the platform has "wchar.h" and the
wchar_t type is a 16-bit unsigned type */
/* #define HAVE_WCHAR_H */
/* #define HAVE_USABLE_WCHAR_T */
@@ -100,8 +100,8 @@ Copyright (c) Corporation for National Research Initiatives.
#endif
/* If the compiler provides a wchar_t type we try to support it
- through the interface functions PyUnicode_FromWideChar() and
- PyUnicode_AsWideChar(). */
+ through the interface functions PyUnicode_FromWideChar(),
+ PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */
#ifdef HAVE_USABLE_WCHAR_T
# ifndef HAVE_WCHAR_H
@@ -131,7 +131,9 @@ typedef unsigned long Py_UCS4;
Python and represents a single Unicode element in the Unicode
type. */
+#ifndef Py_LIMITED_API
typedef PY_UNICODE_TYPE Py_UNICODE;
+#endif
/* --- UCS-2/UCS-4 Name Mangling ------------------------------------------ */
@@ -157,8 +159,10 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode
# define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString
# define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar
+# define PyUnicode_AsWideCharString PyUnicodeUCS2_AsWideCharString
# define PyUnicode_ClearFreeList PyUnicodeUCS2_ClearFreelist
# define PyUnicode_Compare PyUnicodeUCS2_Compare
+# define PyUnicode_CompareWithASCIIString PyUnicodeUCS2_CompareWithASCIIString
# define PyUnicode_Concat PyUnicodeUCS2_Concat
# define PyUnicode_Append PyUnicodeUCS2_Append
# define PyUnicode_AppendAndDel PyUnicodeUCS2_AppendAndDel
@@ -200,6 +204,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
# define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
# define PyUnicode_FSConverter PyUnicodeUCS2_FSConverter
+# define PyUnicode_FSDecoder PyUnicodeUCS2_FSDecoder
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
# define PyUnicode_GetMax PyUnicodeUCS2_GetMax
# define PyUnicode_GetSize PyUnicodeUCS2_GetSize
@@ -211,7 +216,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_Replace PyUnicodeUCS2_Replace
# define PyUnicode_Resize PyUnicodeUCS2_Resize
# define PyUnicode_RichCompare PyUnicodeUCS2_RichCompare
-# define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding
# define PyUnicode_Split PyUnicodeUCS2_Split
# define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines
# define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch
@@ -220,24 +224,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString
# define _PyUnicode_Fini _PyUnicodeUCS2_Fini
# define _PyUnicode_Init _PyUnicodeUCS2_Init
-# define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha
-# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit
-# define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit
-# define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak
-# define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase
-# define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric
-# define _PyUnicode_IsPrintable _PyUnicodeUCS2_IsPrintable
-# define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase
-# define _PyUnicode_IsXidStart _PyUnicodeUCS2_IsXidStart
-# define _PyUnicode_IsXidContinue _PyUnicodeUCS2_IsXidContinue
-# define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase
-# define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace
-# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit
-# define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit
-# define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase
-# define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric
-# define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase
-# define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase
+# define PyUnicode_strdup PyUnicodeUCS2_strdup
#else
@@ -256,8 +243,10 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode
# define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString
# define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar
+# define PyUnicode_AsWideCharString PyUnicodeUCS4_AsWideCharString
# define PyUnicode_ClearFreeList PyUnicodeUCS4_ClearFreelist
# define PyUnicode_Compare PyUnicodeUCS4_Compare
+# define PyUnicode_CompareWithASCIIString PyUnicodeUCS4_CompareWithASCIIString
# define PyUnicode_Concat PyUnicodeUCS4_Concat
# define PyUnicode_Append PyUnicodeUCS4_Append
# define PyUnicode_AppendAndDel PyUnicodeUCS4_AppendAndDel
@@ -299,6 +288,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
# define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
# define PyUnicode_FSConverter PyUnicodeUCS4_FSConverter
+# define PyUnicode_FSDecoder PyUnicodeUCS4_FSDecoder
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
# define PyUnicode_GetMax PyUnicodeUCS4_GetMax
# define PyUnicode_GetSize PyUnicodeUCS4_GetSize
@@ -310,7 +300,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_Replace PyUnicodeUCS4_Replace
# define PyUnicode_Resize PyUnicodeUCS4_Resize
# define PyUnicode_RichCompare PyUnicodeUCS4_RichCompare
-# define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding
# define PyUnicode_Split PyUnicodeUCS4_Split
# define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines
# define PyUnicode_Tailmatch PyUnicodeUCS4_Tailmatch
@@ -319,69 +308,19 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString
# define _PyUnicode_Fini _PyUnicodeUCS4_Fini
# define _PyUnicode_Init _PyUnicodeUCS4_Init
-# define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha
-# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit
-# define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit
-# define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak
-# define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase
-# define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric
-# define _PyUnicode_IsPrintable _PyUnicodeUCS4_IsPrintable
-# define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase
-# define _PyUnicode_IsXidStart _PyUnicodeUCS4_IsXidStart
-# define _PyUnicode_IsXidContinue _PyUnicodeUCS4_IsXidContinue
-# define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase
-# define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace
-# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit
-# define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit
-# define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase
-# define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric
-# define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase
-# define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase
-
+# define PyUnicode_strdup PyUnicodeUCS4_strdup
#endif
/* --- Internal Unicode Operations ---------------------------------------- */
-/* If you want Python to use the compiler's wctype.h functions instead
- of the ones supplied with Python, define WANT_WCTYPE_FUNCTIONS or
- configure Python using --with-wctype-functions. This reduces the
- interpreter's code size. */
-
-#if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)
-
-#include <wctype.h>
-
-#define Py_UNICODE_ISSPACE(ch) iswspace(ch)
-
-#define Py_UNICODE_ISLOWER(ch) iswlower(ch)
-#define Py_UNICODE_ISUPPER(ch) iswupper(ch)
-#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
-#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
-
-#define Py_UNICODE_TOLOWER(ch) towlower(ch)
-#define Py_UNICODE_TOUPPER(ch) towupper(ch)
-#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
-
-#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
-#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
-#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
-#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
-
-#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
-#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
-#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
-
-#define Py_UNICODE_ISALPHA(ch) iswalpha(ch)
-
-#else
-
/* Since splitting on whitespace is an important use case, and
whitespace in most situations is solely ASCII whitespace, we
optimize for the common case by using a quick look-up table
_Py_ascii_whitespace (see below) with an inlined check.
*/
+#ifndef Py_LIMITED_API
#define Py_UNICODE_ISSPACE(ch) \
((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
@@ -405,8 +344,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
-#endif
-
#define Py_UNICODE_ISALNUM(ch) \
(Py_UNICODE_ISALPHA(ch) || \
Py_UNICODE_ISDECIMAL(ch) || \
@@ -428,6 +365,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
((*((string)->str + (offset)) == *((substring)->str)) && \
((*((string)->str + (offset) + (substring)->length-1) == *((substring)->str + (substring)->length-1))) && \
!memcmp((string)->str + (offset), (substring)->str, (substring)->length*sizeof(Py_UNICODE)))
+#endif /* Py_LIMITED_API */
#ifdef __cplusplus
extern "C" {
@@ -435,11 +373,12 @@ extern "C" {
/* --- Unicode Type ------------------------------------------------------- */
+#ifndef Py_LIMITED_API
typedef struct {
PyObject_HEAD
Py_ssize_t length; /* Length of raw Unicode data in buffer */
Py_UNICODE *str; /* Raw Unicode buffer */
- long hash; /* Hash value; -1 if not set */
+ Py_hash_t hash; /* Hash value; -1 if not set */
int state; /* != 0 if interned. In this case the two
* references from the dictionary to this object
* are *not* counted in ob_refcnt. */
@@ -447,6 +386,7 @@ typedef struct {
string, or NULL; this is used for
implementing the buffer protocol */
} PyUnicodeObject;
+#endif
PyAPI_DATA(PyTypeObject) PyUnicode_Type;
PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
@@ -460,6 +400,7 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
#define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
/* Fast access macros */
+#ifndef Py_LIMITED_API
#define PyUnicode_GET_SIZE(op) \
(assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length))
#define PyUnicode_GET_DATA_SIZE(op) \
@@ -468,6 +409,7 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
(assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->str))
#define PyUnicode_AS_DATA(op) \
(assert(PyUnicode_Check(op)),((const char *)((PyUnicodeObject *)(op))->str))
+#endif
/* --- Constants ---------------------------------------------------------- */
@@ -492,29 +434,33 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
The buffer is copied into the new object. */
+#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
const Py_UNICODE *u, /* Unicode buffer */
Py_ssize_t size /* size of buffer */
);
+#endif
/* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */
PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
- const char *u, /* char buffer */
- Py_ssize_t size /* size of buffer */
+ const char *u, /* UTF-8 encoded string */
+ Py_ssize_t size /* size of buffer */
);
/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
UTF-8 encoded bytes */
PyAPI_FUNC(PyObject*) PyUnicode_FromString(
- const char *u /* string */
+ const char *u /* UTF-8 encoded string */
);
/* Return a read-only pointer to the Unicode object's internal
Py_UNICODE buffer. */
+#ifndef Py_LIMITED_API
PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
PyObject *unicode /* Unicode object */
);
+#endif
/* Get the length of the Unicode object. */
@@ -522,8 +468,10 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
PyObject *unicode /* Unicode object */
);
+#ifndef Py_LIMITED_API
/* Get the maximum ordinal for a Unicode character. */
PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
+#endif
/* Resize an already allocated Unicode object to the new size length.
@@ -584,19 +532,31 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
register PyObject *obj /* Object */
);
-PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(const char*, va_list);
-PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(const char*, ...);
+PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(
+ const char *format, /* ASCII-encoded string */
+ va_list vargs
+ );
+PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
+ const char *format, /* ASCII-encoded string */
+ ...
+ );
+#ifndef Py_LIMITED_API
/* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */
PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj,
Py_UNICODE *format_spec,
Py_ssize_t format_spec_len);
+#endif
PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);
PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
-PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(const char *);
+PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
+ const char *u /* UTF-8 encoded string */
+ );
+#ifndef Py_LIMITED_API
PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void);
+#endif
/* Use only if you know it's a string */
#define PyUnicode_CHECK_INTERNED(op) (((PyUnicodeObject *)(op))->state)
@@ -628,11 +588,24 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
error. */
PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
- PyUnicodeObject *unicode, /* Unicode object */
+ PyObject *unicode, /* Unicode object */
register wchar_t *w, /* wchar_t buffer */
Py_ssize_t size /* size of buffer */
);
+/* Convert the Unicode object to a wide character string. The output string
+ always ends with a nul character. If size is not NULL, write the number of
+ wide characters (including the nul character) into *size.
+
+ Returns a buffer allocated by PyMem_Alloc() (use PyMem_Free() to free it)
+ on success. On error, returns NULL, *size is undefined and raises a
+ MemoryError. */
+
+PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString(
+ PyObject *unicode, /* Unicode object */
+ Py_ssize_t *size /* number of characters of the result */
+ );
+
#endif
/* --- Unicode ordinals --------------------------------------------------- */
@@ -693,9 +666,11 @@ PyAPI_FUNC(int) PyUnicode_ClearFreeList(void);
*/
+#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
PyObject *unicode,
const char *errors);
+#endif
/* Returns a pointer to the default encoding (UTF-8) of the
Unicode object unicode and the size of the encoded representation
@@ -711,9 +686,11 @@ PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
*/
+#ifndef Py_LIMITED_API
PyAPI_FUNC(char *) _PyUnicode_AsStringAndSize(
PyObject *unicode,
Py_ssize_t *size);
+#endif
/* Returns a pointer to the default encoding (UTF-8) of the
Unicode object unicode.
@@ -729,22 +706,14 @@ PyAPI_FUNC(char *) _PyUnicode_AsStringAndSize(
*/
+#ifndef Py_LIMITED_API
PyAPI_FUNC(char *) _PyUnicode_AsString(PyObject *unicode);
+#endif
/* Returns "utf-8". */
PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
-/* Sets the currently active default encoding.
-
- Returns 0 on success, -1 in case of an error.
-
- */
-
-PyAPI_FUNC(int) PyUnicode_SetDefaultEncoding(
- const char *encoding /* Encoding name in standard form */
- );
-
/* --- Generic Codecs ----------------------------------------------------- */
/* Create a Unicode object by decoding the encoded string s of the
@@ -778,12 +747,14 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode(
/* Encodes a Py_UNICODE buffer of the given size and returns a
Python string object. */
+#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject*) PyUnicode_Encode(
const Py_UNICODE *s, /* Unicode char buffer */
Py_ssize_t size, /* number of Py_UNICODE chars to encode */
const char *encoding, /* encoding */
const char *errors /* error handling */
);
+#endif
/* Encodes a Unicode object and returns the result as Python
object. */
@@ -833,6 +804,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
Py_ssize_t *consumed /* bytes consumed */
);
+#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
const Py_UNICODE *data, /* Unicode char buffer */
Py_ssize_t length, /* number of Py_UNICODE chars to encode */
@@ -840,6 +812,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
const char *errors /* error handling */
);
+#endif
/* --- UTF-8 Codecs ------------------------------------------------------- */
@@ -860,11 +833,13 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
PyObject *unicode /* Unicode object */
);
+#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
const Py_UNICODE *data, /* Unicode char buffer */
Py_ssize_t length, /* number of Py_UNICODE chars to encode */
const char *errors /* error handling */
);
+#endif
/* --- UTF-32 Codecs ------------------------------------------------------ */
@@ -933,12 +908,14 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
*/
+#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
const Py_UNICODE *data, /* Unicode char buffer */
Py_ssize_t length, /* number of Py_UNICODE chars to encode */
const char *errors, /* error handling */
int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
);
+#endif
/* --- UTF-16 Codecs ------------------------------------------------------ */
@@ -1011,12 +988,14 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
*/
+#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
const Py_UNICODE *data, /* Unicode char buffer */
Py_ssize_t length, /* number of Py_UNICODE chars to encode */
const char *errors, /* error handling */
int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
);
+#endif
/* --- Unicode-Escape Codecs ---------------------------------------------- */
@@ -1030,10 +1009,12 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
PyObject *unicode /* Unicode object */
);
+#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
const Py_UNICODE *data, /* Unicode char buffer */
Py_ssize_t length /* Number of Py_UNICODE chars to encode */
);
+#endif
/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
@@ -1047,20 +1028,24 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
PyObject *unicode /* Unicode object */
);
+#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
const Py_UNICODE *data, /* Unicode char buffer */
Py_ssize_t length /* Number of Py_UNICODE chars to encode */
);
+#endif
/* --- Unicode Internal Codec ---------------------------------------------
Only for internal use in _codecsmodule.c */
+#ifndef Py_LIMITED_API
PyObject *_PyUnicode_DecodeUnicodeInternal(
const char *string,
Py_ssize_t length,
const char *errors
);
+#endif
/* --- Latin-1 Codecs -----------------------------------------------------
@@ -1078,11 +1063,13 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
PyObject *unicode /* Unicode object */
);
+#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
const Py_UNICODE *data, /* Unicode char buffer */
Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
const char *errors /* error handling */
);
+#endif
/* --- ASCII Codecs -------------------------------------------------------
@@ -1100,11 +1087,13 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
PyObject *unicode /* Unicode object */
);
+#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
const Py_UNICODE *data, /* Unicode char buffer */
Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
const char *errors /* error handling */
);
+#endif
/* --- Character Map Codecs -----------------------------------------------
@@ -1142,6 +1131,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
(unicode ordinal -> char ordinal) */
);
+#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
const Py_UNICODE *data, /* Unicode char buffer */
Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
@@ -1149,6 +1139,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
(unicode ordinal -> char ordinal) */
const char *errors /* error handling */
);
+#endif
/* Translate a Py_UNICODE buffer of the given length by applying a
character mapping table to it and return the resulting Unicode
@@ -1163,12 +1154,14 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
*/
+#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
const Py_UNICODE *data, /* Unicode char buffer */
Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
PyObject *table, /* Translate table */
const char *errors /* error handling */
);
+#endif
#ifdef MS_WIN32
@@ -1191,11 +1184,13 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
PyObject *unicode /* Unicode object */
);
+#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
const Py_UNICODE *data, /* Unicode char buffer */
Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
const char *errors /* error handling */
);
+#endif
#endif /* MS_WIN32 */
@@ -1223,27 +1218,47 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
*/
+#ifndef Py_LIMITED_API
PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
Py_UNICODE *s, /* Unicode buffer */
Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
char *output, /* Output buffer; must have size >= length */
const char *errors /* error handling */
);
+#endif
+
+/* Transforms code points that have decimal digit property to the
+ corresponding ASCII digit code points.
+
+ Returns a new Unicode string on success, NULL on failure.
+*/
+
+#ifndef Py_LIMITED_API
+PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII(
+ Py_UNICODE *s, /* Unicode buffer */
+ Py_ssize_t length /* Number of Py_UNICODE chars to transform */
+ );
+#endif
/* --- File system encoding ---------------------------------------------- */
-/* ParseTuple converter which converts a Unicode object into the file
- system encoding as a bytes object, using the "surrogateescape" error
- handler; bytes objects are output as-is. */
+/* ParseTuple converter: encode str objects to bytes using
+ PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */
PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*);
+/* ParseTuple converter: decode bytes objects to unicode using
+ PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */
+
+PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*);
+
/* Decode a null-terminated string using Py_FileSystemDefaultEncoding
and the "surrogateescape" error handler.
- If Py_FileSystemDefaultEncoding is not set, fall back to UTF-8.
+ If Py_FileSystemDefaultEncoding is not set, fall back to the locale
+ encoding.
- Use PyUnicode_DecodeFSDefaultAndSize() if you have the string length.
+ Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known.
*/
PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
@@ -1253,7 +1268,8 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
/* Decode a string using Py_FileSystemDefaultEncoding
and the "surrogateescape" error handler.
- If Py_FileSystemDefaultEncoding is not set, fall back to UTF-8.
+ If Py_FileSystemDefaultEncoding is not set, fall back to the locale
+ encoding.
*/
PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
@@ -1261,6 +1277,17 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
Py_ssize_t size /* size */
);
+/* Encode a Unicode object to Py_FileSystemDefaultEncoding with the
+ "surrogateescape" error handler, and return bytes.
+
+ If Py_FileSystemDefaultEncoding is not set, fall back to the locale
+ encoding.
+*/
+
+PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(
+ PyObject *unicode
+ );
+
/* --- Methods & Slots ----------------------------------------------------
These are capable of handling Unicode objects and strings on input
@@ -1430,7 +1457,7 @@ PyAPI_FUNC(int) PyUnicode_Compare(
PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
PyObject *left,
- const char *right
+ const char *right /* ASCII-encoded string */
);
/* Rich compare two strings and return one of the following:
@@ -1478,26 +1505,31 @@ PyAPI_FUNC(int) PyUnicode_Contains(
PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s);
+#ifndef Py_LIMITED_API
/* Externally visible for str.strip(unicode) */
PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
PyUnicodeObject *self,
int striptype,
PyObject *sepobj
);
+#endif
/* Using the current locale, insert the thousands grouping
into the string pointed to by buffer. For the argument descriptions,
see Objects/stringlib/localeutil.h */
+#ifndef Py_LIMITED_API
PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGroupingLocale(Py_UNICODE *buffer,
Py_ssize_t n_buffer,
Py_UNICODE *digits,
Py_ssize_t n_digits,
Py_ssize_t min_width);
+#endif
/* Using explicit passed-in values, insert the thousands grouping
into the string pointed to by buffer. For the argument descriptions,
see Objects/stringlib/localeutil.h */
+#ifndef Py_LIMITED_API
PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(Py_UNICODE *buffer,
Py_ssize_t n_buffer,
Py_UNICODE *digits,
@@ -1505,10 +1537,12 @@ PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(Py_UNICODE *buffer,
Py_ssize_t min_width,
const char *grouping,
const char *thousands_sep);
+#endif
/* === Characters Type APIs =============================================== */
/* Helper array used by Py_UNICODE_ISSPACE(). */
+#ifndef Py_LIMITED_API
PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
/* These should not be used directly. Use the Py_UNICODE_IS* and
@@ -1519,91 +1553,122 @@ PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
*/
PyAPI_FUNC(int) _PyUnicode_IsLowercase(
- Py_UNICODE ch /* Unicode character */
+ Py_UCS4 ch /* Unicode character */
);
PyAPI_FUNC(int) _PyUnicode_IsUppercase(
- Py_UNICODE ch /* Unicode character */
+ Py_UCS4 ch /* Unicode character */
);
PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
- Py_UNICODE ch /* Unicode character */
+ Py_UCS4 ch /* Unicode character */
);
PyAPI_FUNC(int) _PyUnicode_IsXidStart(
- Py_UNICODE ch /* Unicode character */
+ Py_UCS4 ch /* Unicode character */
);
PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
- Py_UNICODE ch /* Unicode character */
+ Py_UCS4 ch /* Unicode character */
);
PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
- const Py_UNICODE ch /* Unicode character */
+ const Py_UCS4 ch /* Unicode character */
);
PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
- const Py_UNICODE ch /* Unicode character */
+ const Py_UCS4 ch /* Unicode character */
);
-PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToLowercase(
- Py_UNICODE ch /* Unicode character */
+PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
+ Py_UCS4 ch /* Unicode character */
);
-PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToUppercase(
- Py_UNICODE ch /* Unicode character */
+PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
+ Py_UCS4 ch /* Unicode character */
);
-PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToTitlecase(
- Py_UNICODE ch /* Unicode character */
+PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
+ Py_UCS4 ch /* Unicode character */
);
PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
- Py_UNICODE ch /* Unicode character */
+ Py_UCS4 ch /* Unicode character */
);
PyAPI_FUNC(int) _PyUnicode_ToDigit(
- Py_UNICODE ch /* Unicode character */
+ Py_UCS4 ch /* Unicode character */
);
PyAPI_FUNC(double) _PyUnicode_ToNumeric(
- Py_UNICODE ch /* Unicode character */
+ Py_UCS4 ch /* Unicode character */
);
PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
- Py_UNICODE ch /* Unicode character */
+ Py_UCS4 ch /* Unicode character */
);
PyAPI_FUNC(int) _PyUnicode_IsDigit(
- Py_UNICODE ch /* Unicode character */
+ Py_UCS4 ch /* Unicode character */
);
PyAPI_FUNC(int) _PyUnicode_IsNumeric(
- Py_UNICODE ch /* Unicode character */
+ Py_UCS4 ch /* Unicode character */
);
PyAPI_FUNC(int) _PyUnicode_IsPrintable(
- Py_UNICODE ch /* Unicode character */
+ Py_UCS4 ch /* Unicode character */
);
PyAPI_FUNC(int) _PyUnicode_IsAlpha(
- Py_UNICODE ch /* Unicode character */
+ Py_UCS4 ch /* Unicode character */
);
-PyAPI_FUNC(size_t) Py_UNICODE_strlen(const Py_UNICODE *u);
+PyAPI_FUNC(size_t) Py_UNICODE_strlen(
+ const Py_UNICODE *u
+ );
PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy(
+ Py_UNICODE *s1,
+ const Py_UNICODE *s2);
+
+PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat(
Py_UNICODE *s1, const Py_UNICODE *s2);
PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy(
- Py_UNICODE *s1, const Py_UNICODE *s2, size_t n);
+ Py_UNICODE *s1,
+ const Py_UNICODE *s2,
+ size_t n);
PyAPI_FUNC(int) Py_UNICODE_strcmp(
- const Py_UNICODE *s1, const Py_UNICODE *s2);
+ const Py_UNICODE *s1,
+ const Py_UNICODE *s2
+ );
+
+PyAPI_FUNC(int) Py_UNICODE_strncmp(
+ const Py_UNICODE *s1,
+ const Py_UNICODE *s2,
+ size_t n
+ );
PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr(
- const Py_UNICODE *s, Py_UNICODE c
+ const Py_UNICODE *s,
+ Py_UNICODE c
+ );
+
+PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr(
+ const Py_UNICODE *s,
+ Py_UNICODE c
+ );
+
+/* Create a copy of a unicode string ending with a nul character. Return NULL
+ and raise a MemoryError exception on memory allocation failure, otherwise
+ return a new allocated buffer (use PyMem_Free() to free the buffer). */
+
+PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy(
+ PyObject *unicode
);
+#endif /* Py_LIMITED_API */
#ifdef __cplusplus
}