diff options
Diffstat (limited to 'Include/unicodeobject.h')
-rw-r--r-- | Include/unicodeobject.h | 319 |
1 files changed, 192 insertions, 127 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 64f30e5373..4d2a8e4d63 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -79,7 +79,7 @@ Copyright (c) Corporation for National Research Initiatives. #define Py_UNICODE_WIDE #endif -/* Set these flags if the platform has "wchar.h", "wctype.h" and the +/* Set these flags if the platform has "wchar.h" and the wchar_t type is a 16-bit unsigned type */ /* #define HAVE_WCHAR_H */ /* #define HAVE_USABLE_WCHAR_T */ @@ -100,8 +100,8 @@ Copyright (c) Corporation for National Research Initiatives. #endif /* If the compiler provides a wchar_t type we try to support it - through the interface functions PyUnicode_FromWideChar() and - PyUnicode_AsWideChar(). */ + through the interface functions PyUnicode_FromWideChar(), + PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */ #ifdef HAVE_USABLE_WCHAR_T # ifndef HAVE_WCHAR_H @@ -131,7 +131,9 @@ typedef unsigned long Py_UCS4; Python and represents a single Unicode element in the Unicode type. */ +#ifndef Py_LIMITED_API typedef PY_UNICODE_TYPE Py_UNICODE; +#endif /* --- UCS-2/UCS-4 Name Mangling ------------------------------------------ */ @@ -157,8 +159,10 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString # define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar +# define PyUnicode_AsWideCharString PyUnicodeUCS2_AsWideCharString # define PyUnicode_ClearFreeList PyUnicodeUCS2_ClearFreelist # define PyUnicode_Compare PyUnicodeUCS2_Compare +# define PyUnicode_CompareWithASCIIString PyUnicodeUCS2_CompareWithASCIIString # define PyUnicode_Concat PyUnicodeUCS2_Concat # define PyUnicode_Append PyUnicodeUCS2_Append # define PyUnicode_AppendAndDel PyUnicodeUCS2_AppendAndDel @@ -200,6 +204,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode # define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar # define PyUnicode_FSConverter PyUnicodeUCS2_FSConverter +# define PyUnicode_FSDecoder PyUnicodeUCS2_FSDecoder # define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding # define PyUnicode_GetMax PyUnicodeUCS2_GetMax # define PyUnicode_GetSize PyUnicodeUCS2_GetSize @@ -211,7 +216,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_Replace PyUnicodeUCS2_Replace # define PyUnicode_Resize PyUnicodeUCS2_Resize # define PyUnicode_RichCompare PyUnicodeUCS2_RichCompare -# define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding # define PyUnicode_Split PyUnicodeUCS2_Split # define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines # define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch @@ -220,24 +224,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString # define _PyUnicode_Fini _PyUnicodeUCS2_Fini # define _PyUnicode_Init _PyUnicodeUCS2_Init -# define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha -# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit -# define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit -# define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak -# define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase -# define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric -# define _PyUnicode_IsPrintable _PyUnicodeUCS2_IsPrintable -# define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase -# define _PyUnicode_IsXidStart _PyUnicodeUCS2_IsXidStart -# define _PyUnicode_IsXidContinue _PyUnicodeUCS2_IsXidContinue -# define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase -# define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace -# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit -# define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit -# define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase -# define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric -# define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase -# define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase +# define PyUnicode_strdup PyUnicodeUCS2_strdup #else @@ -256,8 +243,10 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString # define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar +# define PyUnicode_AsWideCharString PyUnicodeUCS4_AsWideCharString # define PyUnicode_ClearFreeList PyUnicodeUCS4_ClearFreelist # define PyUnicode_Compare PyUnicodeUCS4_Compare +# define PyUnicode_CompareWithASCIIString PyUnicodeUCS4_CompareWithASCIIString # define PyUnicode_Concat PyUnicodeUCS4_Concat # define PyUnicode_Append PyUnicodeUCS4_Append # define PyUnicode_AppendAndDel PyUnicodeUCS4_AppendAndDel @@ -299,6 +288,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode # define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar # define PyUnicode_FSConverter PyUnicodeUCS4_FSConverter +# define PyUnicode_FSDecoder PyUnicodeUCS4_FSDecoder # define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding # define PyUnicode_GetMax PyUnicodeUCS4_GetMax # define PyUnicode_GetSize PyUnicodeUCS4_GetSize @@ -310,7 +300,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_Replace PyUnicodeUCS4_Replace # define PyUnicode_Resize PyUnicodeUCS4_Resize # define PyUnicode_RichCompare PyUnicodeUCS4_RichCompare -# define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding # define PyUnicode_Split PyUnicodeUCS4_Split # define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines # define PyUnicode_Tailmatch PyUnicodeUCS4_Tailmatch @@ -319,69 +308,19 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString # define _PyUnicode_Fini _PyUnicodeUCS4_Fini # define _PyUnicode_Init _PyUnicodeUCS4_Init -# define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha -# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit -# define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit -# define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak -# define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase -# define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric -# define _PyUnicode_IsPrintable _PyUnicodeUCS4_IsPrintable -# define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase -# define _PyUnicode_IsXidStart _PyUnicodeUCS4_IsXidStart -# define _PyUnicode_IsXidContinue _PyUnicodeUCS4_IsXidContinue -# define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase -# define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace -# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit -# define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit -# define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase -# define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric -# define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase -# define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase - +# define PyUnicode_strdup PyUnicodeUCS4_strdup #endif /* --- Internal Unicode Operations ---------------------------------------- */ -/* If you want Python to use the compiler's wctype.h functions instead - of the ones supplied with Python, define WANT_WCTYPE_FUNCTIONS or - configure Python using --with-wctype-functions. This reduces the - interpreter's code size. */ - -#if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS) - -#include <wctype.h> - -#define Py_UNICODE_ISSPACE(ch) iswspace(ch) - -#define Py_UNICODE_ISLOWER(ch) iswlower(ch) -#define Py_UNICODE_ISUPPER(ch) iswupper(ch) -#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch) -#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch) - -#define Py_UNICODE_TOLOWER(ch) towlower(ch) -#define Py_UNICODE_TOUPPER(ch) towupper(ch) -#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch) - -#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch) -#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch) -#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch) -#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch) - -#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch) -#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch) -#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch) - -#define Py_UNICODE_ISALPHA(ch) iswalpha(ch) - -#else - /* Since splitting on whitespace is an important use case, and whitespace in most situations is solely ASCII whitespace, we optimize for the common case by using a quick look-up table _Py_ascii_whitespace (see below) with an inlined check. */ +#ifndef Py_LIMITED_API #define Py_UNICODE_ISSPACE(ch) \ ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch)) @@ -405,8 +344,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE; #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch) -#endif - #define Py_UNICODE_ISALNUM(ch) \ (Py_UNICODE_ISALPHA(ch) || \ Py_UNICODE_ISDECIMAL(ch) || \ @@ -428,6 +365,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE; ((*((string)->str + (offset)) == *((substring)->str)) && \ ((*((string)->str + (offset) + (substring)->length-1) == *((substring)->str + (substring)->length-1))) && \ !memcmp((string)->str + (offset), (substring)->str, (substring)->length*sizeof(Py_UNICODE))) +#endif /* Py_LIMITED_API */ #ifdef __cplusplus extern "C" { @@ -435,11 +373,12 @@ extern "C" { /* --- Unicode Type ------------------------------------------------------- */ +#ifndef Py_LIMITED_API typedef struct { PyObject_HEAD Py_ssize_t length; /* Length of raw Unicode data in buffer */ Py_UNICODE *str; /* Raw Unicode buffer */ - long hash; /* Hash value; -1 if not set */ + Py_hash_t hash; /* Hash value; -1 if not set */ int state; /* != 0 if interned. In this case the two * references from the dictionary to this object * are *not* counted in ob_refcnt. */ @@ -447,6 +386,7 @@ typedef struct { string, or NULL; this is used for implementing the buffer protocol */ } PyUnicodeObject; +#endif PyAPI_DATA(PyTypeObject) PyUnicode_Type; PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type; @@ -460,6 +400,7 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type; #define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type) /* Fast access macros */ +#ifndef Py_LIMITED_API #define PyUnicode_GET_SIZE(op) \ (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length)) #define PyUnicode_GET_DATA_SIZE(op) \ @@ -468,6 +409,7 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type; (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->str)) #define PyUnicode_AS_DATA(op) \ (assert(PyUnicode_Check(op)),((const char *)((PyUnicodeObject *)(op))->str)) +#endif /* --- Constants ---------------------------------------------------------- */ @@ -492,29 +434,33 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type; The buffer is copied into the new object. */ +#ifndef Py_LIMITED_API PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode( const Py_UNICODE *u, /* Unicode buffer */ Py_ssize_t size /* size of buffer */ ); +#endif /* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */ PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize( - const char *u, /* char buffer */ - Py_ssize_t size /* size of buffer */ + const char *u, /* UTF-8 encoded string */ + Py_ssize_t size /* size of buffer */ ); /* Similar to PyUnicode_FromUnicode(), but u points to null-terminated UTF-8 encoded bytes */ PyAPI_FUNC(PyObject*) PyUnicode_FromString( - const char *u /* string */ + const char *u /* UTF-8 encoded string */ ); /* Return a read-only pointer to the Unicode object's internal Py_UNICODE buffer. */ +#ifndef Py_LIMITED_API PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode( PyObject *unicode /* Unicode object */ ); +#endif /* Get the length of the Unicode object. */ @@ -522,8 +468,10 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize( PyObject *unicode /* Unicode object */ ); +#ifndef Py_LIMITED_API /* Get the maximum ordinal for a Unicode character. */ PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void); +#endif /* Resize an already allocated Unicode object to the new size length. @@ -584,19 +532,31 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromObject( register PyObject *obj /* Object */ ); -PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(const char*, va_list); -PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(const char*, ...); +PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV( + const char *format, /* ASCII-encoded string */ + va_list vargs + ); +PyAPI_FUNC(PyObject *) PyUnicode_FromFormat( + const char *format, /* ASCII-encoded string */ + ... + ); +#ifndef Py_LIMITED_API /* Format the object based on the format_spec, as defined in PEP 3101 (Advanced String Formatting). */ PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj, Py_UNICODE *format_spec, Py_ssize_t format_spec_len); +#endif PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **); PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **); -PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(const char *); +PyAPI_FUNC(PyObject *) PyUnicode_InternFromString( + const char *u /* UTF-8 encoded string */ + ); +#ifndef Py_LIMITED_API PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void); +#endif /* Use only if you know it's a string */ #define PyUnicode_CHECK_INTERNED(op) (((PyUnicodeObject *)(op))->state) @@ -628,11 +588,24 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar( error. */ PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar( - PyUnicodeObject *unicode, /* Unicode object */ + PyObject *unicode, /* Unicode object */ register wchar_t *w, /* wchar_t buffer */ Py_ssize_t size /* size of buffer */ ); +/* Convert the Unicode object to a wide character string. The output string + always ends with a nul character. If size is not NULL, write the number of + wide characters (including the nul character) into *size. + + Returns a buffer allocated by PyMem_Alloc() (use PyMem_Free() to free it) + on success. On error, returns NULL, *size is undefined and raises a + MemoryError. */ + +PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString( + PyObject *unicode, /* Unicode object */ + Py_ssize_t *size /* number of characters of the result */ + ); + #endif /* --- Unicode ordinals --------------------------------------------------- */ @@ -693,9 +666,11 @@ PyAPI_FUNC(int) PyUnicode_ClearFreeList(void); */ +#ifndef Py_LIMITED_API PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString( PyObject *unicode, const char *errors); +#endif /* Returns a pointer to the default encoding (UTF-8) of the Unicode object unicode and the size of the encoded representation @@ -711,9 +686,11 @@ PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString( */ +#ifndef Py_LIMITED_API PyAPI_FUNC(char *) _PyUnicode_AsStringAndSize( PyObject *unicode, Py_ssize_t *size); +#endif /* Returns a pointer to the default encoding (UTF-8) of the Unicode object unicode. @@ -729,22 +706,14 @@ PyAPI_FUNC(char *) _PyUnicode_AsStringAndSize( */ +#ifndef Py_LIMITED_API PyAPI_FUNC(char *) _PyUnicode_AsString(PyObject *unicode); +#endif /* Returns "utf-8". */ PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void); -/* Sets the currently active default encoding. - - Returns 0 on success, -1 in case of an error. - - */ - -PyAPI_FUNC(int) PyUnicode_SetDefaultEncoding( - const char *encoding /* Encoding name in standard form */ - ); - /* --- Generic Codecs ----------------------------------------------------- */ /* Create a Unicode object by decoding the encoded string s of the @@ -778,12 +747,14 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode( /* Encodes a Py_UNICODE buffer of the given size and returns a Python string object. */ +#ifndef Py_LIMITED_API PyAPI_FUNC(PyObject*) PyUnicode_Encode( const Py_UNICODE *s, /* Unicode char buffer */ Py_ssize_t size, /* number of Py_UNICODE chars to encode */ const char *encoding, /* encoding */ const char *errors /* error handling */ ); +#endif /* Encodes a Unicode object and returns the result as Python object. */ @@ -833,6 +804,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful( Py_ssize_t *consumed /* bytes consumed */ ); +#ifndef Py_LIMITED_API PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7( const Py_UNICODE *data, /* Unicode char buffer */ Py_ssize_t length, /* number of Py_UNICODE chars to encode */ @@ -840,6 +812,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7( int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */ const char *errors /* error handling */ ); +#endif /* --- UTF-8 Codecs ------------------------------------------------------- */ @@ -860,11 +833,13 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String( PyObject *unicode /* Unicode object */ ); +#ifndef Py_LIMITED_API PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8( const Py_UNICODE *data, /* Unicode char buffer */ Py_ssize_t length, /* number of Py_UNICODE chars to encode */ const char *errors /* error handling */ ); +#endif /* --- UTF-32 Codecs ------------------------------------------------------ */ @@ -933,12 +908,14 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String( */ +#ifndef Py_LIMITED_API PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32( const Py_UNICODE *data, /* Unicode char buffer */ Py_ssize_t length, /* number of Py_UNICODE chars to encode */ const char *errors, /* error handling */ int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ ); +#endif /* --- UTF-16 Codecs ------------------------------------------------------ */ @@ -1011,12 +988,14 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String( */ +#ifndef Py_LIMITED_API PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16( const Py_UNICODE *data, /* Unicode char buffer */ Py_ssize_t length, /* number of Py_UNICODE chars to encode */ const char *errors, /* error handling */ int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ ); +#endif /* --- Unicode-Escape Codecs ---------------------------------------------- */ @@ -1030,10 +1009,12 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString( PyObject *unicode /* Unicode object */ ); +#ifndef Py_LIMITED_API PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape( const Py_UNICODE *data, /* Unicode char buffer */ Py_ssize_t length /* Number of Py_UNICODE chars to encode */ ); +#endif /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */ @@ -1047,20 +1028,24 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString( PyObject *unicode /* Unicode object */ ); +#ifndef Py_LIMITED_API PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape( const Py_UNICODE *data, /* Unicode char buffer */ Py_ssize_t length /* Number of Py_UNICODE chars to encode */ ); +#endif /* --- Unicode Internal Codec --------------------------------------------- Only for internal use in _codecsmodule.c */ +#ifndef Py_LIMITED_API PyObject *_PyUnicode_DecodeUnicodeInternal( const char *string, Py_ssize_t length, const char *errors ); +#endif /* --- Latin-1 Codecs ----------------------------------------------------- @@ -1078,11 +1063,13 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String( PyObject *unicode /* Unicode object */ ); +#ifndef Py_LIMITED_API PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1( const Py_UNICODE *data, /* Unicode char buffer */ Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ const char *errors /* error handling */ ); +#endif /* --- ASCII Codecs ------------------------------------------------------- @@ -1100,11 +1087,13 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString( PyObject *unicode /* Unicode object */ ); +#ifndef Py_LIMITED_API PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII( const Py_UNICODE *data, /* Unicode char buffer */ Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ const char *errors /* error handling */ ); +#endif /* --- Character Map Codecs ----------------------------------------------- @@ -1142,6 +1131,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString( (unicode ordinal -> char ordinal) */ ); +#ifndef Py_LIMITED_API PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap( const Py_UNICODE *data, /* Unicode char buffer */ Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ @@ -1149,6 +1139,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap( (unicode ordinal -> char ordinal) */ const char *errors /* error handling */ ); +#endif /* Translate a Py_UNICODE buffer of the given length by applying a character mapping table to it and return the resulting Unicode @@ -1163,12 +1154,14 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap( */ +#ifndef Py_LIMITED_API PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap( const Py_UNICODE *data, /* Unicode char buffer */ Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ PyObject *table, /* Translate table */ const char *errors /* error handling */ ); +#endif #ifdef MS_WIN32 @@ -1191,11 +1184,13 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString( PyObject *unicode /* Unicode object */ ); +#ifndef Py_LIMITED_API PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS( const Py_UNICODE *data, /* Unicode char buffer */ Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ const char *errors /* error handling */ ); +#endif #endif /* MS_WIN32 */ @@ -1223,27 +1218,47 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS( */ +#ifndef Py_LIMITED_API PyAPI_FUNC(int) PyUnicode_EncodeDecimal( Py_UNICODE *s, /* Unicode buffer */ Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ char *output, /* Output buffer; must have size >= length */ const char *errors /* error handling */ ); +#endif + +/* Transforms code points that have decimal digit property to the + corresponding ASCII digit code points. + + Returns a new Unicode string on success, NULL on failure. +*/ + +#ifndef Py_LIMITED_API +PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII( + Py_UNICODE *s, /* Unicode buffer */ + Py_ssize_t length /* Number of Py_UNICODE chars to transform */ + ); +#endif /* --- File system encoding ---------------------------------------------- */ -/* ParseTuple converter which converts a Unicode object into the file - system encoding as a bytes object, using the "surrogateescape" error - handler; bytes objects are output as-is. */ +/* ParseTuple converter: encode str objects to bytes using + PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */ PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*); +/* ParseTuple converter: decode bytes objects to unicode using + PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */ + +PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*); + /* Decode a null-terminated string using Py_FileSystemDefaultEncoding and the "surrogateescape" error handler. - If Py_FileSystemDefaultEncoding is not set, fall back to UTF-8. + If Py_FileSystemDefaultEncoding is not set, fall back to the locale + encoding. - Use PyUnicode_DecodeFSDefaultAndSize() if you have the string length. + Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known. */ PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault( @@ -1253,7 +1268,8 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault( /* Decode a string using Py_FileSystemDefaultEncoding and the "surrogateescape" error handler. - If Py_FileSystemDefaultEncoding is not set, fall back to UTF-8. + If Py_FileSystemDefaultEncoding is not set, fall back to the locale + encoding. */ PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize( @@ -1261,6 +1277,17 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize( Py_ssize_t size /* size */ ); +/* Encode a Unicode object to Py_FileSystemDefaultEncoding with the + "surrogateescape" error handler, and return bytes. + + If Py_FileSystemDefaultEncoding is not set, fall back to the locale + encoding. +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault( + PyObject *unicode + ); + /* --- Methods & Slots ---------------------------------------------------- These are capable of handling Unicode objects and strings on input @@ -1430,7 +1457,7 @@ PyAPI_FUNC(int) PyUnicode_Compare( PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString( PyObject *left, - const char *right + const char *right /* ASCII-encoded string */ ); /* Rich compare two strings and return one of the following: @@ -1478,26 +1505,31 @@ PyAPI_FUNC(int) PyUnicode_Contains( PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s); +#ifndef Py_LIMITED_API /* Externally visible for str.strip(unicode) */ PyAPI_FUNC(PyObject *) _PyUnicode_XStrip( PyUnicodeObject *self, int striptype, PyObject *sepobj ); +#endif /* Using the current locale, insert the thousands grouping into the string pointed to by buffer. For the argument descriptions, see Objects/stringlib/localeutil.h */ +#ifndef Py_LIMITED_API PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGroupingLocale(Py_UNICODE *buffer, Py_ssize_t n_buffer, Py_UNICODE *digits, Py_ssize_t n_digits, Py_ssize_t min_width); +#endif /* Using explicit passed-in values, insert the thousands grouping into the string pointed to by buffer. For the argument descriptions, see Objects/stringlib/localeutil.h */ +#ifndef Py_LIMITED_API PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(Py_UNICODE *buffer, Py_ssize_t n_buffer, Py_UNICODE *digits, @@ -1505,10 +1537,12 @@ PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(Py_UNICODE *buffer, Py_ssize_t min_width, const char *grouping, const char *thousands_sep); +#endif /* === Characters Type APIs =============================================== */ /* Helper array used by Py_UNICODE_ISSPACE(). */ +#ifndef Py_LIMITED_API PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[]; /* These should not be used directly. Use the Py_UNICODE_IS* and @@ -1519,91 +1553,122 @@ PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[]; */ PyAPI_FUNC(int) _PyUnicode_IsLowercase( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsUppercase( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsTitlecase( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsXidStart( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsXidContinue( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsWhitespace( - const Py_UNICODE ch /* Unicode character */ + const Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsLinebreak( - const Py_UNICODE ch /* Unicode character */ + const Py_UCS4 ch /* Unicode character */ ); -PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToLowercase( - Py_UNICODE ch /* Unicode character */ +PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase( + Py_UCS4 ch /* Unicode character */ ); -PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToUppercase( - Py_UNICODE ch /* Unicode character */ +PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase( + Py_UCS4 ch /* Unicode character */ ); -PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToTitlecase( - Py_UNICODE ch /* Unicode character */ +PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase( + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_ToDigit( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(double) _PyUnicode_ToNumeric( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsDigit( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsNumeric( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsPrintable( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsAlpha( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); -PyAPI_FUNC(size_t) Py_UNICODE_strlen(const Py_UNICODE *u); +PyAPI_FUNC(size_t) Py_UNICODE_strlen( + const Py_UNICODE *u + ); PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy( + Py_UNICODE *s1, + const Py_UNICODE *s2); + +PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat( Py_UNICODE *s1, const Py_UNICODE *s2); PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy( - Py_UNICODE *s1, const Py_UNICODE *s2, size_t n); + Py_UNICODE *s1, + const Py_UNICODE *s2, + size_t n); PyAPI_FUNC(int) Py_UNICODE_strcmp( - const Py_UNICODE *s1, const Py_UNICODE *s2); + const Py_UNICODE *s1, + const Py_UNICODE *s2 + ); + +PyAPI_FUNC(int) Py_UNICODE_strncmp( + const Py_UNICODE *s1, + const Py_UNICODE *s2, + size_t n + ); PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr( - const Py_UNICODE *s, Py_UNICODE c + const Py_UNICODE *s, + Py_UNICODE c + ); + +PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr( + const Py_UNICODE *s, + Py_UNICODE c + ); + +/* Create a copy of a unicode string ending with a nul character. Return NULL + and raise a MemoryError exception on memory allocation failure, otherwise + return a new allocated buffer (use PyMem_Free() to free the buffer). */ + +PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy( + PyObject *unicode ); +#endif /* Py_LIMITED_API */ #ifdef __cplusplus } |