diff options
Diffstat (limited to 'Include/unicodeobject.h')
| -rw-r--r-- | Include/unicodeobject.h | 96 |
1 files changed, 64 insertions, 32 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 2c496eaada..5711de0164 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -103,10 +103,6 @@ typedef wchar_t Py_UNICODE; # endif #endif -#if defined(MS_WINDOWS) -# define HAVE_MBCS -#endif - #ifdef HAVE_WCHAR_H /* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */ # ifdef _HAVE_BSDI @@ -117,21 +113,9 @@ typedef wchar_t Py_UNICODE; /* Py_UCS4 and Py_UCS2 are typedefs for the respective unicode representations. */ -#if SIZEOF_INT == 4 -typedef unsigned int Py_UCS4; -#elif SIZEOF_LONG == 4 -typedef unsigned long Py_UCS4; -#else -#error "Could not find a proper typedef for Py_UCS4" -#endif - -#if SIZEOF_SHORT == 2 -typedef unsigned short Py_UCS2; -#else -#error "Could not find a proper typedef for Py_UCS2" -#endif - -typedef unsigned char Py_UCS1; +typedef uint32_t Py_UCS4; +typedef uint16_t Py_UCS2; +typedef uint8_t Py_UCS1; /* --- Internal Unicode Operations ---------------------------------------- */ @@ -172,7 +156,7 @@ typedef unsigned char Py_UCS1; Py_UNICODE_ISNUMERIC(ch)) #define Py_UNICODE_COPY(target, source, length) \ - Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE)) + memcpy((target), (source), (length)*sizeof(Py_UNICODE)) #define Py_UNICODE_FILL(target, value, length) \ do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\ @@ -900,7 +884,7 @@ typedef struct { /* minimum character (default: 127, ASCII) */ Py_UCS4 min_char; - /* If non-zero, overallocate the buffer by 25% (default: 0). */ + /* If non-zero, overallocate the buffer (default: 0). */ unsigned char overallocate; /* If readonly is 1, buffer is a shared string (cannot be modified) @@ -934,6 +918,23 @@ PyAPI_FUNC(int) _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, Py_ssize_t length, Py_UCS4 maxchar); +/* Prepare the buffer to have at least the kind KIND. + For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will + support characters in range U+000-U+FFFF. + + Return 0 on success, raise an exception and return -1 on error. */ +#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \ + (assert((KIND) != PyUnicode_WCHAR_KIND), \ + (KIND) <= (WRITER)->kind \ + ? 0 \ + : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND))) + +/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind() + macro instead. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer, + enum PyUnicode_Kind kind); + /* Append a Unicode character. Return 0 on success, raise an exception and return -1 on error. */ PyAPI_FUNC(int) @@ -1170,22 +1171,30 @@ PyAPI_FUNC(PyObject*) PyUnicode_Decode( ); /* Decode a Unicode object unicode and return the result as Python - object. */ + object. + + This API is DEPRECATED. The only supported standard encoding is rot13. + Use PyCodec_Decode() to decode with rot13 and non-standard codecs + that decode from str. */ PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject( PyObject *unicode, /* Unicode object */ const char *encoding, /* encoding */ const char *errors /* error handling */ - ); + ) Py_DEPRECATED(3.6); /* Decode a Unicode object unicode and return the result as Unicode - object. */ + object. + + This API is DEPRECATED. The only supported standard encoding is rot13. + Use PyCodec_Decode() to decode with rot13 and non-standard codecs + that decode from str to str. */ PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode( PyObject *unicode, /* Unicode object */ const char *encoding, /* encoding */ const char *errors /* error handling */ - ); + ) Py_DEPRECATED(3.6); /* Encodes a Py_UNICODE buffer of the given size and returns a Python string object. */ @@ -1200,13 +1209,18 @@ PyAPI_FUNC(PyObject*) PyUnicode_Encode( #endif /* Encodes a Unicode object and returns the result as Python - object. */ + object. + + This API is DEPRECATED. It is superceeded by PyUnicode_AsEncodedString() + since all standard encodings (except rot13) encode str to bytes. + Use PyCodec_Encode() for encoding with rot13 and non-standard codecs + that encode form str to non-bytes. */ PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject( PyObject *unicode, /* Unicode object */ const char *encoding, /* encoding */ const char *errors /* error handling */ - ); + ) Py_DEPRECATED(3.6); /* Encodes a Unicode object and returns the result as Python string object. */ @@ -1218,13 +1232,17 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString( ); /* Encodes a Unicode object and returns the result as Unicode - object. */ + object. + + This API is DEPRECATED. The only supported standard encodings is rot13. + Use PyCodec_Encode() to encode with rot13 and non-standard codecs + that encode from str to str. */ PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode( PyObject *unicode, /* Unicode object */ const char *encoding, /* encoding */ const char *errors /* error handling */ - ); + ) Py_DEPRECATED(3.6); /* Build an encoding map. */ @@ -1640,13 +1658,13 @@ PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap( ); #endif -#ifdef HAVE_MBCS +#ifdef MS_WINDOWS /* --- MBCS codecs for Windows -------------------------------------------- */ PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS( const char *string, /* MBCS encoded string */ - Py_ssize_t length, /* size of string */ + Py_ssize_t length, /* size of string */ const char *errors /* error handling */ ); @@ -1683,7 +1701,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage( const char *errors /* error handling */ ); -#endif /* HAVE_MBCS */ +#endif /* MS_WINDOWS */ /* --- Decimal Encoder ---------------------------------------------------- */ @@ -1938,6 +1956,14 @@ PyAPI_FUNC(PyObject*) PyUnicode_Join( PyObject *seq /* Sequence object */ ); +#ifndef Py_LIMITED_API +PyAPI_FUNC(PyObject *) _PyUnicode_JoinArray( + PyObject *separator, + PyObject **items, + Py_ssize_t seqlen + ); +#endif /* Py_LIMITED_API */ + /* Return 1 if substr matches str[start:end] at the given tail end, 0 otherwise. */ @@ -2248,11 +2274,17 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency( int check_content); #endif +#ifndef Py_LIMITED_API /* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/ PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*); /* Clear all static strings. */ PyAPI_FUNC(void) _PyUnicode_ClearStaticStrings(void); +/* Fast equality check when the inputs are known to be exact unicode types + and where the hash values are equal (i.e. a very probable match) */ +PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *); +#endif /* !Py_LIMITED_API */ + #ifdef __cplusplus } #endif |
