diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-08-05 16:22:51 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-08-05 16:22:51 +0000 |
commit | cf46733632c7279a9fd0fe6ce26f9185a4ae82a9 (patch) | |
tree | da27775a2161723ef342e91af41a8b51fedef405 /subversion/include/private/svn_utf_private.h | |
parent | bb0ef45f7c46b0ae221b26265ef98a768c33f820 (diff) | |
download | subversion-tarball-master.tar.gz |
subversion-1.9.7HEADsubversion-1.9.7master
Diffstat (limited to 'subversion/include/private/svn_utf_private.h')
-rw-r--r-- | subversion/include/private/svn_utf_private.h | 176 |
1 files changed, 175 insertions, 1 deletions
diff --git a/subversion/include/private/svn_utf_private.h b/subversion/include/private/svn_utf_private.h index 9f5a4ad..4584944 100644 --- a/subversion/include/private/svn_utf_private.h +++ b/subversion/include/private/svn_utf_private.h @@ -21,7 +21,7 @@ * @endcopyright * * @file svn_utf_private.h - * @brief UTF validation routines + * @brief UTF validation and normalization routines */ #ifndef SVN_UTF_PRIVATE_H @@ -31,6 +31,8 @@ #include <apr_pools.h> #include "svn_types.h" +#include "svn_string.h" +#include "svn_string_private.h" #ifdef __cplusplus extern "C" { @@ -71,6 +73,18 @@ svn_utf__last_valid(const char *src, apr_size_t len); const char * svn_utf__last_valid2(const char *src, apr_size_t len); +/* Copy LENGTH bytes of SRC, converting characters as follows: + - Pass characters from the ASCII subset to the result + - Strip all combining marks from the string + - Represent other valid Unicode chars as {U+XXXX} + - Replace invalid Unicode chars with {U?XXXX} + - Represent chars that are not valid UTF-8 as ?\XX + - Replace codes outside the Unicode range with a sequence of ?\XX + - Represent the null byte as \0 + Allocate the result in POOL. */ +const char * +svn_utf__fuzzy_escape(const char *src, apr_size_t length, apr_pool_t *pool); + const char * svn_utf__cstring_from_utf8_fuzzy(const char *src, apr_pool_t *pool, @@ -80,6 +94,166 @@ svn_utf__cstring_from_utf8_fuzzy(const char *src, apr_pool_t *)); +#if defined(WIN32) +/* On Windows: Convert the UTF-8 string SRC to UTF-16. + If PREFIX is not NULL, prepend it to the converted result. + The result, if not empty, will be allocated in RESULT_POOL. */ +svn_error_t * +svn_utf__win32_utf8_to_utf16(const WCHAR **result, + const char *src, + const WCHAR *prefix, + apr_pool_t *result_pool); + +/* On Windows: Convert the UTF-16 string SRC to UTF-8. + If PREFIX is not NULL, prepend it to the converted result. + The result, if not empty, will be allocated in RESULT_POOL. */ +svn_error_t * +svn_utf__win32_utf16_to_utf8(const char **result, + const WCHAR *src, + const char *prefix, + apr_pool_t *result_pool); +#endif /* WIN32*/ + + +/* A constant used for many length parameters in the utf8proc wrappers + * to indicate that the length of a string is unknonw. */ +#define SVN_UTF__UNKNOWN_LENGTH ((apr_size_t) -1) + + +/* Compare two UTF-8 strings, ignoring normalization, using buffers + * BUF1 and BUF2 for temporary storage. If either of LEN1 or LEN2 is + * SVN_UTF__UNKNOWN_LENGTH, assume the associated string is + * null-terminated; otherwise, consider the string only up to the + * given length. + * + * Return compare value in *RESULT. + */ +svn_error_t * +svn_utf__normcmp(int *result, + const char *str1, apr_size_t len1, + const char *str2, apr_size_t len2, + svn_membuf_t *buf1, svn_membuf_t *buf2); + +/* Normalize the UTF-8 string STR to form C, using BUF for temporary + * storage. If LEN is SVN_UTF__UNKNOWN_LENGTH, assume STR is + * null-terminated; otherwise, consider the string only up to the + * given length. + * + * Return the normalized string in *RESULT, which shares storage with + * BUF and is valid only until the next time BUF is modified. + * + * A returned error may indicate that STRING contains invalid UTF-8 or + * invalid Unicode codepoints. + */ +svn_error_t* +svn_utf__normalize(const char **result, + const char *str, apr_size_t len, + svn_membuf_t *buf); + +/* Check if STRING is a valid, NFC-normalized UTF-8 string. Note that + * a FALSE return value may indicate that STRING is not valid UTF-8 at + * all. + * + * Use SCRATCH_POOL for temporary allocations. + */ +svn_boolean_t +svn_utf__is_normalized(const char *string, apr_pool_t *scratch_pool); + +/* Encode an UCS-4 string to UTF-8, placing the result into BUFFER. + * While utf8proc does have a similar function, it does more checking + * and processing than we want here; this function does not attempt + * any normalizations but just encodes the individual code points. + * The encoded string will always be NUL-terminated. + * + * Return the length of the result (excluding the NUL terminator) in + * *result_length. + * + * A returned error indicates that a codepoint is invalid. + */ +svn_error_t * +svn_utf__encode_ucs4_string(svn_membuf_t *buffer, + const apr_int32_t *ucs4str, + apr_size_t length, + apr_size_t *result_length); + +/* Pattern matching similar to the the SQLite LIKE and GLOB + * operators. PATTERN, KEY and ESCAPE must all point to UTF-8 + * strings. Furthermore, ESCAPE, if provided, must be a character from + * the ASCII subset. + * + * If any of PATTERN_LEN, STRING_LEN or ESCAPE_LEN are + * SVN_UTF__UNKNOWN_LENGTH, assume the associated string is + * null-terminated; otherwise, consider the string only up to the + * given length. + * + * Use buffers PATTERN_BUF, STRING_BUF and TEMP_BUF for temporary storage. + * + * If SQL_LIKE is true, interpret PATTERN as a pattern used by the SQL + * LIKE operator and notice ESCAPE. Otherwise it's a Unix fileglob + * pattern, and ESCAPE must be NULL. + * + * Set *MATCH to the result of the comparison. +*/ +svn_error_t * +svn_utf__glob(svn_boolean_t *match, + const char *pattern, apr_size_t pattern_len, + const char *string, apr_size_t string_len, + const char *escape, apr_size_t escape_len, + svn_boolean_t sql_like, + svn_membuf_t *pattern_buf, + svn_membuf_t *string_buf, + svn_membuf_t *temp_buf); + +/* Return the compiled version of the wrapped utf8proc library. */ +const char * +svn_utf__utf8proc_compiled_version(void); + +/* Return the runtime version of the wrapped utf8proc library. */ +const char * +svn_utf__utf8proc_runtime_version(void); + +/* Convert an UTF-16 (or UCS-2) string to UTF-8, returning the pointer + * in RESULT. If BIG_ENDIAN is set, then UTF16STR is big-endian; + * otherwise, it's little-endian. + * + * If UTF16LEN is SVN_UTF__UNKNOWN_LENGTH, then UTF16STR must be + * terminated with a zero; otherwise, it is the number of 16-bit codes + * to convert, and the source string may contain NUL values. + * + * Allocate RESULT in RESULT_POOL and use SCRATCH_POOL for + * intermediate allocation. + * + * This function combines UTF-16 surrogate pairs into single code + * points, but will leave single lead or trail surrogates unchanged. + */ +svn_error_t * +svn_utf__utf16_to_utf8(const svn_string_t **result, + const apr_uint16_t *utf16str, + apr_size_t utf16len, + svn_boolean_t big_endian, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool); + +/* Convert an UTF-32 string to UTF-8, returning the pointer in + * RESULT. If BIG_ENDIAN is set, then UTF32STR is big-endian; + * otherwise, it's little-endian. + * + * If UTF32LEN is SVN_UTF__UNKNOWN_LENGTH, then UTF32STR must be + * terminated with a zero; otherwise, it is the number of 32-bit codes + * to convert, and the source string may contain NUL values. + * + * Allocate RESULT in RESULT_POOL and use SCRATCH_POOL for + * intermediate allocation. + */ +svn_error_t * +svn_utf__utf32_to_utf8(const svn_string_t **result, + const apr_int32_t *utf32str, + apr_size_t utf32len, + svn_boolean_t big_endian, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool); + + #ifdef __cplusplus } #endif /* __cplusplus */ |