diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-08-05 16:22:51 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-08-05 16:22:51 +0000 |
commit | cf46733632c7279a9fd0fe6ce26f9185a4ae82a9 (patch) | |
tree | da27775a2161723ef342e91af41a8b51fedef405 /subversion/tests/libsvn_subr/utf-test.c | |
parent | bb0ef45f7c46b0ae221b26265ef98a768c33f820 (diff) | |
download | subversion-tarball-master.tar.gz |
subversion-1.9.7HEADsubversion-1.9.7master
Diffstat (limited to 'subversion/tests/libsvn_subr/utf-test.c')
-rw-r--r-- | subversion/tests/libsvn_subr/utf-test.c | 549 |
1 files changed, 546 insertions, 3 deletions
diff --git a/subversion/tests/libsvn_subr/utf-test.c b/subversion/tests/libsvn_subr/utf-test.c index 2028e14..dd81ccd 100644 --- a/subversion/tests/libsvn_subr/utf-test.c +++ b/subversion/tests/libsvn_subr/utf-test.c @@ -25,6 +25,7 @@ #include "svn_utf.h" #include "svn_pools.h" +#include "private/svn_string_private.h" #include "private/svn_utf_private.h" /* Random number seed. Yes, it's global, just pretend you can't see it. */ @@ -226,7 +227,7 @@ test_utf_cstring_to_utf8_ex2(apr_pool_t *pool) const char *expected_result; const char *from_page; } tests[] = { - {"ascii text\n", "ascii text\n", "unexistant-page"}, + {"ascii text\n", "ascii text\n", "unexistent-page"}, {"Edelwei\xdf", "Edelwei\xc3\x9f", "ISO-8859-1"} }; @@ -266,7 +267,7 @@ test_utf_cstring_from_utf8_ex2(apr_pool_t *pool) const char *expected_result; const char *to_page; } tests[] = { - {"ascii text\n", "ascii text\n", "unexistant-page"}, + {"ascii text\n", "ascii text\n", "unexistent-page"}, {"Edelwei\xc3\x9f", "Edelwei\xdf", "ISO-8859-1"} }; @@ -294,10 +295,540 @@ test_utf_cstring_from_utf8_ex2(apr_pool_t *pool) return SVN_NO_ERROR; } +/* Test normalization-independent UTF-8 string comparison */ +static svn_error_t * +test_utf_collated_compare(apr_pool_t *pool) +{ + /* Normalized: NFC */ + static const char nfc[] = + "\xe1\xb9\xa8" /* S with dot above and below */ + "\xc5\xaf" /* u with ring */ + "\xe1\xb8\x87" /* b with macron below */ + "\xe1\xb9\xbd" /* v with tilde */ + "\xe1\xb8\x9d" /* e with breve and cedilla */ + "\xc8\x91" /* r with double grave */ + "\xc5\xa1" /* s with caron */ + "\xe1\xb8\xaf" /* i with diaeresis and acute */ + "\xe1\xbb\x9d" /* o with grave and hook */ + "\xe1\xb9\x8b"; /* n with circumflex below */ + + /* Normalized: NFD */ + static const char nfd[] = + "S\xcc\xa3\xcc\x87" /* S with dot above and below */ + "u\xcc\x8a" /* u with ring */ + "b\xcc\xb1" /* b with macron below */ + "v\xcc\x83" /* v with tilde */ + "e\xcc\xa7\xcc\x86" /* e with breve and cedilla */ + "r\xcc\x8f" /* r with double grave */ + "s\xcc\x8c" /* s with caron */ + "i\xcc\x88\xcc\x81" /* i with diaeresis and acute */ + "o\xcc\x9b\xcc\x80" /* o with grave and hook */ + "n\xcc\xad"; /* n with circumflex below */ + + /* Mixed, denormalized */ + static const char mixup[] = + "S\xcc\x87\xcc\xa3" /* S with dot above and below */ + "\xc5\xaf" /* u with ring */ + "b\xcc\xb1" /* b with macron below */ + "\xe1\xb9\xbd" /* v with tilde */ + "e\xcc\xa7\xcc\x86" /* e with breve and cedilla */ + "\xc8\x91" /* r with double grave */ + "s\xcc\x8c" /* s with caron */ + "\xe1\xb8\xaf" /* i with diaeresis and acute */ + "o\xcc\x80\xcc\x9b" /* o with grave and hook */ + "\xe1\xb9\x8b"; /* n with circumflex below */ + + static const char longer[] = + "\xe1\xb9\xa8" /* S with dot above and below */ + "\xc5\xaf" /* u with ring */ + "\xe1\xb8\x87" /* b with macron below */ + "\xe1\xb9\xbd" /* v with tilde */ + "\xe1\xb8\x9d" /* e with breve and cedilla */ + "\xc8\x91" /* r with double grave */ + "\xc5\xa1" /* s with caron */ + "\xe1\xb8\xaf" /* i with diaeresis and acute */ + "\xe1\xbb\x9d" /* o with grave and hook */ + "\xe1\xb9\x8b" /* n with circumflex below */ + "X"; + + static const char shorter[] = + "\xe1\xb9\xa8" /* S with dot above and below */ + "\xc5\xaf" /* u with ring */ + "\xe1\xb8\x87" /* b with macron below */ + "\xe1\xb9\xbd" /* v with tilde */ + "\xe1\xb8\x9d" /* e with breve and cedilla */ + "\xc8\x91" /* r with double grave */ + "\xc5\xa1" /* s with caron */ + "\xe1\xb8\xaf" /* i with diaeresis and acute */ + "\xe1\xbb\x9d"; /* o with grave and hook */ + + static const char lowcase[] = + "s\xcc\x87\xcc\xa3" /* s with dot above and below */ + "\xc5\xaf" /* u with ring */ + "b\xcc\xb1" /* b with macron below */ + "\xe1\xb9\xbd" /* v with tilde */ + "e\xcc\xa7\xcc\x86" /* e with breve and cedilla */ + "\xc8\x91" /* r with double grave */ + "s\xcc\x8c" /* s with caron */ + "\xe1\xb8\xaf" /* i with diaeresis and acute */ + "o\xcc\x80\xcc\x9b" /* o with grave and hook */ + "\xe1\xb9\x8b"; /* n with circumflex below */ + + static const struct utfcmp_test_t { + const char *stra; + char op; + const char *strb; + const char *taga; + const char *tagb; + } utfcmp_tests[] = { + /* Empty key */ + {"", '=', "", "empty", "empty"}, + {"", '<', "a", "empty", "nonempty"}, + {"a", '>', "", "nonempty", "empty"}, + + /* Deterministic ordering */ + {"a", '<', "b", "a", "b"}, + {"b", '<', "c", "b", "c"}, + {"a", '<', "c", "a", "c"}, + + /* Normalized equality */ + {nfc, '=', nfd, "nfc", "nfd"}, + {nfd, '=', nfc, "nfd", "nfc"}, + {nfc, '=', mixup, "nfc", "mixup"}, + {nfd, '=', mixup, "nfd", "mixup"}, + {mixup, '=', nfd, "mixup", "nfd"}, + {mixup, '=', nfc, "mixup", "nfc"}, + + /* Key length */ + {nfc, '<', longer, "nfc", "longer"}, + {longer, '>', nfc, "longer", "nfc"}, + {nfd, '>', shorter, "nfd", "shorter"}, + {shorter, '<', nfd, "shorter", "nfd"}, + {mixup, '<', lowcase, "mixup", "lowcase"}, + {lowcase, '>', mixup, "lowcase", "mixup"}, + + {NULL, 0, NULL, NULL, NULL} + }; + + + const struct utfcmp_test_t *ut; + svn_membuf_t bufa, bufb; + svn_membuf__create(&bufa, 0, pool); + svn_membuf__create(&bufb, 0, pool); + + srand(111); + for (ut = utfcmp_tests; ut->stra; ++ut) + { + const svn_boolean_t implicit_size = (rand() % 17) & 1; + const apr_size_t lena = (implicit_size + ? SVN_UTF__UNKNOWN_LENGTH : strlen(ut->stra)); + const apr_size_t lenb = (implicit_size + ? SVN_UTF__UNKNOWN_LENGTH : strlen(ut->strb)); + int result; + + SVN_ERR(svn_utf__normcmp(&result, + ut->stra, lena, ut->strb, lenb, + &bufa, &bufb)); + + /* UCS-4 debugging dump of the decomposed strings + { + const apr_int32_t *const ucsbufa = bufa.data; + const apr_int32_t *const ucsbufb = bufb.data; + apr_size_t i; + + printf("(%c)%7s %c %s\n", ut->op, + ut->taga, (!result ? '=' : (result < 0 ? '<' : '>')), ut->tagb); + + for (i = 0; i < bufa.size || i < bufb.size; ++i) + { + if (i < bufa.size && i < bufb.size) + printf(" U+%04X U+%04X\n", ucsbufa[i], ucsbufb[i]); + else if (i < bufa.size) + printf(" U+%04X\n", ucsbufa[i]); + else + printf(" U+%04X\n", ucsbufb[i]); + } + } + */ + + if (('=' == ut->op && 0 != result) + || ('<' == ut->op && 0 <= result) + || ('>' == ut->op && 0 >= result)) + { + return svn_error_createf + (SVN_ERR_TEST_FAILED, NULL, + "Ut->Op '%s' %c '%s' but '%s' %c '%s'", + ut->taga, ut->op, ut->tagb, + ut->taga, (!result ? '=' : (result < 0 ? '<' : '>')), ut->tagb); + } + } + + return SVN_NO_ERROR; +} + + + +static svn_error_t * +test_utf_pattern_match(apr_pool_t *pool) +{ + static const struct glob_test_t { + svn_boolean_t sql_like; + svn_boolean_t matches; + const char *pattern; + const char *string; + const char *escape; + } glob_tests[] = { +#define LIKE_MATCH TRUE, TRUE +#define LIKE_FAIL TRUE, FALSE +#define GLOB_MATCH FALSE, TRUE +#define GLOB_FAIL FALSE, FALSE + + {LIKE_FAIL, "", "test", NULL}, + {GLOB_FAIL, "", "test", NULL}, + {LIKE_FAIL, "", "%", NULL}, + {GLOB_FAIL, "", "*", NULL}, + {LIKE_FAIL, "test", "%", NULL}, + {GLOB_FAIL, "test", "*", NULL}, + {LIKE_MATCH, "test", "test", NULL}, + {GLOB_MATCH, "test", "test", NULL}, + {LIKE_MATCH, "t\xe1\xb8\x9dst", "te\xcc\xa7\xcc\x86st", NULL}, + {GLOB_MATCH, "te\xcc\xa7\xcc\x86st", "t\xe1\xb8\x9dst", NULL}, + + {LIKE_FAIL, "test", "test", "\xe1\xb8\x9d"}, /* escape char not ascii */ + {LIKE_FAIL, "test", "test", ""}, /* empty escape string */ + + {LIKE_MATCH, "te#st", "test", "#"}, + {LIKE_FAIL, "te#st", "test", NULL}, + {GLOB_MATCH, "te\\st", "test", NULL}, + {LIKE_MATCH, "te##st", "te#st", "#"}, + {LIKE_FAIL, "te##st", "te#st", NULL}, + {GLOB_MATCH, "te\\\\st", "te\\st", NULL}, + {GLOB_FAIL, "te\\\\st", "te\\st", "\\"}, /* escape char with glob */ + {LIKE_FAIL, "te#%t", "te%t", NULL}, + {LIKE_MATCH, "te#%t", "te%t", "#"}, + {GLOB_MATCH, "te\\*t", "te*t", NULL}, + {LIKE_FAIL, "te#%t", "test", NULL}, + {GLOB_FAIL, "te\\*t", "test", NULL}, + {LIKE_FAIL, "te#_t", "te_t", NULL}, + {LIKE_MATCH, "te#_t", "te_t", "#"}, + {GLOB_MATCH, "te\\?t", "te?t", NULL}, + {LIKE_FAIL, "te#_t", "test", NULL}, + {LIKE_FAIL, "te#_t", "test", "#"}, + {GLOB_FAIL, "te\\?t", "test", NULL}, + + {LIKE_MATCH, "_est", "test", NULL}, + {GLOB_MATCH, "?est", "test", NULL}, + {LIKE_MATCH, "te_t", "test", NULL}, + {GLOB_MATCH, "te?t", "test", NULL}, + {LIKE_MATCH, "tes_", "test", NULL}, + {GLOB_MATCH, "tes?", "test", NULL}, + {LIKE_FAIL, "test_", "test", NULL}, + {GLOB_FAIL, "test?", "test", NULL}, + + {LIKE_MATCH, "[s%n]", "[subversion]", NULL}, + {GLOB_FAIL, "[s*n]", "[subversion]", NULL}, + {LIKE_MATCH, "#[s%n]", "[subversion]", "#"}, + {GLOB_MATCH, "\\[s*n]", "[subversion]", NULL}, + + {GLOB_MATCH, ".[\\-\\t]", ".t", NULL}, + {GLOB_MATCH, "test*?*[a-z]*", "testgoop", NULL}, + {GLOB_MATCH, "te[^x]t", "test", NULL}, + {GLOB_MATCH, "te[^abc]t", "test", NULL}, + {GLOB_MATCH, "te[^x]t", "test", NULL}, + {GLOB_MATCH, "te[!x]t", "test", NULL}, + {GLOB_FAIL, "te[^x]t", "text", NULL}, + {GLOB_FAIL, "te[^\\x]t", "text", NULL}, + {GLOB_FAIL, "te[^x\\", "text", NULL}, + {GLOB_FAIL, "te[/]t", "text", NULL}, + {GLOB_MATCH, "te[r-t]t", "test", NULL}, + {GLOB_MATCH, "te[r-Tz]t", "tezt", NULL}, + {GLOB_FAIL, "te[R-T]t", "tent", NULL}, +/* {GLOB_MATCH, "tes[]t]", "test", NULL}, */ + {GLOB_MATCH, "tes[t-]", "test", NULL}, + {GLOB_MATCH, "tes[t-]]", "test]", NULL}, + {GLOB_FAIL, "tes[t-]]", "test", NULL}, + {GLOB_FAIL, "tes[u-]", "test", NULL}, + {GLOB_FAIL, "tes[t-]", "tes[t-]", NULL}, + {GLOB_MATCH, "test[/-/]", "test/", NULL}, + {GLOB_MATCH, "test[\\/-/]", "test/", NULL}, + {GLOB_MATCH, "test[/-\\/]", "test/", NULL}, + +#undef LIKE_MATCH +#undef LIKE_FAIL +#undef GLOB_MATCH +#undef GLOB_FAIL + + {FALSE, FALSE, NULL, NULL, NULL} + }; + + const struct glob_test_t *gt; + svn_membuf_t bufa, bufb, bufc; + svn_membuf__create(&bufa, 0, pool); + svn_membuf__create(&bufb, 0, pool); + svn_membuf__create(&bufc, 0, pool); + + srand(79); + for (gt = glob_tests; gt->pattern; ++gt) + { + const svn_boolean_t implicit_size = (rand() % 13) & 1; + const apr_size_t lenptn = (implicit_size + ? SVN_UTF__UNKNOWN_LENGTH + : strlen(gt->pattern)); + const apr_size_t lenstr = (implicit_size + ? SVN_UTF__UNKNOWN_LENGTH + : strlen(gt->string)); + const apr_size_t lenesc = (implicit_size + ? SVN_UTF__UNKNOWN_LENGTH + : (gt->escape ? strlen(gt->escape) : 0)); + svn_boolean_t match; + svn_error_t *err; + + + err = svn_utf__glob(&match, + gt->pattern, lenptn, + gt->string, lenstr, + gt->escape, lenesc, + gt->sql_like, &bufa, &bufb, &bufc); + + if (!gt->sql_like && gt->escape && !err) + return svn_error_create + (SVN_ERR_TEST_FAILED, err, "Failed to detect GLOB ESCAPE"); + + if ((err && gt->matches) + || (!err && !match != !gt->matches)) + { + if (gt->sql_like) + return svn_error_createf + (SVN_ERR_TEST_FAILED, err, + "Wrong result: %s'%s' LIKE '%s'%s%s%s%s", + (gt->matches ? "NOT " : ""), gt->string, gt->pattern, + (gt->escape ? " ESCAPE " : ""), (gt->escape ? "'" : ""), + (gt->escape ? gt->escape : ""), (gt->escape ? "'" : "")); + else + return svn_error_createf + (SVN_ERR_TEST_FAILED, err, "Wrong result: %s%s GLOB %s", + (gt->matches ? "NOT " : ""), gt->string, gt->pattern); + } + + if (err) + svn_error_clear(err); + } + + return SVN_NO_ERROR; +} + + +static svn_error_t * +test_utf_fuzzy_escape(apr_pool_t *pool) +{ + + /* Accented latin, mixed normalization */ + static const char mixup[] = + "S\xcc\x87\xcc\xa3" /* S with dot above and below */ + "\xc5\xaf" /* u with ring */ + "b\xcc\xb1" /* b with macron below */ + "\xe1\xb9\xbd" /* v with tilde */ + "e\xcc\xa7\xcc\x86" /* e with breve and cedilla */ + "\xc8\x91" /* r with double grave */ + "s\xcc\x8c" /* s with caron */ + "\xe1\xb8\xaf" /* i with diaeresis and acute */ + "o\xcc\x80\xcc\x9b" /* o with grave and hook */ + "\xe1\xb9\x8b"; /* n with circumflex below */ + + /* As above, but latin lowercase 'o' replaced with Greek 'omicron' */ + static const char greekish[] = + "S\xcc\x87\xcc\xa3" /* S with dot above and below */ + "\xc5\xaf" /* u with ring */ + "b\xcc\xb1" /* b with macron below */ + "\xe1\xb9\xbd" /* v with tilde */ + "e\xcc\xa7\xcc\x86" /* e with breve and cedilla */ + "\xc8\x91" /* r with double grave */ + "s\xcc\x8c" /* s with caron */ + "\xe1\xb8\xaf" /* i with diaeresis and acute */ + "\xce\xbf\xcc\x80\xcc\x9b" /* omicron with grave and hook */ + "\xe1\xb9\x8b"; /* n with circumflex below */ + + /* More interesting invalid characters. */ + static const char invalid[] = + "Not Unicode: \xef\xb7\x91;" /* U+FDD1 */ + "Out of range: \xf4\x90\x80\x81;" /* U+110001 */ + "Not UTF-8: \xe6;" + "Null byte: \0;"; + + const char *fuzzy; + + fuzzy = svn_utf__fuzzy_escape(mixup, strlen(mixup), pool); + SVN_TEST_ASSERT(0 == strcmp(fuzzy, "Subversion")); + + fuzzy = svn_utf__fuzzy_escape(greekish, strlen(greekish), pool); + SVN_TEST_ASSERT(0 == strcmp(fuzzy, "Subversi{U+03BF}n")); + + fuzzy = svn_utf__fuzzy_escape(invalid, sizeof(invalid) - 1, pool); + /*fprintf(stderr, "%s\n", fuzzy);*/ + SVN_TEST_ASSERT(0 == strcmp(fuzzy, + "Not Unicode: {U?FDD1};" + "Out of range: ?\\F4?\\90?\\80?\\81;" + "Not UTF-8: ?\\E6;" + "Null byte: \\0;")); + + return SVN_NO_ERROR; +} + +static svn_error_t * +test_utf_is_normalized(apr_pool_t *pool) +{ + /* Normalized: NFC */ + static const char nfc[] = + "\xe1\xb9\xa8" /* S with dot above and below */ + "\xc5\xaf" /* u with ring */ + "\xe1\xb8\x87" /* b with macron below */ + "\xe1\xb9\xbd" /* v with tilde */ + "\xe1\xb8\x9d" /* e with breve and cedilla */ + "\xc8\x91" /* r with double grave */ + "\xc5\xa1" /* s with caron */ + "\xe1\xb8\xaf" /* i with diaeresis and acute */ + "\xe1\xbb\x9d" /* o with grave and hook */ + "\xe1\xb9\x8b"; /* n with circumflex below */ + + /* Normalized: NFD */ + static const char nfd[] = + "S\xcc\xa3\xcc\x87" /* S with dot above and below */ + "u\xcc\x8a" /* u with ring */ + "b\xcc\xb1" /* b with macron below */ + "v\xcc\x83" /* v with tilde */ + "e\xcc\xa7\xcc\x86" /* e with breve and cedilla */ + "r\xcc\x8f" /* r with double grave */ + "s\xcc\x8c" /* s with caron */ + "i\xcc\x88\xcc\x81" /* i with diaeresis and acute */ + "o\xcc\x9b\xcc\x80" /* o with grave and hook */ + "n\xcc\xad"; /* n with circumflex below */ + + /* Mixed, denormalized */ + static const char mixup[] = + "S\xcc\x87\xcc\xa3" /* S with dot above and below */ + "\xc5\xaf" /* u with ring */ + "b\xcc\xb1" /* b with macron below */ + "\xe1\xb9\xbd" /* v with tilde */ + "e\xcc\xa7\xcc\x86" /* e with breve and cedilla */ + "\xc8\x91" /* r with double grave */ + "s\xcc\x8c" /* s with caron */ + "\xe1\xb8\xaf" /* i with diaeresis and acute */ + "o\xcc\x80\xcc\x9b" /* o with grave and hook */ + "\xe1\xb9\x8b"; /* n with circumflex below */ + + /* Invalid UTF-8 */ + static const char invalid[] = + "\xe1\xb9\xa8" /* S with dot above and below */ + "\xc5\xaf" /* u with ring */ + "\xe1\xb8\x87" /* b with macron below */ + "\xe1\xb9\xbd" /* v with tilde */ + "\xe1\xb8\x9d" /* e with breve and cedilla */ + "\xc8\x91" /* r with double grave */ + "\xc5\xa1" /* s with caron */ + "\xe1\xb8\xaf" /* i with diaeresis and acute */ + "\xe6" /* Invalid byte */ + "\xe1\xb9\x8b"; /* n with circumflex below */ + + SVN_ERR_ASSERT(svn_utf__is_normalized(nfc, pool)); + SVN_ERR_ASSERT(!svn_utf__is_normalized(nfd, pool)); + SVN_ERR_ASSERT(!svn_utf__is_normalized(mixup, pool)); + SVN_ERR_ASSERT(!svn_utf__is_normalized(invalid, pool)); + + return SVN_NO_ERROR; +} + + +static svn_error_t * +test_utf_conversions(apr_pool_t *pool) +{ + static const struct cvt_test_t + { + svn_boolean_t sixteenbit; + svn_boolean_t bigendian; + const char *source; + const char *result; + } tests[] = { + +#define UTF_32_LE FALSE, FALSE +#define UTF_32_BE FALSE, TRUE +#define UTF_16_LE TRUE, FALSE +#define UTF_16_BE TRUE, TRUE + + /* Normal character conversion */ + { UTF_32_LE, "t\0\0\0" "e\0\0\0" "s\0\0\0" "t\0\0\0" "\0\0\0\0", "test" }, + { UTF_32_BE, "\0\0\0t" "\0\0\0e" "\0\0\0s" "\0\0\0t" "\0\0\0\0", "test" }, + { UTF_16_LE, "t\0" "e\0" "s\0" "t\0" "\0\0", "test" }, + { UTF_16_BE, "\0t" "\0e" "\0s" "\0t" "\0\0", "test" }, + + /* Valid surrogate pairs */ + { UTF_16_LE, "\x00\xD8" "\x00\xDC" "\0\0", "\xf0\x90\x80\x80" }, /* U+010000 */ + { UTF_16_LE, "\x34\xD8" "\x1E\xDD" "\0\0", "\xf0\x9d\x84\x9e" }, /* U+01D11E */ + { UTF_16_LE, "\xFF\xDB" "\xFD\xDF" "\0\0", "\xf4\x8f\xbf\xbd" }, /* U+10FFFD */ + + { UTF_16_BE, "\xD8\x00" "\xDC\x00" "\0\0", "\xf0\x90\x80\x80" }, /* U+010000 */ + { UTF_16_BE, "\xD8\x34" "\xDD\x1E" "\0\0", "\xf0\x9d\x84\x9e" }, /* U+01D11E */ + { UTF_16_BE, "\xDB\xFF" "\xDF\xFD" "\0\0", "\xf4\x8f\xbf\xbd" }, /* U+10FFFD */ + + /* Swapped, single and trailing surrogate pairs */ + { UTF_16_LE, "*\0" "\x00\xDC" "\x00\xD8" "*\0\0\0", "*\xed\xb0\x80" "\xed\xa0\x80*" }, + { UTF_16_LE, "*\0" "\x1E\xDD" "*\0\0\0", "*\xed\xb4\x9e*" }, + { UTF_16_LE, "*\0" "\xFF\xDB" "*\0\0\0", "*\xed\xaf\xbf*" }, + { UTF_16_LE, "\x1E\xDD" "\0\0", "\xed\xb4\x9e" }, + { UTF_16_LE, "\xFF\xDB" "\0\0", "\xed\xaf\xbf" }, + + { UTF_16_BE, "\0*" "\xDC\x00" "\xD8\x00" "\0*\0\0", "*\xed\xb0\x80" "\xed\xa0\x80*" }, + { UTF_16_BE, "\0*" "\xDD\x1E" "\0*\0\0", "*\xed\xb4\x9e*" }, + { UTF_16_BE, "\0*" "\xDB\xFF" "\0*\0\0", "*\xed\xaf\xbf*" }, + { UTF_16_BE, "\xDD\x1E" "\0\0", "\xed\xb4\x9e" }, + { UTF_16_BE, "\xDB\xFF" "\0\0", "\xed\xaf\xbf" }, + +#undef UTF_32_LE +#undef UTF_32_BE +#undef UTF_16_LE +#undef UTF_16_BE + + { 0 } + }; + + const struct cvt_test_t *tc; + const svn_string_t *result; + int i; + + for (i = 1, tc = tests; tc->source; ++tc, ++i) + { + if (tc->sixteenbit) + SVN_ERR(svn_utf__utf16_to_utf8(&result, (const void*)tc->source, + SVN_UTF__UNKNOWN_LENGTH, + tc->bigendian, pool, pool)); + else + SVN_ERR(svn_utf__utf32_to_utf8(&result, (const void*)tc->source, + SVN_UTF__UNKNOWN_LENGTH, + tc->bigendian, pool, pool)); + SVN_ERR_ASSERT(0 == strcmp(result->data, tc->result)); + } + + /* Test counted strings with NUL characters */ + SVN_ERR(svn_utf__utf16_to_utf8( + &result, (void*)("x\0" "\0\0" "y\0" "*\0"), 3, + FALSE, pool, pool)); + SVN_ERR_ASSERT(0 == memcmp(result->data, "x\0y", 3)); + + SVN_ERR(svn_utf__utf32_to_utf8( + &result, + (void*)("\0\0\0x" "\0\0\0\0" "\0\0\0y" "\0\0\0*"), 3, + TRUE, pool, pool)); + SVN_ERR_ASSERT(0 == memcmp(result->data, "x\0y", 3)); + + return SVN_NO_ERROR; +} + + /* The test table. */ -struct svn_test_descriptor_t test_funcs[] = +static int max_threads = 1; + +static struct svn_test_descriptor_t test_funcs[] = { SVN_TEST_NULL, SVN_TEST_PASS2(utf_validate, @@ -308,5 +839,17 @@ struct svn_test_descriptor_t test_funcs[] = "test svn_utf_cstring_to_utf8_ex2"), SVN_TEST_PASS2(test_utf_cstring_from_utf8_ex2, "test svn_utf_cstring_from_utf8_ex2"), + SVN_TEST_PASS2(test_utf_collated_compare, + "test svn_utf__normcmp"), + SVN_TEST_PASS2(test_utf_pattern_match, + "test svn_utf__glob"), + SVN_TEST_PASS2(test_utf_fuzzy_escape, + "test svn_utf__fuzzy_escape"), + SVN_TEST_PASS2(test_utf_is_normalized, + "test svn_utf__is_normalized"), + SVN_TEST_PASS2(test_utf_conversions, + "test svn_utf__utf{16,32}_to_utf8"), SVN_TEST_NULL }; + +SVN_TEST_MAIN |