diff options
author | Emmanuele Bassi <ebassi@openedhand.com> | 2008-04-19 17:16:28 +0100 |
---|---|---|
committer | Emmanuele Bassi <ebassi@openedhand.com> | 2008-04-19 17:16:28 +0100 |
commit | 379a2fe972c6e7436be9f345fe018bf4fe575354 (patch) | |
tree | c0b14e62c020b57db5cbd4b9a4b9adf280a8a35c /json-glib/json-scanner.c | |
parent | 9429ecaa88cdb6705189b6970481911f11339ff6 (diff) | |
download | json-glib-379a2fe972c6e7436be9f345fe018bf4fe575354.tar.gz |
Add support for parsing \uXXXX into Unicode
JsonScanner was forked from GScanner in order to support the
JSON-specific Unicode escaping.
JsonScanner now intercepts the \u escape character and, if it
is followed by a hexadecimal value, it will retrieve the
Unicode character encoded in the following 4 values and insert
the character itself in the buffer.
This allows full compatibility with JSON.
Diffstat (limited to 'json-glib/json-scanner.c')
-rw-r--r-- | json-glib/json-scanner.c | 56 |
1 files changed, 50 insertions, 6 deletions
diff --git a/json-glib/json-scanner.c b/json-glib/json-scanner.c index acf3b6c..19e4b30 100644 --- a/json-glib/json-scanner.c +++ b/json-glib/json-scanner.c @@ -83,7 +83,7 @@ struct _JsonScannerConfig guint padding_dummy; }; -static const JsonScannerConfig json_scanner_config_template = +static JsonScannerConfig json_scanner_config_template = { ( " \t\r\n" ) /* cset_skip_characters */, ( @@ -186,6 +186,9 @@ static guchar json_scanner_peek_next_char (JsonScanner *scanner); static guchar json_scanner_get_char (JsonScanner *scanner, guint *line_p, guint *position_p); +static gunichar json_scanner_get_unichar (JsonScanner *scanner, + guint *line_p, + guint *position_p); static void json_scanner_msg_handler (JsonScanner *scanner, gchar *message, gboolean is_error); @@ -863,6 +866,36 @@ json_scanner_get_char (JsonScanner *scanner, return fchar; } +#define is_hex_digit(c) (((c) >= '0' && (c) <= '9') || \ + ((c) >= 'a' && (c) <= 'f') || \ + ((c) >= 'A' && (c) <= 'F')) +#define to_hex_digit(c) (((c) <= '9') ? (c) - '0' : ((c) & 7) + 9) + +static gunichar +json_scanner_get_unichar (JsonScanner *scanner, + guint *line_p, + guint *position_p) +{ + gunichar uchar; + gchar ch; + gint i; + + uchar = 0; + for (i = 0; i < 4; i++) + { + ch = json_scanner_get_char (scanner, line_p, position_p); + + if (is_hex_digit (ch)) + uchar += ((gunichar) to_hex_digit (ch) << ((3 - i) * 4)); + else + break; + } + + g_assert (g_unichar_validate (uchar)); + + return uchar; +} + void json_scanner_unexp_token (JsonScanner *scanner, GTokenType expected_token, @@ -1250,11 +1283,11 @@ json_scanner_get_token_i (JsonScanner *scanner, } static void -json_scanner_get_token_ll (JsonScanner *scanner, - GTokenType *token_p, - GTokenValue *value_p, - guint *line_p, - guint *position_p) +json_scanner_get_token_ll (JsonScanner *scanner, + GTokenType *token_p, + GTokenValue *value_p, + guint *line_p, + guint *position_p) { JsonScannerConfig *config; GTokenType token; @@ -1397,6 +1430,17 @@ json_scanner_get_token_ll (JsonScanner *scanner, case 'f': gstring = g_string_append_c (gstring, '\f'); break; + + case 'u': + fchar = json_scanner_peek_next_char (scanner); + if (is_hex_digit (fchar)) + { + gunichar ucs; + + ucs = json_scanner_get_unichar (scanner, line_p, position_p); + gstring = g_string_append_unichar (gstring, ucs); + } + break; case '0': case '1': |