Do not escape a unicode sequence when escaping JSON text.

Previously, any backslash in text being escaped for JSON was doubled so that the result was still valid JSON. However, this led to some perverse results in the case of Unicode sequences, These are now detected and the initial backslash is no longer escaped. All other backslashes are still escaped. No validity check is performed, all that is looked for is \uXXXX where X is a hexidecimal digit. This is a change from the 9.2 and 9.3 behaviour as noted in the Release notes. Per complaint from Teodor Sigaev.
author: Andrew Dunstan <andrew@dunslane.net> 2014-06-03 16:11:31 -0400
committer: Andrew Dunstan <andrew@dunslane.net> 2014-06-03 16:11:31 -0400
commit: 0ad1a816320a2b539a51628e2a0b1e83ff096b1d (patch)
tree: b65753b2035c2e21b60504bd014d2df106dca7ff /src
parent: f30015b6d794c15d52abbb3df3a65081fbefb1ed (diff)
download: postgresql-0ad1a816320a2b539a51628e2a0b1e83ff096b1d.tar.gz
6 files changed, 62 insertions, 7 deletions
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c
index 2462111ecb..8ca1ede83f 100644
--- a/src/backend/utils/adt/json.c
+++ b/src/backend/utils/adt/json.c
@@ -2315,7 +2315,26 @@ escape_json(StringInfo buf, const char *str)
 				appendStringInfoString(buf, "\\\"");
 				break;
 			case '\\':
-				appendStringInfoString(buf, "\\\\");
+				/*
+				 * Unicode escapes are passed through as is. There is no
+				 * requirement that they denote a valid character in the
+				 * server encoding - indeed that is a big part of their
+				 * usefulness.
+				 *
+				 * All we require is that they consist of \uXXXX where
+				 * the Xs are hexadecimal digits. It is the responsibility
+				 * of the caller of, say, to_json() to make sure that the
+				 * unicode escape is valid.
+				 *
+				 * In the case of a jsonb string value being escaped, the
+				 * only unicode escape that should be present is \u0000,
+				 * all the other unicode escapes will have been resolved.
+				 */
+				if (p[1] == 'u' && isxdigit(p[2]) && isxdigit(p[3])
+					&& isxdigit(p[4]) && isxdigit(p[5]))
+					appendStringInfoCharMacro(buf, *p);
+				else
+					appendStringInfoString(buf, "\\\\");
 				break;
 			default:
 				if ((unsigned char) *p < ' ')
diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out
index c4dc8b0e3c..43341aa9bb 100644
--- a/src/test/regress/expected/json.out
+++ b/src/test/regress/expected/json.out
@@ -426,6 +426,20 @@ select to_json(timestamptz '2014-05-28 12:22:35.614298-04');
 (1 row)
 
 COMMIT;
+-- unicode escape - backslash is not escaped
+select to_json(text '\uabcd');
+ to_json  
+----------
+ "\uabcd"
+(1 row)
+
+-- any other backslash is escaped
+select to_json(text '\abcd');
+ to_json  
+----------
+ "\\abcd"
+(1 row)
+
 --json_agg
 SELECT json_agg(q)
   FROM ( SELECT $$a$$ || x AS b, y AS c,
diff --git a/src/test/regress/expected/json_1.out b/src/test/regress/expected/json_1.out
index 629e98e6c5..953324637d 100644
--- a/src/test/regress/expected/json_1.out
+++ b/src/test/regress/expected/json_1.out
@@ -426,6 +426,20 @@ select to_json(timestamptz '2014-05-28 12:22:35.614298-04');
 (1 row)
 
 COMMIT;
+-- unicode escape - backslash is not escaped
+select to_json(text '\uabcd');
+ to_json  
+----------
+ "\uabcd"
+(1 row)
+
+-- any other backslash is escaped
+select to_json(text '\abcd');
+ to_json  
+----------
+ "\\abcd"
+(1 row)
+
 --json_agg
 SELECT json_agg(q)
   FROM ( SELECT $$a$$ || x AS b, y AS c,
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out
index ae7c506811..1e46939b6f 100644
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -61,9 +61,9 @@ LINE 1: SELECT '"\u000g"'::jsonb;
 DETAIL:  "\u" must be followed by four hexadecimal digits.
 CONTEXT:  JSON data, line 1: "\u000g...
 SELECT '"\u0000"'::jsonb;		-- OK, legal escape
-   jsonb   
------------
- "\\u0000"
+  jsonb   
+----------
+ "\u0000"
 (1 row)
 
 -- use octet_length here so we don't get an odd unicode char in the
diff --git a/src/test/regress/expected/jsonb_1.out b/src/test/regress/expected/jsonb_1.out
index 38a95b43f8..955dc424dc 100644
--- a/src/test/regress/expected/jsonb_1.out
+++ b/src/test/regress/expected/jsonb_1.out
@@ -61,9 +61,9 @@ LINE 1: SELECT '"\u000g"'::jsonb;
 DETAIL:  "\u" must be followed by four hexadecimal digits.
 CONTEXT:  JSON data, line 1: "\u000g...
 SELECT '"\u0000"'::jsonb;		-- OK, legal escape
-   jsonb   
------------
- "\\u0000"
+  jsonb   
+----------
+ "\u0000"
 (1 row)
 
 -- use octet_length here so we don't get an odd unicode char in the
diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql
index 6c2faeccd3..3d5ed50126 100644
--- a/src/test/regress/sql/json.sql
+++ b/src/test/regress/sql/json.sql
@@ -111,6 +111,14 @@ SET LOCAL TIME ZONE -8;
 select to_json(timestamptz '2014-05-28 12:22:35.614298-04');
 COMMIT;
 
+-- unicode escape - backslash is not escaped
+
+select to_json(text '\uabcd');
+
+-- any other backslash is escaped
+
+select to_json(text '\abcd');
+
 --json_agg
 
 SELECT json_agg(q)
author	Andrew Dunstan <andrew@dunslane.net>	2014-06-03 16:11:31 -0400
committer	Andrew Dunstan <andrew@dunslane.net>	2014-06-03 16:11:31 -0400
commit	0ad1a816320a2b539a51628e2a0b1e83ff096b1d (patch)
tree	b65753b2035c2e21b60504bd014d2df106dca7ff /src
parent	f30015b6d794c15d52abbb3df3a65081fbefb1ed (diff)
download	postgresql-0ad1a816320a2b539a51628e2a0b1e83ff096b1d.tar.gz