summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ext/json/ext/generator/fbuffer.c70
-rw-r--r--ext/json/ext/generator/fbuffer.h29
-rw-r--r--ext/json/ext/generator/generator.c412
-rw-r--r--ext/json/ext/generator/generator.h178
-rw-r--r--ext/json/ext/generator/unicode.c258
-rw-r--r--ext/json/ext/generator/unicode.h45
6 files changed, 516 insertions, 476 deletions
diff --git a/ext/json/ext/generator/fbuffer.c b/ext/json/ext/generator/fbuffer.c
deleted file mode 100644
index 521f9e1..0000000
--- a/ext/json/ext/generator/fbuffer.c
+++ /dev/null
@@ -1,70 +0,0 @@
-#include "fbuffer.h"
-
-inline FBuffer *fbuffer_alloc()
-{
- FBuffer *fb = ALLOC(FBuffer);
- memset((void *) fb, 0, sizeof(FBuffer));
- fb->initial_length = FBUFFER_INITIAL_LENGTH;
- return fb;
-}
-
-inline FBuffer *fbuffer_alloc_with_length(unsigned int initial_length)
-{
- assert(initial_length > 0);
- FBuffer *fb = ALLOC(FBuffer);
- memset((void *) fb, 0, sizeof(FBuffer));
- fb->initial_length = initial_length;
- return fb;
-}
-
-
-inline void fbuffer_free(FBuffer *fb)
-{
- if (fb->ptr) ruby_xfree(fb->ptr);
- ruby_xfree(fb);
-}
-
-inline void fbuffer_free_only_buffer(FBuffer *fb)
-{
- ruby_xfree(fb);
-}
-
-
-inline void fbuffer_clear(FBuffer *fb)
-{
- fb->len = 0;
-}
-
-static inline void fbuffer_inc_capa(FBuffer *fb, unsigned int requested)
-{
- unsigned int required;
-
- if (!fb->ptr) {
- fb->ptr = ALLOC_N(char, fb->initial_length);
- fb->capa = fb->initial_length;
- }
-
- for (required = fb->capa; requested > required - fb->len; required <<= 1);
-
- if (required > fb->capa) {
- fb->ptr = (char *) REALLOC_N((long*) fb->ptr, char, required);
- fb->capa = required;
- }
-}
-
-inline void fbuffer_append(FBuffer *fb, const char *newstr, unsigned int len)
-{
- if (len > 0) {
- fbuffer_inc_capa(fb, len);
- memcpy(fb->ptr + fb->len, newstr, len);
- fb->len += len;
- }
-}
-
-inline void fbuffer_append_char(FBuffer *fb, char newchr)
-{
- fbuffer_inc_capa(fb, 1);
- *(fb->ptr + fb->len) = newchr;
- fb->len++;
-}
-
diff --git a/ext/json/ext/generator/fbuffer.h b/ext/json/ext/generator/fbuffer.h
deleted file mode 100644
index f4baf62..0000000
--- a/ext/json/ext/generator/fbuffer.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef _FBUFFER_H_
-#define _FBUFFER_H_
-
-#include <assert.h>
-#include "ruby.h"
-
-typedef struct FBufferStruct {
- unsigned int initial_length;
- char *ptr;
- unsigned int len;
- unsigned int capa;
-} FBuffer;
-
-#define FBUFFER_INITIAL_LENGTH 4096
-
-#define FBUFFER_PTR(fb) (fb->ptr)
-#define FBUFFER_LEN(fb) (fb->len)
-#define FBUFFER_CAPA(fb) (fb->capa)
-#define FBUFFER_PAIR(fb) FBUFFER_PTR(fb), FBUFFER_LEN(fb)
-
-inline FBuffer *fbuffer_alloc();
-inline FBuffer *fbuffer_alloc_with_length(unsigned initial_length);
-inline void fbuffer_free(FBuffer *fb);
-inline void fbuffer_free_only_buffer(FBuffer *fb);
-inline void fbuffer_clear(FBuffer *fb);
-inline void fbuffer_append(FBuffer *fb, const char *newstr, unsigned int len);
-inline void fbuffer_append_char(FBuffer *fb, char newchr);
-
-#endif
diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c
index 7d2abd1..fabb0ea 100644
--- a/ext/json/ext/generator/generator.c
+++ b/ext/json/ext/generator/generator.c
@@ -1,82 +1,343 @@
-#include <string.h>
-#include "ruby.h"
+#include "generator.h"
-#if HAVE_RUBY_ST_H
-#include "ruby/st.h"
+#ifdef HAVE_RUBY_ENCODING_H
+static VALUE CEncoding_UTF_8;
+static ID i_encoding, i_encode;
#endif
-#if HAVE_ST_H
-#include "st.h"
-#endif
+static VALUE mJSON, mExt, mGenerator, cState, mGeneratorMethods, mObject,
+ mHash, mArray, mInteger, mFloat, mString, mString_Extend,
+ mTrueClass, mFalseClass, mNilClass, eGeneratorError,
+ eNestingError, CRegexp_MULTILINE;
-#include "unicode.h"
-#include <math.h>
+static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before,
+ i_object_nl, i_array_nl, i_max_nesting,
+ i_allow_nan, i_ascii_only, i_pack, i_unpack, i_create_id, i_extend;
-#if HAVE_RUBY_RE_H
-#include "ruby/re.h"
-#endif
+/*
+ * Copyright 2001-2004 Unicode, Inc.
+ *
+ * Disclaimer
+ *
+ * This source code is provided as is by Unicode, Inc. No claims are
+ * made as to fitness for any particular purpose. No warranties of any
+ * kind are expressed or implied. The recipient agrees to determine
+ * applicability of information provided. If this file has been
+ * purchased on magnetic or optical media from Unicode, Inc., the
+ * sole remedy for any claim will be exchange of defective media
+ * within 90 days of receipt.
+ *
+ * Limitations on Rights to Redistribute This Code
+ *
+ * Unicode, Inc. hereby grants the right to freely use the information
+ * supplied in this file in the creation of products supporting the
+ * Unicode Standard, and to make copies of this file in any form
+ * for internal or external distribution as long as this notice
+ * remains attached.
+ */
-#if HAVE_RE_H
-#include "re.h"
-#endif
+/*
+ * Index into the table below with the first byte of a UTF-8 sequence to
+ * get the number of trailing bytes that are supposed to follow it.
+ * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
+ * left as-is for anyone who may want to do such conversion, which was
+ * allowed in earlier algorithms.
+ */
+static const char trailingBytesForUTF8[256] = {
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
+};
-inline static VALUE cState_partial_generate(VALUE self, VALUE obj, VALUE depth);
-inline static VALUE cState_from_state_s(VALUE self, VALUE opts);
+/*
+ * Magic values subtracted from a buffer value during UTF8 conversion.
+ * This table contains as many values as there might be trailing bytes
+ * in a UTF-8 sequence.
+ */
+static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
+ 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
-#ifndef RHASH_TBL
-#define RHASH_TBL(hsh) (RHASH(hsh)->tbl)
-#endif
+/*
+ * Utility routine to tell whether a sequence of bytes is legal UTF-8.
+ * This must be called with the length pre-determined by the first byte.
+ * If not calling this from ConvertUTF8to*, then the length can be set by:
+ * length = trailingBytesForUTF8[*source]+1;
+ * and the sequence is illegal right away if there aren't that many bytes
+ * available.
+ * If presented with a length > 4, this returns 0. The Unicode
+ * definition of UTF-8 goes up to 4-byte sequences.
+ */
-#ifndef RHASH_SIZE
-#define RHASH_SIZE(hsh) (RHASH(hsh)->tbl->num_entries)
-#endif
+static unsigned char isLegalUTF8(const UTF8 *source, int length)
+{
+ UTF8 a;
+ const UTF8 *srcptr = source+length;
+ switch (length) {
+ default: return 0;
+ /* Everything else falls through when "1"... */
+ case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
+ case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
+ case 2: if ((a = (*--srcptr)) > 0xBF) return 0;
+
+ switch (*source) {
+ /* no fall-through in this inner switch */
+ case 0xE0: if (a < 0xA0) return 0; break;
+ case 0xED: if (a > 0x9F) return 0; break;
+ case 0xF0: if (a < 0x90) return 0; break;
+ case 0xF4: if (a > 0x8F) return 0; break;
+ default: if (a < 0x80) return 0;
+ }
-#ifndef RFLOAT_VALUE
-#define RFLOAT_VALUE(val) (RFLOAT(val)->value)
-#endif
+ case 1: if (*source >= 0x80 && *source < 0xC2) return 0;
+ }
+ if (*source > 0xF4) return 0;
+ return 1;
+}
-#ifdef HAVE_RUBY_ENCODING_H
-#include "ruby/encoding.h"
-#define FORCE_UTF8(obj) rb_enc_associate((obj), rb_utf8_encoding())
-static VALUE CEncoding_UTF_8;
-static ID i_encoding, i_encode;
+static void unicode_escape(char *buf, UTF16 character)
+{
+ const char *digits = "0123456789abcdef";
+
+ buf[2] = digits[character >> 12];
+ buf[3] = digits[(character >> 8) & 0xf];
+ buf[4] = digits[(character >> 4) & 0xf];
+ buf[5] = digits[character & 0xf];
+}
+
+
+static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 character)
+{
+ unicode_escape(buf, character);
+ fbuffer_append(buffer, buf, 6);
+}
+
+static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string)
+{
+ const UTF8 *source = (UTF8 *) RSTRING_PTR(string);
+ const UTF8 *sourceEnd = source + RSTRING_LEN(string);
+ char buf[6] = { '\\', 'u' };
+
+ while (source < sourceEnd) {
+ UTF32 ch = 0;
+ unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
+ if (source + extraBytesToRead >= sourceEnd) {
+ rb_raise(rb_path2class("JSON::GeneratorError"),
+ "partial character in source, but hit end");
+ }
+ if (!isLegalUTF8(source, extraBytesToRead+1)) {
+ rb_raise(rb_path2class("JSON::GeneratorError"),
+ "source sequence is illegal/malformed utf-8");
+ }
+ /*
+ * The cases all fall through. See "Note A" below.
+ */
+ switch (extraBytesToRead) {
+ case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
+ case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
+ case 3: ch += *source++; ch <<= 6;
+ case 2: ch += *source++; ch <<= 6;
+ case 1: ch += *source++; ch <<= 6;
+ case 0: ch += *source++;
+ }
+ ch -= offsetsFromUTF8[extraBytesToRead];
+
+ if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
+ /* UTF-16 surrogate values are illegal in UTF-32 */
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+#if UNI_STRICT_CONVERSION
+ source -= (extraBytesToRead+1); /* return to the illegal value itself */
+ rb_raise(rb_path2class("JSON::GeneratorError"),
+ "source sequence is illegal/malformed utf-8");
+#else
+ unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR);
+#endif
+ } else {
+ /* normal case */
+ switch (ch) {
+ case '\n':
+ fbuffer_append(buffer, "\\n", 2);
+ break;
+ case '\r':
+ fbuffer_append(buffer, "\\r", 2);
+ break;
+ case '\\':
+ fbuffer_append(buffer, "\\\\", 2);
+ break;
+ case '"':
+ fbuffer_append(buffer, "\\\"", 2);
+ break;
+ case '\t':
+ fbuffer_append(buffer, "\\t", 2);
+ break;
+ case '\f':
+ fbuffer_append(buffer, "\\f", 2);
+ break;
+ case '\b':
+ fbuffer_append(buffer, "\\b", 2);
+ break;
+ default:
+ if (ch >= 0x20 && ch <= 0x7f) {
+ fbuffer_append_char(buffer, ch);
+ } else {
+ unicode_escape_to_buffer(buffer, buf, (UTF16) ch);
+ }
+ break;
+ }
+ }
+ } else if (ch > UNI_MAX_UTF16) {
+#if UNI_STRICT_CONVERSION
+ source -= (extraBytesToRead+1); /* return to the start */
+ rb_raise(rb_path2class("JSON::GeneratorError"),
+ "source sequence is illegal/malformed utf8");
#else
-#define FORCE_UTF8(obj)
+ unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR);
#endif
+ } else {
+ /* target is a character in range 0xFFFF - 0x10FFFF. */
+ ch -= halfBase;
+ unicode_escape_to_buffer(buffer, buf, (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START));
+ unicode_escape_to_buffer(buffer, buf, (UTF16)((ch & halfMask) + UNI_SUR_LOW_START));
+ }
+ }
+}
-static VALUE mJSON, mExt, mGenerator, cState, mGeneratorMethods, mObject,
- mHash, mArray, mInteger, mFloat, mString, mString_Extend,
- mTrueClass, mFalseClass, mNilClass, eGeneratorError,
- eNestingError, CRegexp_MULTILINE;
+static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string)
+{
+ const char *ptr = RSTRING_PTR(string), *p;
+ int len = RSTRING_LEN(string), start = 0, end = 0;
+ const char *escape = NULL;
+ int escape_len;
+ unsigned char c;
+ char buf[6] = { '\\', 'u' };
+
+ for (start = 0, end = 0; end < len;) {
+ p = ptr + end;
+ c = (unsigned char) *p;
+ if (c < 0x20) {
+ switch (c) {
+ case '\n':
+ escape = "\\n";
+ escape_len = 2;
+ break;
+ case '\r':
+ escape = "\\r";
+ escape_len = 2;
+ break;
+ case '\t':
+ escape = "\\t";
+ escape_len = 2;
+ break;
+ case '\f':
+ escape = "\\f";
+ escape_len = 2;
+ break;
+ case '\b':
+ escape = "\\b";
+ escape_len = 2;
+ break;
+ default:
+ unicode_escape(buf, (UTF16) *p);
+ escape = buf;
+ escape_len = 6;
+ break;
+ }
+ } else {
+ switch (c) {
+ case '\\':
+ escape = "\\\\";
+ escape_len = 2;
+ break;
+ case '"':
+ escape = "\\\"";
+ escape_len = 2;
+ break;
+ default:
+ end++;
+ continue;
+ break;
+ }
+ }
+ fbuffer_append(buffer, ptr + start, end - start);
+ fbuffer_append(buffer, escape, escape_len);
+ start = ++end;
+ escape = NULL;
+ }
+ fbuffer_append(buffer, ptr + start, end - start);
+}
-static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before,
- i_object_nl, i_array_nl, i_max_nesting,
- i_allow_nan, i_ascii_only, i_pack, i_unpack, i_create_id, i_extend;
+/* fbuffer implementation */
-typedef struct JSON_Generator_StateStruct {
- char *indent;
- long indent_len;
- char *space;
- long space_len;
- char *space_before;
- long space_before_len;
- char *object_nl;
- long object_nl_len;
- char *array_nl;
- long array_nl_len;
- FBuffer *array_delim;
- FBuffer *object_delim;
- FBuffer *object_delim2;
- long max_nesting;
- char allow_nan;
- char ascii_only;
-} JSON_Generator_State;
-
-#define GET_STATE(self) \
- JSON_Generator_State *state; \
- Data_Get_Struct(self, JSON_Generator_State, state);
-
-#define RSTRING_PAIR(string) RSTRING_PTR(string), RSTRING_LEN(string)
+static FBuffer *fbuffer_alloc()
+{
+ FBuffer *fb = ALLOC(FBuffer);
+ memset((void *) fb, 0, sizeof(FBuffer));
+ fb->initial_length = FBUFFER_INITIAL_LENGTH;
+ return fb;
+}
+
+static FBuffer *fbuffer_alloc_with_length(unsigned int initial_length)
+{
+ assert(initial_length > 0);
+ FBuffer *fb = ALLOC(FBuffer);
+ memset((void *) fb, 0, sizeof(FBuffer));
+ fb->initial_length = initial_length;
+ return fb;
+}
+
+
+static void fbuffer_free(FBuffer *fb)
+{
+ if (fb->ptr) ruby_xfree(fb->ptr);
+ ruby_xfree(fb);
+}
+
+static void fbuffer_free_only_buffer(FBuffer *fb)
+{
+ ruby_xfree(fb);
+}
+
+static void fbuffer_clear(FBuffer *fb)
+{
+ fb->len = 0;
+}
+
+static void fbuffer_inc_capa(FBuffer *fb, unsigned int requested)
+{
+ unsigned int required;
+
+ if (!fb->ptr) {
+ fb->ptr = ALLOC_N(char, fb->initial_length);
+ fb->capa = fb->initial_length;
+ }
+
+ for (required = fb->capa; requested > required - fb->len; required <<= 1);
+
+ if (required > fb->capa) {
+ fb->ptr = (char *) REALLOC_N((long*) fb->ptr, char, required);
+ fb->capa = required;
+ }
+}
+
+static void fbuffer_append(FBuffer *fb, const char *newstr, unsigned int len)
+{
+ if (len > 0) {
+ fbuffer_inc_capa(fb, len);
+ memcpy(fb->ptr + fb->len, newstr, len);
+ fb->len += len;
+ }
+}
+
+static void fbuffer_append_char(FBuffer *fb, char newchr)
+{
+ fbuffer_inc_capa(fb, 1);
+ *(fb->ptr + fb->len) = newchr;
+ fb->len++;
+}
/*
* Document-module: JSON::Ext::Generator
@@ -183,7 +444,8 @@ static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
* method should be used, if you want to convert raw strings to JSON
* instead of UTF-8 strings, e. g. binary data.
*/
-static VALUE mString_to_json_raw_object(VALUE self) {
+static VALUE mString_to_json_raw_object(VALUE self)
+{
VALUE ary;
VALUE result = rb_hash_new();
rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self)));
@@ -198,7 +460,8 @@ static VALUE mString_to_json_raw_object(VALUE self) {
* This method creates a JSON text from the result of a call to
* to_json_raw_object of this String.
*/
-static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self) {
+static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self)
+{
VALUE obj = mString_to_json_raw_object(self);
Check_Type(obj, T_HASH);
return mHash_to_json(argc, argv, obj);
@@ -210,7 +473,8 @@ static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self) {
* Raw Strings are JSON Objects (the raw bytes are stored in an array for the
* key "raw"). The Ruby String can be created by this module method.
*/
-static VALUE mString_Extend_json_create(VALUE self, VALUE o) {
+static VALUE mString_Extend_json_create(VALUE self, VALUE o)
+{
VALUE ary;
Check_Type(o, T_HASH);
ary = rb_hash_aref(o, rb_str_new2("raw"));
@@ -272,7 +536,8 @@ static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self)
return cState_partial_generate(state, string, depth);
}
-static void State_free(JSON_Generator_State *state) {
+static void State_free(JSON_Generator_State *state)
+{
if (state->indent) ruby_xfree(state->indent);
if (state->space) ruby_xfree(state->space);
if (state->space_before) ruby_xfree(state->space_before);
@@ -400,7 +665,7 @@ static VALUE cState_to_h(VALUE self)
STR_SET_EMBED_LEN(str, 0);\
} while (0)
-inline static VALUE fbuffer2rstring(FBuffer *buffer)
+static VALUE fbuffer2rstring(FBuffer *buffer)
{
NEWOBJ(str, struct RString);
OBJSETUP(str, rb_cString, T_STRING);
@@ -413,8 +678,7 @@ inline static VALUE fbuffer2rstring(FBuffer *buffer)
return (VALUE) str;
}
#else
-
-inline static VALUE fbuffer2rstring(FBuffer *buffer)
+static VALUE fbuffer2rstring(FBuffer *buffer)
{
NEWOBJ(str, struct RString);
OBJSETUP(str, rb_cString, T_STRING);
@@ -427,7 +691,7 @@ inline static VALUE fbuffer2rstring(FBuffer *buffer)
}
#endif
-void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj, long depth)
+static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj, long depth)
{
VALUE tmp;
switch (TYPE(obj)) {
@@ -581,7 +845,7 @@ void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, V
* Generates a part of a JSON document from object +obj+ and returns the
* result.
*/
-inline static VALUE cState_partial_generate(VALUE self, VALUE obj, VALUE depth)
+static VALUE cState_partial_generate(VALUE self, VALUE obj, VALUE depth)
{
VALUE result;
FBuffer *buffer = fbuffer_alloc();
@@ -623,7 +887,7 @@ inline static VALUE cState_partial_generate(VALUE self, VALUE obj, VALUE depth)
* result. If no valid JSON document can be created this method raises a
* GeneratorError exception.
*/
-inline static VALUE cState_generate(VALUE self, VALUE obj)
+static VALUE cState_generate(VALUE self, VALUE obj)
{
VALUE result = cState_partial_generate(self, obj, Qnil);
VALUE re, args[2];
@@ -670,7 +934,7 @@ static VALUE cState_initialize(int argc, VALUE *argv, VALUE self)
* new State instance configured by _opts_, something else to create an
* unconfigured instance. If _opts_ is a State object, it is just returned.
*/
-inline static VALUE cState_from_state_s(VALUE self, VALUE opts)
+static VALUE cState_from_state_s(VALUE self, VALUE opts)
{
if (rb_obj_is_kind_of(opts, self)) {
return opts;
diff --git a/ext/json/ext/generator/generator.h b/ext/json/ext/generator/generator.h
new file mode 100644
index 0000000..b127f9d
--- /dev/null
+++ b/ext/json/ext/generator/generator.h
@@ -0,0 +1,178 @@
+#ifndef _GENERATOR_H_
+#define _GENERATOR_H_
+
+#include <string.h>
+#include <assert.h>
+#include <math.h>
+
+#include "ruby.h"
+
+#if HAVE_RUBY_ST_H
+#include "ruby/st.h"
+#endif
+
+#if HAVE_ST_H
+#include "st.h"
+#endif
+
+#if HAVE_RUBY_RE_H
+#include "ruby/re.h"
+#endif
+
+#if HAVE_RE_H
+#include "re.h"
+#endif
+
+#ifdef HAVE_RUBY_ENCODING_H
+#include "ruby/encoding.h"
+#define FORCE_UTF8(obj) rb_enc_associate((obj), rb_utf8_encoding())
+#else
+#define FORCE_UTF8(obj)
+#endif
+
+#ifndef RHASH_TBL
+#define RHASH_TBL(hsh) (RHASH(hsh)->tbl)
+#endif
+
+#ifndef RHASH_SIZE
+#define RHASH_SIZE(hsh) (RHASH(hsh)->tbl->num_entries)
+#endif
+
+#ifndef RFLOAT_VALUE
+#define RFLOAT_VALUE(val) (RFLOAT(val)->value)
+#endif
+
+#ifndef RARRAY_PTR
+#define RARRAY_PTR(ARRAY) RARRAY(ARRAY)->ptr
+#endif
+#ifndef RARRAY_LEN
+#define RARRAY_LEN(ARRAY) RARRAY(ARRAY)->len
+#endif
+#ifndef RSTRING_PTR
+#define RSTRING_PTR(string) RSTRING(string)->ptr
+#endif
+#ifndef RSTRING_LEN
+#define RSTRING_LEN(string) RSTRING(string)->len
+#endif
+
+#define RSTRING_PAIR(string) RSTRING_PTR(string), RSTRING_LEN(string)
+
+/* fbuffer implementation */
+
+typedef struct FBufferStruct {
+ unsigned int initial_length;
+ char *ptr;
+ unsigned int len;
+ unsigned int capa;
+} FBuffer;
+
+#define FBUFFER_INITIAL_LENGTH 4096
+
+#define FBUFFER_PTR(fb) (fb->ptr)
+#define FBUFFER_LEN(fb) (fb->len)
+#define FBUFFER_CAPA(fb) (fb->capa)
+#define FBUFFER_PAIR(fb) FBUFFER_PTR(fb), FBUFFER_LEN(fb)
+
+static FBuffer *fbuffer_alloc();
+static FBuffer *fbuffer_alloc_with_length(unsigned initial_length);
+static void fbuffer_free(FBuffer *fb);
+static void fbuffer_free_only_buffer(FBuffer *fb);
+static void fbuffer_clear(FBuffer *fb);
+static void fbuffer_append(FBuffer *fb, const char *newstr, unsigned int len);
+static void fbuffer_append_char(FBuffer *fb, char newchr);
+
+/* unicode defintions */
+
+#define UNI_STRICT_CONVERSION 1
+
+typedef unsigned long UTF32; /* at least 32 bits */
+typedef unsigned short UTF16; /* at least 16 bits */
+typedef unsigned char UTF8; /* typically 8 bits */
+
+#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
+#define UNI_MAX_BMP (UTF32)0x0000FFFF
+#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
+#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
+#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
+
+#define UNI_SUR_HIGH_START (UTF32)0xD800
+#define UNI_SUR_HIGH_END (UTF32)0xDBFF
+#define UNI_SUR_LOW_START (UTF32)0xDC00
+#define UNI_SUR_LOW_END (UTF32)0xDFFF
+
+static const int halfShift = 10; /* used for shifting by 10 bits */
+
+static const UTF32 halfBase = 0x0010000UL;
+static const UTF32 halfMask = 0x3FFUL;
+
+static unsigned char isLegalUTF8(const UTF8 *source, int length);
+static void unicode_escape(char *buf, UTF16 character);
+static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 character);
+static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string);
+static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string);
+
+/* ruby api and some helpers */
+
+typedef struct JSON_Generator_StateStruct {
+ char *indent;
+ long indent_len;
+ char *space;
+ long space_len;
+ char *space_before;
+ long space_before_len;
+ char *object_nl;
+ long object_nl_len;
+ char *array_nl;
+ long array_nl_len;
+ FBuffer *array_delim;
+ FBuffer *object_delim;
+ FBuffer *object_delim2;
+ long max_nesting;
+ char allow_nan;
+ char ascii_only;
+} JSON_Generator_State;
+
+#define GET_STATE(self) \
+ JSON_Generator_State *state; \
+ Data_Get_Struct(self, JSON_Generator_State, state);
+
+static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self);
+static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self);
+static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self);
+static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self);
+static VALUE mString_included_s(VALUE self, VALUE modul);
+static VALUE mString_to_json(int argc, VALUE *argv, VALUE self);
+static VALUE mString_to_json_raw_object(VALUE self);
+static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self);
+static VALUE mString_Extend_json_create(VALUE self, VALUE o);
+static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self);
+static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self);
+static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self);
+static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self);
+static void State_free(JSON_Generator_State *state);
+static JSON_Generator_State *State_allocate();
+static VALUE cState_s_allocate(VALUE klass);
+static VALUE cState_configure(VALUE self, VALUE opts);
+static VALUE cState_to_h(VALUE self);
+static VALUE fbuffer2rstring(FBuffer *buffer);
+static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj, long depth);
+static VALUE cState_partial_generate(VALUE self, VALUE obj, VALUE depth);
+static VALUE cState_generate(VALUE self, VALUE obj);
+static VALUE cState_initialize(int argc, VALUE *argv, VALUE self);
+static VALUE cState_from_state_s(VALUE self, VALUE opts);
+static VALUE cState_indent(VALUE self);
+static VALUE cState_indent_set(VALUE self, VALUE indent);
+static VALUE cState_space(VALUE self);
+static VALUE cState_space_set(VALUE self, VALUE space);
+static VALUE cState_space_before(VALUE self);
+static VALUE cState_space_before_set(VALUE self, VALUE space_before);
+static VALUE cState_object_nl(VALUE self);
+static VALUE cState_object_nl_set(VALUE self, VALUE object_nl);
+static VALUE cState_array_nl(VALUE self);
+static VALUE cState_array_nl_set(VALUE self, VALUE array_nl);
+static VALUE cState_max_nesting(VALUE self);
+static VALUE cState_max_nesting_set(VALUE self, VALUE depth);
+static VALUE cState_allow_nan_p(VALUE self);
+static VALUE cState_ascii_only_p(VALUE self);
+
+#endif
diff --git a/ext/json/ext/generator/unicode.c b/ext/json/ext/generator/unicode.c
deleted file mode 100644
index cece042..0000000
--- a/ext/json/ext/generator/unicode.c
+++ /dev/null
@@ -1,258 +0,0 @@
-#include "unicode.h"
-#include "fbuffer.h"
-
-/*
- * Copyright 2001-2004 Unicode, Inc.
- *
- * Disclaimer
- *
- * This source code is provided as is by Unicode, Inc. No claims are
- * made as to fitness for any particular purpose. No warranties of any
- * kind are expressed or implied. The recipient agrees to determine
- * applicability of information provided. If this file has been
- * purchased on magnetic or optical media from Unicode, Inc., the
- * sole remedy for any claim will be exchange of defective media
- * within 90 days of receipt.
- *
- * Limitations on Rights to Redistribute This Code
- *
- * Unicode, Inc. hereby grants the right to freely use the information
- * supplied in this file in the creation of products supporting the
- * Unicode Standard, and to make copies of this file in any form
- * for internal or external distribution as long as this notice
- * remains attached.
- */
-
-/*
- * Index into the table below with the first byte of a UTF-8 sequence to
- * get the number of trailing bytes that are supposed to follow it.
- * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
- * left as-is for anyone who may want to do such conversion, which was
- * allowed in earlier algorithms.
- */
-static const char trailingBytesForUTF8[256] = {
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
-};
-
-/*
- * Magic values subtracted from a buffer value during UTF8 conversion.
- * This table contains as many values as there might be trailing bytes
- * in a UTF-8 sequence.
- */
-static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
- 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
-
-/*
- * Utility routine to tell whether a sequence of bytes is legal UTF-8.
- * This must be called with the length pre-determined by the first byte.
- * If not calling this from ConvertUTF8to*, then the length can be set by:
- * length = trailingBytesForUTF8[*source]+1;
- * and the sequence is illegal right away if there aren't that many bytes
- * available.
- * If presented with a length > 4, this returns 0. The Unicode
- * definition of UTF-8 goes up to 4-byte sequences.
- */
-
-inline static unsigned char isLegalUTF8(const UTF8 *source, int length)
-{
- UTF8 a;
- const UTF8 *srcptr = source+length;
- switch (length) {
- default: return 0;
- /* Everything else falls through when "1"... */
- case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
- case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
- case 2: if ((a = (*--srcptr)) > 0xBF) return 0;
-
- switch (*source) {
- /* no fall-through in this inner switch */
- case 0xE0: if (a < 0xA0) return 0; break;
- case 0xED: if (a > 0x9F) return 0; break;
- case 0xF0: if (a < 0x90) return 0; break;
- case 0xF4: if (a > 0x8F) return 0; break;
- default: if (a < 0x80) return 0;
- }
-
- case 1: if (*source >= 0x80 && *source < 0xC2) return 0;
- }
- if (*source > 0xF4) return 0;
- return 1;
-}
-
-inline static void unicode_escape(char *buf, UTF16 character)
-{
- const char *digits = "0123456789abcdef";
-
- buf[2] = digits[character >> 12];
- buf[3] = digits[(character >> 8) & 0xf];
- buf[4] = digits[(character >> 4) & 0xf];
- buf[5] = digits[character & 0xf];
-}
-
-
-inline static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 character)
-{
- unicode_escape(buf, character);
- fbuffer_append(buffer, buf, 6);
-}
-
-inline void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string)
-{
- const UTF8 *source = (UTF8 *) RSTRING_PTR(string);
- const UTF8 *sourceEnd = source + RSTRING_LEN(string);
- char buf[6] = { '\\', 'u' };
-
- while (source < sourceEnd) {
- UTF32 ch = 0;
- unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
- if (source + extraBytesToRead >= sourceEnd) {
- rb_raise(rb_path2class("JSON::GeneratorError"),
- "partial character in source, but hit end");
- }
- if (!isLegalUTF8(source, extraBytesToRead+1)) {
- rb_raise(rb_path2class("JSON::GeneratorError"),
- "source sequence is illegal/malformed utf-8");
- }
- /*
- * The cases all fall through. See "Note A" below.
- */
- switch (extraBytesToRead) {
- case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
- case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
- case 3: ch += *source++; ch <<= 6;
- case 2: ch += *source++; ch <<= 6;
- case 1: ch += *source++; ch <<= 6;
- case 0: ch += *source++;
- }
- ch -= offsetsFromUTF8[extraBytesToRead];
-
- if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
- /* UTF-16 surrogate values are illegal in UTF-32 */
- if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
-#if UNI_STRICT_CONVERSION
- source -= (extraBytesToRead+1); /* return to the illegal value itself */
- rb_raise(rb_path2class("JSON::GeneratorError"),
- "source sequence is illegal/malformed utf-8");
-#else
- unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR);
-#endif
- } else {
- /* normal case */
- switch (ch) {
- case '\n':
- fbuffer_append(buffer, "\\n", 2);
- break;
- case '\r':
- fbuffer_append(buffer, "\\r", 2);
- break;
- case '\\':
- fbuffer_append(buffer, "\\\\", 2);
- break;
- case '"':
- fbuffer_append(buffer, "\\\"", 2);
- break;
- case '\t':
- fbuffer_append(buffer, "\\t", 2);
- break;
- case '\f':
- fbuffer_append(buffer, "\\f", 2);
- break;
- case '\b':
- fbuffer_append(buffer, "\\b", 2);
- break;
- default:
- if (ch >= 0x20 && ch <= 0x7f) {
- fbuffer_append_char(buffer, ch);
- } else {
- unicode_escape_to_buffer(buffer, buf, (UTF16) ch);
- }
- break;
- }
- }
- } else if (ch > UNI_MAX_UTF16) {
-#if UNI_STRICT_CONVERSION
- source -= (extraBytesToRead+1); /* return to the start */
- rb_raise(rb_path2class("JSON::GeneratorError"),
- "source sequence is illegal/malformed utf8");
-#else
- unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR);
-#endif
- } else {
- /* target is a character in range 0xFFFF - 0x10FFFF. */
- ch -= halfBase;
- unicode_escape_to_buffer(buffer, buf, (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START));
- unicode_escape_to_buffer(buffer, buf, (UTF16)((ch & halfMask) + UNI_SUR_LOW_START));
- }
- }
-}
-
-inline void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string)
-{
- const char *ptr = RSTRING_PTR(string), *p;
- int len = RSTRING_LEN(string), start = 0, end = 0;
- const char *escape = NULL;
- int escape_len;
- unsigned char c;
- char buf[6] = { '\\', 'u' };
-
- for (start = 0, end = 0; end < len;) {
- p = ptr + end;
- c = (unsigned char) *p;
- if (c < 0x20) {
- switch (c) {
- case '\n':
- escape = "\\n";
- escape_len = 2;
- break;
- case '\r':
- escape = "\\r";
- escape_len = 2;
- break;
- case '\t':
- escape = "\\t";
- escape_len = 2;
- break;
- case '\f':
- escape = "\\f";
- escape_len = 2;
- break;
- case '\b':
- escape = "\\b";
- escape_len = 2;
- break;
- default:
- unicode_escape(buf, (UTF16) *p);
- escape = buf;
- escape_len = 6;
- break;
- }
- } else {
- switch (c) {
- case '\\':
- escape = "\\\\";
- escape_len = 2;
- break;
- case '"':
- escape = "\\\"";
- escape_len = 2;
- break;
- default:
- end++;
- continue;
- break;
- }
- }
- fbuffer_append(buffer, ptr + start, end - start);
- fbuffer_append(buffer, escape, escape_len);
- start = ++end;
- escape = NULL;
- }
- fbuffer_append(buffer, ptr + start, end - start);
-}
diff --git a/ext/json/ext/generator/unicode.h b/ext/json/ext/generator/unicode.h
deleted file mode 100644
index 3837b32..0000000
--- a/ext/json/ext/generator/unicode.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#include "ruby.h"
-#include "fbuffer.h"
-
-#ifndef _GENERATOR_UNICODE_H_
-#define _GENERATOR_UNICODE_H_
-
-#define UNI_STRICT_CONVERSION 1
-
-typedef unsigned long UTF32; /* at least 32 bits */
-typedef unsigned short UTF16; /* at least 16 bits */
-typedef unsigned char UTF8; /* typically 8 bits */
-
-#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
-#define UNI_MAX_BMP (UTF32)0x0000FFFF
-#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
-#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
-#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
-
-#define UNI_SUR_HIGH_START (UTF32)0xD800
-#define UNI_SUR_HIGH_END (UTF32)0xDBFF
-#define UNI_SUR_LOW_START (UTF32)0xDC00
-#define UNI_SUR_LOW_END (UTF32)0xDFFF
-
-static const int halfShift = 10; /* used for shifting by 10 bits */
-
-static const UTF32 halfBase = 0x0010000UL;
-static const UTF32 halfMask = 0x3FFUL;
-
-inline void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string);
-inline void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string);
-
-#ifndef RARRAY_PTR
-#define RARRAY_PTR(ARRAY) RARRAY(ARRAY)->ptr
-#endif
-#ifndef RARRAY_LEN
-#define RARRAY_LEN(ARRAY) RARRAY(ARRAY)->len
-#endif
-#ifndef RSTRING_PTR
-#define RSTRING_PTR(string) RSTRING(string)->ptr
-#endif
-#ifndef RSTRING_LEN
-#define RSTRING_LEN(string) RSTRING(string)->len
-#endif
-
-#endif