6 files changed, 516 insertions, 476 deletions
diff --git a/ext/json/ext/generator/fbuffer.c b/ext/json/ext/generator/fbuffer.c
deleted file mode 100644
index 521f9e1..0000000
--- a/ext/json/ext/generator/fbuffer.c
+++ /dev/null
@@ -1,70 +0,0 @@
-#include "fbuffer.h"
-
-inline FBuffer *fbuffer_alloc()
-{
-    FBuffer *fb = ALLOC(FBuffer);
-    memset((void *) fb, 0, sizeof(FBuffer));
-    fb->initial_length = FBUFFER_INITIAL_LENGTH;
-    return fb;
-}
-
-inline FBuffer *fbuffer_alloc_with_length(unsigned int initial_length)
-{
-    assert(initial_length > 0);
-    FBuffer *fb = ALLOC(FBuffer);
-    memset((void *) fb, 0, sizeof(FBuffer));
-    fb->initial_length = initial_length;
-    return fb;
-}
-
-
-inline void fbuffer_free(FBuffer *fb)
-{
-    if (fb->ptr) ruby_xfree(fb->ptr);
-    ruby_xfree(fb);
-}
-
-inline void fbuffer_free_only_buffer(FBuffer *fb)
-{
-    ruby_xfree(fb);
-}
-
-
-inline void fbuffer_clear(FBuffer *fb)
-{
-    fb->len = 0;
-}
-
-static inline void fbuffer_inc_capa(FBuffer *fb, unsigned int requested)
-{
-    unsigned int required;
-
-    if (!fb->ptr) {
-        fb->ptr = ALLOC_N(char, fb->initial_length);
-        fb->capa = fb->initial_length;
-    }
-
-    for (required = fb->capa; requested > required - fb->len; required <<= 1);
-
-    if (required > fb->capa) {
-        fb->ptr = (char *) REALLOC_N((long*) fb->ptr, char, required);
-        fb->capa = required;
-    }
-}
-
-inline void fbuffer_append(FBuffer *fb, const char *newstr, unsigned int len)
-{
-    if (len > 0) {
-        fbuffer_inc_capa(fb, len);
-        memcpy(fb->ptr + fb->len, newstr, len);
-        fb->len += len;
-    }
-}
-
-inline void fbuffer_append_char(FBuffer *fb, char newchr)
-{
-    fbuffer_inc_capa(fb, 1);
-    *(fb->ptr + fb->len) = newchr;
-    fb->len++;
-}
-
diff --git a/ext/json/ext/generator/fbuffer.h b/ext/json/ext/generator/fbuffer.h
deleted file mode 100644
index f4baf62..0000000
--- a/ext/json/ext/generator/fbuffer.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef _FBUFFER_H_
-#define _FBUFFER_H_
-
-#include <assert.h>
-#include "ruby.h"
-
-typedef struct FBufferStruct {
-    unsigned int initial_length;
-    char *ptr;
-    unsigned int len;
-    unsigned int capa;
-} FBuffer;
-
-#define FBUFFER_INITIAL_LENGTH 4096
-
-#define FBUFFER_PTR(fb) (fb->ptr)
-#define FBUFFER_LEN(fb) (fb->len)
-#define FBUFFER_CAPA(fb) (fb->capa)
-#define FBUFFER_PAIR(fb) FBUFFER_PTR(fb), FBUFFER_LEN(fb)
-
-inline FBuffer *fbuffer_alloc();
-inline FBuffer *fbuffer_alloc_with_length(unsigned initial_length);
-inline void fbuffer_free(FBuffer *fb);
-inline void fbuffer_free_only_buffer(FBuffer *fb);
-inline void fbuffer_clear(FBuffer *fb);
-inline void fbuffer_append(FBuffer *fb, const char *newstr, unsigned int len);
-inline void fbuffer_append_char(FBuffer *fb, char newchr);
-
-#endif
diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c
index 7d2abd1..fabb0ea 100644
--- a/ext/json/ext/generator/generator.c
+++ b/ext/json/ext/generator/generator.c
@@ -1,82 +1,343 @@
-#include <string.h>
-#include "ruby.h"
+#include "generator.h"
 
-#if HAVE_RUBY_ST_H
-#include "ruby/st.h"
+#ifdef HAVE_RUBY_ENCODING_H
+static VALUE CEncoding_UTF_8;
+static ID i_encoding, i_encode;
 #endif
 
-#if HAVE_ST_H
-#include "st.h"
-#endif
+static VALUE mJSON, mExt, mGenerator, cState, mGeneratorMethods, mObject,
+             mHash, mArray, mInteger, mFloat, mString, mString_Extend,
+             mTrueClass, mFalseClass, mNilClass, eGeneratorError,
+             eNestingError, CRegexp_MULTILINE;
 
-#include "unicode.h"
-#include <math.h>
+static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before,
+          i_object_nl, i_array_nl, i_max_nesting,
+          i_allow_nan, i_ascii_only, i_pack, i_unpack, i_create_id, i_extend;
 
-#if HAVE_RUBY_RE_H
-#include "ruby/re.h"
-#endif
+/*
+ * Copyright 2001-2004 Unicode, Inc.
+ * 
+ * Disclaimer
+ * 
+ * This source code is provided as is by Unicode, Inc. No claims are
+ * made as to fitness for any particular purpose. No warranties of any
+ * kind are expressed or implied. The recipient agrees to determine
+ * applicability of information provided. If this file has been
+ * purchased on magnetic or optical media from Unicode, Inc., the
+ * sole remedy for any claim will be exchange of defective media
+ * within 90 days of receipt.
+ * 
+ * Limitations on Rights to Redistribute This Code
+ * 
+ * Unicode, Inc. hereby grants the right to freely use the information
+ * supplied in this file in the creation of products supporting the
+ * Unicode Standard, and to make copies of this file in any form
+ * for internal or external distribution as long as this notice
+ * remains attached.
+ */
 
-#if HAVE_RE_H
-#include "re.h"
-#endif
+/*
+ * Index into the table below with the first byte of a UTF-8 sequence to
+ * get the number of trailing bytes that are supposed to follow it.
+ * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
+ * left as-is for anyone who may want to do such conversion, which was
+ * allowed in earlier algorithms.
+ */
+static const char trailingBytesForUTF8[256] = {
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
+};
 
-inline static VALUE cState_partial_generate(VALUE self, VALUE obj, VALUE depth);
-inline static VALUE cState_from_state_s(VALUE self, VALUE opts);
+/*
+ * Magic values subtracted from a buffer value during UTF8 conversion.
+ * This table contains as many values as there might be trailing bytes
+ * in a UTF-8 sequence.
+ */
+static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 
+		     0x03C82080UL, 0xFA082080UL, 0x82082080UL };
 
-#ifndef RHASH_TBL
-#define RHASH_TBL(hsh) (RHASH(hsh)->tbl)
-#endif
+/*
+ * Utility routine to tell whether a sequence of bytes is legal UTF-8.
+ * This must be called with the length pre-determined by the first byte.
+ * If not calling this from ConvertUTF8to*, then the length can be set by:
+ *  length = trailingBytesForUTF8[*source]+1;
+ * and the sequence is illegal right away if there aren't that many bytes
+ * available.
+ * If presented with a length > 4, this returns 0.  The Unicode
+ * definition of UTF-8 goes up to 4-byte sequences.
+ */
 
-#ifndef RHASH_SIZE
-#define RHASH_SIZE(hsh) (RHASH(hsh)->tbl->num_entries)
-#endif
+static unsigned char isLegalUTF8(const UTF8 *source, int length)
+{
+    UTF8 a;
+    const UTF8 *srcptr = source+length;
+    switch (length) {
+        default: return 0;
+                 /* Everything else falls through when "1"... */
+        case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
+        case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
+        case 2: if ((a = (*--srcptr)) > 0xBF) return 0;
+
+                    switch (*source) {
+                        /* no fall-through in this inner switch */
+                        case 0xE0: if (a < 0xA0) return 0; break;
+                        case 0xED: if (a > 0x9F) return 0; break;
+                        case 0xF0: if (a < 0x90) return 0; break;
+                        case 0xF4: if (a > 0x8F) return 0; break;
+                        default:   if (a < 0x80) return 0;
+                    }
 
-#ifndef RFLOAT_VALUE
-#define RFLOAT_VALUE(val) (RFLOAT(val)->value)
-#endif
+        case 1: if (*source >= 0x80 && *source < 0xC2) return 0;
+    }
+    if (*source > 0xF4) return 0;
+    return 1;
+}
 
-#ifdef HAVE_RUBY_ENCODING_H
-#include "ruby/encoding.h"
-#define FORCE_UTF8(obj) rb_enc_associate((obj), rb_utf8_encoding())
-static VALUE CEncoding_UTF_8;
-static ID i_encoding, i_encode;
+static void unicode_escape(char *buf, UTF16 character)
+{
+    const char *digits = "0123456789abcdef";
+
+    buf[2] = digits[character >> 12];
+    buf[3] = digits[(character >> 8) & 0xf];
+    buf[4] = digits[(character >> 4) & 0xf];
+    buf[5] = digits[character & 0xf];
+}
+
+
+static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 character)
+{
+    unicode_escape(buf, character);
+    fbuffer_append(buffer, buf, 6);
+}
+
+static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string)
+{
+    const UTF8 *source = (UTF8 *) RSTRING_PTR(string);
+    const UTF8 *sourceEnd = source + RSTRING_LEN(string);
+    char buf[6] = { '\\', 'u' };
+
+    while (source < sourceEnd) {
+        UTF32 ch = 0;
+        unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
+        if (source + extraBytesToRead >= sourceEnd) {
+            rb_raise(rb_path2class("JSON::GeneratorError"),
+                    "partial character in source, but hit end");
+        }
+        if (!isLegalUTF8(source, extraBytesToRead+1)) {
+            rb_raise(rb_path2class("JSON::GeneratorError"),
+                    "source sequence is illegal/malformed utf-8");
+        }
+        /*
+         * The cases all fall through. See "Note A" below.
+         */
+        switch (extraBytesToRead) {
+            case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
+            case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
+            case 3: ch += *source++; ch <<= 6;
+            case 2: ch += *source++; ch <<= 6;
+            case 1: ch += *source++; ch <<= 6;
+            case 0: ch += *source++;
+        }
+        ch -= offsetsFromUTF8[extraBytesToRead];
+
+        if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
+            /* UTF-16 surrogate values are illegal in UTF-32 */
+            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+#if UNI_STRICT_CONVERSION
+                source -= (extraBytesToRead+1); /* return to the illegal value itself */
+                rb_raise(rb_path2class("JSON::GeneratorError"),
+                        "source sequence is illegal/malformed utf-8");
+#else
+                unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR);
+#endif
+            } else {
+                /* normal case */
+                switch (ch) {
+                    case '\n':
+                        fbuffer_append(buffer, "\\n", 2);
+                        break;
+                    case '\r':
+                        fbuffer_append(buffer, "\\r", 2);
+                        break;
+                    case '\\':
+                        fbuffer_append(buffer, "\\\\", 2);
+                        break;
+                    case '"':
+                        fbuffer_append(buffer, "\\\"", 2);
+                        break;
+                    case '\t':
+                        fbuffer_append(buffer, "\\t", 2);
+                        break;
+                    case '\f':
+                        fbuffer_append(buffer, "\\f", 2);
+                        break;
+                    case '\b':
+                        fbuffer_append(buffer, "\\b", 2);
+                        break;
+                    default:
+                        if (ch >= 0x20 && ch <= 0x7f) {
+                            fbuffer_append_char(buffer, ch);
+                        } else {
+                            unicode_escape_to_buffer(buffer, buf, (UTF16) ch);
+                        }
+                        break;
+                }
+            }
+        } else if (ch > UNI_MAX_UTF16) {
+#if UNI_STRICT_CONVERSION
+            source -= (extraBytesToRead+1); /* return to the start */
+            rb_raise(rb_path2class("JSON::GeneratorError"),
+                    "source sequence is illegal/malformed utf8");
 #else
-#define FORCE_UTF8(obj)
+            unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR);
 #endif
+        } else {
+            /* target is a character in range 0xFFFF - 0x10FFFF. */
+            ch -= halfBase;
+            unicode_escape_to_buffer(buffer, buf, (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START));
+            unicode_escape_to_buffer(buffer, buf, (UTF16)((ch & halfMask) + UNI_SUR_LOW_START));
+        }
+    }
+}
 
-static VALUE mJSON, mExt, mGenerator, cState, mGeneratorMethods, mObject,
-             mHash, mArray, mInteger, mFloat, mString, mString_Extend,
-             mTrueClass, mFalseClass, mNilClass, eGeneratorError,
-             eNestingError, CRegexp_MULTILINE;
+static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string)
+{
+    const char *ptr = RSTRING_PTR(string), *p;
+    int len = RSTRING_LEN(string), start = 0, end = 0;
+    const char *escape = NULL;
+    int escape_len;
+    unsigned char c;
+    char buf[6] = { '\\', 'u' };
+
+    for (start = 0, end = 0; end < len;) {
+        p = ptr + end;
+        c = (unsigned char) *p;
+        if (c < 0x20) {
+            switch (c) {
+                case '\n':
+                    escape = "\\n";
+                    escape_len = 2;
+                    break;
+                case '\r':
+                    escape = "\\r";
+                    escape_len = 2;
+                    break;
+                case '\t':
+                    escape = "\\t";
+                    escape_len = 2;
+                    break;
+                case '\f':
+                    escape = "\\f";
+                    escape_len = 2;
+                    break;
+                case '\b':
+                    escape = "\\b";
+                    escape_len = 2;
+                    break;
+                default:
+                    unicode_escape(buf, (UTF16) *p);
+                    escape = buf;
+                    escape_len = 6;
+                    break;
+            }
+        } else {
+            switch (c) {
+                case '\\':
+                    escape = "\\\\";
+                    escape_len = 2;
+                    break;
+                case '"':
+                    escape =  "\\\"";
+                    escape_len = 2;
+                    break;
+                default:
+                    end++;
+                    continue;
+                    break;
+            }
+        }
+        fbuffer_append(buffer, ptr + start, end - start);
+        fbuffer_append(buffer, escape, escape_len);
+        start = ++end;
+        escape = NULL;
+    }
+    fbuffer_append(buffer, ptr + start, end - start);
+}
 
-static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before,
-          i_object_nl, i_array_nl, i_max_nesting,
-          i_allow_nan, i_ascii_only, i_pack, i_unpack, i_create_id, i_extend;
+/* fbuffer implementation */
 
-typedef struct JSON_Generator_StateStruct {
-    char *indent;
-    long indent_len;
-    char *space;
-    long space_len;
-    char *space_before;
-    long space_before_len;
-    char *object_nl;
-    long object_nl_len;
-    char *array_nl;
-    long array_nl_len;
-    FBuffer *array_delim;
-    FBuffer *object_delim;
-    FBuffer *object_delim2;
-    long max_nesting;
-    char allow_nan;
-    char ascii_only;
-} JSON_Generator_State;
-
-#define GET_STATE(self)                       \
-    JSON_Generator_State *state;              \
-    Data_Get_Struct(self, JSON_Generator_State, state);
-
-#define RSTRING_PAIR(string) RSTRING_PTR(string), RSTRING_LEN(string)
+static FBuffer *fbuffer_alloc()
+{
+    FBuffer *fb = ALLOC(FBuffer);
+    memset((void *) fb, 0, sizeof(FBuffer));
+    fb->initial_length = FBUFFER_INITIAL_LENGTH;
+    return fb;
+}
+
+static FBuffer *fbuffer_alloc_with_length(unsigned int initial_length)
+{
+    assert(initial_length > 0);
+    FBuffer *fb = ALLOC(FBuffer);
+    memset((void *) fb, 0, sizeof(FBuffer));
+    fb->initial_length = initial_length;
+    return fb;
+}
+
+
+static void fbuffer_free(FBuffer *fb)
+{
+    if (fb->ptr) ruby_xfree(fb->ptr);
+    ruby_xfree(fb);
+}
+
+static void fbuffer_free_only_buffer(FBuffer *fb)
+{
+    ruby_xfree(fb);
+}
+
+static void fbuffer_clear(FBuffer *fb)
+{
+    fb->len = 0;
+}
+
+static void fbuffer_inc_capa(FBuffer *fb, unsigned int requested)
+{
+    unsigned int required;
+
+    if (!fb->ptr) {
+        fb->ptr = ALLOC_N(char, fb->initial_length);
+        fb->capa = fb->initial_length;
+    }
+
+    for (required = fb->capa; requested > required - fb->len; required <<= 1);
+
+    if (required > fb->capa) {
+        fb->ptr = (char *) REALLOC_N((long*) fb->ptr, char, required);
+        fb->capa = required;
+    }
+}
+
+static void fbuffer_append(FBuffer *fb, const char *newstr, unsigned int len)
+{
+    if (len > 0) {
+        fbuffer_inc_capa(fb, len);
+        memcpy(fb->ptr + fb->len, newstr, len);
+        fb->len += len;
+    }
+}
+
+static void fbuffer_append_char(FBuffer *fb, char newchr)
+{
+    fbuffer_inc_capa(fb, 1);
+    *(fb->ptr + fb->len) = newchr;
+    fb->len++;
+}
 
 /* 
  * Document-module: JSON::Ext::Generator
@@ -183,7 +444,8 @@ static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
  * method should be used, if you want to convert raw strings to JSON
  * instead of UTF-8 strings, e. g. binary data.
  */
-static VALUE mString_to_json_raw_object(VALUE self) {
+static VALUE mString_to_json_raw_object(VALUE self)
+{
     VALUE ary;
     VALUE result = rb_hash_new();
     rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self)));
@@ -198,7 +460,8 @@ static VALUE mString_to_json_raw_object(VALUE self) {
  * This method creates a JSON text from the result of a call to
  * to_json_raw_object of this String.
  */
-static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self) {
+static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self)
+{
     VALUE obj = mString_to_json_raw_object(self);
     Check_Type(obj, T_HASH);
     return mHash_to_json(argc, argv, obj);
@@ -210,7 +473,8 @@ static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self) {
  * Raw Strings are JSON Objects (the raw bytes are stored in an array for the
  * key "raw"). The Ruby String can be created by this module method.
  */
-static VALUE mString_Extend_json_create(VALUE self, VALUE o) {
+static VALUE mString_Extend_json_create(VALUE self, VALUE o)
+{
     VALUE ary;
     Check_Type(o, T_HASH);
     ary = rb_hash_aref(o, rb_str_new2("raw"));
@@ -272,7 +536,8 @@ static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self)
     return cState_partial_generate(state, string, depth);
 }
 
-static void State_free(JSON_Generator_State *state) {
+static void State_free(JSON_Generator_State *state)
+{
     if (state->indent) ruby_xfree(state->indent);
     if (state->space) ruby_xfree(state->space);
     if (state->space_before) ruby_xfree(state->space_before);
@@ -400,7 +665,7 @@ static VALUE cState_to_h(VALUE self)
     STR_SET_EMBED_LEN(str, 0);\
 } while (0)
 
-inline static VALUE fbuffer2rstring(FBuffer *buffer)
+static VALUE fbuffer2rstring(FBuffer *buffer)
 {
     NEWOBJ(str, struct RString);
     OBJSETUP(str, rb_cString, T_STRING);
@@ -413,8 +678,7 @@ inline static VALUE fbuffer2rstring(FBuffer *buffer)
     return (VALUE) str;
 }
 #else
-
-inline static VALUE fbuffer2rstring(FBuffer *buffer)
+static VALUE fbuffer2rstring(FBuffer *buffer)
 {
     NEWOBJ(str, struct RString);
     OBJSETUP(str, rb_cString, T_STRING);
@@ -427,7 +691,7 @@ inline static VALUE fbuffer2rstring(FBuffer *buffer)
 }
 #endif
 
-void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj, long depth)
+static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj, long depth)
 {
     VALUE tmp;
     switch (TYPE(obj)) {
@@ -581,7 +845,7 @@ void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, V
  * Generates a part of a JSON document from object +obj+ and returns the
  * result.
  */
-inline static VALUE cState_partial_generate(VALUE self, VALUE obj, VALUE depth)
+static VALUE cState_partial_generate(VALUE self, VALUE obj, VALUE depth)
 {
     VALUE result;
     FBuffer *buffer = fbuffer_alloc();
@@ -623,7 +887,7 @@ inline static VALUE cState_partial_generate(VALUE self, VALUE obj, VALUE depth)
  * result. If no valid JSON document can be created this method raises a
  * GeneratorError exception.
  */
-inline static VALUE cState_generate(VALUE self, VALUE obj)
+static VALUE cState_generate(VALUE self, VALUE obj)
 {
     VALUE result = cState_partial_generate(self, obj, Qnil);
     VALUE re, args[2];
@@ -670,7 +934,7 @@ static VALUE cState_initialize(int argc, VALUE *argv, VALUE self)
  * new State instance configured by _opts_, something else to create an
  * unconfigured instance. If _opts_ is a State object, it is just returned.
  */
-inline static VALUE cState_from_state_s(VALUE self, VALUE opts)
+static VALUE cState_from_state_s(VALUE self, VALUE opts)
 {
     if (rb_obj_is_kind_of(opts, self)) {
         return opts;
diff --git a/ext/json/ext/generator/generator.h b/ext/json/ext/generator/generator.h
new file mode 100644
index 0000000..b127f9d
--- /dev/null
+++ b/ext/json/ext/generator/generator.h
@@ -0,0 +1,178 @@
+#ifndef _GENERATOR_H_
+#define _GENERATOR_H_
+
+#include <string.h>
+#include <assert.h>
+#include <math.h>
+
+#include "ruby.h"
+
+#if HAVE_RUBY_ST_H
+#include "ruby/st.h"
+#endif
+
+#if HAVE_ST_H
+#include "st.h"
+#endif
+
+#if HAVE_RUBY_RE_H
+#include "ruby/re.h"
+#endif
+
+#if HAVE_RE_H
+#include "re.h"
+#endif
+
+#ifdef HAVE_RUBY_ENCODING_H
+#include "ruby/encoding.h"
+#define FORCE_UTF8(obj) rb_enc_associate((obj), rb_utf8_encoding())
+#else
+#define FORCE_UTF8(obj)
+#endif
+
+#ifndef RHASH_TBL
+#define RHASH_TBL(hsh) (RHASH(hsh)->tbl)
+#endif
+
+#ifndef RHASH_SIZE
+#define RHASH_SIZE(hsh) (RHASH(hsh)->tbl->num_entries)
+#endif
+
+#ifndef RFLOAT_VALUE
+#define RFLOAT_VALUE(val) (RFLOAT(val)->value)
+#endif
+
+#ifndef RARRAY_PTR
+#define RARRAY_PTR(ARRAY) RARRAY(ARRAY)->ptr
+#endif
+#ifndef RARRAY_LEN
+#define RARRAY_LEN(ARRAY) RARRAY(ARRAY)->len
+#endif
+#ifndef RSTRING_PTR
+#define RSTRING_PTR(string) RSTRING(string)->ptr
+#endif
+#ifndef RSTRING_LEN
+#define RSTRING_LEN(string) RSTRING(string)->len
+#endif
+
+#define RSTRING_PAIR(string) RSTRING_PTR(string), RSTRING_LEN(string)
+
+/* fbuffer implementation */
+
+typedef struct FBufferStruct {
+    unsigned int initial_length;
+    char *ptr;
+    unsigned int len;
+    unsigned int capa;
+} FBuffer;
+
+#define FBUFFER_INITIAL_LENGTH 4096
+
+#define FBUFFER_PTR(fb) (fb->ptr)
+#define FBUFFER_LEN(fb) (fb->len)
+#define FBUFFER_CAPA(fb) (fb->capa)
+#define FBUFFER_PAIR(fb) FBUFFER_PTR(fb), FBUFFER_LEN(fb)
+
+static FBuffer *fbuffer_alloc();
+static FBuffer *fbuffer_alloc_with_length(unsigned initial_length);
+static void fbuffer_free(FBuffer *fb);
+static void fbuffer_free_only_buffer(FBuffer *fb);
+static void fbuffer_clear(FBuffer *fb);
+static void fbuffer_append(FBuffer *fb, const char *newstr, unsigned int len);
+static void fbuffer_append_char(FBuffer *fb, char newchr);
+
+/* unicode defintions */
+
+#define UNI_STRICT_CONVERSION 1
+
+typedef unsigned long	UTF32;	/* at least 32 bits */
+typedef unsigned short	UTF16;	/* at least 16 bits */
+typedef unsigned char	UTF8;	/* typically 8 bits */
+
+#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
+#define UNI_MAX_BMP (UTF32)0x0000FFFF
+#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
+#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
+#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
+
+#define UNI_SUR_HIGH_START  (UTF32)0xD800
+#define UNI_SUR_HIGH_END    (UTF32)0xDBFF
+#define UNI_SUR_LOW_START   (UTF32)0xDC00
+#define UNI_SUR_LOW_END     (UTF32)0xDFFF
+
+static const int halfShift  = 10; /* used for shifting by 10 bits */
+
+static const UTF32 halfBase = 0x0010000UL;
+static const UTF32 halfMask = 0x3FFUL;
+
+static unsigned char isLegalUTF8(const UTF8 *source, int length);
+static void unicode_escape(char *buf, UTF16 character);
+static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 character);
+static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string);
+static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string);
+
+/* ruby api and some helpers */
+
+typedef struct JSON_Generator_StateStruct {
+    char *indent;
+    long indent_len;
+    char *space;
+    long space_len;
+    char *space_before;
+    long space_before_len;
+    char *object_nl;
+    long object_nl_len;
+    char *array_nl;
+    long array_nl_len;
+    FBuffer *array_delim;
+    FBuffer *object_delim;
+    FBuffer *object_delim2;
+    long max_nesting;
+    char allow_nan;
+    char ascii_only;
+} JSON_Generator_State;
+
+#define GET_STATE(self)                       \
+    JSON_Generator_State *state;              \
+    Data_Get_Struct(self, JSON_Generator_State, state);
+
+static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self);
+static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self);
+static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self);
+static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self);
+static VALUE mString_included_s(VALUE self, VALUE modul);
+static VALUE mString_to_json(int argc, VALUE *argv, VALUE self);
+static VALUE mString_to_json_raw_object(VALUE self);
+static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self);
+static VALUE mString_Extend_json_create(VALUE self, VALUE o);
+static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self);
+static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self);
+static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self);
+static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self);
+static void State_free(JSON_Generator_State *state);
+static JSON_Generator_State *State_allocate();
+static VALUE cState_s_allocate(VALUE klass);
+static VALUE cState_configure(VALUE self, VALUE opts);
+static VALUE cState_to_h(VALUE self);
+static VALUE fbuffer2rstring(FBuffer *buffer);
+static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj, long depth);
+static VALUE cState_partial_generate(VALUE self, VALUE obj, VALUE depth);
+static VALUE cState_generate(VALUE self, VALUE obj);
+static VALUE cState_initialize(int argc, VALUE *argv, VALUE self);
+static VALUE cState_from_state_s(VALUE self, VALUE opts);
+static VALUE cState_indent(VALUE self);
+static VALUE cState_indent_set(VALUE self, VALUE indent);
+static VALUE cState_space(VALUE self);
+static VALUE cState_space_set(VALUE self, VALUE space);
+static VALUE cState_space_before(VALUE self);
+static VALUE cState_space_before_set(VALUE self, VALUE space_before);
+static VALUE cState_object_nl(VALUE self);
+static VALUE cState_object_nl_set(VALUE self, VALUE object_nl);
+static VALUE cState_array_nl(VALUE self);
+static VALUE cState_array_nl_set(VALUE self, VALUE array_nl);
+static VALUE cState_max_nesting(VALUE self);
+static VALUE cState_max_nesting_set(VALUE self, VALUE depth);
+static VALUE cState_allow_nan_p(VALUE self);
+static VALUE cState_ascii_only_p(VALUE self);
+
+#endif
diff --git a/ext/json/ext/generator/unicode.c b/ext/json/ext/generator/unicode.c
deleted file mode 100644
index cece042..0000000
--- a/ext/json/ext/generator/unicode.c
+++ /dev/null
@@ -1,258 +0,0 @@
-#include "unicode.h"
-#include "fbuffer.h"
-
-/*
- * Copyright 2001-2004 Unicode, Inc.
- * 
- * Disclaimer
- * 
- * This source code is provided as is by Unicode, Inc. No claims are
- * made as to fitness for any particular purpose. No warranties of any
- * kind are expressed or implied. The recipient agrees to determine
- * applicability of information provided. If this file has been
- * purchased on magnetic or optical media from Unicode, Inc., the
- * sole remedy for any claim will be exchange of defective media
- * within 90 days of receipt.
- * 
- * Limitations on Rights to Redistribute This Code
- * 
- * Unicode, Inc. hereby grants the right to freely use the information
- * supplied in this file in the creation of products supporting the
- * Unicode Standard, and to make copies of this file in any form
- * for internal or external distribution as long as this notice
- * remains attached.
- */
-
-/*
- * Index into the table below with the first byte of a UTF-8 sequence to
- * get the number of trailing bytes that are supposed to follow it.
- * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
- * left as-is for anyone who may want to do such conversion, which was
- * allowed in earlier algorithms.
- */
-static const char trailingBytesForUTF8[256] = {
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
-};
-
-/*
- * Magic values subtracted from a buffer value during UTF8 conversion.
- * This table contains as many values as there might be trailing bytes
- * in a UTF-8 sequence.
- */
-static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 
-		     0x03C82080UL, 0xFA082080UL, 0x82082080UL };
-
-/*
- * Utility routine to tell whether a sequence of bytes is legal UTF-8.
- * This must be called with the length pre-determined by the first byte.
- * If not calling this from ConvertUTF8to*, then the length can be set by:
- *  length = trailingBytesForUTF8[*source]+1;
- * and the sequence is illegal right away if there aren't that many bytes
- * available.
- * If presented with a length > 4, this returns 0.  The Unicode
- * definition of UTF-8 goes up to 4-byte sequences.
- */
-
-inline static unsigned char isLegalUTF8(const UTF8 *source, int length)
-{
-    UTF8 a;
-    const UTF8 *srcptr = source+length;
-    switch (length) {
-        default: return 0;
-                 /* Everything else falls through when "1"... */
-        case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
-        case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
-        case 2: if ((a = (*--srcptr)) > 0xBF) return 0;
-
-                    switch (*source) {
-                        /* no fall-through in this inner switch */
-                        case 0xE0: if (a < 0xA0) return 0; break;
-                        case 0xED: if (a > 0x9F) return 0; break;
-                        case 0xF0: if (a < 0x90) return 0; break;
-                        case 0xF4: if (a > 0x8F) return 0; break;
-                        default:   if (a < 0x80) return 0;
-                    }
-
-        case 1: if (*source >= 0x80 && *source < 0xC2) return 0;
-    }
-    if (*source > 0xF4) return 0;
-    return 1;
-}
-
-inline static void unicode_escape(char *buf, UTF16 character)
-{
-    const char *digits = "0123456789abcdef";
-
-    buf[2] = digits[character >> 12];
-    buf[3] = digits[(character >> 8) & 0xf];
-    buf[4] = digits[(character >> 4) & 0xf];
-    buf[5] = digits[character & 0xf];
-}
-
-
-inline static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 character)
-{
-    unicode_escape(buf, character);
-    fbuffer_append(buffer, buf, 6);
-}
-
-inline void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string)
-{
-    const UTF8 *source = (UTF8 *) RSTRING_PTR(string);
-    const UTF8 *sourceEnd = source + RSTRING_LEN(string);
-    char buf[6] = { '\\', 'u' };
-
-    while (source < sourceEnd) {
-        UTF32 ch = 0;
-        unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
-        if (source + extraBytesToRead >= sourceEnd) {
-            rb_raise(rb_path2class("JSON::GeneratorError"),
-                    "partial character in source, but hit end");
-        }
-        if (!isLegalUTF8(source, extraBytesToRead+1)) {
-            rb_raise(rb_path2class("JSON::GeneratorError"),
-                    "source sequence is illegal/malformed utf-8");
-        }
-        /*
-         * The cases all fall through. See "Note A" below.
-         */
-        switch (extraBytesToRead) {
-            case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
-            case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
-            case 3: ch += *source++; ch <<= 6;
-            case 2: ch += *source++; ch <<= 6;
-            case 1: ch += *source++; ch <<= 6;
-            case 0: ch += *source++;
-        }
-        ch -= offsetsFromUTF8[extraBytesToRead];
-
-        if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
-            /* UTF-16 surrogate values are illegal in UTF-32 */
-            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
-#if UNI_STRICT_CONVERSION
-                source -= (extraBytesToRead+1); /* return to the illegal value itself */
-                rb_raise(rb_path2class("JSON::GeneratorError"),
-                        "source sequence is illegal/malformed utf-8");
-#else
-                unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR);
-#endif
-            } else {
-                /* normal case */
-                switch (ch) {
-                    case '\n':
-                        fbuffer_append(buffer, "\\n", 2);
-                        break;
-                    case '\r':
-                        fbuffer_append(buffer, "\\r", 2);
-                        break;
-                    case '\\':
-                        fbuffer_append(buffer, "\\\\", 2);
-                        break;
-                    case '"':
-                        fbuffer_append(buffer, "\\\"", 2);
-                        break;
-                    case '\t':
-                        fbuffer_append(buffer, "\\t", 2);
-                        break;
-                    case '\f':
-                        fbuffer_append(buffer, "\\f", 2);
-                        break;
-                    case '\b':
-                        fbuffer_append(buffer, "\\b", 2);
-                        break;
-                    default:
-                        if (ch >= 0x20 && ch <= 0x7f) {
-                            fbuffer_append_char(buffer, ch);
-                        } else {
-                            unicode_escape_to_buffer(buffer, buf, (UTF16) ch);
-                        }
-                        break;
-                }
-            }
-        } else if (ch > UNI_MAX_UTF16) {
-#if UNI_STRICT_CONVERSION
-            source -= (extraBytesToRead+1); /* return to the start */
-            rb_raise(rb_path2class("JSON::GeneratorError"),
-                    "source sequence is illegal/malformed utf8");
-#else
-            unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR);
-#endif
-        } else {
-            /* target is a character in range 0xFFFF - 0x10FFFF. */
-            ch -= halfBase;
-            unicode_escape_to_buffer(buffer, buf, (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START));
-            unicode_escape_to_buffer(buffer, buf, (UTF16)((ch & halfMask) + UNI_SUR_LOW_START));
-        }
-    }
-}
-
-inline void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string)
-{
-    const char *ptr = RSTRING_PTR(string), *p;
-    int len = RSTRING_LEN(string), start = 0, end = 0;
-    const char *escape = NULL;
-    int escape_len;
-    unsigned char c;
-    char buf[6] = { '\\', 'u' };
-
-    for (start = 0, end = 0; end < len;) {
-        p = ptr + end;
-        c = (unsigned char) *p;
-        if (c < 0x20) {
-            switch (c) {
-                case '\n':
-                    escape = "\\n";
-                    escape_len = 2;
-                    break;
-                case '\r':
-                    escape = "\\r";
-                    escape_len = 2;
-                    break;
-                case '\t':
-                    escape = "\\t";
-                    escape_len = 2;
-                    break;
-                case '\f':
-                    escape = "\\f";
-                    escape_len = 2;
-                    break;
-                case '\b':
-                    escape = "\\b";
-                    escape_len = 2;
-                    break;
-                default:
-                    unicode_escape(buf, (UTF16) *p);
-                    escape = buf;
-                    escape_len = 6;
-                    break;
-            }
-        } else {
-            switch (c) {
-                case '\\':
-                    escape = "\\\\";
-                    escape_len = 2;
-                    break;
-                case '"':
-                    escape =  "\\\"";
-                    escape_len = 2;
-                    break;
-                default:
-                    end++;
-                    continue;
-                    break;
-            }
-        }
-        fbuffer_append(buffer, ptr + start, end - start);
-        fbuffer_append(buffer, escape, escape_len);
-        start = ++end;
-        escape = NULL;
-    }
-    fbuffer_append(buffer, ptr + start, end - start);
-}
diff --git a/ext/json/ext/generator/unicode.h b/ext/json/ext/generator/unicode.h
deleted file mode 100644
index 3837b32..0000000
--- a/ext/json/ext/generator/unicode.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#include "ruby.h"
-#include "fbuffer.h"
-
-#ifndef _GENERATOR_UNICODE_H_
-#define _GENERATOR_UNICODE_H_
-
-#define UNI_STRICT_CONVERSION 1
-
-typedef unsigned long	UTF32;	/* at least 32 bits */
-typedef unsigned short	UTF16;	/* at least 16 bits */
-typedef unsigned char	UTF8;	/* typically 8 bits */
-
-#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
-#define UNI_MAX_BMP (UTF32)0x0000FFFF
-#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
-#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
-#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
-
-#define UNI_SUR_HIGH_START  (UTF32)0xD800
-#define UNI_SUR_HIGH_END    (UTF32)0xDBFF
-#define UNI_SUR_LOW_START   (UTF32)0xDC00
-#define UNI_SUR_LOW_END     (UTF32)0xDFFF
-
-static const int halfShift  = 10; /* used for shifting by 10 bits */
-
-static const UTF32 halfBase = 0x0010000UL;
-static const UTF32 halfMask = 0x3FFUL;
-
-inline void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string);
-inline void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string);
-
-#ifndef RARRAY_PTR
-#define RARRAY_PTR(ARRAY) RARRAY(ARRAY)->ptr
-#endif
-#ifndef RARRAY_LEN
-#define RARRAY_LEN(ARRAY) RARRAY(ARRAY)->len
-#endif
-#ifndef RSTRING_PTR
-#define RSTRING_PTR(string) RSTRING(string)->ptr
-#endif
-#ifndef RSTRING_LEN
-#define RSTRING_LEN(string) RSTRING(string)->len
-#endif
-
-#endif