diff options
author | Florian Frank <flori@ping.de> | 2009-11-07 04:46:56 +0100 |
---|---|---|
committer | Florian Frank <flori@ping.de> | 2009-11-07 15:13:21 +0100 |
commit | 67602c351b6c1655f853f0ca69341035e9b2c04b (patch) | |
tree | aa0c57dad0a195a7622bd8827bbdf0db9a480461 | |
parent | 1153dfc4c65846e737060fb13d15452bfe408502 (diff) | |
download | json-67602c351b6c1655f853f0ca69341035e9b2c04b.tar.gz |
use one compilation unit to support inlining
-rw-r--r-- | ext/json/ext/generator/fbuffer.c | 70 | ||||
-rw-r--r-- | ext/json/ext/generator/fbuffer.h | 29 | ||||
-rw-r--r-- | ext/json/ext/generator/generator.c | 412 | ||||
-rw-r--r-- | ext/json/ext/generator/generator.h | 178 | ||||
-rw-r--r-- | ext/json/ext/generator/unicode.c | 258 | ||||
-rw-r--r-- | ext/json/ext/generator/unicode.h | 45 |
6 files changed, 516 insertions, 476 deletions
diff --git a/ext/json/ext/generator/fbuffer.c b/ext/json/ext/generator/fbuffer.c deleted file mode 100644 index 521f9e1..0000000 --- a/ext/json/ext/generator/fbuffer.c +++ /dev/null @@ -1,70 +0,0 @@ -#include "fbuffer.h" - -inline FBuffer *fbuffer_alloc() -{ - FBuffer *fb = ALLOC(FBuffer); - memset((void *) fb, 0, sizeof(FBuffer)); - fb->initial_length = FBUFFER_INITIAL_LENGTH; - return fb; -} - -inline FBuffer *fbuffer_alloc_with_length(unsigned int initial_length) -{ - assert(initial_length > 0); - FBuffer *fb = ALLOC(FBuffer); - memset((void *) fb, 0, sizeof(FBuffer)); - fb->initial_length = initial_length; - return fb; -} - - -inline void fbuffer_free(FBuffer *fb) -{ - if (fb->ptr) ruby_xfree(fb->ptr); - ruby_xfree(fb); -} - -inline void fbuffer_free_only_buffer(FBuffer *fb) -{ - ruby_xfree(fb); -} - - -inline void fbuffer_clear(FBuffer *fb) -{ - fb->len = 0; -} - -static inline void fbuffer_inc_capa(FBuffer *fb, unsigned int requested) -{ - unsigned int required; - - if (!fb->ptr) { - fb->ptr = ALLOC_N(char, fb->initial_length); - fb->capa = fb->initial_length; - } - - for (required = fb->capa; requested > required - fb->len; required <<= 1); - - if (required > fb->capa) { - fb->ptr = (char *) REALLOC_N((long*) fb->ptr, char, required); - fb->capa = required; - } -} - -inline void fbuffer_append(FBuffer *fb, const char *newstr, unsigned int len) -{ - if (len > 0) { - fbuffer_inc_capa(fb, len); - memcpy(fb->ptr + fb->len, newstr, len); - fb->len += len; - } -} - -inline void fbuffer_append_char(FBuffer *fb, char newchr) -{ - fbuffer_inc_capa(fb, 1); - *(fb->ptr + fb->len) = newchr; - fb->len++; -} - diff --git a/ext/json/ext/generator/fbuffer.h b/ext/json/ext/generator/fbuffer.h deleted file mode 100644 index f4baf62..0000000 --- a/ext/json/ext/generator/fbuffer.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef _FBUFFER_H_ -#define _FBUFFER_H_ - -#include <assert.h> -#include "ruby.h" - -typedef struct FBufferStruct { - unsigned int initial_length; - char *ptr; - unsigned int len; - unsigned int capa; -} FBuffer; - -#define FBUFFER_INITIAL_LENGTH 4096 - -#define FBUFFER_PTR(fb) (fb->ptr) -#define FBUFFER_LEN(fb) (fb->len) -#define FBUFFER_CAPA(fb) (fb->capa) -#define FBUFFER_PAIR(fb) FBUFFER_PTR(fb), FBUFFER_LEN(fb) - -inline FBuffer *fbuffer_alloc(); -inline FBuffer *fbuffer_alloc_with_length(unsigned initial_length); -inline void fbuffer_free(FBuffer *fb); -inline void fbuffer_free_only_buffer(FBuffer *fb); -inline void fbuffer_clear(FBuffer *fb); -inline void fbuffer_append(FBuffer *fb, const char *newstr, unsigned int len); -inline void fbuffer_append_char(FBuffer *fb, char newchr); - -#endif diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index 7d2abd1..fabb0ea 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -1,82 +1,343 @@ -#include <string.h> -#include "ruby.h" +#include "generator.h" -#if HAVE_RUBY_ST_H -#include "ruby/st.h" +#ifdef HAVE_RUBY_ENCODING_H +static VALUE CEncoding_UTF_8; +static ID i_encoding, i_encode; #endif -#if HAVE_ST_H -#include "st.h" -#endif +static VALUE mJSON, mExt, mGenerator, cState, mGeneratorMethods, mObject, + mHash, mArray, mInteger, mFloat, mString, mString_Extend, + mTrueClass, mFalseClass, mNilClass, eGeneratorError, + eNestingError, CRegexp_MULTILINE; -#include "unicode.h" -#include <math.h> +static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before, + i_object_nl, i_array_nl, i_max_nesting, + i_allow_nan, i_ascii_only, i_pack, i_unpack, i_create_id, i_extend; -#if HAVE_RUBY_RE_H -#include "ruby/re.h" -#endif +/* + * Copyright 2001-2004 Unicode, Inc. + * + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. + */ -#if HAVE_RE_H -#include "re.h" -#endif +/* + * Index into the table below with the first byte of a UTF-8 sequence to + * get the number of trailing bytes that are supposed to follow it. + * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is + * left as-is for anyone who may want to do such conversion, which was + * allowed in earlier algorithms. + */ +static const char trailingBytesForUTF8[256] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 +}; -inline static VALUE cState_partial_generate(VALUE self, VALUE obj, VALUE depth); -inline static VALUE cState_from_state_s(VALUE self, VALUE opts); +/* + * Magic values subtracted from a buffer value during UTF8 conversion. + * This table contains as many values as there might be trailing bytes + * in a UTF-8 sequence. + */ +static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, + 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; -#ifndef RHASH_TBL -#define RHASH_TBL(hsh) (RHASH(hsh)->tbl) -#endif +/* + * Utility routine to tell whether a sequence of bytes is legal UTF-8. + * This must be called with the length pre-determined by the first byte. + * If not calling this from ConvertUTF8to*, then the length can be set by: + * length = trailingBytesForUTF8[*source]+1; + * and the sequence is illegal right away if there aren't that many bytes + * available. + * If presented with a length > 4, this returns 0. The Unicode + * definition of UTF-8 goes up to 4-byte sequences. + */ -#ifndef RHASH_SIZE -#define RHASH_SIZE(hsh) (RHASH(hsh)->tbl->num_entries) -#endif +static unsigned char isLegalUTF8(const UTF8 *source, int length) +{ + UTF8 a; + const UTF8 *srcptr = source+length; + switch (length) { + default: return 0; + /* Everything else falls through when "1"... */ + case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; + case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; + case 2: if ((a = (*--srcptr)) > 0xBF) return 0; + + switch (*source) { + /* no fall-through in this inner switch */ + case 0xE0: if (a < 0xA0) return 0; break; + case 0xED: if (a > 0x9F) return 0; break; + case 0xF0: if (a < 0x90) return 0; break; + case 0xF4: if (a > 0x8F) return 0; break; + default: if (a < 0x80) return 0; + } -#ifndef RFLOAT_VALUE -#define RFLOAT_VALUE(val) (RFLOAT(val)->value) -#endif + case 1: if (*source >= 0x80 && *source < 0xC2) return 0; + } + if (*source > 0xF4) return 0; + return 1; +} -#ifdef HAVE_RUBY_ENCODING_H -#include "ruby/encoding.h" -#define FORCE_UTF8(obj) rb_enc_associate((obj), rb_utf8_encoding()) -static VALUE CEncoding_UTF_8; -static ID i_encoding, i_encode; +static void unicode_escape(char *buf, UTF16 character) +{ + const char *digits = "0123456789abcdef"; + + buf[2] = digits[character >> 12]; + buf[3] = digits[(character >> 8) & 0xf]; + buf[4] = digits[(character >> 4) & 0xf]; + buf[5] = digits[character & 0xf]; +} + + +static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 character) +{ + unicode_escape(buf, character); + fbuffer_append(buffer, buf, 6); +} + +static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string) +{ + const UTF8 *source = (UTF8 *) RSTRING_PTR(string); + const UTF8 *sourceEnd = source + RSTRING_LEN(string); + char buf[6] = { '\\', 'u' }; + + while (source < sourceEnd) { + UTF32 ch = 0; + unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; + if (source + extraBytesToRead >= sourceEnd) { + rb_raise(rb_path2class("JSON::GeneratorError"), + "partial character in source, but hit end"); + } + if (!isLegalUTF8(source, extraBytesToRead+1)) { + rb_raise(rb_path2class("JSON::GeneratorError"), + "source sequence is illegal/malformed utf-8"); + } + /* + * The cases all fall through. See "Note A" below. + */ + switch (extraBytesToRead) { + case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ + case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ + case 3: ch += *source++; ch <<= 6; + case 2: ch += *source++; ch <<= 6; + case 1: ch += *source++; ch <<= 6; + case 0: ch += *source++; + } + ch -= offsetsFromUTF8[extraBytesToRead]; + + if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { +#if UNI_STRICT_CONVERSION + source -= (extraBytesToRead+1); /* return to the illegal value itself */ + rb_raise(rb_path2class("JSON::GeneratorError"), + "source sequence is illegal/malformed utf-8"); +#else + unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR); +#endif + } else { + /* normal case */ + switch (ch) { + case '\n': + fbuffer_append(buffer, "\\n", 2); + break; + case '\r': + fbuffer_append(buffer, "\\r", 2); + break; + case '\\': + fbuffer_append(buffer, "\\\\", 2); + break; + case '"': + fbuffer_append(buffer, "\\\"", 2); + break; + case '\t': + fbuffer_append(buffer, "\\t", 2); + break; + case '\f': + fbuffer_append(buffer, "\\f", 2); + break; + case '\b': + fbuffer_append(buffer, "\\b", 2); + break; + default: + if (ch >= 0x20 && ch <= 0x7f) { + fbuffer_append_char(buffer, ch); + } else { + unicode_escape_to_buffer(buffer, buf, (UTF16) ch); + } + break; + } + } + } else if (ch > UNI_MAX_UTF16) { +#if UNI_STRICT_CONVERSION + source -= (extraBytesToRead+1); /* return to the start */ + rb_raise(rb_path2class("JSON::GeneratorError"), + "source sequence is illegal/malformed utf8"); #else -#define FORCE_UTF8(obj) + unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR); #endif + } else { + /* target is a character in range 0xFFFF - 0x10FFFF. */ + ch -= halfBase; + unicode_escape_to_buffer(buffer, buf, (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START)); + unicode_escape_to_buffer(buffer, buf, (UTF16)((ch & halfMask) + UNI_SUR_LOW_START)); + } + } +} -static VALUE mJSON, mExt, mGenerator, cState, mGeneratorMethods, mObject, - mHash, mArray, mInteger, mFloat, mString, mString_Extend, - mTrueClass, mFalseClass, mNilClass, eGeneratorError, - eNestingError, CRegexp_MULTILINE; +static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string) +{ + const char *ptr = RSTRING_PTR(string), *p; + int len = RSTRING_LEN(string), start = 0, end = 0; + const char *escape = NULL; + int escape_len; + unsigned char c; + char buf[6] = { '\\', 'u' }; + + for (start = 0, end = 0; end < len;) { + p = ptr + end; + c = (unsigned char) *p; + if (c < 0x20) { + switch (c) { + case '\n': + escape = "\\n"; + escape_len = 2; + break; + case '\r': + escape = "\\r"; + escape_len = 2; + break; + case '\t': + escape = "\\t"; + escape_len = 2; + break; + case '\f': + escape = "\\f"; + escape_len = 2; + break; + case '\b': + escape = "\\b"; + escape_len = 2; + break; + default: + unicode_escape(buf, (UTF16) *p); + escape = buf; + escape_len = 6; + break; + } + } else { + switch (c) { + case '\\': + escape = "\\\\"; + escape_len = 2; + break; + case '"': + escape = "\\\""; + escape_len = 2; + break; + default: + end++; + continue; + break; + } + } + fbuffer_append(buffer, ptr + start, end - start); + fbuffer_append(buffer, escape, escape_len); + start = ++end; + escape = NULL; + } + fbuffer_append(buffer, ptr + start, end - start); +} -static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before, - i_object_nl, i_array_nl, i_max_nesting, - i_allow_nan, i_ascii_only, i_pack, i_unpack, i_create_id, i_extend; +/* fbuffer implementation */ -typedef struct JSON_Generator_StateStruct { - char *indent; - long indent_len; - char *space; - long space_len; - char *space_before; - long space_before_len; - char *object_nl; - long object_nl_len; - char *array_nl; - long array_nl_len; - FBuffer *array_delim; - FBuffer *object_delim; - FBuffer *object_delim2; - long max_nesting; - char allow_nan; - char ascii_only; -} JSON_Generator_State; - -#define GET_STATE(self) \ - JSON_Generator_State *state; \ - Data_Get_Struct(self, JSON_Generator_State, state); - -#define RSTRING_PAIR(string) RSTRING_PTR(string), RSTRING_LEN(string) +static FBuffer *fbuffer_alloc() +{ + FBuffer *fb = ALLOC(FBuffer); + memset((void *) fb, 0, sizeof(FBuffer)); + fb->initial_length = FBUFFER_INITIAL_LENGTH; + return fb; +} + +static FBuffer *fbuffer_alloc_with_length(unsigned int initial_length) +{ + assert(initial_length > 0); + FBuffer *fb = ALLOC(FBuffer); + memset((void *) fb, 0, sizeof(FBuffer)); + fb->initial_length = initial_length; + return fb; +} + + +static void fbuffer_free(FBuffer *fb) +{ + if (fb->ptr) ruby_xfree(fb->ptr); + ruby_xfree(fb); +} + +static void fbuffer_free_only_buffer(FBuffer *fb) +{ + ruby_xfree(fb); +} + +static void fbuffer_clear(FBuffer *fb) +{ + fb->len = 0; +} + +static void fbuffer_inc_capa(FBuffer *fb, unsigned int requested) +{ + unsigned int required; + + if (!fb->ptr) { + fb->ptr = ALLOC_N(char, fb->initial_length); + fb->capa = fb->initial_length; + } + + for (required = fb->capa; requested > required - fb->len; required <<= 1); + + if (required > fb->capa) { + fb->ptr = (char *) REALLOC_N((long*) fb->ptr, char, required); + fb->capa = required; + } +} + +static void fbuffer_append(FBuffer *fb, const char *newstr, unsigned int len) +{ + if (len > 0) { + fbuffer_inc_capa(fb, len); + memcpy(fb->ptr + fb->len, newstr, len); + fb->len += len; + } +} + +static void fbuffer_append_char(FBuffer *fb, char newchr) +{ + fbuffer_inc_capa(fb, 1); + *(fb->ptr + fb->len) = newchr; + fb->len++; +} /* * Document-module: JSON::Ext::Generator @@ -183,7 +444,8 @@ static VALUE mString_to_json(int argc, VALUE *argv, VALUE self) * method should be used, if you want to convert raw strings to JSON * instead of UTF-8 strings, e. g. binary data. */ -static VALUE mString_to_json_raw_object(VALUE self) { +static VALUE mString_to_json_raw_object(VALUE self) +{ VALUE ary; VALUE result = rb_hash_new(); rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self))); @@ -198,7 +460,8 @@ static VALUE mString_to_json_raw_object(VALUE self) { * This method creates a JSON text from the result of a call to * to_json_raw_object of this String. */ -static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self) { +static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self) +{ VALUE obj = mString_to_json_raw_object(self); Check_Type(obj, T_HASH); return mHash_to_json(argc, argv, obj); @@ -210,7 +473,8 @@ static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self) { * Raw Strings are JSON Objects (the raw bytes are stored in an array for the * key "raw"). The Ruby String can be created by this module method. */ -static VALUE mString_Extend_json_create(VALUE self, VALUE o) { +static VALUE mString_Extend_json_create(VALUE self, VALUE o) +{ VALUE ary; Check_Type(o, T_HASH); ary = rb_hash_aref(o, rb_str_new2("raw")); @@ -272,7 +536,8 @@ static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self) return cState_partial_generate(state, string, depth); } -static void State_free(JSON_Generator_State *state) { +static void State_free(JSON_Generator_State *state) +{ if (state->indent) ruby_xfree(state->indent); if (state->space) ruby_xfree(state->space); if (state->space_before) ruby_xfree(state->space_before); @@ -400,7 +665,7 @@ static VALUE cState_to_h(VALUE self) STR_SET_EMBED_LEN(str, 0);\ } while (0) -inline static VALUE fbuffer2rstring(FBuffer *buffer) +static VALUE fbuffer2rstring(FBuffer *buffer) { NEWOBJ(str, struct RString); OBJSETUP(str, rb_cString, T_STRING); @@ -413,8 +678,7 @@ inline static VALUE fbuffer2rstring(FBuffer *buffer) return (VALUE) str; } #else - -inline static VALUE fbuffer2rstring(FBuffer *buffer) +static VALUE fbuffer2rstring(FBuffer *buffer) { NEWOBJ(str, struct RString); OBJSETUP(str, rb_cString, T_STRING); @@ -427,7 +691,7 @@ inline static VALUE fbuffer2rstring(FBuffer *buffer) } #endif -void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj, long depth) +static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj, long depth) { VALUE tmp; switch (TYPE(obj)) { @@ -581,7 +845,7 @@ void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, V * Generates a part of a JSON document from object +obj+ and returns the * result. */ -inline static VALUE cState_partial_generate(VALUE self, VALUE obj, VALUE depth) +static VALUE cState_partial_generate(VALUE self, VALUE obj, VALUE depth) { VALUE result; FBuffer *buffer = fbuffer_alloc(); @@ -623,7 +887,7 @@ inline static VALUE cState_partial_generate(VALUE self, VALUE obj, VALUE depth) * result. If no valid JSON document can be created this method raises a * GeneratorError exception. */ -inline static VALUE cState_generate(VALUE self, VALUE obj) +static VALUE cState_generate(VALUE self, VALUE obj) { VALUE result = cState_partial_generate(self, obj, Qnil); VALUE re, args[2]; @@ -670,7 +934,7 @@ static VALUE cState_initialize(int argc, VALUE *argv, VALUE self) * new State instance configured by _opts_, something else to create an * unconfigured instance. If _opts_ is a State object, it is just returned. */ -inline static VALUE cState_from_state_s(VALUE self, VALUE opts) +static VALUE cState_from_state_s(VALUE self, VALUE opts) { if (rb_obj_is_kind_of(opts, self)) { return opts; diff --git a/ext/json/ext/generator/generator.h b/ext/json/ext/generator/generator.h new file mode 100644 index 0000000..b127f9d --- /dev/null +++ b/ext/json/ext/generator/generator.h @@ -0,0 +1,178 @@ +#ifndef _GENERATOR_H_ +#define _GENERATOR_H_ + +#include <string.h> +#include <assert.h> +#include <math.h> + +#include "ruby.h" + +#if HAVE_RUBY_ST_H +#include "ruby/st.h" +#endif + +#if HAVE_ST_H +#include "st.h" +#endif + +#if HAVE_RUBY_RE_H +#include "ruby/re.h" +#endif + +#if HAVE_RE_H +#include "re.h" +#endif + +#ifdef HAVE_RUBY_ENCODING_H +#include "ruby/encoding.h" +#define FORCE_UTF8(obj) rb_enc_associate((obj), rb_utf8_encoding()) +#else +#define FORCE_UTF8(obj) +#endif + +#ifndef RHASH_TBL +#define RHASH_TBL(hsh) (RHASH(hsh)->tbl) +#endif + +#ifndef RHASH_SIZE +#define RHASH_SIZE(hsh) (RHASH(hsh)->tbl->num_entries) +#endif + +#ifndef RFLOAT_VALUE +#define RFLOAT_VALUE(val) (RFLOAT(val)->value) +#endif + +#ifndef RARRAY_PTR +#define RARRAY_PTR(ARRAY) RARRAY(ARRAY)->ptr +#endif +#ifndef RARRAY_LEN +#define RARRAY_LEN(ARRAY) RARRAY(ARRAY)->len +#endif +#ifndef RSTRING_PTR +#define RSTRING_PTR(string) RSTRING(string)->ptr +#endif +#ifndef RSTRING_LEN +#define RSTRING_LEN(string) RSTRING(string)->len +#endif + +#define RSTRING_PAIR(string) RSTRING_PTR(string), RSTRING_LEN(string) + +/* fbuffer implementation */ + +typedef struct FBufferStruct { + unsigned int initial_length; + char *ptr; + unsigned int len; + unsigned int capa; +} FBuffer; + +#define FBUFFER_INITIAL_LENGTH 4096 + +#define FBUFFER_PTR(fb) (fb->ptr) +#define FBUFFER_LEN(fb) (fb->len) +#define FBUFFER_CAPA(fb) (fb->capa) +#define FBUFFER_PAIR(fb) FBUFFER_PTR(fb), FBUFFER_LEN(fb) + +static FBuffer *fbuffer_alloc(); +static FBuffer *fbuffer_alloc_with_length(unsigned initial_length); +static void fbuffer_free(FBuffer *fb); +static void fbuffer_free_only_buffer(FBuffer *fb); +static void fbuffer_clear(FBuffer *fb); +static void fbuffer_append(FBuffer *fb, const char *newstr, unsigned int len); +static void fbuffer_append_char(FBuffer *fb, char newchr); + +/* unicode defintions */ + +#define UNI_STRICT_CONVERSION 1 + +typedef unsigned long UTF32; /* at least 32 bits */ +typedef unsigned short UTF16; /* at least 16 bits */ +typedef unsigned char UTF8; /* typically 8 bits */ + +#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD +#define UNI_MAX_BMP (UTF32)0x0000FFFF +#define UNI_MAX_UTF16 (UTF32)0x0010FFFF +#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF +#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF + +#define UNI_SUR_HIGH_START (UTF32)0xD800 +#define UNI_SUR_HIGH_END (UTF32)0xDBFF +#define UNI_SUR_LOW_START (UTF32)0xDC00 +#define UNI_SUR_LOW_END (UTF32)0xDFFF + +static const int halfShift = 10; /* used for shifting by 10 bits */ + +static const UTF32 halfBase = 0x0010000UL; +static const UTF32 halfMask = 0x3FFUL; + +static unsigned char isLegalUTF8(const UTF8 *source, int length); +static void unicode_escape(char *buf, UTF16 character); +static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 character); +static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string); +static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string); + +/* ruby api and some helpers */ + +typedef struct JSON_Generator_StateStruct { + char *indent; + long indent_len; + char *space; + long space_len; + char *space_before; + long space_before_len; + char *object_nl; + long object_nl_len; + char *array_nl; + long array_nl_len; + FBuffer *array_delim; + FBuffer *object_delim; + FBuffer *object_delim2; + long max_nesting; + char allow_nan; + char ascii_only; +} JSON_Generator_State; + +#define GET_STATE(self) \ + JSON_Generator_State *state; \ + Data_Get_Struct(self, JSON_Generator_State, state); + +static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self); +static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self); +static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self); +static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self); +static VALUE mString_included_s(VALUE self, VALUE modul); +static VALUE mString_to_json(int argc, VALUE *argv, VALUE self); +static VALUE mString_to_json_raw_object(VALUE self); +static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self); +static VALUE mString_Extend_json_create(VALUE self, VALUE o); +static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self); +static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self); +static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self); +static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self); +static void State_free(JSON_Generator_State *state); +static JSON_Generator_State *State_allocate(); +static VALUE cState_s_allocate(VALUE klass); +static VALUE cState_configure(VALUE self, VALUE opts); +static VALUE cState_to_h(VALUE self); +static VALUE fbuffer2rstring(FBuffer *buffer); +static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj, long depth); +static VALUE cState_partial_generate(VALUE self, VALUE obj, VALUE depth); +static VALUE cState_generate(VALUE self, VALUE obj); +static VALUE cState_initialize(int argc, VALUE *argv, VALUE self); +static VALUE cState_from_state_s(VALUE self, VALUE opts); +static VALUE cState_indent(VALUE self); +static VALUE cState_indent_set(VALUE self, VALUE indent); +static VALUE cState_space(VALUE self); +static VALUE cState_space_set(VALUE self, VALUE space); +static VALUE cState_space_before(VALUE self); +static VALUE cState_space_before_set(VALUE self, VALUE space_before); +static VALUE cState_object_nl(VALUE self); +static VALUE cState_object_nl_set(VALUE self, VALUE object_nl); +static VALUE cState_array_nl(VALUE self); +static VALUE cState_array_nl_set(VALUE self, VALUE array_nl); +static VALUE cState_max_nesting(VALUE self); +static VALUE cState_max_nesting_set(VALUE self, VALUE depth); +static VALUE cState_allow_nan_p(VALUE self); +static VALUE cState_ascii_only_p(VALUE self); + +#endif diff --git a/ext/json/ext/generator/unicode.c b/ext/json/ext/generator/unicode.c deleted file mode 100644 index cece042..0000000 --- a/ext/json/ext/generator/unicode.c +++ /dev/null @@ -1,258 +0,0 @@ -#include "unicode.h" -#include "fbuffer.h" - -/* - * Copyright 2001-2004 Unicode, Inc. - * - * Disclaimer - * - * This source code is provided as is by Unicode, Inc. No claims are - * made as to fitness for any particular purpose. No warranties of any - * kind are expressed or implied. The recipient agrees to determine - * applicability of information provided. If this file has been - * purchased on magnetic or optical media from Unicode, Inc., the - * sole remedy for any claim will be exchange of defective media - * within 90 days of receipt. - * - * Limitations on Rights to Redistribute This Code - * - * Unicode, Inc. hereby grants the right to freely use the information - * supplied in this file in the creation of products supporting the - * Unicode Standard, and to make copies of this file in any form - * for internal or external distribution as long as this notice - * remains attached. - */ - -/* - * Index into the table below with the first byte of a UTF-8 sequence to - * get the number of trailing bytes that are supposed to follow it. - * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is - * left as-is for anyone who may want to do such conversion, which was - * allowed in earlier algorithms. - */ -static const char trailingBytesForUTF8[256] = { - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 -}; - -/* - * Magic values subtracted from a buffer value during UTF8 conversion. - * This table contains as many values as there might be trailing bytes - * in a UTF-8 sequence. - */ -static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, - 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; - -/* - * Utility routine to tell whether a sequence of bytes is legal UTF-8. - * This must be called with the length pre-determined by the first byte. - * If not calling this from ConvertUTF8to*, then the length can be set by: - * length = trailingBytesForUTF8[*source]+1; - * and the sequence is illegal right away if there aren't that many bytes - * available. - * If presented with a length > 4, this returns 0. The Unicode - * definition of UTF-8 goes up to 4-byte sequences. - */ - -inline static unsigned char isLegalUTF8(const UTF8 *source, int length) -{ - UTF8 a; - const UTF8 *srcptr = source+length; - switch (length) { - default: return 0; - /* Everything else falls through when "1"... */ - case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; - case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; - case 2: if ((a = (*--srcptr)) > 0xBF) return 0; - - switch (*source) { - /* no fall-through in this inner switch */ - case 0xE0: if (a < 0xA0) return 0; break; - case 0xED: if (a > 0x9F) return 0; break; - case 0xF0: if (a < 0x90) return 0; break; - case 0xF4: if (a > 0x8F) return 0; break; - default: if (a < 0x80) return 0; - } - - case 1: if (*source >= 0x80 && *source < 0xC2) return 0; - } - if (*source > 0xF4) return 0; - return 1; -} - -inline static void unicode_escape(char *buf, UTF16 character) -{ - const char *digits = "0123456789abcdef"; - - buf[2] = digits[character >> 12]; - buf[3] = digits[(character >> 8) & 0xf]; - buf[4] = digits[(character >> 4) & 0xf]; - buf[5] = digits[character & 0xf]; -} - - -inline static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 character) -{ - unicode_escape(buf, character); - fbuffer_append(buffer, buf, 6); -} - -inline void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string) -{ - const UTF8 *source = (UTF8 *) RSTRING_PTR(string); - const UTF8 *sourceEnd = source + RSTRING_LEN(string); - char buf[6] = { '\\', 'u' }; - - while (source < sourceEnd) { - UTF32 ch = 0; - unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; - if (source + extraBytesToRead >= sourceEnd) { - rb_raise(rb_path2class("JSON::GeneratorError"), - "partial character in source, but hit end"); - } - if (!isLegalUTF8(source, extraBytesToRead+1)) { - rb_raise(rb_path2class("JSON::GeneratorError"), - "source sequence is illegal/malformed utf-8"); - } - /* - * The cases all fall through. See "Note A" below. - */ - switch (extraBytesToRead) { - case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ - case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ - case 3: ch += *source++; ch <<= 6; - case 2: ch += *source++; ch <<= 6; - case 1: ch += *source++; ch <<= 6; - case 0: ch += *source++; - } - ch -= offsetsFromUTF8[extraBytesToRead]; - - if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ - /* UTF-16 surrogate values are illegal in UTF-32 */ - if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { -#if UNI_STRICT_CONVERSION - source -= (extraBytesToRead+1); /* return to the illegal value itself */ - rb_raise(rb_path2class("JSON::GeneratorError"), - "source sequence is illegal/malformed utf-8"); -#else - unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR); -#endif - } else { - /* normal case */ - switch (ch) { - case '\n': - fbuffer_append(buffer, "\\n", 2); - break; - case '\r': - fbuffer_append(buffer, "\\r", 2); - break; - case '\\': - fbuffer_append(buffer, "\\\\", 2); - break; - case '"': - fbuffer_append(buffer, "\\\"", 2); - break; - case '\t': - fbuffer_append(buffer, "\\t", 2); - break; - case '\f': - fbuffer_append(buffer, "\\f", 2); - break; - case '\b': - fbuffer_append(buffer, "\\b", 2); - break; - default: - if (ch >= 0x20 && ch <= 0x7f) { - fbuffer_append_char(buffer, ch); - } else { - unicode_escape_to_buffer(buffer, buf, (UTF16) ch); - } - break; - } - } - } else if (ch > UNI_MAX_UTF16) { -#if UNI_STRICT_CONVERSION - source -= (extraBytesToRead+1); /* return to the start */ - rb_raise(rb_path2class("JSON::GeneratorError"), - "source sequence is illegal/malformed utf8"); -#else - unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR); -#endif - } else { - /* target is a character in range 0xFFFF - 0x10FFFF. */ - ch -= halfBase; - unicode_escape_to_buffer(buffer, buf, (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START)); - unicode_escape_to_buffer(buffer, buf, (UTF16)((ch & halfMask) + UNI_SUR_LOW_START)); - } - } -} - -inline void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string) -{ - const char *ptr = RSTRING_PTR(string), *p; - int len = RSTRING_LEN(string), start = 0, end = 0; - const char *escape = NULL; - int escape_len; - unsigned char c; - char buf[6] = { '\\', 'u' }; - - for (start = 0, end = 0; end < len;) { - p = ptr + end; - c = (unsigned char) *p; - if (c < 0x20) { - switch (c) { - case '\n': - escape = "\\n"; - escape_len = 2; - break; - case '\r': - escape = "\\r"; - escape_len = 2; - break; - case '\t': - escape = "\\t"; - escape_len = 2; - break; - case '\f': - escape = "\\f"; - escape_len = 2; - break; - case '\b': - escape = "\\b"; - escape_len = 2; - break; - default: - unicode_escape(buf, (UTF16) *p); - escape = buf; - escape_len = 6; - break; - } - } else { - switch (c) { - case '\\': - escape = "\\\\"; - escape_len = 2; - break; - case '"': - escape = "\\\""; - escape_len = 2; - break; - default: - end++; - continue; - break; - } - } - fbuffer_append(buffer, ptr + start, end - start); - fbuffer_append(buffer, escape, escape_len); - start = ++end; - escape = NULL; - } - fbuffer_append(buffer, ptr + start, end - start); -} diff --git a/ext/json/ext/generator/unicode.h b/ext/json/ext/generator/unicode.h deleted file mode 100644 index 3837b32..0000000 --- a/ext/json/ext/generator/unicode.h +++ /dev/null @@ -1,45 +0,0 @@ -#include "ruby.h" -#include "fbuffer.h" - -#ifndef _GENERATOR_UNICODE_H_ -#define _GENERATOR_UNICODE_H_ - -#define UNI_STRICT_CONVERSION 1 - -typedef unsigned long UTF32; /* at least 32 bits */ -typedef unsigned short UTF16; /* at least 16 bits */ -typedef unsigned char UTF8; /* typically 8 bits */ - -#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD -#define UNI_MAX_BMP (UTF32)0x0000FFFF -#define UNI_MAX_UTF16 (UTF32)0x0010FFFF -#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF -#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF - -#define UNI_SUR_HIGH_START (UTF32)0xD800 -#define UNI_SUR_HIGH_END (UTF32)0xDBFF -#define UNI_SUR_LOW_START (UTF32)0xDC00 -#define UNI_SUR_LOW_END (UTF32)0xDFFF - -static const int halfShift = 10; /* used for shifting by 10 bits */ - -static const UTF32 halfBase = 0x0010000UL; -static const UTF32 halfMask = 0x3FFUL; - -inline void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string); -inline void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string); - -#ifndef RARRAY_PTR -#define RARRAY_PTR(ARRAY) RARRAY(ARRAY)->ptr -#endif -#ifndef RARRAY_LEN -#define RARRAY_LEN(ARRAY) RARRAY(ARRAY)->len -#endif -#ifndef RSTRING_PTR -#define RSTRING_PTR(string) RSTRING(string)->ptr -#endif -#ifndef RSTRING_LEN -#define RSTRING_LEN(string) RSTRING(string)->len -#endif - -#endif |