diff options
| author | Bruce Momjian <bruce@momjian.us> | 2002-03-05 05:52:50 +0000 |
|---|---|---|
| committer | Bruce Momjian <bruce@momjian.us> | 2002-03-05 05:52:50 +0000 |
| commit | a8bd7e1c6e026678019b2f25cffc0a94ce62b24b (patch) | |
| tree | 0334b3c7648b888f1c416579e8ca29fcdadb4a6e /src/backend/utils/mb/wchar.c | |
| parent | 03194432de712f7afb4ddc2ade2bc44f0536dae1 (diff) | |
| download | postgresql-a8bd7e1c6e026678019b2f25cffc0a94ce62b24b.tar.gz | |
> Tatsuo Ishii wrote:
> > > > It was made to cope with encoding such as an Asian bloc in 7.2Beta2.
> > > >
> > > > Added ServerEncoding
> > > > Korean (JOHAB), Thai (WIN874),
> > > > Vietnamese (TCVN), Arabic (WIN1256)
> > > >
> > > > Added ClientEncoding
> > > > Simplified Chinese (GBK), Korean (UHC)
> > > >
> > > >
> > > >
> http://www.sankyo-unyu.co.jp/Pool/postgresql-7.2b2.newencoding.diff.tar.gz
> > > > (608K)
> > >
> > > Looks good. I need some people to review this for me.
> >
> > For me they look good too. The only missing part is a
> > documentation. I will ask him to write it up. If he couldn't, I will
> > do it for him.
> > > The diff is 3mb
> > > but appears to address only additions to multibyte. I have attached a
> > > list of files it modifies. Also, look at the sizes of the mb/
> > > directory. It is getting large:
> > >
> > > 4 ./CVS
> > > 6 ./Unicode/CVS
> > > 3433 ./Unicode
> > > 6197 .
> >
> > Yes. We definitely need the on-the-fly encoding addition capability:
> > i.e. CREATE CHRACTER SET in the future...
> > --
> > Tatsuo Ishii
> >
> >
Address chainge.
http://www.sankyo-unyu.co.jp/Pool/postgresql-7.2.newencoding.diff.gz
Add PsqlODBC and document ...etc patch.
Eiji Tokuya
Diffstat (limited to 'src/backend/utils/mb/wchar.c')
| -rw-r--r-- | src/backend/utils/mb/wchar.c | 105 |
1 files changed, 82 insertions, 23 deletions
diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c index 4f6a54c8ee..16aed72fa7 100644 --- a/src/backend/utils/mb/wchar.c +++ b/src/backend/utils/mb/wchar.c @@ -1,7 +1,7 @@ /* * conversion functions between pg_wchar and multi-byte streams. * Tatsuo Ishii - * $Id: wchar.c,v 1.26 2001/10/28 06:25:56 momjian Exp $ + * $Id: wchar.c,v 1.27 2002/03/05 05:52:44 momjian Exp $ * * WIN1250 client encoding updated by Pavel Behal * @@ -251,6 +251,21 @@ pg_euctw_mblen(const unsigned char *s) } /* + * JOHAB + */ +static int +pg_johab2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) +{ + return (pg_euc2wchar_with_len(from, to, len)); +} + +static int +pg_johab_mblen(const unsigned char *s) +{ + return (pg_euc_mblen(s)); +} + +/* * convert UTF-8 string to pg_wchar (UCS-2) * caller should allocate enough space for "to" * len: length of from. @@ -457,34 +472,78 @@ pg_big5_mblen(const unsigned char *s) return (len); } +/* + * GBK + */ +static int +pg_gbk_mblen(const unsigned char *s) +{ + int len; + + if (*s > 0x7f) + { /* kanji? */ + len = 2; + } + else + { /* should be ASCII */ + len = 1; + } + return (len); +} + +/* + * UHC + */ +static int +pg_uhc_mblen(const unsigned char *s) +{ + int len; + + if (*s > 0x7f) + { /* 2byte? */ + len = 2; + } + else + { /* should be ASCII */ + len = 1; + } + return (len); +} + pg_wchar_tbl pg_wchar_table[] = { {pg_ascii2wchar_with_len, pg_ascii_mblen, 1}, /* 0; PG_SQL_ASCII */ {pg_eucjp2wchar_with_len, pg_eucjp_mblen, 3}, /* 1; PG_EUC_JP */ {pg_euccn2wchar_with_len, pg_euccn_mblen, 3}, /* 2; PG_EUC_CN */ {pg_euckr2wchar_with_len, pg_euckr_mblen, 3}, /* 3; PG_EUC_KR */ {pg_euctw2wchar_with_len, pg_euctw_mblen, 3}, /* 4; PG_EUC_TW */ - {pg_utf2wchar_with_len, pg_utf_mblen, 3}, /* 5; PG_UNICODE */ - {pg_mule2wchar_with_len, pg_mule_mblen, 3}, /* 6; PG_MULE_INTERNAL */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 7; PG_LATIN1 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 8; PG_LATIN2 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 9; PG_LATIN3 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 10; PG_LATIN4 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 11; PG_LATIN5 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 12; PG_KOI8 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 13; PG_WIN1251 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 14; PG_ALT */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 15; ISO-8859-5 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 16; ISO-8859-6 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 17; ISO-8859-7 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 18; ISO-8859-8 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 19; ISO-8859-10 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 20; ISO-8859-13 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 21; ISO-8859-14 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 22; ISO-8859-15 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 23; ISO-8859-16 */ - {0, pg_sjis_mblen, 2}, /* 24; PG_SJIS */ - {0, pg_big5_mblen, 2}, /* 25; PG_BIG5 */ - {pg_latin12wchar_with_len, pg_latin1_mblen, 1} /* 26; PG_WIN1250 */ + {pg_johab2wchar_with_len, pg_johab_mblen, 3}, /* 5; PG_JOHAB */ + {pg_utf2wchar_with_len, pg_utf_mblen, 3}, /* 6; PG_UNICODE */ + {pg_mule2wchar_with_len, pg_mule_mblen, 3}, /* 7; PG_MULE_INTERNAL */ + {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 8; PG_LATIN1 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 9; PG_LATIN2 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 10; PG_LATIN3 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 11; PG_LATIN4 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 12; PG_LATIN5 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 13; PG_LATIN6 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 14; PG_LATIN7 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 15; PG_LATIN8 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 16; PG_LATIN9 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 17; PG_LATIN10 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 18; PG_WIN1256 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 19; PG_TCVN */ + {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 20; PG_WIN874 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 21; PG_KOI8 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 22; PG_WIN1251 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 23; PG_ALT */ + {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 24; ISO-8859-5 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 25; ISO-8859-6 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 26; ISO-8859-7 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 27; ISO-8859-8 */ + {0, pg_sjis_mblen, 2}, /* 28; PG_SJIS */ + {0, pg_big5_mblen, 2}, /* 29; PG_BIG5 */ + {0, pg_gbk_mblen, 2}, /* 30; PG_GBK */ + {0, pg_uhc_mblen, 2}, /* 31; PG_UHC */ + {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 32; PG_WIN1250 */ }; /* returns the byte length of a word for mule internal code */ |
