diff options
author | Mariusz PluciĆski <mplucinski@mplucinski.com> | 2014-06-24 19:21:43 +0200 |
---|---|---|
committer | Will Estes <westes575@gmail.com> | 2014-11-30 19:22:43 -0500 |
commit | 227e731b7686d79902c31756e11e7104070f4c2b (patch) | |
tree | c417e60a930cb21a3c7dbb2616b80e10ddc7fb89 /src | |
parent | 4d6089fc838072d6a057257254107fdf84ff690f (diff) | |
download | flex-git-unicode.tar.gz |
Make charset support working with C++ scanners toounicode
Diffstat (limited to 'src')
-rw-r--r-- | src/FlexLexer.h | 17 | ||||
-rw-r--r-- | src/flex.skl | 123 | ||||
-rw-r--r-- | src/main.c | 5 |
3 files changed, 118 insertions, 27 deletions
diff --git a/src/FlexLexer.h b/src/FlexLexer.h index ccda8ae..a9aa4f2 100644 --- a/src/FlexLexer.h +++ b/src/FlexLexer.h @@ -96,8 +96,16 @@ public: int debug() const { return yy_flex_debug; } void set_debug( int flag ) { yy_flex_debug = flag; } +#ifdef YY_CHARSET + void set_charset(char *charset); + char* get_charset(); +#endif + protected: YY_CHAR* yytext; +#ifdef YY_CHARSET + char *yycharset; +#endif int yyleng; int yylineno; // only maintained if you use %option yylineno int yy_flex_debug; // only has effect with -d or "%option debug" @@ -158,6 +166,15 @@ protected: yy_state_type yy_try_NUL_trans( yy_state_type current_state ); int yy_get_next_buffer(); +#ifdef YY_CHARSET + size_t yycharset_convert(char* source, size_t source_bytes, YY_CHAR* target, + size_t target_length, size_t* converted_bytes); + virtual size_t yycharset_handler(char *charset, + char *source, size_t source_bytes, + YY_CHAR *target, size_t target_length, + size_t *converted_bytes); +#endif + FLEX_STD istream* yyin; // input source for default LexerInput FLEX_STD ostream* yyout; // output sink for default LexerOutput diff --git a/src/flex.skl b/src/flex.skl index 5738453..fae28c1 100644 --- a/src/flex.skl +++ b/src/flex.skl @@ -925,8 +925,13 @@ m4_ifdef( [[<M4_YY_BISON_LLOC>]], m4_ifdef( [[M4_YY_CHARSET]], [[ char *yycharset_r; /** current charset name */ +]]) + +m4_ifdef( [[M4_YY_CXX]],,[[ +m4_ifdef( [[M4_YY_CHARSET]],[[ yycharset_handler_t yycharset_handler_r; /** charset handle function */ ]]) +]]) }; /* end struct yyguts_t */ ]]) @@ -1052,19 +1057,20 @@ m4_ifdef( [[M4_YY_REENTRANT]],[[ m4_ifdef( [[M4_YY_CHARSET]],[[ m4_ifdef( [[M4_YY_NO_GET_CHARSET]],,[[ char *yyget_charset M4_YY_PARAMS( M4_YY_PROTO_ONLY_ARG ); +%if-c-only yycharset_handler_t yyget_charset_handler M4_YY_PARAMS( M4_YY_PROTO_ONLY_ARG ); +%endif ]]) ]]) ]]) m4_ifdef( [[M4_YY_REENTRANT]],[[ -/* YY_REENTRANT */ m4_ifdef( [[M4_YY_CHARSET]], [[ -/* YY_CHARSET */ m4_ifdef( [[M4_YY_NO_SET_CHARSET]],,[[ -/* !YY_NO_SET_CHARSET */ void yyset_charset M4_YY_PARAMS( char *charset M4_YY_PROTO_LAST_ARG ); +%if-c-only void yyset_charset_handler M4_YY_PARAMS( yycharset_handler_t charset_handler M4_YY_PROTO_LAST_ARG ); +%endif ]]) ]]) ]]) @@ -1694,10 +1700,18 @@ m4_ifdef( [[M4_YY_CHARSET]],[[ /* yycharset_convert - convert incoming data from arbitrary * charset into internal representation */ +%if-c-only static size_t yycharset_convert YYFARGS5( char*, source, size_t, source_bytes, YY_CHAR*, target, size_t, target_length, size_t*, converted_bytes) { +%endif +%if-c++-only +size_t yyFlexLexer::yycharset_convert( + char* source, size_t source_bytes, + YY_CHAR* target, size_t target_length, + size_t* converted_bytes) { +%endif M4_YY_DECL_GUTS_VAR(); if(strcmp(yycharset, "M4_YY_CHARSET_SOURCE")==0) { if(target_length < source_bytes) @@ -1705,15 +1719,28 @@ static size_t yycharset_convert YYFARGS5( strncpy((char*)target, source, source_bytes); *converted_bytes = source_bytes; return source_bytes; - } else if(yycharset_handler) + } +%if-c-only + else if(yycharset_handler) +%endif return yycharset_handler(yycharset, source, source_bytes, target, target_length, converted_bytes M4_YY_CALL_LAST_ARG); - else { - char msg[256]; - snprintf(msg, sizeof(msg), - "Unsupported character encoding: %s", yycharset); - YY_FATAL_ERROR(msg); - } + +/* Code below just outputs an error message saying that selected encoding + * is not supported. In C scanner it is an end part of yycharset_convert, + * while in C++ scanner it is a default implementation of yycharset_handler */ +%if-c++-only +} + +size_t yyFlexLexer::yycharset_handler(char *charset, + char *source, size_t source_bytes, + YY_CHAR *target, size_t target_length, + size_t *converted_bytes) { +%endif + char msg[256]; + snprintf(msg, sizeof(msg), + "Unsupported character encoding: %s", yycharset); + YY_FATAL_ERROR(msg); return 0; } ]]) @@ -2727,33 +2754,55 @@ int yyget_column YYFARGS0(void) } ]]) ]]) +%endif -m4_ifdef( [[M4_YY_REENTRANT]],[[ -m4_ifdef( [[M4_YY_CHARSET]], [[ -m4_ifdef( [[M4_YY_NO_GET_CHARSET]],,[[ -/** Get the currently set charset name +m4_ifdef([[M4_YY_CHARSET]],[[ + m4_ifdef([[M4_YY_REENTRANT]],[[ + + m4_ifdef([[M4_YY_NO_GET_CHARSET]],,[[ + m4_define([[M4_YY_GET_CHARSET]],[[ + char* yyget_charset YYFARGS0(void) + ]]) + ]]) + + m4_ifdef([[M4_YY_NO_GET_CHARSET_HANDLER]],,[[ + m4_define([[M4_YY_GET_CHARSET_HANDLER]],[[ + yycharset_handler_t yyget_charset_handler YYFARGS0(void) + ]]) + ]]) + ]]) + + m4_ifdef([[M4_YY_CXX]], [[ + m4_define( [[M4_YY_GET_CHARSET]], [[ + char *FlexLexer::get_charset() + ]]) + ]]) +]]) + +m4_ifdef( [[M4_YY_GET_CHARSET]], [[ +/** Get the current charset name * M4_YY_DOC_PARAM + * @return charset name */ -char *yyget_charset YYFARGS0(void) +M4_YY_GET_CHARSET { M4_YY_DECL_GUTS_VAR(); return yycharset; } ]]) -m4_ifdef( [[M4_YY_NO_GET_CHARSET_HANDLER]],,[[ +m4_ifdef( [[M4_YY_GET_CHARSET_HANDLER]],[[ /** Get the currently set charset handler * M4_YY_DOC_PARAM */ -yycharset_handler_t yyget_charset_handler YYFARGS0(void) +M4_YY_GET_CHARSET_HANDLER { M4_YY_DECL_GUTS_VAR(); return yycharset_handler; } ]]) -]]) -]]) +%if-c-only m4_ifdef( [[M4_YY_NO_GET_IN]],, [[ /** Get the input stream. @@ -2859,22 +2908,44 @@ void yyset_column YYFARGS1( int , _column_no) } ]]) ]]) +%endif -m4_ifdef( [[M4_YY_REENTRANT]],[[ -m4_ifdef( [[M4_YY_CHARSET]], [[ -m4_ifdef( [[M4_YY_NO_SET_CHARSET]],,[[ +m4_ifdef([[M4_YY_CHARSET]], [[ + m4_ifdef([[M4_YY_REENTRANT]],[[ + + m4_ifdef([[M4_YY_NO_SET_CHARSET]],,[[ + m4_define([[M4_YY_SET_CHARSET]], [[ + void yyset_charset YYFARGS1(char*, charset) + ]]) + ]]) + + m4_ifdef([[M4_YY_NO_SET_CHARSET_HANDLER]],,[[ + m4_define([[M4_YY_SET_CHARSET_HANDLER]],[[ + void yyset_charset_handler YYFARGS1(yycharset_handler_t, charset_handler) + ]]) + ]]) + ]]) + + m4_ifdef( [[M4_YY_CXX]],[[ + m4_define( [[M4_YY_SET_CHARSET]], [[ + void FlexLexer::set_charset(char *charset) + ]]) + ]]) +]]) + +m4_ifdef( [[M4_YY_SET_CHARSET]],[[ /** Set the current charset name * @param charset charset name * M4_YY_DOC_PARAM */ -void yyset_charset YYFARGS1( char*, charset) +M4_YY_SET_CHARSET { M4_YY_DECL_GUTS_VAR(); yycharset = strdup(charset); } ]]) -m4_ifdef( [[M4_YY_NO_SET_CHARSET_HANDLER]],,[[ +m4_ifdef( [[M4_YY_SET_CHARSET_HANDLER]],[[ /** Set the current charset handler * @param charset_handler handler function * M4_YY_DOC_PARAM @@ -2885,10 +2956,8 @@ void yyset_charset_handler YYFARGS1( yycharset_handler_t, charset_handler) yycharset_handler = charset_handler; } ]]) -]]) -]]) - +%if-c-only m4_ifdef( [[M4_YY_NO_SET_IN]],, [[ /** Set the input stream. This does not discard the current @@ -304,6 +304,8 @@ void check_options () if (C_plus_plus && bison_bridge_lval) flexerror (_("bison bridge not supported for the C++ scanner.")); + if(C_plus_plus) + buf_m4_define( &m4defs_buf, "M4_YY_CXX", NULL); if (useecs) { /* Set up doubly-linked equivalence classes. */ @@ -1663,6 +1665,9 @@ void readin () } OUT_END_CODE (); + if(charset_enabled) + outn ("#define YY_CHARSET"); + if (C_plus_plus) { outn ("#define yytext_ptr yytext"); |