diff options
author | Lorry Tar Creator <lorry-tar-importer@baserock.org> | 2013-03-14 05:42:27 +0000 |
---|---|---|
committer | <> | 2013-04-03 16:25:08 +0000 |
commit | c4dd7a1a684490673e25aaf4fabec5df138854c4 (patch) | |
tree | 4d57c44caae4480efff02b90b9be86f44bf25409 /ext/intl/normalizer | |
download | php2-master.tar.gz |
Imported from /home/lorry/working-area/delta_php2/php-5.4.13.tar.bz2.HEADphp-5.4.13master
Diffstat (limited to 'ext/intl/normalizer')
-rw-r--r-- | ext/intl/normalizer/normalizer.c | 68 | ||||
-rw-r--r-- | ext/intl/normalizer/normalizer.h | 37 | ||||
-rw-r--r-- | ext/intl/normalizer/normalizer_class.c | 81 | ||||
-rw-r--r-- | ext/intl/normalizer/normalizer_class.h | 43 | ||||
-rw-r--r-- | ext/intl/normalizer/normalizer_normalize.c | 258 | ||||
-rw-r--r-- | ext/intl/normalizer/normalizer_normalize.h | 25 |
6 files changed, 512 insertions, 0 deletions
diff --git a/ext/intl/normalizer/normalizer.c b/ext/intl/normalizer/normalizer.c new file mode 100644 index 0000000..033b05c --- /dev/null +++ b/ext/intl/normalizer/normalizer.c @@ -0,0 +1,68 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Ed Batutis <ed@batutis.com> | + +----------------------------------------------------------------------+ + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "normalizer_class.h" +#include "normalizer.h" + +#include <unicode/utypes.h> +#include <unicode/unorm.h> +#include <unicode/ustring.h> + +/* {{{ normalizer_register_constants + * Register constants common for the both (OO and procedural) + * APIs. + */ +void normalizer_register_constants( INIT_FUNC_ARGS ) +{ + if( !Normalizer_ce_ptr ) + { + zend_error( E_ERROR, "Normalizer class not defined" ); + return; + } + + #define NORMALIZER_EXPOSE_CONST(x) REGISTER_LONG_CONSTANT(#x, x, CONST_CS) + #define NORMALIZER_EXPOSE_CLASS_CONST(x) zend_declare_class_constant_long( Normalizer_ce_ptr, ZEND_STRS( #x ) - 1, NORMALIZER_##x TSRMLS_CC ); + #define NORMALIZER_EXPOSE_CUSTOM_CLASS_CONST(name, value) zend_declare_class_constant_long( Normalizer_ce_ptr, ZEND_STRS( name ) - 1, value TSRMLS_CC ); + + /* Normalization form constants */ + NORMALIZER_EXPOSE_CLASS_CONST( NONE ); + NORMALIZER_EXPOSE_CLASS_CONST( FORM_D ); + NORMALIZER_EXPOSE_CLASS_CONST( NFD ); + NORMALIZER_EXPOSE_CLASS_CONST( FORM_KD ); + NORMALIZER_EXPOSE_CLASS_CONST( NFKD ); + NORMALIZER_EXPOSE_CLASS_CONST( FORM_C ); + NORMALIZER_EXPOSE_CLASS_CONST( NFC ); + NORMALIZER_EXPOSE_CLASS_CONST( FORM_KC ); + NORMALIZER_EXPOSE_CLASS_CONST( NFKC ); + + #undef NORMALIZER_EXPOSE_CUSTOM_CLASS_CONST + #undef NORMALIZER_EXPOSE_CLASS_CONST + #undef NORMALIZER_EXPOSE_CONST +} +/* }}} */ + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim600: noet sw=4 ts=4 fdm=marker + * vim<600: noet sw=4 ts=4 + */ diff --git a/ext/intl/normalizer/normalizer.h b/ext/intl/normalizer/normalizer.h new file mode 100644 index 0000000..eca9abe --- /dev/null +++ b/ext/intl/normalizer/normalizer.h @@ -0,0 +1,37 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Ed Batutis <ed@batutis.com> | + +----------------------------------------------------------------------+ + */ + +#ifndef NORMALIZER_NORMALIZER_H +#define NORMALIZER_NORMALIZER_H + +#include <php.h> +#include <unicode/utypes.h> +#include <unicode/unorm.h> + +#define NORMALIZER_NONE UNORM_NONE +#define NORMALIZER_FORM_D UNORM_NFD +#define NORMALIZER_NFD UNORM_NFD +#define NORMALIZER_FORM_KD UNORM_NFKD +#define NORMALIZER_NFKD UNORM_NFKD +#define NORMALIZER_FORM_C UNORM_NFC +#define NORMALIZER_NFC UNORM_NFC +#define NORMALIZER_FORM_KC UNORM_NFKC +#define NORMALIZER_NFKC UNORM_NFKC +#define NORMALIZER_DEFAULT UNORM_DEFAULT + +void normalizer_register_constants( INIT_FUNC_ARGS ); + +#endif // NORMALIZER_NORMALIZER_H diff --git a/ext/intl/normalizer/normalizer_class.c b/ext/intl/normalizer/normalizer_class.c new file mode 100644 index 0000000..154d877 --- /dev/null +++ b/ext/intl/normalizer/normalizer_class.c @@ -0,0 +1,81 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Ed Batutis <ed@batutis.com> | + +----------------------------------------------------------------------+ + */ + +#include "normalizer_class.h" +#include "php_intl.h" +#include "normalizer_normalize.h" +#include "intl_error.h" + +#include <unicode/unorm.h> + +zend_class_entry *Normalizer_ce_ptr = NULL; + +/* + * 'Normalizer' class registration structures & functions + */ + +/* {{{ Normalizer methods arguments info */ + +ZEND_BEGIN_ARG_INFO_EX( normalizer_3_args, 0, 0, 3 ) + ZEND_ARG_INFO( 0, arg1 ) + ZEND_ARG_INFO( 0, arg2 ) + ZEND_ARG_INFO( 0, arg3 ) +ZEND_END_ARG_INFO() + +/* }}} */ + +/* {{{ Normalizer_class_functions + * Every 'Normalizer' class method has an entry in this table + */ + +zend_function_entry Normalizer_class_functions[] = { + ZEND_FENTRY( normalize, ZEND_FN( normalizer_normalize ), normalizer_3_args, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC ) + ZEND_FENTRY( isNormalized, ZEND_FN( normalizer_is_normalized ), normalizer_3_args, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC ) + PHP_FE_END +}; +/* }}} */ + +/* {{{ normalizer_register_Normalizer_class + * Initialize 'Normalizer' class + */ +void normalizer_register_Normalizer_class( TSRMLS_D ) +{ + zend_class_entry ce; + + /* Create and register 'Normalizer' class. */ + INIT_CLASS_ENTRY( ce, "Normalizer", Normalizer_class_functions ); + ce.create_object = NULL; + Normalizer_ce_ptr = zend_register_internal_class( &ce TSRMLS_CC ); + + /* Declare 'Normalizer' class properties. */ + if( !Normalizer_ce_ptr ) + { + zend_error( E_ERROR, + "Normalizer: attempt to create properties " + "on a non-registered class." ); + return; + } +} +/* }}} */ + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim600: noet sw=4 ts=4 fdm=marker + * vim<600: noet sw=4 ts=4 + */ diff --git a/ext/intl/normalizer/normalizer_class.h b/ext/intl/normalizer/normalizer_class.h new file mode 100644 index 0000000..4d3f7d2 --- /dev/null +++ b/ext/intl/normalizer/normalizer_class.h @@ -0,0 +1,43 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Ed Batutis <ed@batutis.com> | + +----------------------------------------------------------------------+ + */ + +#ifndef NORMALIZER_CLASS_H +#define NORMALIZER_CLASS_H + +#include <php.h> + +#include "intl_common.h" +#include "intl_error.h" + +#include <unicode/unorm.h> + +typedef struct { + zend_object zo; + + // error value not used currently + intl_error err; + +} Normalizer_object; + +#define NORMALIZER_ERROR(co) (co)->err +#define NORMALIZER_ERROR_P(co) &(NORMALIZER_ERROR(co)) + +#define NORMALIZER_ERROR_CODE(co) INTL_ERROR_CODE(NORMALIZER_ERROR(co)) +#define NORMALIZER_ERROR_CODE_P(co) &(INTL_ERROR_CODE(NORMALIZER_ERROR(co))) + +void normalizer_register_Normalizer_class( TSRMLS_D ); +extern zend_class_entry *Normalizer_ce_ptr; +#endif // #ifndef NORMALIZER_CLASS_H diff --git a/ext/intl/normalizer/normalizer_normalize.c b/ext/intl/normalizer/normalizer_normalize.c new file mode 100644 index 0000000..f46285e --- /dev/null +++ b/ext/intl/normalizer/normalizer_normalize.c @@ -0,0 +1,258 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Ed Batutis <ed@batutis.com> | + +----------------------------------------------------------------------+ + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "php_intl.h" +#include "unicode/unorm.h" +#include "normalizer.h" +#include "normalizer_class.h" +#include "normalizer_normalize.h" +#include "intl_convert.h" + +/* {{{ proto string Normalizer::normalize( string $input [, string $form = FORM_C] ) + * Normalize a string. }}} */ +/* {{{ proto string normalizer_normalize( string $input [, string $form = FORM_C] ) + * Normalize a string. + */ +PHP_FUNCTION( normalizer_normalize ) +{ + char* input = NULL; + /* form is optional, defaults to FORM_C */ + long form = NORMALIZER_DEFAULT; + int input_len = 0; + + UChar* uinput = NULL; + int uinput_len = 0; + int expansion_factor = 1; + UErrorCode status = U_ZERO_ERROR; + + UChar* uret_buf = NULL; + int uret_len = 0; + + char* ret_buf = NULL; + int32_t ret_len = 0; + + int32_t size_needed; + + intl_error_reset( NULL TSRMLS_CC ); + + /* Parse parameters. */ + if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "s|l", + &input, &input_len, &form ) == FAILURE ) + { + intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, + "normalizer_normalize: unable to parse input params", 0 TSRMLS_CC ); + + RETURN_FALSE; + } + + expansion_factor = 1; + + switch(form) { + case NORMALIZER_NONE: + break; + case NORMALIZER_FORM_D: + expansion_factor = 3; + break; + case NORMALIZER_FORM_KD: + expansion_factor = 3; + break; + case NORMALIZER_FORM_C: + case NORMALIZER_FORM_KC: + break; + default: + intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, + "normalizer_normalize: illegal normalization form", 0 TSRMLS_CC ); + RETURN_FALSE; + } + + /* + * Normalize string (converting it to UTF-16 first). + */ + + /* First convert the string to UTF-16. */ + intl_convert_utf8_to_utf16(&uinput, &uinput_len, input, input_len, &status ); + + if( U_FAILURE( status ) ) + { + /* Set global error code. */ + intl_error_set_code( NULL, status TSRMLS_CC ); + + /* Set error messages. */ + intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC ); + if (uinput) { + efree( uinput ); + } + RETURN_FALSE; + } + + + /* Allocate memory for the destination buffer for normalization */ + uret_len = uinput_len * expansion_factor; + uret_buf = eumalloc( uret_len + 1 ); + + /* normalize */ + size_needed = unorm_normalize( uinput, uinput_len, form, (int32_t) 0 /* options */, uret_buf, uret_len, &status); + + /* Bail out if an unexpected error occurred. + * (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough). + * (U_STRING_NOT_TERMINATED_WARNING usually means that the input string is empty). + */ + if( U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR && status != U_STRING_NOT_TERMINATED_WARNING ) { + efree( uret_buf ); + efree( uinput ); + RETURN_NULL(); + } + + if ( size_needed > uret_len ) { + /* realloc does not seem to work properly - memory is corrupted + * uret_buf = eurealloc(uret_buf, size_needed + 1); + */ + efree( uret_buf ); + uret_buf = eumalloc( size_needed + 1 ); + uret_len = size_needed; + + status = U_ZERO_ERROR; + + /* try normalize again */ + size_needed = unorm_normalize( uinput, uinput_len, form, (int32_t) 0 /* options */, uret_buf, uret_len, &status); + + /* Bail out if an unexpected error occurred. */ + if( U_FAILURE(status) ) { + /* Set error messages. */ + intl_error_set_custom_msg( NULL,"Error normalizing string", 0 TSRMLS_CC ); + efree( uret_buf ); + efree( uinput ); + RETURN_FALSE; + } + } + + efree( uinput ); + + /* the buffer we actually used */ + uret_len = size_needed; + + /* Convert normalized string from UTF-16 to UTF-8. */ + intl_convert_utf16_to_utf8( &ret_buf, &ret_len, uret_buf, uret_len, &status ); + efree( uret_buf ); + if( U_FAILURE( status ) ) + { + intl_error_set( NULL, status, + "normalizer_normalize: error converting normalized text UTF-8", 0 TSRMLS_CC ); + RETURN_FALSE; + } + + /* Return it. */ + RETVAL_STRINGL( ret_buf, ret_len, FALSE ); +} +/* }}} */ + +/* {{{ proto bool Normalizer::isNormalized( string $input [, string $form = FORM_C] ) + * Test if a string is in a given normalization form. }}} */ +/* {{{ proto bool normalizer_is_normalize( string $input [, string $form = FORM_C] ) + * Test if a string is in a given normalization form. + */ +PHP_FUNCTION( normalizer_is_normalized ) +{ + char* input = NULL; + /* form is optional, defaults to FORM_C */ + long form = NORMALIZER_DEFAULT; + int input_len = 0; + + UChar* uinput = NULL; + int uinput_len = 0; + UErrorCode status = U_ZERO_ERROR; + + UBool uret = FALSE; + + intl_error_reset( NULL TSRMLS_CC ); + + /* Parse parameters. */ + if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "s|l", + &input, &input_len, &form) == FAILURE ) + { + intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, + "normalizer_is_normalized: unable to parse input params", 0 TSRMLS_CC ); + + RETURN_FALSE; + } + + switch(form) { + /* case NORMALIZER_NONE: not allowed - doesn't make sense */ + + case NORMALIZER_FORM_D: + case NORMALIZER_FORM_KD: + case NORMALIZER_FORM_C: + case NORMALIZER_FORM_KC: + break; + default: + intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, + "normalizer_normalize: illegal normalization form", 0 TSRMLS_CC ); + RETURN_FALSE; + } + + + /* + * Test normalization of string (converting it to UTF-16 first). + */ + + /* First convert the string to UTF-16. */ + intl_convert_utf8_to_utf16(&uinput, &uinput_len, input, input_len, &status ); + + if( U_FAILURE( status ) ) + { + /* Set global error code. */ + intl_error_set_code( NULL, status TSRMLS_CC ); + + /* Set error messages. */ + intl_error_set_custom_msg( NULL, "Error converting string to UTF-16.", 0 TSRMLS_CC ); + if (uinput) { + efree( uinput ); + } + RETURN_FALSE; + } + + + /* test string */ + uret = unorm_isNormalizedWithOptions( uinput, uinput_len, form, (int32_t) 0 /* options */, &status); + + efree( uinput ); + + /* Bail out if an unexpected error occurred. */ + if( U_FAILURE(status) ) { + /* Set error messages. */ + intl_error_set_custom_msg( NULL,"Error testing if string is the given normalization form.", 0 TSRMLS_CC ); + RETURN_FALSE; + } + + if ( uret ) + RETURN_TRUE; + + RETURN_FALSE; +} +/* }}} */ + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim600: noet sw=4 ts=4 fdm=marker + * vim<600: noet sw=4 ts=4 + */ diff --git a/ext/intl/normalizer/normalizer_normalize.h b/ext/intl/normalizer/normalizer_normalize.h new file mode 100644 index 0000000..41c31f7 --- /dev/null +++ b/ext/intl/normalizer/normalizer_normalize.h @@ -0,0 +1,25 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Ed Batutis <ed@batutis.com> | + +----------------------------------------------------------------------+ + */ + +#ifndef NORMALIZER_NORMALIZE_H +#define NORMALIZER_NORMALIZE_H + +#include <php.h> + +PHP_FUNCTION( normalizer_normalize ); +PHP_FUNCTION( normalizer_is_normalized ); + +#endif // NORMALIZER_NORMALIZE_H |