summaryrefslogtreecommitdiff
path: root/ext/intl/normalizer
diff options
context:
space:
mode:
authorLorry Tar Creator <lorry-tar-importer@baserock.org>2013-03-14 05:42:27 +0000
committer <>2013-04-03 16:25:08 +0000
commitc4dd7a1a684490673e25aaf4fabec5df138854c4 (patch)
tree4d57c44caae4480efff02b90b9be86f44bf25409 /ext/intl/normalizer
downloadphp2-master.tar.gz
Imported from /home/lorry/working-area/delta_php2/php-5.4.13.tar.bz2.HEADphp-5.4.13master
Diffstat (limited to 'ext/intl/normalizer')
-rw-r--r--ext/intl/normalizer/normalizer.c68
-rw-r--r--ext/intl/normalizer/normalizer.h37
-rw-r--r--ext/intl/normalizer/normalizer_class.c81
-rw-r--r--ext/intl/normalizer/normalizer_class.h43
-rw-r--r--ext/intl/normalizer/normalizer_normalize.c258
-rw-r--r--ext/intl/normalizer/normalizer_normalize.h25
6 files changed, 512 insertions, 0 deletions
diff --git a/ext/intl/normalizer/normalizer.c b/ext/intl/normalizer/normalizer.c
new file mode 100644
index 0000000..033b05c
--- /dev/null
+++ b/ext/intl/normalizer/normalizer.c
@@ -0,0 +1,68 @@
+/*
+ +----------------------------------------------------------------------+
+ | PHP Version 5 |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | http://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Ed Batutis <ed@batutis.com> |
+ +----------------------------------------------------------------------+
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "normalizer_class.h"
+#include "normalizer.h"
+
+#include <unicode/utypes.h>
+#include <unicode/unorm.h>
+#include <unicode/ustring.h>
+
+/* {{{ normalizer_register_constants
+ * Register constants common for the both (OO and procedural)
+ * APIs.
+ */
+void normalizer_register_constants( INIT_FUNC_ARGS )
+{
+ if( !Normalizer_ce_ptr )
+ {
+ zend_error( E_ERROR, "Normalizer class not defined" );
+ return;
+ }
+
+ #define NORMALIZER_EXPOSE_CONST(x) REGISTER_LONG_CONSTANT(#x, x, CONST_CS)
+ #define NORMALIZER_EXPOSE_CLASS_CONST(x) zend_declare_class_constant_long( Normalizer_ce_ptr, ZEND_STRS( #x ) - 1, NORMALIZER_##x TSRMLS_CC );
+ #define NORMALIZER_EXPOSE_CUSTOM_CLASS_CONST(name, value) zend_declare_class_constant_long( Normalizer_ce_ptr, ZEND_STRS( name ) - 1, value TSRMLS_CC );
+
+ /* Normalization form constants */
+ NORMALIZER_EXPOSE_CLASS_CONST( NONE );
+ NORMALIZER_EXPOSE_CLASS_CONST( FORM_D );
+ NORMALIZER_EXPOSE_CLASS_CONST( NFD );
+ NORMALIZER_EXPOSE_CLASS_CONST( FORM_KD );
+ NORMALIZER_EXPOSE_CLASS_CONST( NFKD );
+ NORMALIZER_EXPOSE_CLASS_CONST( FORM_C );
+ NORMALIZER_EXPOSE_CLASS_CONST( NFC );
+ NORMALIZER_EXPOSE_CLASS_CONST( FORM_KC );
+ NORMALIZER_EXPOSE_CLASS_CONST( NFKC );
+
+ #undef NORMALIZER_EXPOSE_CUSTOM_CLASS_CONST
+ #undef NORMALIZER_EXPOSE_CLASS_CONST
+ #undef NORMALIZER_EXPOSE_CONST
+}
+/* }}} */
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * c-basic-offset: 4
+ * End:
+ * vim600: noet sw=4 ts=4 fdm=marker
+ * vim<600: noet sw=4 ts=4
+ */
diff --git a/ext/intl/normalizer/normalizer.h b/ext/intl/normalizer/normalizer.h
new file mode 100644
index 0000000..eca9abe
--- /dev/null
+++ b/ext/intl/normalizer/normalizer.h
@@ -0,0 +1,37 @@
+/*
+ +----------------------------------------------------------------------+
+ | PHP Version 5 |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | http://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Ed Batutis <ed@batutis.com> |
+ +----------------------------------------------------------------------+
+ */
+
+#ifndef NORMALIZER_NORMALIZER_H
+#define NORMALIZER_NORMALIZER_H
+
+#include <php.h>
+#include <unicode/utypes.h>
+#include <unicode/unorm.h>
+
+#define NORMALIZER_NONE UNORM_NONE
+#define NORMALIZER_FORM_D UNORM_NFD
+#define NORMALIZER_NFD UNORM_NFD
+#define NORMALIZER_FORM_KD UNORM_NFKD
+#define NORMALIZER_NFKD UNORM_NFKD
+#define NORMALIZER_FORM_C UNORM_NFC
+#define NORMALIZER_NFC UNORM_NFC
+#define NORMALIZER_FORM_KC UNORM_NFKC
+#define NORMALIZER_NFKC UNORM_NFKC
+#define NORMALIZER_DEFAULT UNORM_DEFAULT
+
+void normalizer_register_constants( INIT_FUNC_ARGS );
+
+#endif // NORMALIZER_NORMALIZER_H
diff --git a/ext/intl/normalizer/normalizer_class.c b/ext/intl/normalizer/normalizer_class.c
new file mode 100644
index 0000000..154d877
--- /dev/null
+++ b/ext/intl/normalizer/normalizer_class.c
@@ -0,0 +1,81 @@
+/*
+ +----------------------------------------------------------------------+
+ | PHP Version 5 |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | http://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Ed Batutis <ed@batutis.com> |
+ +----------------------------------------------------------------------+
+ */
+
+#include "normalizer_class.h"
+#include "php_intl.h"
+#include "normalizer_normalize.h"
+#include "intl_error.h"
+
+#include <unicode/unorm.h>
+
+zend_class_entry *Normalizer_ce_ptr = NULL;
+
+/*
+ * 'Normalizer' class registration structures & functions
+ */
+
+/* {{{ Normalizer methods arguments info */
+
+ZEND_BEGIN_ARG_INFO_EX( normalizer_3_args, 0, 0, 3 )
+ ZEND_ARG_INFO( 0, arg1 )
+ ZEND_ARG_INFO( 0, arg2 )
+ ZEND_ARG_INFO( 0, arg3 )
+ZEND_END_ARG_INFO()
+
+/* }}} */
+
+/* {{{ Normalizer_class_functions
+ * Every 'Normalizer' class method has an entry in this table
+ */
+
+zend_function_entry Normalizer_class_functions[] = {
+ ZEND_FENTRY( normalize, ZEND_FN( normalizer_normalize ), normalizer_3_args, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC )
+ ZEND_FENTRY( isNormalized, ZEND_FN( normalizer_is_normalized ), normalizer_3_args, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC )
+ PHP_FE_END
+};
+/* }}} */
+
+/* {{{ normalizer_register_Normalizer_class
+ * Initialize 'Normalizer' class
+ */
+void normalizer_register_Normalizer_class( TSRMLS_D )
+{
+ zend_class_entry ce;
+
+ /* Create and register 'Normalizer' class. */
+ INIT_CLASS_ENTRY( ce, "Normalizer", Normalizer_class_functions );
+ ce.create_object = NULL;
+ Normalizer_ce_ptr = zend_register_internal_class( &ce TSRMLS_CC );
+
+ /* Declare 'Normalizer' class properties. */
+ if( !Normalizer_ce_ptr )
+ {
+ zend_error( E_ERROR,
+ "Normalizer: attempt to create properties "
+ "on a non-registered class." );
+ return;
+ }
+}
+/* }}} */
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * c-basic-offset: 4
+ * End:
+ * vim600: noet sw=4 ts=4 fdm=marker
+ * vim<600: noet sw=4 ts=4
+ */
diff --git a/ext/intl/normalizer/normalizer_class.h b/ext/intl/normalizer/normalizer_class.h
new file mode 100644
index 0000000..4d3f7d2
--- /dev/null
+++ b/ext/intl/normalizer/normalizer_class.h
@@ -0,0 +1,43 @@
+/*
+ +----------------------------------------------------------------------+
+ | PHP Version 5 |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | http://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Ed Batutis <ed@batutis.com> |
+ +----------------------------------------------------------------------+
+ */
+
+#ifndef NORMALIZER_CLASS_H
+#define NORMALIZER_CLASS_H
+
+#include <php.h>
+
+#include "intl_common.h"
+#include "intl_error.h"
+
+#include <unicode/unorm.h>
+
+typedef struct {
+ zend_object zo;
+
+ // error value not used currently
+ intl_error err;
+
+} Normalizer_object;
+
+#define NORMALIZER_ERROR(co) (co)->err
+#define NORMALIZER_ERROR_P(co) &(NORMALIZER_ERROR(co))
+
+#define NORMALIZER_ERROR_CODE(co) INTL_ERROR_CODE(NORMALIZER_ERROR(co))
+#define NORMALIZER_ERROR_CODE_P(co) &(INTL_ERROR_CODE(NORMALIZER_ERROR(co)))
+
+void normalizer_register_Normalizer_class( TSRMLS_D );
+extern zend_class_entry *Normalizer_ce_ptr;
+#endif // #ifndef NORMALIZER_CLASS_H
diff --git a/ext/intl/normalizer/normalizer_normalize.c b/ext/intl/normalizer/normalizer_normalize.c
new file mode 100644
index 0000000..f46285e
--- /dev/null
+++ b/ext/intl/normalizer/normalizer_normalize.c
@@ -0,0 +1,258 @@
+/*
+ +----------------------------------------------------------------------+
+ | PHP Version 5 |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | http://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Ed Batutis <ed@batutis.com> |
+ +----------------------------------------------------------------------+
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "php_intl.h"
+#include "unicode/unorm.h"
+#include "normalizer.h"
+#include "normalizer_class.h"
+#include "normalizer_normalize.h"
+#include "intl_convert.h"
+
+/* {{{ proto string Normalizer::normalize( string $input [, string $form = FORM_C] )
+ * Normalize a string. }}} */
+/* {{{ proto string normalizer_normalize( string $input [, string $form = FORM_C] )
+ * Normalize a string.
+ */
+PHP_FUNCTION( normalizer_normalize )
+{
+ char* input = NULL;
+ /* form is optional, defaults to FORM_C */
+ long form = NORMALIZER_DEFAULT;
+ int input_len = 0;
+
+ UChar* uinput = NULL;
+ int uinput_len = 0;
+ int expansion_factor = 1;
+ UErrorCode status = U_ZERO_ERROR;
+
+ UChar* uret_buf = NULL;
+ int uret_len = 0;
+
+ char* ret_buf = NULL;
+ int32_t ret_len = 0;
+
+ int32_t size_needed;
+
+ intl_error_reset( NULL TSRMLS_CC );
+
+ /* Parse parameters. */
+ if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "s|l",
+ &input, &input_len, &form ) == FAILURE )
+ {
+ intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
+ "normalizer_normalize: unable to parse input params", 0 TSRMLS_CC );
+
+ RETURN_FALSE;
+ }
+
+ expansion_factor = 1;
+
+ switch(form) {
+ case NORMALIZER_NONE:
+ break;
+ case NORMALIZER_FORM_D:
+ expansion_factor = 3;
+ break;
+ case NORMALIZER_FORM_KD:
+ expansion_factor = 3;
+ break;
+ case NORMALIZER_FORM_C:
+ case NORMALIZER_FORM_KC:
+ break;
+ default:
+ intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
+ "normalizer_normalize: illegal normalization form", 0 TSRMLS_CC );
+ RETURN_FALSE;
+ }
+
+ /*
+ * Normalize string (converting it to UTF-16 first).
+ */
+
+ /* First convert the string to UTF-16. */
+ intl_convert_utf8_to_utf16(&uinput, &uinput_len, input, input_len, &status );
+
+ if( U_FAILURE( status ) )
+ {
+ /* Set global error code. */
+ intl_error_set_code( NULL, status TSRMLS_CC );
+
+ /* Set error messages. */
+ intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
+ if (uinput) {
+ efree( uinput );
+ }
+ RETURN_FALSE;
+ }
+
+
+ /* Allocate memory for the destination buffer for normalization */
+ uret_len = uinput_len * expansion_factor;
+ uret_buf = eumalloc( uret_len + 1 );
+
+ /* normalize */
+ size_needed = unorm_normalize( uinput, uinput_len, form, (int32_t) 0 /* options */, uret_buf, uret_len, &status);
+
+ /* Bail out if an unexpected error occurred.
+ * (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough).
+ * (U_STRING_NOT_TERMINATED_WARNING usually means that the input string is empty).
+ */
+ if( U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR && status != U_STRING_NOT_TERMINATED_WARNING ) {
+ efree( uret_buf );
+ efree( uinput );
+ RETURN_NULL();
+ }
+
+ if ( size_needed > uret_len ) {
+ /* realloc does not seem to work properly - memory is corrupted
+ * uret_buf = eurealloc(uret_buf, size_needed + 1);
+ */
+ efree( uret_buf );
+ uret_buf = eumalloc( size_needed + 1 );
+ uret_len = size_needed;
+
+ status = U_ZERO_ERROR;
+
+ /* try normalize again */
+ size_needed = unorm_normalize( uinput, uinput_len, form, (int32_t) 0 /* options */, uret_buf, uret_len, &status);
+
+ /* Bail out if an unexpected error occurred. */
+ if( U_FAILURE(status) ) {
+ /* Set error messages. */
+ intl_error_set_custom_msg( NULL,"Error normalizing string", 0 TSRMLS_CC );
+ efree( uret_buf );
+ efree( uinput );
+ RETURN_FALSE;
+ }
+ }
+
+ efree( uinput );
+
+ /* the buffer we actually used */
+ uret_len = size_needed;
+
+ /* Convert normalized string from UTF-16 to UTF-8. */
+ intl_convert_utf16_to_utf8( &ret_buf, &ret_len, uret_buf, uret_len, &status );
+ efree( uret_buf );
+ if( U_FAILURE( status ) )
+ {
+ intl_error_set( NULL, status,
+ "normalizer_normalize: error converting normalized text UTF-8", 0 TSRMLS_CC );
+ RETURN_FALSE;
+ }
+
+ /* Return it. */
+ RETVAL_STRINGL( ret_buf, ret_len, FALSE );
+}
+/* }}} */
+
+/* {{{ proto bool Normalizer::isNormalized( string $input [, string $form = FORM_C] )
+ * Test if a string is in a given normalization form. }}} */
+/* {{{ proto bool normalizer_is_normalize( string $input [, string $form = FORM_C] )
+ * Test if a string is in a given normalization form.
+ */
+PHP_FUNCTION( normalizer_is_normalized )
+{
+ char* input = NULL;
+ /* form is optional, defaults to FORM_C */
+ long form = NORMALIZER_DEFAULT;
+ int input_len = 0;
+
+ UChar* uinput = NULL;
+ int uinput_len = 0;
+ UErrorCode status = U_ZERO_ERROR;
+
+ UBool uret = FALSE;
+
+ intl_error_reset( NULL TSRMLS_CC );
+
+ /* Parse parameters. */
+ if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "s|l",
+ &input, &input_len, &form) == FAILURE )
+ {
+ intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
+ "normalizer_is_normalized: unable to parse input params", 0 TSRMLS_CC );
+
+ RETURN_FALSE;
+ }
+
+ switch(form) {
+ /* case NORMALIZER_NONE: not allowed - doesn't make sense */
+
+ case NORMALIZER_FORM_D:
+ case NORMALIZER_FORM_KD:
+ case NORMALIZER_FORM_C:
+ case NORMALIZER_FORM_KC:
+ break;
+ default:
+ intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
+ "normalizer_normalize: illegal normalization form", 0 TSRMLS_CC );
+ RETURN_FALSE;
+ }
+
+
+ /*
+ * Test normalization of string (converting it to UTF-16 first).
+ */
+
+ /* First convert the string to UTF-16. */
+ intl_convert_utf8_to_utf16(&uinput, &uinput_len, input, input_len, &status );
+
+ if( U_FAILURE( status ) )
+ {
+ /* Set global error code. */
+ intl_error_set_code( NULL, status TSRMLS_CC );
+
+ /* Set error messages. */
+ intl_error_set_custom_msg( NULL, "Error converting string to UTF-16.", 0 TSRMLS_CC );
+ if (uinput) {
+ efree( uinput );
+ }
+ RETURN_FALSE;
+ }
+
+
+ /* test string */
+ uret = unorm_isNormalizedWithOptions( uinput, uinput_len, form, (int32_t) 0 /* options */, &status);
+
+ efree( uinput );
+
+ /* Bail out if an unexpected error occurred. */
+ if( U_FAILURE(status) ) {
+ /* Set error messages. */
+ intl_error_set_custom_msg( NULL,"Error testing if string is the given normalization form.", 0 TSRMLS_CC );
+ RETURN_FALSE;
+ }
+
+ if ( uret )
+ RETURN_TRUE;
+
+ RETURN_FALSE;
+}
+/* }}} */
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * c-basic-offset: 4
+ * End:
+ * vim600: noet sw=4 ts=4 fdm=marker
+ * vim<600: noet sw=4 ts=4
+ */
diff --git a/ext/intl/normalizer/normalizer_normalize.h b/ext/intl/normalizer/normalizer_normalize.h
new file mode 100644
index 0000000..41c31f7
--- /dev/null
+++ b/ext/intl/normalizer/normalizer_normalize.h
@@ -0,0 +1,25 @@
+/*
+ +----------------------------------------------------------------------+
+ | PHP Version 5 |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | http://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Ed Batutis <ed@batutis.com> |
+ +----------------------------------------------------------------------+
+ */
+
+#ifndef NORMALIZER_NORMALIZE_H
+#define NORMALIZER_NORMALIZE_H
+
+#include <php.h>
+
+PHP_FUNCTION( normalizer_normalize );
+PHP_FUNCTION( normalizer_is_normalized );
+
+#endif // NORMALIZER_NORMALIZE_H