1 files changed, 258 insertions, 0 deletions
diff --git a/ext/intl/normalizer/normalizer_normalize.c b/ext/intl/normalizer/normalizer_normalize.c
new file mode 100644
index 0000000..f46285e
--- /dev/null
+++ b/ext/intl/normalizer/normalizer_normalize.c
@@ -0,0 +1,258 @@
+/*
+   +----------------------------------------------------------------------+
+   | PHP Version 5														  |
+   +----------------------------------------------------------------------+
+   | This source file is subject to version 3.01 of the PHP license,	  |
+   | that is bundled with this package in the file LICENSE, and is		  |
+   | available through the world-wide-web at the following url:			  |
+   | http://www.php.net/license/3_01.txt								  |
+   | If you did not receive a copy of the PHP license and are unable to   |
+   | obtain it through the world-wide-web, please send a note to		  |
+   | license@php.net so we can mail you a copy immediately.				  |
+   +----------------------------------------------------------------------+
+   | Authors: Ed Batutis <ed@batutis.com>								  |
+   +----------------------------------------------------------------------+
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "php_intl.h"
+#include "unicode/unorm.h"
+#include "normalizer.h"
+#include "normalizer_class.h"
+#include "normalizer_normalize.h"
+#include "intl_convert.h"
+
+/* {{{ proto string Normalizer::normalize( string $input [, string $form = FORM_C] )
+ * Normalize a string. }}} */
+/* {{{ proto string normalizer_normalize( string $input [, string $form = FORM_C] )
+ * Normalize a string.
+ */
+PHP_FUNCTION( normalizer_normalize )
+{
+	char*			input = NULL;
+	/* form is optional, defaults to FORM_C */
+	long			form = NORMALIZER_DEFAULT;
+	int			input_len = 0;
+		
+	UChar*			uinput = NULL;
+	int			uinput_len = 0;
+	int			expansion_factor = 1;
+	UErrorCode		status = U_ZERO_ERROR;
+		
+	UChar*			uret_buf = NULL;
+	int			uret_len = 0;
+		
+	char*			ret_buf = NULL;
+	int32_t			ret_len = 0;
+
+	int32_t			size_needed;
+		
+	intl_error_reset( NULL TSRMLS_CC );
+
+	/* Parse parameters. */
+	if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "s|l",
+				&input, &input_len, &form ) == FAILURE )
+	{
+		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
+						 "normalizer_normalize: unable to parse input params", 0 TSRMLS_CC );
+
+		RETURN_FALSE;
+	}
+
+	expansion_factor = 1;
+
+	switch(form) {
+		case NORMALIZER_NONE:
+			break;
+		case NORMALIZER_FORM_D:
+			expansion_factor = 3;
+			break;
+		case NORMALIZER_FORM_KD:
+			expansion_factor = 3;
+			break;
+		case NORMALIZER_FORM_C:
+		case NORMALIZER_FORM_KC:
+			break;
+		default:
+			intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
+						"normalizer_normalize: illegal normalization form", 0 TSRMLS_CC );
+			RETURN_FALSE;
+	}
+
+	/*
+	 * Normalize string (converting it to UTF-16 first).
+	 */
+
+	/* First convert the string to UTF-16. */
+	intl_convert_utf8_to_utf16(&uinput, &uinput_len, input, input_len, &status );
+
+	if( U_FAILURE( status ) )
+	{
+		/* Set global error code. */
+		intl_error_set_code( NULL, status TSRMLS_CC );
+
+		/* Set error messages. */
+		intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
+		if (uinput) {
+			efree( uinput );
+		}
+		RETURN_FALSE;
+	}
+
+
+	/* Allocate memory for the destination buffer for normalization */
+	uret_len = uinput_len * expansion_factor;
+	uret_buf = eumalloc( uret_len + 1 );
+
+	/* normalize */
+	size_needed = unorm_normalize( uinput, uinput_len, form, (int32_t) 0 /* options */, uret_buf, uret_len, &status);
+	
+	/* Bail out if an unexpected error occurred.
+	 * (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough).
+	 * (U_STRING_NOT_TERMINATED_WARNING usually means that the input string is empty).
+	 */	
+	if( U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR && status != U_STRING_NOT_TERMINATED_WARNING ) {
+		efree( uret_buf );
+		efree( uinput );
+		RETURN_NULL();
+	}
+
+	if ( size_needed > uret_len ) {
+		/* realloc does not seem to work properly - memory is corrupted
+		 * uret_buf =  eurealloc(uret_buf, size_needed + 1);
+		 */
+		efree( uret_buf );
+		uret_buf = eumalloc( size_needed + 1 );
+		uret_len = size_needed;
+
+		status = U_ZERO_ERROR;
+
+		/* try normalize again */
+		size_needed = unorm_normalize( uinput, uinput_len, form, (int32_t) 0 /* options */, uret_buf, uret_len, &status);
+
+		/* Bail out if an unexpected error occurred. */
+		if( U_FAILURE(status)  ) {
+			/* Set error messages. */
+			intl_error_set_custom_msg( NULL,"Error normalizing string", 0 TSRMLS_CC );
+			efree( uret_buf );
+			efree( uinput );
+			RETURN_FALSE;
+		}
+	}
+
+	efree( uinput );
+
+	/* the buffer we actually used */
+	uret_len = size_needed;
+
+	/* Convert normalized string from UTF-16 to UTF-8. */
+	intl_convert_utf16_to_utf8( &ret_buf, &ret_len, uret_buf, uret_len, &status );
+	efree( uret_buf );
+	if( U_FAILURE( status ) )
+	{
+		intl_error_set( NULL, status,
+				"normalizer_normalize: error converting normalized text UTF-8", 0 TSRMLS_CC );
+		RETURN_FALSE;
+	}
+
+	/* Return it. */
+	RETVAL_STRINGL( ret_buf, ret_len, FALSE );
+}
+/* }}} */
+
+/* {{{ proto bool Normalizer::isNormalized( string $input [, string $form = FORM_C] )
+ * Test if a string is in a given normalization form. }}} */
+/* {{{ proto bool normalizer_is_normalize( string $input [, string $form = FORM_C] )
+ * Test if a string is in a given normalization form.
+ */
+PHP_FUNCTION( normalizer_is_normalized )
+{
+	char*	 	input = NULL;
+	/* form is optional, defaults to FORM_C */
+	long		form = NORMALIZER_DEFAULT;
+	int		input_len = 0;
+
+	UChar*	 	uinput = NULL;
+	int		uinput_len = 0;
+	UErrorCode	status = U_ZERO_ERROR;
+		
+	UBool		uret = FALSE;
+		
+	intl_error_reset( NULL TSRMLS_CC );
+
+	/* Parse parameters. */
+	if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "s|l",
+				&input, &input_len, &form) == FAILURE )
+	{
+		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
+				"normalizer_is_normalized: unable to parse input params", 0 TSRMLS_CC );
+
+		RETURN_FALSE;
+	}
+
+	switch(form) {
+		/* case NORMALIZER_NONE: not allowed - doesn't make sense */
+
+		case NORMALIZER_FORM_D:
+		case NORMALIZER_FORM_KD:
+		case NORMALIZER_FORM_C:
+		case NORMALIZER_FORM_KC:
+			break;
+		default:
+			intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
+						"normalizer_normalize: illegal normalization form", 0 TSRMLS_CC );
+			RETURN_FALSE;
+	}
+
+
+	/*
+	 * Test normalization of string (converting it to UTF-16 first).
+	 */
+
+	/* First convert the string to UTF-16. */
+	intl_convert_utf8_to_utf16(&uinput, &uinput_len, input, input_len, &status );
+
+	if( U_FAILURE( status ) )
+	{
+		/* Set global error code. */
+		intl_error_set_code( NULL, status TSRMLS_CC );
+
+		/* Set error messages. */
+		intl_error_set_custom_msg( NULL, "Error converting string to UTF-16.", 0 TSRMLS_CC );
+		if (uinput) {
+			efree( uinput );
+		}
+		RETURN_FALSE;
+	}
+
+
+	/* test string */
+	uret = unorm_isNormalizedWithOptions( uinput, uinput_len, form, (int32_t) 0 /* options */, &status);
+	
+	efree( uinput );
+
+	/* Bail out if an unexpected error occurred. */
+	if( U_FAILURE(status)  ) {
+		/* Set error messages. */
+		intl_error_set_custom_msg( NULL,"Error testing if string is the given normalization form.", 0 TSRMLS_CC );
+		RETURN_FALSE;
+	}
+
+	if ( uret )
+		RETURN_TRUE;
+				
+	RETURN_FALSE;
+}
+/* }}} */
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * c-basic-offset: 4
+ * End:
+ * vim600: noet sw=4 ts=4 fdm=marker
+ * vim<600: noet sw=4 ts=4
+ */