diff options
author | Lorry Tar Creator <lorry-tar-importer@baserock.org> | 2013-03-14 05:42:27 +0000 |
---|---|---|
committer | <> | 2013-04-03 16:25:08 +0000 |
commit | c4dd7a1a684490673e25aaf4fabec5df138854c4 (patch) | |
tree | 4d57c44caae4480efff02b90b9be86f44bf25409 /ext/intl/collator/collator_sort.c | |
download | php2-master.tar.gz |
Imported from /home/lorry/working-area/delta_php2/php-5.4.13.tar.bz2.HEADphp-5.4.13master
Diffstat (limited to 'ext/intl/collator/collator_sort.c')
-rw-r--r-- | ext/intl/collator/collator_sort.c | 621 |
1 files changed, 621 insertions, 0 deletions
diff --git a/ext/intl/collator/collator_sort.c b/ext/intl/collator/collator_sort.c new file mode 100644 index 0000000..0785111 --- /dev/null +++ b/ext/intl/collator/collator_sort.c @@ -0,0 +1,621 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Vadim Savchuk <vsavchuk@productengine.com> | + | Dmitry Lakhtyuk <dlakhtyuk@productengine.com> | + +----------------------------------------------------------------------+ + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "php_intl.h" +#include "collator.h" +#include "collator_class.h" +#include "collator_sort.h" +#include "collator_convert.h" +#include "intl_convert.h" + +#if !defined(HAVE_PTRDIFF_T) && !defined(_PTRDIFF_T_DEFINED) +typedef long ptrdiff_t; +#endif + +/** + * Declare 'index' which will point to sort key in sort key + * buffer. + */ +typedef struct _collator_sort_key_index { + char* key; /* pointer to sort key */ + zval** zstr; /* pointer to original string(hash-item) */ +} collator_sort_key_index_t; + +ZEND_EXTERN_MODULE_GLOBALS( intl ) + +static const size_t DEF_SORT_KEYS_BUF_SIZE = 1048576; +static const size_t DEF_SORT_KEYS_BUF_INCREMENT = 1048576; + +static const size_t DEF_SORT_KEYS_INDX_BUF_SIZE = 1048576; +static const size_t DEF_SORT_KEYS_INDX_BUF_INCREMENT = 1048576; + +static const size_t DEF_UTF16_BUF_SIZE = 1024; + +/* {{{ collator_regular_compare_function */ +static int collator_regular_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC) +{ + Collator_object* co = NULL; + + int rc = SUCCESS; + + zval* str1 = collator_convert_object_to_string( op1 TSRMLS_CC ); + zval* str2 = collator_convert_object_to_string( op2 TSRMLS_CC ); + + zval* num1 = NULL; + zval* num2 = NULL; + zval* norm1 = NULL; + zval* norm2 = NULL; + + /* If both args are strings AND either of args is not numeric string + * then use ICU-compare. Otherwise PHP-compare. */ + if( Z_TYPE_P(str1) == IS_STRING && Z_TYPE_P(str2) == IS_STRING && + ( str1 == ( num1 = collator_convert_string_to_number_if_possible( str1 ) ) || + str2 == ( num2 = collator_convert_string_to_number_if_possible( str2 ) ) ) ) + { + /* Fetch collator object. */ + co = (Collator_object *) zend_object_store_get_object( INTL_G(current_collator) TSRMLS_CC ); + + if (!co || !co->ucoll) { + intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) TSRMLS_CC ); + intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ), + "Object not initialized", 0 TSRMLS_CC ); + php_error_docref(NULL TSRMLS_CC, E_RECOVERABLE_ERROR, "Object not initialized"); + } + + /* Compare the strings using ICU. */ + result->value.lval = ucol_strcoll( + co->ucoll, + INTL_Z_STRVAL_P(str1), INTL_Z_STRLEN_P(str1), + INTL_Z_STRVAL_P(str2), INTL_Z_STRLEN_P(str2) ); + result->type = IS_LONG; + } + else + { + /* num1 is set if str1 and str2 are strings. */ + if( num1 ) + { + if( num1 == str1 ) + { + /* str1 is string but not numeric string + * just convert it to utf8. + */ + norm1 = collator_convert_zstr_utf16_to_utf8( str1 ); + + /* num2 is not set but str2 is string => do normalization. */ + norm2 = collator_normalize_sort_argument( str2 ); + } + else + { + /* str1 is numeric strings => passthru to PHP-compare. */ + zval_add_ref( &num1 ); + norm1 = num1; + + /* str2 is numeric strings => passthru to PHP-compare. */ + zval_add_ref( &num2 ); + norm2 = num2; + } + } + else + { + /* num1 is not set if str1 or str2 is not a string => do normalization. */ + norm1 = collator_normalize_sort_argument( str1 ); + + /* if num1 is not set then num2 is not set as well => do normalization. */ + norm2 = collator_normalize_sort_argument( str2 ); + } + + rc = compare_function( result, norm1, norm2 TSRMLS_CC ); + + zval_ptr_dtor( &norm1 ); + zval_ptr_dtor( &norm2 ); + } + + if( num1 ) + zval_ptr_dtor( &num1 ); + + if( num2 ) + zval_ptr_dtor( &num2 ); + + zval_ptr_dtor( &str1 ); + zval_ptr_dtor( &str2 ); + + return rc; +} +/* }}} */ + +/* {{{ collator_numeric_compare_function + * Convert input args to double and compare it. + */ +static int collator_numeric_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC) +{ + int rc = SUCCESS; + zval* num1 = NULL; + zval* num2 = NULL; + + if( Z_TYPE_P(op1) == IS_STRING ) + { + num1 = collator_convert_string_to_double( op1 ); + op1 = num1; + } + + if( Z_TYPE_P(op2) == IS_STRING ) + { + num2 = collator_convert_string_to_double( op2 ); + op2 = num2; + } + + rc = numeric_compare_function( result, op1, op2 TSRMLS_CC); + + if( num1 ) + zval_ptr_dtor( &num1 ); + if( num2 ) + zval_ptr_dtor( &num2 ); + + return rc; +} +/* }}} */ + +/* {{{ collator_icu_compare_function + * Direct use of ucol_strcoll. +*/ +static int collator_icu_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC) +{ + int rc = SUCCESS; + Collator_object* co = NULL; + zval* str1 = NULL; + zval* str2 = NULL; + + str1 = collator_make_printable_zval( op1 ); + str2 = collator_make_printable_zval( op2 ); + + /* Fetch collator object. */ + co = (Collator_object *) zend_object_store_get_object( INTL_G(current_collator) TSRMLS_CC ); + + /* Compare the strings using ICU. */ + result->value.lval = ucol_strcoll( + co->ucoll, + INTL_Z_STRVAL_P(str1), INTL_Z_STRLEN_P(str1), + INTL_Z_STRVAL_P(str2), INTL_Z_STRLEN_P(str2) ); + result->type = IS_LONG; + + zval_ptr_dtor( &str1 ); + zval_ptr_dtor( &str2 ); + + return rc; +} +/* }}} */ + +/* {{{ collator_compare_func + * Taken from PHP5 source (array_data_compare). + */ +static int collator_compare_func( const void* a, const void* b TSRMLS_DC ) +{ + Bucket *f; + Bucket *s; + zval result; + zval *first; + zval *second; + + f = *((Bucket **) a); + s = *((Bucket **) b); + + first = *((zval **) f->pData); + second = *((zval **) s->pData); + + if( INTL_G(compare_func)( &result, first, second TSRMLS_CC) == FAILURE ) + return 0; + + if( Z_TYPE(result) == IS_DOUBLE ) + { + if( Z_DVAL(result) < 0 ) + return -1; + else if( Z_DVAL(result) > 0 ) + return 1; + else + return 0; + } + + convert_to_long(&result); + + if( Z_LVAL(result) < 0 ) + return -1; + else if( Z_LVAL(result) > 0 ) + return 1; + + return 0; +} +/* }}} */ + +/* {{{ collator_cmp_sort_keys + * Compare sort keys + */ +static int collator_cmp_sort_keys( const void *p1, const void *p2 TSRMLS_DC ) +{ + char* key1 = ((collator_sort_key_index_t*)p1)->key; + char* key2 = ((collator_sort_key_index_t*)p2)->key; + + return strcmp( key1, key2 ); +} +/* }}} */ + +/* {{{ collator_get_compare_function + * Choose compare function according to sort flags. + */ +static collator_compare_func_t collator_get_compare_function( const long sort_flags ) +{ + collator_compare_func_t func; + + switch( sort_flags ) + { + case COLLATOR_SORT_NUMERIC: + func = collator_numeric_compare_function; + break; + + case COLLATOR_SORT_STRING: + func = collator_icu_compare_function; + break; + + case COLLATOR_SORT_REGULAR: + default: + func = collator_regular_compare_function; + break; + } + + return func; +} +/* }}} */ + +/* {{{ collator_sort_internal + * Common code shared by collator_sort() and collator_asort() API functions. + */ +static void collator_sort_internal( int renumber, INTERNAL_FUNCTION_PARAMETERS ) +{ + zval* array = NULL; + HashTable* hash = NULL; + zval* saved_collator = NULL; + long sort_flags = COLLATOR_SORT_REGULAR; + + COLLATOR_METHOD_INIT_VARS + + /* Parse parameters. */ + if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "Oa|l", + &object, Collator_ce_ptr, &array, &sort_flags ) == FAILURE ) + { + intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, + "collator_sort_internal: unable to parse input params", 0 TSRMLS_CC ); + + RETURN_FALSE; + } + + /* Fetch the object. */ + COLLATOR_METHOD_FETCH_OBJECT; + + /* Set 'compare function' according to sort flags. */ + INTL_G(compare_func) = collator_get_compare_function( sort_flags ); + + hash = HASH_OF( array ); + + /* Convert strings in the specified array from UTF-8 to UTF-16. */ + collator_convert_hash_from_utf8_to_utf16( hash, COLLATOR_ERROR_CODE_P( co ) ); + COLLATOR_CHECK_STATUS( co, "Error converting hash from UTF-8 to UTF-16" ); + + /* Save specified collator in the request-global (?) variable. */ + saved_collator = INTL_G( current_collator ); + INTL_G( current_collator ) = object; + + /* Sort specified array. */ + zend_hash_sort( hash, zend_qsort, collator_compare_func, renumber TSRMLS_CC ); + + /* Restore saved collator. */ + INTL_G( current_collator ) = saved_collator; + + /* Convert strings in the specified array back to UTF-8. */ + collator_convert_hash_from_utf16_to_utf8( hash, COLLATOR_ERROR_CODE_P( co ) ); + COLLATOR_CHECK_STATUS( co, "Error converting hash from UTF-16 to UTF-8" ); + + RETURN_TRUE; +} +/* }}} */ + +/* {{{ proto bool Collator::sort( Collator $coll, array(string) $arr [, int $sort_flags] ) + * Sort array using specified collator. }}} */ +/* {{{ proto bool collator_sort( Collator $coll, array(string) $arr [, int $sort_flags] ) + * Sort array using specified collator. + */ +PHP_FUNCTION( collator_sort ) +{ + collator_sort_internal( TRUE, INTERNAL_FUNCTION_PARAM_PASSTHRU ); +} +/* }}} */ + +/* {{{ proto bool Collator::sortWithSortKeys( Collator $coll, array(string) $arr ) + * Equivalent to standard PHP sort using Collator. + * Uses ICU ucol_getSortKey for performance. }}} */ +/* {{{ proto bool collator_sort_with_sort_keys( Collator $coll, array(string) $arr ) + * Equivalent to standard PHP sort using Collator. + * Uses ICU ucol_getSortKey for performance. + */ +PHP_FUNCTION( collator_sort_with_sort_keys ) +{ + zval* array = NULL; + HashTable* hash = NULL; + zval** hashData = NULL; /* currently processed item of input hash */ + + char* sortKeyBuf = NULL; /* buffer to store sort keys */ + uint32_t sortKeyBufSize = DEF_SORT_KEYS_BUF_SIZE; /* buffer size */ + ptrdiff_t sortKeyBufOffset = 0; /* pos in buffer to store sort key */ + int32_t sortKeyLen = 0; /* the length of currently processing key */ + uint32_t bufLeft = 0; + uint32_t bufIncrement = 0; + + collator_sort_key_index_t* sortKeyIndxBuf = NULL; /* buffer to store 'indexes' which will be passed to 'qsort' */ + uint32_t sortKeyIndxBufSize = DEF_SORT_KEYS_INDX_BUF_SIZE; + uint32_t sortKeyIndxSize = sizeof( collator_sort_key_index_t ); + + uint32_t sortKeyCount = 0; + uint32_t j = 0; + + UChar* utf16_buf = NULL; /* tmp buffer to hold current processing string in utf-16 */ + int utf16_buf_size = DEF_UTF16_BUF_SIZE; /* the length of utf16_buf */ + int utf16_len = 0; /* length of converted string */ + + HashTable* sortedHash = NULL; + + COLLATOR_METHOD_INIT_VARS + + /* Parse parameters. */ + if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "Oa", + &object, Collator_ce_ptr, &array ) == FAILURE ) + { + intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, + "collator_sort_with_sort_keys: unable to parse input params", 0 TSRMLS_CC ); + + RETURN_FALSE; + } + + /* Fetch the object. */ + COLLATOR_METHOD_FETCH_OBJECT; + + if (!co || !co->ucoll) { + intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) TSRMLS_CC ); + intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ), + "Object not initialized", 0 TSRMLS_CC ); + php_error_docref(NULL TSRMLS_CC, E_RECOVERABLE_ERROR, "Object not initialized"); + + RETURN_FALSE; + } + + /* + * Sort specified array. + */ + hash = HASH_OF( array ); + + if( !hash || zend_hash_num_elements( hash ) == 0 ) + RETURN_TRUE; + + /* Create bufers */ + sortKeyBuf = ecalloc( sortKeyBufSize, sizeof( char ) ); + sortKeyIndxBuf = ecalloc( sortKeyIndxBufSize, sizeof( uint8_t ) ); + utf16_buf = eumalloc( utf16_buf_size ); + + /* Iterate through input hash and create a sort key for each value. */ + zend_hash_internal_pointer_reset( hash ); + while( zend_hash_get_current_data( hash, (void**) &hashData ) == SUCCESS ) + { + /* Convert current hash item from UTF-8 to UTF-16LE and save the result to utf16_buf. */ + + utf16_len = utf16_buf_size; + + /* Process string values only. */ + if( Z_TYPE_PP( hashData ) == IS_STRING ) + { + intl_convert_utf8_to_utf16( &utf16_buf, &utf16_len, Z_STRVAL_PP( hashData ), Z_STRLEN_PP( hashData ), COLLATOR_ERROR_CODE_P( co ) ); + + if( U_FAILURE( COLLATOR_ERROR_CODE( co ) ) ) + { + intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) TSRMLS_CC ); + intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ), "Sort with sort keys failed", 0 TSRMLS_CC ); + + if( utf16_buf ) + efree( utf16_buf ); + + efree( sortKeyIndxBuf ); + efree( sortKeyBuf ); + + RETURN_FALSE; + } + } + else + { + /* Set empty string */ + utf16_len = 0; + utf16_buf[utf16_len] = 0; + } + + if( (utf16_len + 1) > utf16_buf_size ) + utf16_buf_size = utf16_len + 1; + + /* Get sort key, reallocating the buffer if needed. */ + bufLeft = sortKeyBufSize - sortKeyBufOffset; + + sortKeyLen = ucol_getSortKey( co->ucoll, + utf16_buf, + utf16_len, + (uint8_t*)sortKeyBuf + sortKeyBufOffset, + bufLeft ); + + /* check for sortKeyBuf overflow, increasing its size of the buffer if needed */ + if( sortKeyLen > bufLeft ) + { + bufIncrement = ( sortKeyLen > DEF_SORT_KEYS_BUF_INCREMENT ) ? sortKeyLen : DEF_SORT_KEYS_BUF_INCREMENT; + + sortKeyBufSize += bufIncrement; + bufLeft += bufIncrement; + + sortKeyBuf = erealloc( sortKeyBuf, sortKeyBufSize ); + + sortKeyLen = ucol_getSortKey( co->ucoll, utf16_buf, utf16_len, (uint8_t*)sortKeyBuf + sortKeyBufOffset, bufLeft ); + } + + /* check sortKeyIndxBuf overflow, increasing its size of the buffer if needed */ + if( ( sortKeyCount + 1 ) * sortKeyIndxSize > sortKeyIndxBufSize ) + { + bufIncrement = ( sortKeyIndxSize > DEF_SORT_KEYS_INDX_BUF_INCREMENT ) ? sortKeyIndxSize : DEF_SORT_KEYS_INDX_BUF_INCREMENT; + + sortKeyIndxBufSize += bufIncrement; + + sortKeyIndxBuf = erealloc( sortKeyIndxBuf, sortKeyIndxBufSize ); + } + + sortKeyIndxBuf[sortKeyCount].key = (char*)sortKeyBufOffset; /* remeber just offset, cause address */ + /* of 'sortKeyBuf' may be changed due to realloc. */ + sortKeyIndxBuf[sortKeyCount].zstr = hashData; + + sortKeyBufOffset += sortKeyLen; + ++sortKeyCount; + + zend_hash_move_forward( hash ); + } + + /* update ptrs to point to valid keys. */ + for( j = 0; j < sortKeyCount; j++ ) + sortKeyIndxBuf[j].key = sortKeyBuf + (ptrdiff_t)sortKeyIndxBuf[j].key; + + /* sort it */ + zend_qsort( sortKeyIndxBuf, sortKeyCount, sortKeyIndxSize, collator_cmp_sort_keys TSRMLS_CC ); + + /* for resulting hash we'll assign new hash keys rather then reordering */ + ALLOC_HASHTABLE( sortedHash ); + zend_hash_init( sortedHash, 0, NULL, ZVAL_PTR_DTOR, 0 ); + + for( j = 0; j < sortKeyCount; j++ ) + { + zval_add_ref( sortKeyIndxBuf[j].zstr ); + zend_hash_next_index_insert( sortedHash, sortKeyIndxBuf[j].zstr, sizeof(zval **), NULL ); + } + + /* Save sorted hash into return variable. */ + zval_dtor( array ); + (array)->value.ht = sortedHash; + (array)->type = IS_ARRAY; + + if( utf16_buf ) + efree( utf16_buf ); + + efree( sortKeyIndxBuf ); + efree( sortKeyBuf ); + + RETURN_TRUE; +} +/* }}} */ + +/* {{{ proto bool Collator::asort( Collator $coll, array(string) $arr ) + * Sort array using specified collator, maintaining index association. }}} */ +/* {{{ proto bool collator_asort( Collator $coll, array(string) $arr ) + * Sort array using specified collator, maintaining index association. + */ +PHP_FUNCTION( collator_asort ) +{ + collator_sort_internal( FALSE, INTERNAL_FUNCTION_PARAM_PASSTHRU ); +} +/* }}} */ + +/* {{{ proto bool Collator::getSortKey( Collator $coll, string $str ) + * Get a sort key for a string from a Collator. }}} */ +/* {{{ proto bool collator_get_sort_key( Collator $coll, string $str ) + * Get a sort key for a string from a Collator. }}} */ +PHP_FUNCTION( collator_get_sort_key ) +{ + char* str = NULL; + int str_len = 0; + UChar* ustr = NULL; + int ustr_len = 0; + uint8_t* key = NULL; + int key_len = 0; + + COLLATOR_METHOD_INIT_VARS + + /* Parse parameters. */ + if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "Os", + &object, Collator_ce_ptr, &str, &str_len ) == FAILURE ) + { + intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, + "collator_get_sort_key: unable to parse input params", 0 TSRMLS_CC ); + + RETURN_FALSE; + } + + /* Fetch the object. */ + COLLATOR_METHOD_FETCH_OBJECT; + + if (!co || !co->ucoll) { + intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) TSRMLS_CC ); + intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ), + "Object not initialized", 0 TSRMLS_CC ); + php_error_docref(NULL TSRMLS_CC, E_RECOVERABLE_ERROR, "Object not initialized"); + + RETURN_FALSE; + } + + /* + * Compare given strings (converting them to UTF-16 first). + */ + + /* First convert the strings to UTF-16. */ + intl_convert_utf8_to_utf16( + &ustr, &ustr_len, str, str_len, COLLATOR_ERROR_CODE_P( co ) ); + if( U_FAILURE( COLLATOR_ERROR_CODE( co ) ) ) + { + /* Set global error code. */ + intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) TSRMLS_CC ); + + /* Set error messages. */ + intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ), + "Error converting first argument to UTF-16", 0 TSRMLS_CC ); + efree( ustr ); + RETURN_FALSE; + } + + /* ucol_getSortKey is exception in that the key length includes the + * NUL terminator*/ + key_len = ucol_getSortKey(co->ucoll, ustr, ustr_len, key, 0); + if(!key_len) { + efree( ustr ); + RETURN_FALSE; + } + key = emalloc(key_len); + key_len = ucol_getSortKey(co->ucoll, ustr, ustr_len, key, key_len); + efree( ustr ); + if(!key_len) { + RETURN_FALSE; + } + RETURN_STRINGL((char *)key, key_len - 1, 0); +} +/* }}} */ + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim600: noet sw=4 ts=4 fdm=marker + * vim<600: noet sw=4 ts=4 + */ |