diff options
| -rw-r--r-- | ext/standard/Makefile.in | 2 | ||||
| -rw-r--r-- | ext/standard/basic_functions.c | 1 | ||||
| -rw-r--r-- | ext/standard/levenshtein.c | 117 | ||||
| -rw-r--r-- | ext/standard/php_string.h | 1 | 
4 files changed, 120 insertions, 1 deletions
| diff --git a/ext/standard/Makefile.in b/ext/standard/Makefile.in index 221922f52c..d83ed1a49a 100644 --- a/ext/standard/Makefile.in +++ b/ext/standard/Makefile.in @@ -7,7 +7,7 @@ LTLIBRARY_SOURCES=\  	link.c mail.c math.c md5.c metaphone.c microtime.c pack.c pageinfo.c \  	parsedate.c quot_print.c rand.c reg.c soundex.c string.c \  	syslog.c type.c uniqid.c url.c url_scanner.c var.c output.c assert.c \ -	strnatcmp.c +	strnatcmp.c levenshtein.c  include $(top_srcdir)/build/dynlib.mk diff --git a/ext/standard/basic_functions.c b/ext/standard/basic_functions.c index 4a1aa32a0d..8a45900d67 100644 --- a/ext/standard/basic_functions.c +++ b/ext/standard/basic_functions.c @@ -162,6 +162,7 @@ function_entry basic_functions[] = {  	PHP_FE(implode,									NULL)  	PHP_FE(setlocale,								NULL)  	PHP_FE(soundex,									NULL) +	PHP_FE(levdist,									NULL)  	PHP_FE(chr,										NULL)  	PHP_FE(ord,										NULL)  	PHP_FE(parse_str,								NULL) diff --git a/ext/standard/levenshtein.c b/ext/standard/levenshtein.c new file mode 100644 index 0000000000..946f36466c --- /dev/null +++ b/ext/standard/levenshtein.c @@ -0,0 +1,117 @@ +/* +   +----------------------------------------------------------------------+ +   | PHP version 4.0                                                      | +   +----------------------------------------------------------------------+ +   | Copyright (c) 1997, 1998, 1999, 2000 The PHP Group                   | +   +----------------------------------------------------------------------+ +   | This source file is subject to version 2.02 of the PHP license,      | +   | that is bundled with this package in the file LICENSE, and is        | +   | available at through the world-wide-web at                           | +   | http://www.php.net/license/2_02.txt.                                 | +   | If you did not receive a copy of the PHP license and are unable to   | +   | obtain it through the world-wide-web, please send a note to          | +   | license@php.net so we can mail you a copy immediately.               | +   +----------------------------------------------------------------------+ +   | Author: Bjørn Borud - Guardian Networks AS <borud@guardian.no>       | +   +----------------------------------------------------------------------+ + */ +/* $Id$ */ + +#include "php.h" +#include <stdlib.h> +#include <errno.h> +#include <ctype.h> +#include "php_string.h" + +int calc_levdist(const char *s1, const char *s2) /* faster, but obfuscated */ +{ +	register char *p1,*p2; +	register int i,j,n; +	int l1=0,l2=0; +	char r[512]; + +	/* skip equal start sequence, if any */ +	while(*s1==*s2) { +		if(!*s1) break; +		s1++; s2++; +	} +	 +	/* if we already used up one string, then +      the result is the length of the other */ +	if(*s1=='\0') return strlen(s2); +	if(*s2=='\0') return strlen(s1); + +	/* length count */ +	while(*s1++) l1++; +	while(*s2++) l2++; +	 +	/* cut of equal tail sequence, if any */ +	while(*--s1 == *--s2) { +		l1--; l2--;		 +	} +	 + +	/* reset pointers, adjust length */ +	s1-=l1++; +	s2-=l2++; + +	/* possible dist to great? */ + 	if(abs(l1-l2)>=255) return -1; + +	/* swap if l2 longer than l1 */ +	if(l1<l2) { +		(long)s1 ^= (long)s2; (long)s2 ^= (long)s1; (long)s1 ^= (long)s2; +		l1 ^= l2; l2 ^= l1; l1 ^= l2; +	} + +	 +	/* fill initial row */ +	n=(*s1!=*s2); +	for(i=0,p1=r;i<l1;i++,*p1++=n++,p1++) {/*empty*/} +	 +	/* calc. rowwise */ +	for(j=1;j<l2;j++) { +		/* init pointers and col#0 */ +		p1 = r + !(j&1); +		p2 = r + (j&1); +		n=*p1+1; +		*p2++=n;p2++; +		s2++; +		 +		/* foreach column */ +		for(i=1;i<l1;i++) { +			if(*p1<n) n=*p1+(*(s1+i)!=*(s2)); /* replace cheaper than delete? */ +			p1++; +			if(*++p1<n) n=*p1+1; /* insert cheaper then insert ? */ +			*p2++=n++; /* update field and cost for next col's delete */ +			p2++; +		}	 +	} + +	/* return result */ +	return n-1; +} + + +/* {{{ proto int levdist(string str1, string str2) +   Calculate Levenshtein distance between two strings */ +PHP_FUNCTION(levdist){ +	zval **str1, **str2; +	int l; + +	if (ARG_COUNT(ht) != 2 || zend_get_parameters_ex(2, &str1, &str2) == FAILURE) { +		WRONG_PARAM_COUNT; +	} +	convert_to_string_ex(str1); +	convert_to_string_ex(str2); +	 +	l = calc_levdist((*str1)->value.str.val, (*str2)->value.str.val); + +	if(l<0) { +		php_error(E_WARNING,"levdist(): argument string(s) to long"); +	} + +	RETURN_LONG(l); +} +/* }}} */ + diff --git a/ext/standard/php_string.h b/ext/standard/php_string.h index e71cf8f997..537c405fe7 100644 --- a/ext/standard/php_string.h +++ b/ext/standard/php_string.h @@ -43,6 +43,7 @@ PHP_FUNCTION(chop);  PHP_FUNCTION(trim);  PHP_FUNCTION(ltrim);  PHP_FUNCTION(soundex); +PHP_FUNCTION(levdist);  PHP_FUNCTION(count_chars);  PHP_FUNCTION(explode); | 
