summaryrefslogtreecommitdiff
path: root/doc/src/sgml/fuzzystrmatch.sgml
diff options
context:
space:
mode:
Diffstat (limited to 'doc/src/sgml/fuzzystrmatch.sgml')
-rw-r--r--doc/src/sgml/fuzzystrmatch.sgml122
1 files changed, 122 insertions, 0 deletions
diff --git a/doc/src/sgml/fuzzystrmatch.sgml b/doc/src/sgml/fuzzystrmatch.sgml
new file mode 100644
index 0000000000..666e031c0d
--- /dev/null
+++ b/doc/src/sgml/fuzzystrmatch.sgml
@@ -0,0 +1,122 @@
+
+<sect1 id="fuzzystrmatch">
+ <title>fuzzystrmatch</title>
+
+ <para>
+ This section describes the fuzzystrmatch module which provides different
+ functions to determine similarities and distance between strings.
+ </para>
+
+ <sect2>
+ <title>Soundex</title>
+ <para>
+ The Soundex system is a method of matching similar sounding names
+ (or any words) to the same code. It was initially used by the
+ United States Census in 1880, 1900, and 1910, but it has little use
+ beyond English names (or the English pronunciation of names), and
+ it is not a linguistic tool.
+ </para>
+ <para>
+ When comparing two soundex values to determine similarity, the
+ difference function reports how close the match is on a scale
+ from zero to four, with zero being no match and four being an
+ exact match.
+ </para>
+ <para>
+ The following are some usage examples:
+ </para>
+ <programlisting>
+SELECT soundex('hello world!');
+
+SELECT soundex('Anne'), soundex('Ann'), difference('Anne', 'Ann');
+SELECT soundex('Anne'), soundex('Andrew'), difference('Anne', 'Andrew');
+SELECT soundex('Anne'), soundex('Margaret'), difference('Anne', 'Margaret');
+
+CREATE TABLE s (nm text);
+
+INSERT INTO s VALUES ('john');
+INSERT INTO s VALUES ('joan');
+INSERT INTO s VALUES ('wobbly');
+INSERT INTO s VALUES ('jack');
+
+SELECT * FROM s WHERE soundex(nm) = soundex('john');
+
+SELECT a.nm, b.nm FROM s a, s b WHERE soundex(a.nm) = soundex(b.nm) AND a.oid <> b.oid;
+
+CREATE FUNCTION text_sx_eq(text, text) RETURNS boolean AS
+'select soundex($1) = soundex($2)'
+LANGUAGE SQL;
+
+CREATE FUNCTION text_sx_lt(text, text) RETURNS boolean AS
+'select soundex($1) < soundex($2)'
+LANGUAGE SQL;
+
+CREATE FUNCTION text_sx_gt(text, text) RETURNS boolean AS
+'select soundex($1) > soundex($2)'
+LANGUAGE SQL;
+
+CREATE FUNCTION text_sx_le(text, text) RETURNS boolean AS
+'select soundex($1) <= soundex($2)'
+LANGUAGE SQL;
+
+CREATE FUNCTION text_sx_ge(text, text) RETURNS boolean AS
+'select soundex($1) >= soundex($2)'
+LANGUAGE SQL;
+
+CREATE FUNCTION text_sx_ne(text, text) RETURNS boolean AS
+'select soundex($1) <> soundex($2)'
+LANGUAGE SQL;
+
+DROP OPERATOR #= (text, text);
+
+CREATE OPERATOR #= (leftarg=text, rightarg=text, procedure=text_sx_eq, commutator = #=);
+
+SELECT * FROM s WHERE text_sx_eq(nm, 'john');
+
+SELECT * FROM s WHERE s.nm #= 'john';
+
+SELECT * FROM s WHERE difference(s.nm, 'john') > 2;
+ </programlisting>
+ </sect2>
+
+ <sect2>
+ <title>levenshtein</title>
+ <para>
+ This function calculates the levenshtein distance between two strings:
+ </para>
+ <programlisting>
+ int levenshtein(text source, text target)
+ </programlisting>
+ <para>
+ Both <literal>source</literal> and <literal>target</literal> can be any
+ NOT NULL string with a maximum of 255 characters.
+ </para>
+ <para>
+ Example:
+ </para>
+ <programlisting>
+ SELECT levenshtein('GUMBO','GAMBOL');
+ </programlisting>
+ </sect2>
+
+ <sect2>
+ <title>metaphone</title>
+ <para>
+ This function calculates and returns the metaphone code of an input string:
+ </para>
+ <programlisting>
+ text metahpone(text source, int max_output_length)
+ </programlisting>
+ <para>
+ <literal>source</literal> has to be a NOT NULL string with a maximum of
+ 255 characters. <literal>max_output_length</literal> fixes the maximum
+ length of the output metaphone code; if longer, the output is truncated
+ to this length.
+ </para>
+ <para>Example</para>
+ <programlisting>
+ SELECT metaphone('GUMBO',4);
+ </programlisting>
+ </sect2>
+
+</sect1>