summaryrefslogtreecommitdiff
path: root/doc/src/sgml/isn.sgml
diff options
context:
space:
mode:
Diffstat (limited to 'doc/src/sgml/isn.sgml')
-rw-r--r--doc/src/sgml/isn.sgml502
1 files changed, 502 insertions, 0 deletions
diff --git a/doc/src/sgml/isn.sgml b/doc/src/sgml/isn.sgml
new file mode 100644
index 0000000000..c6fef47f08
--- /dev/null
+++ b/doc/src/sgml/isn.sgml
@@ -0,0 +1,502 @@
+<sect1 id="isn">
+ <title>isn</title>
+
+ <indexterm zone="isn">
+ <primary>isn</primary>
+ </indexterm>
+
+ <para>
+ The <literal>isn</literal> module adds data types for the following
+ international-standard namespaces: EAN13, UPC, ISBN (books), ISMN (music),
+ and ISSN (serials). This module is inspired by Garrett A. Wollman's
+ isbn_issn code.
+ </para>
+ <para>
+ This module validates, and automatically adds the correct
+ hyphenations to the numbers. Also, it supports the new ISBN-13
+ numbers to be used starting in January 2007.
+ </para>
+
+ <para>
+ Premises:
+ </para>
+
+ <orderedlist>
+ <listitem>
+ <para>ISBN13, ISMN13, ISSN13 numbers are all EAN13 numbers</para>
+ </listitem>
+ <listitem>
+ <para>EAN13 numbers aren't always ISBN13, ISMN13 or ISSN13 (some are)</para>
+ </listitem>
+ <listitem>
+ <para>some ISBN13 numbers can be displayed as ISBN</para>
+ </listitem>
+ <listitem>
+ <para>some ISMN13 numbers can be displayed as ISMN</para>
+ </listitem>
+ <listitem>
+ <para>some ISSN13 numbers can be displayed as ISSN</para>
+ </listitem>
+ <listitem>
+ <para>all UPC, ISBN, ISMN and ISSN can be represented as EAN13 numbers</para>
+ </listitem>
+ </orderedlist>
+
+ <note>
+ <para>
+ All types are internally represented as 64 bit integers,
+ and internally all are consistently interchangeable.
+ </para>
+ </note>
+ <note>
+ <para>
+ We have two operator classes (for btree and for hash) so each data type
+ can be indexed for faster access.
+ </para>
+ </note>
+
+ <sect2>
+ <title>Data types</title>
+
+ <para>
+ We have the following data types:
+ </para>
+
+ <table>
+ <title>Data types</title>
+ <tgroup cols="2">
+ <thead>
+ <row>
+ <entry><para>Data type</para></entry>
+ <entry><para>Description</para></entry>
+ </row>
+ </thead>
+ <tbody>
+ <row>
+ <entry><para><literal>EAN13</literal></para></entry>
+ <entry>
+ <para>
+ European Article Numbers. This type will always show the EAN13-display
+ format. Te output function for this is <literal>ean13_out()</literal>
+ </para>
+ </entry>
+ </row>
+
+ <row>
+ <entry><para><literal>ISBN13</literal></para></entry>
+ <entry>
+ <para>
+ For International Standard Book Numbers to be displayed in
+ the new EAN13-display format.
+ </para>
+ </entry>
+ </row>
+
+ <row>
+ <entry><para><literal>ISMN13</literal></para></entry>
+ <entry>
+ <para>
+ For International Standard Music Numbers to be displayed in
+ the new EAN13-display format.
+ </para>
+ </entry>
+ </row>
+ <row>
+ <entry><para><literal>ISSN13</literal></para></entry>
+ <entry>
+ <para>
+ For International Standard Serial Numbers to be displayed in the new
+ EAN13-display format.
+ </para>
+ </entry>
+ </row>
+ <row>
+ <entry><para><literal>ISBN</literal></para></entry>
+ <entry>
+ <para>
+ For International Standard Book Numbers to be displayed in the current
+ short-display format.
+ </para>
+ </entry>
+ </row>
+ <row>
+ <entry><para><literal>ISMN</literal></para></entry>
+ <entry>
+ <para>
+ For International Standard Music Numbers to be displayed in the
+ current short-display format.
+ </para>
+ </entry>
+ </row>
+ <row>
+ <entry><para><literal>ISSN</literal></para></entry>
+ <entry>
+ <para>
+ For International Standard Serial Numbers to be displayed in the
+ current short-display format. These types will display the short
+ version of the ISxN (ISxN 10) whenever it's possible, and it will
+ show ISxN 13 when it's impossible to show the short version. The
+ output function to do this is <literal>isn_out()</literal>
+ </para>
+ </entry>
+ </row>
+ <row>
+ <entry><para><literal>UPC</literal></para></entry>
+ <entry>
+ <para>
+ For Universal Product Codes. UPC numbers are a subset of the EAN13
+ numbers (they are basically EAN13 without the first '0' digit.)
+ The output function to do this is also <literal>isn_out()</literal>
+ </para>
+ </entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <note>
+ <para>
+ <literal>EAN13</literal>, <literal>ISBN13</literal>,
+ <literal>ISMN13</literal> and <literal>ISSN13</literal> types will always
+ display the long version of the ISxN (EAN13). The output function to do
+ this is <literal>ean13_out()</literal>.
+ </para>
+ <para>
+ The need for these types is just for displaying in different ways the same
+ data: <literal>ISBN13</literal> is actually the same as
+ <literal>ISBN</literal>, <literal>ISMN13=ISMN</literal> and
+ <literal>ISSN13=ISSN</literal>.
+ </para>
+ </note>
+ </sect2>
+
+ <sect2>
+ <title>Input functions</title>
+
+ <para>
+ We have the following input functions:
+ </para>
+
+ <table>
+ <title>Input functions</title>
+ <tgroup cols="2">
+ <thead>
+ <row>
+ <entry>Function</entry>
+ <entry>Description</entry>
+ </row>
+ </thead>
+ <tbody>
+ <row>
+ <entry><para><literal>ean13_in()</literal></para></entry>
+ <entry>
+ <para>
+ To take a string and return an EAN13.
+ </para>
+ </entry>
+ </row>
+
+ <row>
+ <entry><para><literal>isbn_in()</literal></para></entry>
+ <entry>
+ <para>
+ To take a string and return valid ISBN or ISBN13 numbers.
+ </para>
+ </entry>
+ </row>
+
+ <row>
+ <entry><para><literal>ismn_in()</literal></para></entry>
+ <entry>
+ <para>
+ To take a string and return valid ISMN or ISMN13 numbers.
+ </para>
+ </entry>
+ </row>
+
+ <row>
+ <entry><para><literal>issn_in()</literal></para></entry>
+ <entry>
+ <para>
+ To take a string and return valid ISSN or ISSN13 numbers.
+ </para>
+ </entry>
+ </row>
+ <row>
+ <entry><para><literal>upc_in()</literal></para></entry>
+ <entry>
+ <para>
+ To take a string and return an UPC codes.
+ </para>
+ </entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </sect2>
+
+ <sect2>
+ <title>Casts</title>
+
+ <para>
+ We are able to cast from:
+ </para>
+ <itemizedlist>
+ <listitem>
+ <para>
+ ISBN13 -> EAN13
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ ISMN13 -> EAN13
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ ISSN13 -> EAN13
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ ISBN -> EAN13
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ ISMN -> EAN13
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ ISSN -> EAN13
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ UPC -> EAN13
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ ISBN <-> ISBN13
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ ISMN <-> ISMN13
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ ISSN <-> ISSN13
+ </para>
+ </listitem>
+ </itemizedlist>
+ </sect2>
+
+ <sect2>
+ <title>C API</title>
+ <para>
+ The C API is implemented as:
+ </para>
+ <programlisting>
+ extern Datum isn_out(PG_FUNCTION_ARGS);
+ extern Datum ean13_out(PG_FUNCTION_ARGS);
+ extern Datum ean13_in(PG_FUNCTION_ARGS);
+ extern Datum isbn_in(PG_FUNCTION_ARGS);
+ extern Datum ismn_in(PG_FUNCTION_ARGS);
+ extern Datum issn_in(PG_FUNCTION_ARGS);
+ extern Datum upc_in(PG_FUNCTION_ARGS);
+ </programlisting>
+
+ <para>
+ On success:
+ </para>
+ <itemizedlist>
+ <listitem>
+ <para>
+ <literal>isn_out()</literal> takes any of our types and returns a string containing
+ the shortes possible representation of the number.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <literal>ean13_out()</literal> takes any of our types and returns the
+ EAN13 (long) representation of the number.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <literal>ean13_in()</literal> takes a string and return a EAN13. Which, as stated in (2)
+ could or could not be any of our types, but it certainly is an EAN13
+ number. Only if the string is a valid EAN13 number, otherwise it fails.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <literal>isbn_in()</literal> takes a string and return an ISBN/ISBN13. Only if the string
+ is really a ISBN/ISBN13, otherwise it fails.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <literal>ismn_in()</literal> takes a string and return an ISMN/ISMN13. Only if the string
+ is really a ISMN/ISMN13, otherwise it fails.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <literal>issn_in()</literal> takes a string and return an ISSN/ISSN13. Only if the string
+ is really a ISSN/ISSN13, otherwise it fails.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <literal>upc_in()</literal> takes a string and return an UPC. Only if the string is
+ really a UPC, otherwise it fails.
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ (on failure, the functions 'ereport' the error)
+ </para>
+ </sect2>
+
+ <sect2>
+ <title>Testing functions</title>
+ <table>
+ <title>Testing functions</title>
+ <tgroup cols="2">
+ <thead>
+ <row>
+ <entry><para>Function</para></entry>
+ <entry><para>Description</para></entry>
+ </row>
+ </thead>
+ <tbody>
+ <row>
+ <entry><para><literal>isn_weak(boolean)</literal></para></entry>
+ <entry><para>Sets the weak input mode.</para></entry>
+ </row>
+ <row>
+ <entry><para><literal>isn_weak()</literal></para></entry>
+ <entry><para>Gets the current status of the weak mode.</para></entry>
+ </row>
+ <row>
+ <entry><para><literal>make_valid()</literal></para></entry>
+ <entry><para>Validates an invalid number (deleting the invalid flag).</para></entry>
+ </row>
+ <row>
+ <entry><para><literal>is_valid()</literal></para></entry>
+ <entry><para>Checks for the invalid flag prsence.</para></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <para>
+ <literal>Weak</literal> mode is used to be able to insert invalid data to
+ a table. Invalid as in the check digit being wrong, not missing numbers.
+ </para>
+ <para>
+ Why would you want to use the weak mode? Well, it could be that
+ you have a huge collection of ISBN numbers, and that there are so many of
+ them that for weird reasons some have the wrong check digit (perhaps the
+ numbers where scanned from a printed list and the OCR got the numbers wrong,
+ perhaps the numbers were manually captured... who knows.) Anyway, the thing
+ is you might want to clean the mess up, but you still want to be able to have
+ all the numbers in your database and maybe use an external tool to access
+ the invalid numbers in the database so you can verify the information and
+ validate it more easily; as selecting all the invalid numbers in the table.
+ </para>
+ <para>
+ When you insert invalid numbers in a table using the weak mode, the number
+ will be inserted with the corrected check digit, but it will be flagged
+ with an exclamation mark ('!') at the end (i.e. 0-11-000322-5!)
+ </para>
+ <para>
+ You can also force the insertion of invalid numbers even not in the weak mode,
+ appending the '!' character at the end of the number.
+ </para>
+ </sect2>
+
+ <sect2>
+ <title>Examples</title>
+ <programlisting>
+--Using the types directly:
+SELECT isbn('978-0-393-04002-9');
+SELECT isbn13('0901690546');
+SELECT issn('1436-4522');
+
+--Casting types:
+-- note that you can only cast from ean13 to other type when the casted
+-- number would be valid in the realm of the casted type;
+-- thus, the following will NOT work: select isbn(ean13('0220356483481'));
+-- but these will:
+SELECT upc(ean13('0220356483481'));
+SELECT ean13(upc('220356483481'));
+
+--Create a table with a single column to hold ISBN numbers:
+CREATE TABLE test ( id isbn );
+INSERT INTO test VALUES('9780393040029');
+
+--Automatically calculating check digits (observe the '?'):
+INSERT INTO test VALUES('220500896?');
+INSERT INTO test VALUES('978055215372?');
+
+SELECT issn('3251231?');
+SELECT ismn('979047213542?');
+
+--Using the weak mode:
+SELECT isn_weak(true);
+INSERT INTO test VALUES('978-0-11-000533-4');
+INSERT INTO test VALUES('9780141219307');
+INSERT INTO test VALUES('2-205-00876-X');
+SELECT isn_weak(false);
+
+SELECT id FROM test WHERE NOT is_valid(id);
+UPDATE test SET id=make_valid(id) WHERE id = '2-205-00876-X!';
+
+SELECT * FROM test;
+
+SELECT isbn13(id) FROM test;
+ </programlisting>
+ </sect2>
+
+ <sect2>
+ <title>Bibliography</title>
+ <para>
+ The information to implement this module was collected through
+ several sites, including:
+ </para>
+ <programlisting>
+ http://www.isbn-international.org/
+ http://www.issn.org/
+ http://www.ismn-international.org/
+ http://www.wikipedia.org/
+ </programlisting>
+ <para>
+ the prefixes used for hyphenation where also compiled from:
+ </para>
+ <programlisting>
+ http://www.gs1.org/productssolutions/idkeys/support/prefix_list.html
+ http://www.isbn-international.org/en/identifiers.html
+ http://www.ismn-international.org/ranges.html
+ </programlisting>
+ <para>
+ Care was taken during the creation of the algorithms and they
+ were meticulously verified against the suggested algorithms
+ in the official ISBN, ISMN, ISSN User Manuals.
+ </para>
+ </sect2>
+
+ <sect2>
+ <title>Author</title>
+ <para>
+ Germán Méndez Bravo (Kronuz), 2004 - 2006
+ </para>
+ </sect2>
+</sect1>
+