summaryrefslogtreecommitdiff
path: root/ext/standard/html_tables
diff options
context:
space:
mode:
Diffstat (limited to 'ext/standard/html_tables')
-rw-r--r--ext/standard/html_tables/ents_basic.txt5
-rw-r--r--ext/standard/html_tables/ents_basic_apos.txt5
-rw-r--r--ext/standard/html_tables/ents_html401.txt253
-rw-r--r--ext/standard/html_tables/ents_html5.txt2125
-rw-r--r--ext/standard/html_tables/ents_xhtml.txt253
-rw-r--r--ext/standard/html_tables/html_table_gen.php812
-rw-r--r--ext/standard/html_tables/mappings/8859-1.TXT303
-rw-r--r--ext/standard/html_tables/mappings/8859-15.TXT303
-rw-r--r--ext/standard/html_tables/mappings/8859-5.TXT303
-rw-r--r--ext/standard/html_tables/mappings/CP1251.TXT274
-rw-r--r--ext/standard/html_tables/mappings/CP1252.TXT274
-rw-r--r--ext/standard/html_tables/mappings/CP866.TXT275
-rw-r--r--ext/standard/html_tables/mappings/KOI8-R.TXT302
-rw-r--r--ext/standard/html_tables/mappings/ROMAN.TXT370
14 files changed, 5857 insertions, 0 deletions
diff --git a/ext/standard/html_tables/ents_basic.txt b/ext/standard/html_tables/ents_basic.txt
new file mode 100644
index 0000000..3a2ec93
--- /dev/null
+++ b/ext/standard/html_tables/ents_basic.txt
@@ -0,0 +1,5 @@
+quot 22
+amp 26
+#039 27
+lt 3C
+gt 3E
diff --git a/ext/standard/html_tables/ents_basic_apos.txt b/ext/standard/html_tables/ents_basic_apos.txt
new file mode 100644
index 0000000..6a0f307
--- /dev/null
+++ b/ext/standard/html_tables/ents_basic_apos.txt
@@ -0,0 +1,5 @@
+quot 22
+amp 26
+apos 27
+lt 3C
+gt 3E
diff --git a/ext/standard/html_tables/ents_html401.txt b/ext/standard/html_tables/ents_html401.txt
new file mode 100644
index 0000000..7e1564b
--- /dev/null
+++ b/ext/standard/html_tables/ents_html401.txt
@@ -0,0 +1,253 @@
+#039 0027 //artifical; there's no ' in HTML 4.01
+nbsp 00A0
+iexcl 00A1
+cent 00A2
+pound 00A3
+curren 00A4
+yen 00A5
+brvbar 00A6
+sect 00A7
+uml 00A8
+copy 00A9
+ordf 00AA
+laquo 00AB
+not 00AC
+shy 00AD
+reg 00AE
+macr 00AF
+deg 00B0
+plusmn 00B1
+sup2 00B2
+sup3 00B3
+acute 00B4
+micro 00B5
+para 00B6
+middot 00B7
+cedil 00B8
+sup1 00B9
+ordm 00BA
+raquo 00BB
+frac14 00BC
+frac12 00BD
+frac34 00BE
+iquest 00BF
+Agrave 00C0
+Aacute 00C1
+Acirc 00C2
+Atilde 00C3
+Auml 00C4
+Aring 00C5
+AElig 00C6
+Ccedil 00C7
+Egrave 00C8
+Eacute 00C9
+Ecirc 00CA
+Euml 00CB
+Igrave 00CC
+Iacute 00CD
+Icirc 00CE
+Iuml 00CF
+ETH 00D0
+Ntilde 00D1
+Ograve 00D2
+Oacute 00D3
+Ocirc 00D4
+Otilde 00D5
+Ouml 00D6
+times 00D7
+Oslash 00D8
+Ugrave 00D9
+Uacute 00DA
+Ucirc 00DB
+Uuml 00DC
+Yacute 00DD
+THORN 00DE
+szlig 00DF
+agrave 00E0
+aacute 00E1
+acirc 00E2
+atilde 00E3
+auml 00E4
+aring 00E5
+aelig 00E6
+ccedil 00E7
+egrave 00E8
+eacute 00E9
+ecirc 00EA
+euml 00EB
+igrave 00EC
+iacute 00ED
+icirc 00EE
+iuml 00EF
+eth 00F0
+ntilde 00F1
+ograve 00F2
+oacute 00F3
+ocirc 00F4
+otilde 00F5
+ouml 00F6
+divide 00F7
+oslash 00F8
+ugrave 00F9
+uacute 00FA
+ucirc 00FB
+uuml 00FC
+yacute 00FD
+thorn 00FE
+yuml 00FF
+fnof 0192
+Alpha 0391
+Beta 0392
+Gamma 0393
+Delta 0394
+Epsilon 0395
+Zeta 0396
+Eta 0397
+Theta 0398
+Iota 0399
+Kappa 039A
+Lambda 039B
+Mu 039C
+Nu 039D
+Xi 039E
+Omicron 039F
+Pi 03A0
+Rho 03A1
+Sigma 03A3
+Tau 03A4
+Upsilon 03A5
+Phi 03A6
+Chi 03A7
+Psi 03A8
+Omega 03A9
+alpha 03B1
+beta 03B2
+gamma 03B3
+delta 03B4
+epsilon 03B5
+zeta 03B6
+eta 03B7
+theta 03B8
+iota 03B9
+kappa 03BA
+lambda 03BB
+mu 03BC
+nu 03BD
+xi 03BE
+omicron 03BF
+pi 03C0
+rho 03C1
+sigmaf 03C2
+sigma 03C3
+tau 03C4
+upsilon 03C5
+phi 03C6
+chi 03C7
+psi 03C8
+omega 03C9
+thetasym 03D1
+upsih 03D2
+piv 03D6
+bull 2022
+hellip 2026
+prime 2032
+Prime 2033
+oline 203E
+frasl 2044
+weierp 2118
+image 2111
+real 211C
+trade 2122
+alefsym 2135
+larr 2190
+uarr 2191
+rarr 2192
+darr 2193
+harr 2194
+crarr 21B5
+lArr 21D0
+uArr 21D1
+rArr 21D2
+dArr 21D3
+hArr 21D4
+forall 2200
+part 2202
+exist 2203
+empty 2205
+nabla 2207
+isin 2208
+notin 2209
+ni 220B
+prod 220F
+sum 2211
+minus 2212
+lowast 2217
+radic 221A
+prop 221D
+infin 221E
+ang 2220
+and 2227
+or 2228
+cap 2229
+cup 222A
+int 222B
+there4 2234
+sim 223C
+cong 2245
+asymp 2248
+ne 2260
+equiv 2261
+le 2264
+ge 2265
+sub 2282
+sup 2283
+nsub 2284
+sube 2286
+supe 2287
+oplus 2295
+otimes 2297
+perp 22A5
+sdot 22C5
+lceil 2308
+rceil 2309
+lfloor 230A
+rfloor 230B
+lang 2329
+rang 232A
+loz 25CA
+spades 2660
+clubs 2663
+hearts 2665
+diams 2666
+quot 0022
+amp 0026
+lt 003C
+gt 003E
+OElig 0152
+oelig 0153
+Scaron 0160
+scaron 0161
+Yuml 0178
+circ 02C6
+tilde 02DC
+ensp 2002
+emsp 2003
+thinsp 2009
+zwnj 200C
+zwj 200D
+lrm 200E
+rlm 200F
+ndash 2013
+mdash 2014
+lsquo 2018
+rsquo 2019
+sbquo 201A
+ldquo 201C
+rdquo 201D
+bdquo 201E
+dagger 2020
+Dagger 2021
+permil 2030
+lsaquo 2039
+rsaquo 203A
+euro 20AC \ No newline at end of file
diff --git a/ext/standard/html_tables/ents_html5.txt b/ext/standard/html_tables/ents_html5.txt
new file mode 100644
index 0000000..18defb2
--- /dev/null
+++ b/ext/standard/html_tables/ents_html5.txt
@@ -0,0 +1,2125 @@
+AElig 000C6
+AMP 00026
+Aacute 000C1
+Abreve 00102
+Acirc 000C2
+Acy 00410
+Afr 1D504
+Agrave 000C0
+Alpha 00391
+Amacr 00100
+And 02A53
+Aogon 00104
+Aopf 1D538
+ApplyFunction 02061
+Aring 000C5
+Ascr 1D49C
+Assign 02254
+Atilde 000C3
+Auml 000C4
+Backslash 02216
+Barv 02AE7
+Barwed 02306
+Bcy 00411
+Because 02235
+Bernoullis 0212C
+Beta 00392
+Bfr 1D505
+Bopf 1D539
+Breve 002D8
+Bscr 0212C
+Bumpeq 0224E
+CHcy 00427
+COPY 000A9
+Cacute 00106
+Cap 022D2
+CapitalDifferentialD 02145
+Cayleys 0212D
+Ccaron 0010C
+Ccedil 000C7
+Ccirc 00108
+Cconint 02230
+Cdot 0010A
+Cedilla 000B8
+CenterDot 000B7
+Cfr 0212D
+Chi 003A7
+CircleDot 02299
+CircleMinus 02296
+CirclePlus 02295
+CircleTimes 02297
+ClockwiseContourIntegral 02232
+CloseCurlyDoubleQuote 0201D
+CloseCurlyQuote 02019
+Colon 02237
+Colone 02A74
+Congruent 02261
+Conint 0222F
+ContourIntegral 0222E
+Copf 02102
+Coproduct 02210
+CounterClockwiseContourIntegral 02233
+Cross 02A2F
+Cscr 1D49E
+Cup 022D3
+CupCap 0224D
+DD 02145
+DDotrahd 02911
+DJcy 00402
+DScy 00405
+DZcy 0040F
+Dagger 02021
+Darr 021A1
+Dashv 02AE4
+Dcaron 0010E
+Dcy 00414
+Del 02207
+Delta 00394
+Dfr 1D507
+DiacriticalAcute 000B4
+DiacriticalDot 002D9
+DiacriticalDoubleAcute 002DD
+DiacriticalGrave 00060
+DiacriticalTilde 002DC
+Diamond 022C4
+DifferentialD 02146
+Dopf 1D53B
+Dot 000A8
+DotDot 020DC
+DotEqual 02250
+DoubleContourIntegral 0222F
+DoubleDot 000A8
+DoubleDownArrow 021D3
+DoubleLeftArrow 021D0
+DoubleLeftRightArrow 021D4
+DoubleLeftTee 02AE4
+DoubleLongLeftArrow 027F8
+DoubleLongLeftRightArrow 027FA
+DoubleLongRightArrow 027F9
+DoubleRightArrow 021D2
+DoubleRightTee 022A8
+DoubleUpArrow 021D1
+DoubleUpDownArrow 021D5
+DoubleVerticalBar 02225
+DownArrow 02193
+DownArrowBar 02913
+DownArrowUpArrow 021F5
+DownBreve 00311
+DownLeftRightVector 02950
+DownLeftTeeVector 0295E
+DownLeftVector 021BD
+DownLeftVectorBar 02956
+DownRightTeeVector 0295F
+DownRightVector 021C1
+DownRightVectorBar 02957
+DownTee 022A4
+DownTeeArrow 021A7
+Downarrow 021D3
+Dscr 1D49F
+Dstrok 00110
+ENG 0014A
+ETH 000D0
+Eacute 000C9
+Ecaron 0011A
+Ecirc 000CA
+Ecy 0042D
+Edot 00116
+Efr 1D508
+Egrave 000C8
+Element 02208
+Emacr 00112
+EmptySmallSquare 025FB
+EmptyVerySmallSquare 025AB
+Eogon 00118
+Eopf 1D53C
+Epsilon 00395
+Equal 02A75
+EqualTilde 02242
+Equilibrium 021CC
+Escr 02130
+Esim 02A73
+Eta 00397
+Euml 000CB
+Exists 02203
+ExponentialE 02147
+Fcy 00424
+Ffr 1D509
+FilledSmallSquare 025FC
+FilledVerySmallSquare 025AA
+Fopf 1D53D
+ForAll 02200
+Fouriertrf 02131
+Fscr 02131
+GJcy 00403
+GT 0003E
+Gamma 00393
+Gammad 003DC
+Gbreve 0011E
+Gcedil 00122
+Gcirc 0011C
+Gcy 00413
+Gdot 00120
+Gfr 1D50A
+Gg 022D9
+Gopf 1D53E
+GreaterEqual 02265
+GreaterEqualLess 022DB
+GreaterFullEqual 02267
+GreaterGreater 02AA2
+GreaterLess 02277
+GreaterSlantEqual 02A7E
+GreaterTilde 02273
+Gscr 1D4A2
+Gt 0226B
+HARDcy 0042A
+Hacek 002C7
+Hat 0005E
+Hcirc 00124
+Hfr 0210C
+HilbertSpace 0210B
+Hopf 0210D
+HorizontalLine 02500
+Hscr 0210B
+Hstrok 00126
+HumpDownHump 0224E
+HumpEqual 0224F
+IEcy 00415
+IJlig 00132
+IOcy 00401
+Iacute 000CD
+Icirc 000CE
+Icy 00418
+Idot 00130
+Ifr 02111
+Igrave 000CC
+Im 02111
+Imacr 0012A
+ImaginaryI 02148
+Implies 021D2
+Int 0222C
+Integral 0222B
+Intersection 022C2
+InvisibleComma 02063
+InvisibleTimes 02062
+Iogon 0012E
+Iopf 1D540
+Iota 00399
+Iscr 02110
+Itilde 00128
+Iukcy 00406
+Iuml 000CF
+Jcirc 00134
+Jcy 00419
+Jfr 1D50D
+Jopf 1D541
+Jscr 1D4A5
+Jsercy 00408
+Jukcy 00404
+KHcy 00425
+KJcy 0040C
+Kappa 0039A
+Kcedil 00136
+Kcy 0041A
+Kfr 1D50E
+Kopf 1D542
+Kscr 1D4A6
+LJcy 00409
+LT 0003C
+Lacute 00139
+Lambda 0039B
+Lang 027EA
+Laplacetrf 02112
+Larr 0219E
+Lcaron 0013D
+Lcedil 0013B
+Lcy 0041B
+LeftAngleBracket 027E8
+LeftArrow 02190
+LeftArrowBar 021E4
+LeftArrowRightArrow 021C6
+LeftCeiling 02308
+LeftDoubleBracket 027E6
+LeftDownTeeVector 02961
+LeftDownVector 021C3
+LeftDownVectorBar 02959
+LeftFloor 0230A
+LeftRightArrow 02194
+LeftRightVector 0294E
+LeftTee 022A3
+LeftTeeArrow 021A4
+LeftTeeVector 0295A
+LeftTriangle 022B2
+LeftTriangleBar 029CF
+LeftTriangleEqual 022B4
+LeftUpDownVector 02951
+LeftUpTeeVector 02960
+LeftUpVector 021BF
+LeftUpVectorBar 02958
+LeftVector 021BC
+LeftVectorBar 02952
+Leftarrow 021D0
+Leftrightarrow 021D4
+LessEqualGreater 022DA
+LessFullEqual 02266
+LessGreater 02276
+LessLess 02AA1
+LessSlantEqual 02A7D
+LessTilde 02272
+Lfr 1D50F
+Ll 022D8
+Lleftarrow 021DA
+Lmidot 0013F
+LongLeftArrow 027F5
+LongLeftRightArrow 027F7
+LongRightArrow 027F6
+Longleftarrow 027F8
+Longleftrightarrow 027FA
+Longrightarrow 027F9
+Lopf 1D543
+LowerLeftArrow 02199
+LowerRightArrow 02198
+Lscr 02112
+Lsh 021B0
+Lstrok 00141
+Lt 0226A
+Map 02905
+Mcy 0041C
+MediumSpace 0205F
+Mellintrf 02133
+Mfr 1D510
+MinusPlus 02213
+Mopf 1D544
+Mscr 02133
+Mu 0039C
+NJcy 0040A
+Nacute 00143
+Ncaron 00147
+Ncedil 00145
+Ncy 0041D
+NegativeMediumSpace 0200B
+NegativeThickSpace 0200B
+NegativeThinSpace 0200B
+NegativeVeryThinSpace 0200B
+NestedGreaterGreater 0226B
+NestedLessLess 0226A
+NewLine 0000A
+Nfr 1D511
+NoBreak 02060
+NonBreakingSpace 000A0
+Nopf 02115
+Not 02AEC
+NotCongruent 02262
+NotCupCap 0226D
+NotDoubleVerticalBar 02226
+NotElement 02209
+NotEqual 02260
+NotEqualTilde 02242 00338
+NotExists 02204
+NotGreater 0226F
+NotGreaterEqual 02271
+NotGreaterFullEqual 02267 00338
+NotGreaterGreater 0226B 00338
+NotGreaterLess 02279
+NotGreaterSlantEqual 02A7E 00338
+NotGreaterTilde 02275
+NotHumpDownHump 0224E 00338
+NotHumpEqual 0224F 00338
+NotLeftTriangle 022EA
+NotLeftTriangleBar 029CF 00338
+NotLeftTriangleEqual 022EC
+NotLess 0226E
+NotLessEqual 02270
+NotLessGreater 02278
+NotLessLess 0226A 00338
+NotLessSlantEqual 02A7D 00338
+NotLessTilde 02274
+NotNestedGreaterGreater 02AA2 00338
+NotNestedLessLess 02AA1 00338
+NotPrecedes 02280
+NotPrecedesEqual 02AAF 00338
+NotPrecedesSlantEqual 022E0
+NotReverseElement 0220C
+NotRightTriangle 022EB
+NotRightTriangleBar 029D0 00338
+NotRightTriangleEqual 022ED
+NotSquareSubset 0228F 00338
+NotSquareSubsetEqual 022E2
+NotSquareSuperset 02290 00338
+NotSquareSupersetEqual 022E3
+NotSubset 02282 020D2
+NotSubsetEqual 02288
+NotSucceeds 02281
+NotSucceedsEqual 02AB0 00338
+NotSucceedsSlantEqual 022E1
+NotSucceedsTilde 0227F 00338
+NotSuperset 02283 020D2
+NotSupersetEqual 02289
+NotTilde 02241
+NotTildeEqual 02244
+NotTildeFullEqual 02247
+NotTildeTilde 02249
+NotVerticalBar 02224
+Nscr 1D4A9
+Ntilde 000D1
+Nu 0039D
+OElig 00152
+Oacute 000D3
+Ocirc 000D4
+Ocy 0041E
+Odblac 00150
+Ofr 1D512
+Ograve 000D2
+Omacr 0014C
+Omega 003A9
+Omicron 0039F
+Oopf 1D546
+OpenCurlyDoubleQuote 0201C
+OpenCurlyQuote 02018
+Or 02A54
+Oscr 1D4AA
+Oslash 000D8
+Otilde 000D5
+Otimes 02A37
+Ouml 000D6
+OverBar 0203E
+OverBrace 023DE
+OverBracket 023B4
+OverParenthesis 023DC
+PartialD 02202
+Pcy 0041F
+Pfr 1D513
+Phi 003A6
+Pi 003A0
+PlusMinus 000B1
+Poincareplane 0210C
+Popf 02119
+Pr 02ABB
+Precedes 0227A
+PrecedesEqual 02AAF
+PrecedesSlantEqual 0227C
+PrecedesTilde 0227E
+Prime 02033
+Product 0220F
+Proportion 02237
+Proportional 0221D
+Pscr 1D4AB
+Psi 003A8
+QUOT 00022
+Qfr 1D514
+Qopf 0211A
+Qscr 1D4AC
+RBarr 02910
+REG 000AE
+Racute 00154
+Rang 027EB
+Rarr 021A0
+Rarrtl 02916
+Rcaron 00158
+Rcedil 00156
+Rcy 00420
+Re 0211C
+ReverseElement 0220B
+ReverseEquilibrium 021CB
+ReverseUpEquilibrium 0296F
+Rfr 0211C
+Rho 003A1
+RightAngleBracket 027E9
+RightArrow 02192
+RightArrowBar 021E5
+RightArrowLeftArrow 021C4
+RightCeiling 02309
+RightDoubleBracket 027E7
+RightDownTeeVector 0295D
+RightDownVector 021C2
+RightDownVectorBar 02955
+RightFloor 0230B
+RightTee 022A2
+RightTeeArrow 021A6
+RightTeeVector 0295B
+RightTriangle 022B3
+RightTriangleBar 029D0
+RightTriangleEqual 022B5
+RightUpDownVector 0294F
+RightUpTeeVector 0295C
+RightUpVector 021BE
+RightUpVectorBar 02954
+RightVector 021C0
+RightVectorBar 02953
+Rightarrow 021D2
+Ropf 0211D
+RoundImplies 02970
+Rrightarrow 021DB
+Rscr 0211B
+Rsh 021B1
+RuleDelayed 029F4
+SHCHcy 00429
+SHcy 00428
+SOFTcy 0042C
+Sacute 0015A
+Sc 02ABC
+Scaron 00160
+Scedil 0015E
+Scirc 0015C
+Scy 00421
+Sfr 1D516
+ShortDownArrow 02193
+ShortLeftArrow 02190
+ShortRightArrow 02192
+ShortUpArrow 02191
+Sigma 003A3
+SmallCircle 02218
+Sopf 1D54A
+Sqrt 0221A
+Square 025A1
+SquareIntersection 02293
+SquareSubset 0228F
+SquareSubsetEqual 02291
+SquareSuperset 02290
+SquareSupersetEqual 02292
+SquareUnion 02294
+Sscr 1D4AE
+Star 022C6
+Sub 022D0
+Subset 022D0
+SubsetEqual 02286
+Succeeds 0227B
+SucceedsEqual 02AB0
+SucceedsSlantEqual 0227D
+SucceedsTilde 0227F
+SuchThat 0220B
+Sum 02211
+Sup 022D1
+Superset 02283
+SupersetEqual 02287
+Supset 022D1
+THORN 000DE
+TRADE 02122
+TSHcy 0040B
+TScy 00426
+Tab 00009
+Tau 003A4
+Tcaron 00164
+Tcedil 00162
+Tcy 00422
+Tfr 1D517
+Therefore 02234
+Theta 00398
+ThickSpace 0205F 0200A
+ThinSpace 02009
+Tilde 0223C
+TildeEqual 02243
+TildeFullEqual 02245
+TildeTilde 02248
+Topf 1D54B
+TripleDot 020DB
+Tscr 1D4AF
+Tstrok 00166
+Uacute 000DA
+Uarr 0219F
+Uarrocir 02949
+Ubrcy 0040E
+Ubreve 0016C
+Ucirc 000DB
+Ucy 00423
+Udblac 00170
+Ufr 1D518
+Ugrave 000D9
+Umacr 0016A
+UnderBar 0005F
+UnderBrace 023DF
+UnderBracket 023B5
+UnderParenthesis 023DD
+Union 022C3
+UnionPlus 0228E
+Uogon 00172
+Uopf 1D54C
+UpArrow 02191
+UpArrowBar 02912
+UpArrowDownArrow 021C5
+UpDownArrow 02195
+UpEquilibrium 0296E
+UpTee 022A5
+UpTeeArrow 021A5
+Uparrow 021D1
+Updownarrow 021D5
+UpperLeftArrow 02196
+UpperRightArrow 02197
+Upsi 003D2
+Upsilon 003A5
+Uring 0016E
+Uscr 1D4B0
+Utilde 00168
+Uuml 000DC
+VDash 022AB
+Vbar 02AEB
+Vcy 00412
+Vdash 022A9
+Vdashl 02AE6
+Vee 022C1
+Verbar 02016
+Vert 02016
+VerticalBar 02223
+VerticalLine 0007C
+VerticalSeparator 02758
+VerticalTilde 02240
+VeryThinSpace 0200A
+Vfr 1D519
+Vopf 1D54D
+Vscr 1D4B1
+Vvdash 022AA
+Wcirc 00174
+Wedge 022C0
+Wfr 1D51A
+Wopf 1D54E
+Wscr 1D4B2
+Xfr 1D51B
+Xi 0039E
+Xopf 1D54F
+Xscr 1D4B3
+YAcy 0042F
+YIcy 00407
+YUcy 0042E
+Yacute 000DD
+Ycirc 00176
+Ycy 0042B
+Yfr 1D51C
+Yopf 1D550
+Yscr 1D4B4
+Yuml 00178
+ZHcy 00416
+Zacute 00179
+Zcaron 0017D
+Zcy 00417
+Zdot 0017B
+ZeroWidthSpace 0200B
+Zeta 00396
+Zfr 02128
+Zopf 02124
+Zscr 1D4B5
+aacute 000E1
+abreve 00103
+ac 0223E
+acE 0223E 00333
+acd 0223F
+acirc 000E2
+acute 000B4
+acy 00430
+aelig 000E6
+af 02061
+afr 1D51E
+agrave 000E0
+alefsym 02135
+aleph 02135
+alpha 003B1
+amacr 00101
+amalg 02A3F
+amp 00026
+and 02227
+andand 02A55
+andd 02A5C
+andslope 02A58
+andv 02A5A
+ang 02220
+ange 029A4
+angle 02220
+angmsd 02221
+angmsdaa 029A8
+angmsdab 029A9
+angmsdac 029AA
+angmsdad 029AB
+angmsdae 029AC
+angmsdaf 029AD
+angmsdag 029AE
+angmsdah 029AF
+angrt 0221F
+angrtvb 022BE
+angrtvbd 0299D
+angsph 02222
+angst 000C5
+angzarr 0237C
+aogon 00105
+aopf 1D552
+ap 02248
+apE 02A70
+apacir 02A6F
+ape 0224A
+apid 0224B
+apos 00027
+approx 02248
+approxeq 0224A
+aring 000E5
+ascr 1D4B6
+ast 0002A
+asymp 02248
+asympeq 0224D
+atilde 000E3
+auml 000E4
+awconint 02233
+awint 02A11
+bNot 02AED
+backcong 0224C
+backepsilon 003F6
+backprime 02035
+backsim 0223D
+backsimeq 022CD
+barvee 022BD
+barwed 02305
+barwedge 02305
+bbrk 023B5
+bbrktbrk 023B6
+bcong 0224C
+bcy 00431
+bdquo 0201E
+becaus 02235
+because 02235
+bemptyv 029B0
+bepsi 003F6
+bernou 0212C
+beta 003B2
+beth 02136
+between 0226C
+bfr 1D51F
+bigcap 022C2
+bigcirc 025EF
+bigcup 022C3
+bigodot 02A00
+bigoplus 02A01
+bigotimes 02A02
+bigsqcup 02A06
+bigstar 02605
+bigtriangledown 025BD
+bigtriangleup 025B3
+biguplus 02A04
+bigvee 022C1
+bigwedge 022C0
+bkarow 0290D
+blacklozenge 029EB
+blacksquare 025AA
+blacktriangle 025B4
+blacktriangledown 025BE
+blacktriangleleft 025C2
+blacktriangleright 025B8
+blank 02423
+blk12 02592
+blk14 02591
+blk34 02593
+block 02588
+bne 0003D 020E5
+bnequiv 02261 020E5
+bnot 02310
+bopf 1D553
+bot 022A5
+bottom 022A5
+bowtie 022C8
+boxDL 02557
+boxDR 02554
+boxDl 02556
+boxDr 02553
+boxH 02550
+boxHD 02566
+boxHU 02569
+boxHd 02564
+boxHu 02567
+boxUL 0255D
+boxUR 0255A
+boxUl 0255C
+boxUr 02559
+boxV 02551
+boxVH 0256C
+boxVL 02563
+boxVR 02560
+boxVh 0256B
+boxVl 02562
+boxVr 0255F
+boxbox 029C9
+boxdL 02555
+boxdR 02552
+boxdl 02510
+boxdr 0250C
+boxh 02500
+boxhD 02565
+boxhU 02568
+boxhd 0252C
+boxhu 02534
+boxminus 0229F
+boxplus 0229E
+boxtimes 022A0
+boxuL 0255B
+boxuR 02558
+boxul 02518
+boxur 02514
+boxv 02502
+boxvH 0256A
+boxvL 02561
+boxvR 0255E
+boxvh 0253C
+boxvl 02524
+boxvr 0251C
+bprime 02035
+breve 002D8
+brvbar 000A6
+bscr 1D4B7
+bsemi 0204F
+bsim 0223D
+bsime 022CD
+bsol 0005C
+bsolb 029C5
+bsolhsub 027C8
+bull 02022
+bullet 02022
+bump 0224E
+bumpE 02AAE
+bumpe 0224F
+bumpeq 0224F
+cacute 00107
+cap 02229
+capand 02A44
+capbrcup 02A49
+capcap 02A4B
+capcup 02A47
+capdot 02A40
+caps 02229 0FE00
+caret 02041
+caron 002C7
+ccaps 02A4D
+ccaron 0010D
+ccedil 000E7
+ccirc 00109
+ccups 02A4C
+ccupssm 02A50
+cdot 0010B
+cedil 000B8
+cemptyv 029B2
+cent 000A2
+centerdot 000B7
+cfr 1D520
+chcy 00447
+check 02713
+checkmark 02713
+chi 003C7
+cir 025CB
+cirE 029C3
+circ 002C6
+circeq 02257
+circlearrowleft 021BA
+circlearrowright 021BB
+circledR 000AE
+circledS 024C8
+circledast 0229B
+circledcirc 0229A
+circleddash 0229D
+cire 02257
+cirfnint 02A10
+cirmid 02AEF
+cirscir 029C2
+clubs 02663
+clubsuit 02663
+colon 0003A
+colone 02254
+coloneq 02254
+comma 0002C
+commat 00040
+comp 02201
+compfn 02218
+complement 02201
+complexes 02102
+cong 02245
+congdot 02A6D
+conint 0222E
+copf 1D554
+coprod 02210
+copy 000A9
+copysr 02117
+crarr 021B5
+cross 02717
+cscr 1D4B8
+csub 02ACF
+csube 02AD1
+csup 02AD0
+csupe 02AD2
+ctdot 022EF
+cudarrl 02938
+cudarrr 02935
+cuepr 022DE
+cuesc 022DF
+cularr 021B6
+cularrp 0293D
+cup 0222A
+cupbrcap 02A48
+cupcap 02A46
+cupcup 02A4A
+cupdot 0228D
+cupor 02A45
+cups 0222A 0FE00
+curarr 021B7
+curarrm 0293C
+curlyeqprec 022DE
+curlyeqsucc 022DF
+curlyvee 022CE
+curlywedge 022CF
+curren 000A4
+curvearrowleft 021B6
+curvearrowright 021B7
+cuvee 022CE
+cuwed 022CF
+cwconint 02232
+cwint 02231
+cylcty 0232D
+dArr 021D3
+dHar 02965
+dagger 02020
+daleth 02138
+darr 02193
+dash 02010
+dashv 022A3
+dbkarow 0290F
+dblac 002DD
+dcaron 0010F
+dcy 00434
+dd 02146
+ddagger 02021
+ddarr 021CA
+ddotseq 02A77
+deg 000B0
+delta 003B4
+demptyv 029B1
+dfisht 0297F
+dfr 1D521
+dharl 021C3
+dharr 021C2
+diam 022C4
+diamond 022C4
+diamondsuit 02666
+diams 02666
+die 000A8
+digamma 003DD
+disin 022F2
+div 000F7
+divide 000F7
+divideontimes 022C7
+divonx 022C7
+djcy 00452
+dlcorn 0231E
+dlcrop 0230D
+dollar 00024
+dopf 1D555
+dot 002D9
+doteq 02250
+doteqdot 02251
+dotminus 02238
+dotplus 02214
+dotsquare 022A1
+doublebarwedge 02306
+downarrow 02193
+downdownarrows 021CA
+downharpoonleft 021C3
+downharpoonright 021C2
+drbkarow 02910
+drcorn 0231F
+drcrop 0230C
+dscr 1D4B9
+dscy 00455
+dsol 029F6
+dstrok 00111
+dtdot 022F1
+dtri 025BF
+dtrif 025BE
+duarr 021F5
+duhar 0296F
+dwangle 029A6
+dzcy 0045F
+dzigrarr 027FF
+eDDot 02A77
+eDot 02251
+eacute 000E9
+easter 02A6E
+ecaron 0011B
+ecir 02256
+ecirc 000EA
+ecolon 02255
+ecy 0044D
+edot 00117
+ee 02147
+efDot 02252
+efr 1D522
+eg 02A9A
+egrave 000E8
+egs 02A96
+egsdot 02A98
+el 02A99
+elinters 023E7
+ell 02113
+els 02A95
+elsdot 02A97
+emacr 00113
+empty 02205
+emptyset 02205
+emptyv 02205
+emsp 02003
+emsp13 02004
+emsp14 02005
+eng 0014B
+ensp 02002
+eogon 00119
+eopf 1D556
+epar 022D5
+eparsl 029E3
+eplus 02A71
+epsi 003B5
+epsilon 003B5
+epsiv 003F5
+eqcirc 02256
+eqcolon 02255
+eqsim 02242
+eqslantgtr 02A96
+eqslantless 02A95
+equals 0003D
+equest 0225F
+equiv 02261
+equivDD 02A78
+eqvparsl 029E5
+erDot 02253
+erarr 02971
+escr 0212F
+esdot 02250
+esim 02242
+eta 003B7
+eth 000F0
+euml 000EB
+euro 020AC
+excl 00021
+exist 02203
+expectation 02130
+exponentiale 02147
+fallingdotseq 02252
+fcy 00444
+female 02640
+ffilig 0FB03
+fflig 0FB00
+ffllig 0FB04
+ffr 1D523
+filig 0FB01
+fjlig 00066 0006A
+flat 0266D
+fllig 0FB02
+fltns 025B1
+fnof 00192
+fopf 1D557
+forall 02200
+fork 022D4
+forkv 02AD9
+fpartint 02A0D
+frac12 000BD
+frac13 02153
+frac14 000BC
+frac15 02155
+frac16 02159
+frac18 0215B
+frac23 02154
+frac25 02156
+frac34 000BE
+frac35 02157
+frac38 0215C
+frac45 02158
+frac56 0215A
+frac58 0215D
+frac78 0215E
+frasl 02044
+frown 02322
+fscr 1D4BB
+gE 02267
+gEl 02A8C
+gacute 001F5
+gamma 003B3
+gammad 003DD
+gap 02A86
+gbreve 0011F
+gcirc 0011D
+gcy 00433
+gdot 00121
+ge 02265
+gel 022DB
+geq 02265
+geqq 02267
+geqslant 02A7E
+ges 02A7E
+gescc 02AA9
+gesdot 02A80
+gesdoto 02A82
+gesdotol 02A84
+gesl 022DB 0FE00
+gesles 02A94
+gfr 1D524
+gg 0226B
+ggg 022D9
+gimel 02137
+gjcy 00453
+gl 02277
+glE 02A92
+gla 02AA5
+glj 02AA4
+gnE 02269
+gnap 02A8A
+gnapprox 02A8A
+gne 02A88
+gneq 02A88
+gneqq 02269
+gnsim 022E7
+gopf 1D558
+grave 00060
+gscr 0210A
+gsim 02273
+gsime 02A8E
+gsiml 02A90
+gt 0003E
+gtcc 02AA7
+gtcir 02A7A
+gtdot 022D7
+gtlPar 02995
+gtquest 02A7C
+gtrapprox 02A86
+gtrarr 02978
+gtrdot 022D7
+gtreqless 022DB
+gtreqqless 02A8C
+gtrless 02277
+gtrsim 02273
+gvertneqq 02269 0FE00
+gvnE 02269 0FE00
+hArr 021D4
+hairsp 0200A
+half 000BD
+hamilt 0210B
+hardcy 0044A
+harr 02194
+harrcir 02948
+harrw 021AD
+hbar 0210F
+hcirc 00125
+hearts 02665
+heartsuit 02665
+hellip 02026
+hercon 022B9
+hfr 1D525
+hksearow 02925
+hkswarow 02926
+hoarr 021FF
+homtht 0223B
+hookleftarrow 021A9
+hookrightarrow 021AA
+hopf 1D559
+horbar 02015
+hscr 1D4BD
+hslash 0210F
+hstrok 00127
+hybull 02043
+hyphen 02010
+iacute 000ED
+ic 02063
+icirc 000EE
+icy 00438
+iecy 00435
+iexcl 000A1
+iff 021D4
+ifr 1D526
+igrave 000EC
+ii 02148
+iiiint 02A0C
+iiint 0222D
+iinfin 029DC
+iiota 02129
+ijlig 00133
+imacr 0012B
+image 02111
+imagline 02110
+imagpart 02111
+imath 00131
+imof 022B7
+imped 001B5
+in 02208
+incare 02105
+infin 0221E
+infintie 029DD
+inodot 00131
+int 0222B
+intcal 022BA
+integers 02124
+intercal 022BA
+intlarhk 02A17
+intprod 02A3C
+iocy 00451
+iogon 0012F
+iopf 1D55A
+iota 003B9
+iprod 02A3C
+iquest 000BF
+iscr 1D4BE
+isin 02208
+isinE 022F9
+isindot 022F5
+isins 022F4
+isinsv 022F3
+isinv 02208
+it 02062
+itilde 00129
+iukcy 00456
+iuml 000EF
+jcirc 00135
+jcy 00439
+jfr 1D527
+jmath 00237
+jopf 1D55B
+jscr 1D4BF
+jsercy 00458
+jukcy 00454
+kappa 003BA
+kappav 003F0
+kcedil 00137
+kcy 0043A
+kfr 1D528
+kgreen 00138
+khcy 00445
+kjcy 0045C
+kopf 1D55C
+kscr 1D4C0
+lAarr 021DA
+lArr 021D0
+lAtail 0291B
+lBarr 0290E
+lE 02266
+lEg 02A8B
+lHar 02962
+lacute 0013A
+laemptyv 029B4
+lagran 02112
+lambda 003BB
+lang 027E8
+langd 02991
+langle 027E8
+lap 02A85
+laquo 000AB
+larr 02190
+larrb 021E4
+larrbfs 0291F
+larrfs 0291D
+larrhk 021A9
+larrlp 021AB
+larrpl 02939
+larrsim 02973
+larrtl 021A2
+lat 02AAB
+latail 02919
+late 02AAD
+lates 02AAD 0FE00
+lbarr 0290C
+lbbrk 02772
+lbrace 0007B
+lbrack 0005B
+lbrke 0298B
+lbrksld 0298F
+lbrkslu 0298D
+lcaron 0013E
+lcedil 0013C
+lceil 02308
+lcub 0007B
+lcy 0043B
+ldca 02936
+ldquo 0201C
+ldquor 0201E
+ldrdhar 02967
+ldrushar 0294B
+ldsh 021B2
+le 02264
+leftarrow 02190
+leftarrowtail 021A2
+leftharpoondown 021BD
+leftharpoonup 021BC
+leftleftarrows 021C7
+leftrightarrow 02194
+leftrightarrows 021C6
+leftrightharpoons 021CB
+leftrightsquigarrow 021AD
+leftthreetimes 022CB
+leg 022DA
+leq 02264
+leqq 02266
+leqslant 02A7D
+les 02A7D
+lescc 02AA8
+lesdot 02A7F
+lesdoto 02A81
+lesdotor 02A83
+lesg 022DA 0FE00
+lesges 02A93
+lessapprox 02A85
+lessdot 022D6
+lesseqgtr 022DA
+lesseqqgtr 02A8B
+lessgtr 02276
+lesssim 02272
+lfisht 0297C
+lfloor 0230A
+lfr 1D529
+lg 02276
+lgE 02A91
+lhard 021BD
+lharu 021BC
+lharul 0296A
+lhblk 02584
+ljcy 00459
+ll 0226A
+llarr 021C7
+llcorner 0231E
+llhard 0296B
+lltri 025FA
+lmidot 00140
+lmoust 023B0
+lmoustache 023B0
+lnE 02268
+lnap 02A89
+lnapprox 02A89
+lne 02A87
+lneq 02A87
+lneqq 02268
+lnsim 022E6
+loang 027EC
+loarr 021FD
+lobrk 027E6
+longleftarrow 027F5
+longleftrightarrow 027F7
+longmapsto 027FC
+longrightarrow 027F6
+looparrowleft 021AB
+looparrowright 021AC
+lopar 02985
+lopf 1D55D
+loplus 02A2D
+lotimes 02A34
+lowast 02217
+lowbar 0005F
+loz 025CA
+lozenge 025CA
+lozf 029EB
+lpar 00028
+lparlt 02993
+lrarr 021C6
+lrcorner 0231F
+lrhar 021CB
+lrhard 0296D
+lrm 0200E
+lrtri 022BF
+lsaquo 02039
+lscr 1D4C1
+lsh 021B0
+lsim 02272
+lsime 02A8D
+lsimg 02A8F
+lsqb 0005B
+lsquo 02018
+lsquor 0201A
+lstrok 00142
+lt 0003C
+ltcc 02AA6
+ltcir 02A79
+ltdot 022D6
+lthree 022CB
+ltimes 022C9
+ltlarr 02976
+ltquest 02A7B
+ltrPar 02996
+ltri 025C3
+ltrie 022B4
+ltrif 025C2
+lurdshar 0294A
+luruhar 02966
+lvertneqq 02268 0FE00
+lvnE 02268 0FE00
+mDDot 0223A
+macr 000AF
+male 02642
+malt 02720
+maltese 02720
+map 021A6
+mapsto 021A6
+mapstodown 021A7
+mapstoleft 021A4
+mapstoup 021A5
+marker 025AE
+mcomma 02A29
+mcy 0043C
+mdash 02014
+measuredangle 02221
+mfr 1D52A
+mho 02127
+micro 000B5
+mid 02223
+midast 0002A
+midcir 02AF0
+middot 000B7
+minus 02212
+minusb 0229F
+minusd 02238
+minusdu 02A2A
+mlcp 02ADB
+mldr 02026
+mnplus 02213
+models 022A7
+mopf 1D55E
+mp 02213
+mscr 1D4C2
+mstpos 0223E
+mu 003BC
+multimap 022B8
+mumap 022B8
+nGg 022D9 00338
+nGt 0226B 020D2
+nGtv 0226B 00338
+nLeftarrow 021CD
+nLeftrightarrow 021CE
+nLl 022D8 00338
+nLt 0226A 020D2
+nLtv 0226A 00338
+nRightarrow 021CF
+nVDash 022AF
+nVdash 022AE
+nabla 02207
+nacute 00144
+nang 02220 020D2
+nap 02249
+napE 02A70 00338
+napid 0224B 00338
+napos 00149
+napprox 02249
+natur 0266E
+natural 0266E
+naturals 02115
+nbsp 000A0
+nbump 0224E 00338
+nbumpe 0224F 00338
+ncap 02A43
+ncaron 00148
+ncedil 00146
+ncong 02247
+ncongdot 02A6D 00338
+ncup 02A42
+ncy 0043D
+ndash 02013
+ne 02260
+neArr 021D7
+nearhk 02924
+nearr 02197
+nearrow 02197
+nedot 02250 00338
+nequiv 02262
+nesear 02928
+nesim 02242 00338
+nexist 02204
+nexists 02204
+nfr 1D52B
+ngE 02267 00338
+nge 02271
+ngeq 02271
+ngeqq 02267 00338
+ngeqslant 02A7E 00338
+nges 02A7E 00338
+ngsim 02275
+ngt 0226F
+ngtr 0226F
+nhArr 021CE
+nharr 021AE
+nhpar 02AF2
+ni 0220B
+nis 022FC
+nisd 022FA
+niv 0220B
+njcy 0045A
+nlArr 021CD
+nlE 02266 00338
+nlarr 0219A
+nldr 02025
+nle 02270
+nleftarrow 0219A
+nleftrightarrow 021AE
+nleq 02270
+nleqq 02266 00338
+nleqslant 02A7D 00338
+nles 02A7D 00338
+nless 0226E
+nlsim 02274
+nlt 0226E
+nltri 022EA
+nltrie 022EC
+nmid 02224
+nopf 1D55F
+not 000AC
+notin 02209
+notinE 022F9 00338
+notindot 022F5 00338
+notinva 02209
+notinvb 022F7
+notinvc 022F6
+notni 0220C
+notniva 0220C
+notnivb 022FE
+notnivc 022FD
+npar 02226
+nparallel 02226
+nparsl 02AFD 020E5
+npart 02202 00338
+npolint 02A14
+npr 02280
+nprcue 022E0
+npre 02AAF 00338
+nprec 02280
+npreceq 02AAF 00338
+nrArr 021CF
+nrarr 0219B
+nrarrc 02933 00338
+nrarrw 0219D 00338
+nrightarrow 0219B
+nrtri 022EB
+nrtrie 022ED
+nsc 02281
+nsccue 022E1
+nsce 02AB0 00338
+nscr 1D4C3
+nshortmid 02224
+nshortparallel 02226
+nsim 02241
+nsime 02244
+nsimeq 02244
+nsmid 02224
+nspar 02226
+nsqsube 022E2
+nsqsupe 022E3
+nsub 02284
+nsubE 02AC5 00338
+nsube 02288
+nsubset 02282 020D2
+nsubseteq 02288
+nsubseteqq 02AC5 00338
+nsucc 02281
+nsucceq 02AB0 00338
+nsup 02285
+nsupE 02AC6 00338
+nsupe 02289
+nsupset 02283 020D2
+nsupseteq 02289
+nsupseteqq 02AC6 00338
+ntgl 02279
+ntilde 000F1
+ntlg 02278
+ntriangleleft 022EA
+ntrianglelefteq 022EC
+ntriangleright 022EB
+ntrianglerighteq 022ED
+nu 003BD
+num 00023
+numero 02116
+numsp 02007
+nvDash 022AD
+nvHarr 02904
+nvap 0224D 020D2
+nvdash 022AC
+nvge 02265 020D2
+nvgt 0003E 020D2
+nvinfin 029DE
+nvlArr 02902
+nvle 02264 020D2
+nvlt 0003C 020D2
+nvltrie 022B4 020D2
+nvrArr 02903
+nvrtrie 022B5 020D2
+nvsim 0223C 020D2
+nwArr 021D6
+nwarhk 02923
+nwarr 02196
+nwarrow 02196
+nwnear 02927
+oS 024C8
+oacute 000F3
+oast 0229B
+ocir 0229A
+ocirc 000F4
+ocy 0043E
+odash 0229D
+odblac 00151
+odiv 02A38
+odot 02299
+odsold 029BC
+oelig 00153
+ofcir 029BF
+ofr 1D52C
+ogon 002DB
+ograve 000F2
+ogt 029C1
+ohbar 029B5
+ohm 003A9
+oint 0222E
+olarr 021BA
+olcir 029BE
+olcross 029BB
+oline 0203E
+olt 029C0
+omacr 0014D
+omega 003C9
+omicron 003BF
+omid 029B6
+ominus 02296
+oopf 1D560
+opar 029B7
+operp 029B9
+oplus 02295
+or 02228
+orarr 021BB
+ord 02A5D
+order 02134
+orderof 02134
+ordf 000AA
+ordm 000BA
+origof 022B6
+oror 02A56
+orslope 02A57
+orv 02A5B
+oscr 02134
+oslash 000F8
+osol 02298
+otilde 000F5
+otimes 02297
+otimesas 02A36
+ouml 000F6
+ovbar 0233D
+par 02225
+para 000B6
+parallel 02225
+parsim 02AF3
+parsl 02AFD
+part 02202
+pcy 0043F
+percnt 00025
+period 0002E
+permil 02030
+perp 022A5
+pertenk 02031
+pfr 1D52D
+phi 003C6
+phiv 003D5
+phmmat 02133
+phone 0260E
+pi 003C0
+pitchfork 022D4
+piv 003D6
+planck 0210F
+planckh 0210E
+plankv 0210F
+plus 0002B
+plusacir 02A23
+plusb 0229E
+pluscir 02A22
+plusdo 02214
+plusdu 02A25
+pluse 02A72
+plusmn 000B1
+plussim 02A26
+plustwo 02A27
+pm 000B1
+pointint 02A15
+popf 1D561
+pound 000A3
+pr 0227A
+prE 02AB3
+prap 02AB7
+prcue 0227C
+pre 02AAF
+prec 0227A
+precapprox 02AB7
+preccurlyeq 0227C
+preceq 02AAF
+precnapprox 02AB9
+precneqq 02AB5
+precnsim 022E8
+precsim 0227E
+prime 02032
+primes 02119
+prnE 02AB5
+prnap 02AB9
+prnsim 022E8
+prod 0220F
+profalar 0232E
+profline 02312
+profsurf 02313
+prop 0221D
+propto 0221D
+prsim 0227E
+prurel 022B0
+pscr 1D4C5
+psi 003C8
+puncsp 02008
+qfr 1D52E
+qint 02A0C
+qopf 1D562
+qprime 02057
+qscr 1D4C6
+quaternions 0210D
+quatint 02A16
+quest 0003F
+questeq 0225F
+quot 00022
+rAarr 021DB
+rArr 021D2
+rAtail 0291C
+rBarr 0290F
+rHar 02964
+race 0223D 00331
+racute 00155
+radic 0221A
+raemptyv 029B3
+rang 027E9
+rangd 02992
+range 029A5
+rangle 027E9
+raquo 000BB
+rarr 02192
+rarrap 02975
+rarrb 021E5
+rarrbfs 02920
+rarrc 02933
+rarrfs 0291E
+rarrhk 021AA
+rarrlp 021AC
+rarrpl 02945
+rarrsim 02974
+rarrtl 021A3
+rarrw 0219D
+ratail 0291A
+ratio 02236
+rationals 0211A
+rbarr 0290D
+rbbrk 02773
+rbrace 0007D
+rbrack 0005D
+rbrke 0298C
+rbrksld 0298E
+rbrkslu 02990
+rcaron 00159
+rcedil 00157
+rceil 02309
+rcub 0007D
+rcy 00440
+rdca 02937
+rdldhar 02969
+rdquo 0201D
+rdquor 0201D
+rdsh 021B3
+real 0211C
+realine 0211B
+realpart 0211C
+reals 0211D
+rect 025AD
+reg 000AE
+rfisht 0297D
+rfloor 0230B
+rfr 1D52F
+rhard 021C1
+rharu 021C0
+rharul 0296C
+rho 003C1
+rhov 003F1
+rightarrow 02192
+rightarrowtail 021A3
+rightharpoondown 021C1
+rightharpoonup 021C0
+rightleftarrows 021C4
+rightleftharpoons 021CC
+rightrightarrows 021C9
+rightsquigarrow 0219D
+rightthreetimes 022CC
+ring 002DA
+risingdotseq 02253
+rlarr 021C4
+rlhar 021CC
+rlm 0200F
+rmoust 023B1
+rmoustache 023B1
+rnmid 02AEE
+roang 027ED
+roarr 021FE
+robrk 027E7
+ropar 02986
+ropf 1D563
+roplus 02A2E
+rotimes 02A35
+rpar 00029
+rpargt 02994
+rppolint 02A12
+rrarr 021C9
+rsaquo 0203A
+rscr 1D4C7
+rsh 021B1
+rsqb 0005D
+rsquo 02019
+rsquor 02019
+rthree 022CC
+rtimes 022CA
+rtri 025B9
+rtrie 022B5
+rtrif 025B8
+rtriltri 029CE
+ruluhar 02968
+rx 0211E
+sacute 0015B
+sbquo 0201A
+sc 0227B
+scE 02AB4
+scap 02AB8
+scaron 00161
+sccue 0227D
+sce 02AB0
+scedil 0015F
+scirc 0015D
+scnE 02AB6
+scnap 02ABA
+scnsim 022E9
+scpolint 02A13
+scsim 0227F
+scy 00441
+sdot 022C5
+sdotb 022A1
+sdote 02A66
+seArr 021D8
+searhk 02925
+searr 02198
+searrow 02198
+sect 000A7
+semi 0003B
+seswar 02929
+setminus 02216
+setmn 02216
+sext 02736
+sfr 1D530
+sfrown 02322
+sharp 0266F
+shchcy 00449
+shcy 00448
+shortmid 02223
+shortparallel 02225
+shy 000AD
+sigma 003C3
+sigmaf 003C2
+sigmav 003C2
+sim 0223C
+simdot 02A6A
+sime 02243
+simeq 02243
+simg 02A9E
+simgE 02AA0
+siml 02A9D
+simlE 02A9F
+simne 02246
+simplus 02A24
+simrarr 02972
+slarr 02190
+smallsetminus 02216
+smashp 02A33
+smeparsl 029E4
+smid 02223
+smile 02323
+smt 02AAA
+smte 02AAC
+smtes 02AAC 0FE00
+softcy 0044C
+sol 0002F
+solb 029C4
+solbar 0233F
+sopf 1D564
+spades 02660
+spadesuit 02660
+spar 02225
+sqcap 02293
+sqcaps 02293 0FE00
+sqcup 02294
+sqcups 02294 0FE00
+sqsub 0228F
+sqsube 02291
+sqsubset 0228F
+sqsubseteq 02291
+sqsup 02290
+sqsupe 02292
+sqsupset 02290
+sqsupseteq 02292
+squ 025A1
+square 025A1
+squarf 025AA
+squf 025AA
+srarr 02192
+sscr 1D4C8
+ssetmn 02216
+ssmile 02323
+sstarf 022C6
+star 02606
+starf 02605
+straightepsilon 003F5
+straightphi 003D5
+strns 000AF
+sub 02282
+subE 02AC5
+subdot 02ABD
+sube 02286
+subedot 02AC3
+submult 02AC1
+subnE 02ACB
+subne 0228A
+subplus 02ABF
+subrarr 02979
+subset 02282
+subseteq 02286
+subseteqq 02AC5
+subsetneq 0228A
+subsetneqq 02ACB
+subsim 02AC7
+subsub 02AD5
+subsup 02AD3
+succ 0227B
+succapprox 02AB8
+succcurlyeq 0227D
+succeq 02AB0
+succnapprox 02ABA
+succneqq 02AB6
+succnsim 022E9
+succsim 0227F
+sum 02211
+sung 0266A
+sup 02283
+sup1 000B9
+sup2 000B2
+sup3 000B3
+supE 02AC6
+supdot 02ABE
+supdsub 02AD8
+supe 02287
+supedot 02AC4
+suphsol 027C9
+suphsub 02AD7
+suplarr 0297B
+supmult 02AC2
+supnE 02ACC
+supne 0228B
+supplus 02AC0
+supset 02283
+supseteq 02287
+supseteqq 02AC6
+supsetneq 0228B
+supsetneqq 02ACC
+supsim 02AC8
+supsub 02AD4
+supsup 02AD6
+swArr 021D9
+swarhk 02926
+swarr 02199
+swarrow 02199
+swnwar 0292A
+szlig 000DF
+target 02316
+tau 003C4
+tbrk 023B4
+tcaron 00165
+tcedil 00163
+tcy 00442
+tdot 020DB
+telrec 02315
+tfr 1D531
+there4 02234
+therefore 02234
+theta 003B8
+thetasym 003D1
+thetav 003D1
+thickapprox 02248
+thicksim 0223C
+thinsp 02009
+thkap 02248
+thksim 0223C
+thorn 000FE
+tilde 002DC
+times 000D7
+timesb 022A0
+timesbar 02A31
+timesd 02A30
+tint 0222D
+toea 02928
+top 022A4
+topbot 02336
+topcir 02AF1
+topf 1D565
+topfork 02ADA
+tosa 02929
+tprime 02034
+trade 02122
+triangle 025B5
+triangledown 025BF
+triangleleft 025C3
+trianglelefteq 022B4
+triangleq 0225C
+triangleright 025B9
+trianglerighteq 022B5
+tridot 025EC
+trie 0225C
+triminus 02A3A
+triplus 02A39
+trisb 029CD
+tritime 02A3B
+trpezium 023E2
+tscr 1D4C9
+tscy 00446
+tshcy 0045B
+tstrok 00167
+twixt 0226C
+twoheadleftarrow 0219E
+twoheadrightarrow 021A0
+uArr 021D1
+uHar 02963
+uacute 000FA
+uarr 02191
+ubrcy 0045E
+ubreve 0016D
+ucirc 000FB
+ucy 00443
+udarr 021C5
+udblac 00171
+udhar 0296E
+ufisht 0297E
+ufr 1D532
+ugrave 000F9
+uharl 021BF
+uharr 021BE
+uhblk 02580
+ulcorn 0231C
+ulcorner 0231C
+ulcrop 0230F
+ultri 025F8
+umacr 0016B
+uml 000A8
+uogon 00173
+uopf 1D566
+uparrow 02191
+updownarrow 02195
+upharpoonleft 021BF
+upharpoonright 021BE
+uplus 0228E
+upsi 003C5
+upsih 003D2
+upsilon 003C5
+upuparrows 021C8
+urcorn 0231D
+urcorner 0231D
+urcrop 0230E
+uring 0016F
+urtri 025F9
+uscr 1D4CA
+utdot 022F0
+utilde 00169
+utri 025B5
+utrif 025B4
+uuarr 021C8
+uuml 000FC
+uwangle 029A7
+vArr 021D5
+vBar 02AE8
+vBarv 02AE9
+vDash 022A8
+vangrt 0299C
+varepsilon 003F5
+varkappa 003F0
+varnothing 02205
+varphi 003D5
+varpi 003D6
+varpropto 0221D
+varr 02195
+varrho 003F1
+varsigma 003C2
+varsubsetneq 0228A 0FE00
+varsubsetneqq 02ACB 0FE00
+varsupsetneq 0228B 0FE00
+varsupsetneqq 02ACC 0FE00
+vartheta 003D1
+vartriangleleft 022B2
+vartriangleright 022B3
+vcy 00432
+vdash 022A2
+vee 02228
+veebar 022BB
+veeeq 0225A
+vellip 022EE
+verbar 0007C
+vert 0007C
+vfr 1D533
+vltri 022B2
+vnsub 02282 020D2
+vnsup 02283 020D2
+vopf 1D567
+vprop 0221D
+vrtri 022B3
+vscr 1D4CB
+vsubnE 02ACB 0FE00
+vsubne 0228A 0FE00
+vsupnE 02ACC 0FE00
+vsupne 0228B 0FE00
+vzigzag 0299A
+wcirc 00175
+wedbar 02A5F
+wedge 02227
+wedgeq 02259
+weierp 02118
+wfr 1D534
+wopf 1D568
+wp 02118
+wr 02240
+wreath 02240
+wscr 1D4CC
+xcap 022C2
+xcirc 025EF
+xcup 022C3
+xdtri 025BD
+xfr 1D535
+xhArr 027FA
+xharr 027F7
+xi 003BE
+xlArr 027F8
+xlarr 027F5
+xmap 027FC
+xnis 022FB
+xodot 02A00
+xopf 1D569
+xoplus 02A01
+xotime 02A02
+xrArr 027F9
+xrarr 027F6
+xscr 1D4CD
+xsqcup 02A06
+xuplus 02A04
+xutri 025B3
+xvee 022C1
+xwedge 022C0
+yacute 000FD
+yacy 0044F
+ycirc 00177
+ycy 0044B
+yen 000A5
+yfr 1D536
+yicy 00457
+yopf 1D56A
+yscr 1D4CE
+yucy 0044E
+yuml 000FF
+zacute 0017A
+zcaron 0017E
+zcy 00437
+zdot 0017C
+zeetrf 02128
+zeta 003B6
+zfr 1D537
+zhcy 00436
+zigrarr 021DD
+zopf 1D56B
+zscr 1D4CF
+zwj 0200D
+zwnj 0200C \ No newline at end of file
diff --git a/ext/standard/html_tables/ents_xhtml.txt b/ext/standard/html_tables/ents_xhtml.txt
new file mode 100644
index 0000000..81800bc
--- /dev/null
+++ b/ext/standard/html_tables/ents_xhtml.txt
@@ -0,0 +1,253 @@
+nbsp 00A0
+iexcl 00A1
+cent 00A2
+pound 00A3
+curren 00A4
+yen 00A5
+brvbar 00A6
+sect 00A7
+uml 00A8
+copy 00A9
+ordf 00AA
+laquo 00AB
+not 00AC
+shy 00AD
+reg 00AE
+macr 00AF
+deg 00B0
+plusmn 00B1
+sup2 00B2
+sup3 00B3
+acute 00B4
+micro 00B5
+para 00B6
+middot 00B7
+cedil 00B8
+sup1 00B9
+ordm 00BA
+raquo 00BB
+frac14 00BC
+frac12 00BD
+frac34 00BE
+iquest 00BF
+Agrave 00C0
+Aacute 00C1
+Acirc 00C2
+Atilde 00C3
+Auml 00C4
+Aring 00C5
+AElig 00C6
+Ccedil 00C7
+Egrave 00C8
+Eacute 00C9
+Ecirc 00CA
+Euml 00CB
+Igrave 00CC
+Iacute 00CD
+Icirc 00CE
+Iuml 00CF
+ETH 00D0
+Ntilde 00D1
+Ograve 00D2
+Oacute 00D3
+Ocirc 00D4
+Otilde 00D5
+Ouml 00D6
+times 00D7
+Oslash 00D8
+Ugrave 00D9
+Uacute 00DA
+Ucirc 00DB
+Uuml 00DC
+Yacute 00DD
+THORN 00DE
+szlig 00DF
+agrave 00E0
+aacute 00E1
+acirc 00E2
+atilde 00E3
+auml 00E4
+aring 00E5
+aelig 00E6
+ccedil 00E7
+egrave 00E8
+eacute 00E9
+ecirc 00EA
+euml 00EB
+igrave 00EC
+iacute 00ED
+icirc 00EE
+iuml 00EF
+eth 00F0
+ntilde 00F1
+ograve 00F2
+oacute 00F3
+ocirc 00F4
+otilde 00F5
+ouml 00F6
+divide 00F7
+oslash 00F8
+ugrave 00F9
+uacute 00FA
+ucirc 00FB
+uuml 00FC
+yacute 00FD
+thorn 00FE
+yuml 00FF
+quot 0022
+amp 0026
+lt 003C
+gt 003E
+apos 0027
+OElig 0152
+oelig 0153
+Scaron 0160
+scaron 0161
+Yuml 0178
+circ 02C6
+tilde 02DC
+ensp 2002
+emsp 2003
+thinsp 2009
+zwnj 200C
+zwj 200D
+lrm 200E
+rlm 200F
+ndash 2013
+mdash 2014
+lsquo 2018
+rsquo 2019
+sbquo 201A
+ldquo 201C
+rdquo 201D
+bdquo 201E
+dagger 2020
+Dagger 2021
+permil 2030
+lsaquo 2039
+rsaquo 203A
+euro 20AC
+fnof 0192
+Alpha 0391
+Beta 0392
+Gamma 0393
+Delta 0394
+Epsilon 0395
+Zeta 0396
+Eta 0397
+Theta 0398
+Iota 0399
+Kappa 039A
+Lambda 039B
+Mu 039C
+Nu 039D
+Xi 039E
+Omicron 039F
+Pi 03A0
+Rho 03A1
+Sigma 03A3
+Tau 03A4
+Upsilon 03A5
+Phi 03A6
+Chi 03A7
+Psi 03A8
+Omega 03A9
+alpha 03B1
+beta 03B2
+gamma 03B3
+delta 03B4
+epsilon 03B5
+zeta 03B6
+eta 03B7
+theta 03B8
+iota 03B9
+kappa 03BA
+lambda 03BB
+mu 03BC
+nu 03BD
+xi 03BE
+omicron 03BF
+pi 03C0
+rho 03C1
+sigmaf 03C2
+sigma 03C3
+tau 03C4
+upsilon 03C5
+phi 03C6
+chi 03C7
+psi 03C8
+omega 03C9
+thetasym 03D1
+upsih 03D2
+piv 03D6
+bull 2022
+hellip 2026
+prime 2032
+Prime 2033
+oline 203E
+frasl 2044
+weierp 2118
+image 2111
+real 211C
+trade 2122
+alefsym 2135
+larr 2190
+uarr 2191
+rarr 2192
+darr 2193
+harr 2194
+crarr 21B5
+lArr 21D0
+uArr 21D1
+rArr 21D2
+dArr 21D3
+hArr 21D4
+forall 2200
+part 2202
+exist 2203
+empty 2205
+nabla 2207
+isin 2208
+notin 2209
+ni 220B
+prod 220F
+sum 2211
+minus 2212
+lowast 2217
+radic 221A
+prop 221D
+infin 221E
+ang 2220
+and 2227
+or 2228
+cap 2229
+cup 222A
+int 222B
+there4 2234
+sim 223C
+cong 2245
+asymp 2248
+ne 2260
+equiv 2261
+le 2264
+ge 2265
+sub 2282
+sup 2283
+nsub 2284
+sube 2286
+supe 2287
+oplus 2295
+otimes 2297
+perp 22A5
+sdot 22C5
+lceil 2308
+rceil 2309
+lfloor 230A
+rfloor 230B
+lang 2329
+rang 232A
+loz 25CA
+spades 2660
+clubs 2663
+hearts 2665
+diams 2666 \ No newline at end of file
diff --git a/ext/standard/html_tables/html_table_gen.php b/ext/standard/html_tables/html_table_gen.php
new file mode 100644
index 0000000..7e7314f
--- /dev/null
+++ b/ext/standard/html_tables/html_table_gen.php
@@ -0,0 +1,812 @@
+<?php
+/*
+ +----------------------------------------------------------------------+
+ | PHP Version 5 |
+ +----------------------------------------------------------------------+
+ | Copyright (c) 1997-2010 The PHP Group |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | http://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Gustavo Lopes <cataphract@php.net> |
+ +----------------------------------------------------------------------+
+*/
+
+/* This file prints to stdout the contents of ext/standard/html_tables.h */
+/* put together with glue; have patience */
+
+$t = <<<CODE
+/*
+ +----------------------------------------------------------------------+
+ | PHP Version 5 |
+ +----------------------------------------------------------------------+
+ | Copyright (c) 1997-%s The PHP Group |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | http://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+*/
+
+/* \$Id$ */
+
+#ifndef HTML_TABLES_H
+#define HTML_TABLES_H
+
+/**************************************************************************
+***************************************************************************
+** THIS FILE IS AUTOMATICALLY GENERATED. DO NOT MODIFY IT. **
+***************************************************************************
+** Please change html_tables/html_table_gen.php instead and then **
+** run it in order to generate this file **
+***************************************************************************
+**************************************************************************/
+
+enum entity_charset { cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15, cs_cp1251,
+ cs_8859_5, cs_cp866, cs_macroman, cs_koi8r, cs_big5,
+ cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp,
+ cs_numelems /* used to count the number of charsets */
+ };
+#define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_8859_1)
+#define CHARSET_SINGLE_BYTE(cs) ((cs) > cs_utf_8 && (cs) < cs_big5)
+#define CHARSET_PARTIAL_SUPPORT(cs) ((cs) >= cs_big5)
+
+static const struct {
+ const char *codeset;
+ enum entity_charset charset;
+} charset_map[] = {
+ { "ISO-8859-1", cs_8859_1 },
+ { "ISO8859-1", cs_8859_1 },
+ { "ISO-8859-15", cs_8859_15 },
+ { "ISO8859-15", cs_8859_15 },
+ { "utf-8", cs_utf_8 },
+ { "cp1252", cs_cp1252 },
+ { "Windows-1252", cs_cp1252 },
+ { "1252", cs_cp1252 },
+ { "BIG5", cs_big5 },
+ { "950", cs_big5 },
+ { "GB2312", cs_gb2312 },
+ { "936", cs_gb2312 },
+ { "BIG5-HKSCS", cs_big5hkscs },
+ { "Shift_JIS", cs_sjis },
+ { "SJIS", cs_sjis },
+ { "932", cs_sjis },
+ { "EUCJP", cs_eucjp },
+ { "EUC-JP", cs_eucjp },
+ { "KOI8-R", cs_koi8r },
+ { "koi8-ru", cs_koi8r },
+ { "koi8r", cs_koi8r },
+ { "cp1251", cs_cp1251 },
+ { "Windows-1251", cs_cp1251 },
+ { "win-1251", cs_cp1251 },
+ { "iso8859-5", cs_8859_5 },
+ { "iso-8859-5", cs_8859_5 },
+ { "cp866", cs_cp866 },
+ { "866", cs_cp866 },
+ { "ibm866", cs_cp866 },
+ { "MacRoman", cs_macroman },
+ { NULL }
+};
+
+/* longest entity name length excluding & and ; */
+#define LONGEST_ENTITY_LENGTH 31
+
+/* Definitions for mappings *to* Unicode.
+ * The origin charset must have at most 256 code points.
+ * The multi-byte encodings are not supported */
+typedef struct {
+ unsigned short uni_cp[64];
+} enc_to_uni_stage2;
+
+typedef struct {
+ const enc_to_uni_stage2 *inner[4];
+} enc_to_uni;
+
+/* bits 7-8 bits (only single bytes encodings supported )*/
+#define ENT_ENC_TO_UNI_STAGE1(k) ((k & 0xC0) >> 6)
+/* bits 1-6 */
+#define ENT_ENC_TO_UNI_STAGE2(k) ((k) & 0x3F)
+
+
+CODE;
+
+echo sprintf($t, date("Y"));
+
+$encodings = array(
+ array(
+ "ident" => "iso88591",
+ "enumid" => 1,
+ "name" => "ISO-8859-1",
+ "file" => "mappings/8859-1.TXT",
+ ),
+ array(
+ "ident" => "iso88595",
+ "enumid" => 5,
+ "name" => "ISO-8859-5",
+ "file" => "mappings/8859-5.TXT",
+ ),
+ array(
+ "ident" => "iso885915",
+ "enumid" => 3,
+ "name" => "ISO-8859-15",
+ "file" => "mappings/8859-15.TXT",
+ ),
+ array(
+ "ident" => "win1252",
+ "enumid" => 2,
+ "enumident" => "cp1252",
+ "name" => "Windows-1252",
+ "file" => "mappings/CP1252.TXT",
+ ),
+ array(
+ "ident" => "win1251",
+ "enumid" => 4,
+ "enumident" => "cp1252",
+ "name" => "Windows-1251",
+ "file" => "mappings/CP1251.TXT",
+ ),
+ array(
+ "ident" => "koi8r",
+ "enumid" => 8,
+ "name" => "KOI8-R",
+ "file" => "mappings/KOI8-R.TXT",
+ ),
+ array(
+ "ident" => "cp866",
+ "enumid" => 6,
+ "name" => "CP-866",
+ "file" => "mappings/CP866.TXT",
+ ),
+ array(
+ "ident" => "macroman",
+ "enumid" => 7,
+ "name" => "MacRoman",
+ "file" => "mappings/ROMAN.TXT",
+ ),
+);
+
+$prevStage2 = array();
+
+foreach ($encodings as $e) {
+ echo
+"/* {{{ Mappings *to* Unicode for {$e['name']} */\n\n";
+
+ /* process file */
+ $map = array();
+ $lines = explode("\n", file_get_contents($e{'file'}));
+ foreach ($lines as $l) {
+ if (preg_match("/^0x([0-9A-Z]{2})\t0x([0-9A-Z]{2,})/i", $l, $matches))
+ $map[] = array($matches[1], $matches[2]);
+ }
+
+ $mappy = array();
+ foreach ($map as $v) { $mappy[hexdec($v[0])] = hexdec($v[1]); }
+
+ $mstable = array("ident" => $e['ident']);
+ /* calculate two-stage tables */
+ for ($i = 0; $i < 4; $i++) {
+ for ($j = 0; $j < 64; $j++) {
+ $cp = $i << 6 | $j;
+ $mstable[$i][$j] = isset($mappy[$cp]) ? $mappy[$cp] : NULL;
+ }
+ }
+
+ echo
+"/* {{{ Stage 2 tables for {$e['name']} */\n\n";
+
+ $s2tables_idents = array();
+ for ($i = 0; $i < 4; $i++) {
+ if (($t = array_keys($prevStage2, $mstable[$i])) !== array()) {
+ $s2tables_idents[$i] = $encodings[$t[0]/5]["ident"];
+ continue;
+ }
+
+ $s2tables_idents[$i] = $e["ident"];
+
+ echo "static const enc_to_uni_stage2 enc_to_uni_s2_{$e['ident']}_".
+ sprintf("%02X", $i << 6)." = { {\n";
+ for ($j = 0; $j < 64; $j++) {
+ if ($j == 0) echo "\t";
+ elseif ($j % 6 == 0) echo "\n\t";
+ else echo " ";
+ if ($mstable[$i][$j] !== NULL)
+ echo sprintf("0x%04X,", $mstable[$i][$j]);
+ else
+ echo "0xFFFF,"; /* special value; indicates no mapping */
+ }
+ echo "\n} };\n\n";
+
+ $prevStage2[] = $mstable[$i];
+ }
+
+ echo
+"/* end of stage 2 tables for {$e['name']} }}} */\n\n";
+
+ echo
+"/* {{{ Stage 1 table for {$e['name']} */\n";
+
+ echo
+"static const enc_to_uni enc_to_uni_{$e['ident']} = { {
+\t&enc_to_uni_s2_{$s2tables_idents[0]}_00,
+\t&enc_to_uni_s2_{$s2tables_idents[1]}_40,
+\t&enc_to_uni_s2_{$s2tables_idents[2]}_80,
+\t&enc_to_uni_s2_{$s2tables_idents[3]}_C0 }
+};
+";
+
+ echo
+"/* end of stage 1 table for {$e['name']} }}} */\n\n";
+}
+
+$maxencnum = max(array_map(function($e) { return $e['enumid']; }, $encodings));
+$a = range(0, $maxencnum);
+foreach ($encodings as $e) { $a[$e['enumid']] = $e['ident']; }
+
+ echo
+"/* {{{ Index of tables for encoding conversion */
+static const enc_to_uni *const enc_to_uni_index[cs_numelems] = {\n";
+
+foreach ($a as $k => $v) {
+ if (is_numeric($v))
+ echo "\tNULL,\n";
+ else
+ echo "\t&enc_to_uni_$v,\n";
+}
+
+ echo
+"};
+/* }}} */\n";
+
+$t = <<<CODE
+
+/* Definitions for mappings *from* Unicode */
+
+typedef struct {
+ unsigned short un_code_point; /* we don't need bigger */
+ unsigned char cs_code; /* currently, we only have maps to single-byte encodings */
+} uni_to_enc;
+
+
+CODE;
+
+echo $t;
+
+$encodings = array(
+ array(
+ "ident" => "iso885915",
+ "name" => "ISO-8859-15",
+ "file" => "mappings/8859-15.TXT",
+ "range" => array(0xA4, 0xBE),
+ ),
+ array(
+ "ident" => "win1252",
+ "name" => "Windows-1252",
+ "file" => "mappings/CP1252.TXT",
+ "range" => array(0x80, 0x9F),
+ ),
+ array(
+ "ident" => "win1251",
+ "name" => "Windows-1251",
+ "file" => "mappings/CP1251.TXT",
+ "range" => array(0x80, 0xFF),
+ ),
+ array(
+ "ident" => "koi8r",
+ "name" => "KOI8-R",
+ "file" => "mappings/KOI8-R.TXT",
+ "range" => array(0x80, 0xFF),
+ ),
+ array(
+ "ident" => "cp866",
+ "name" => "CP-866",
+ "file" => "mappings/CP866.TXT",
+ "range" => array(0x80, 0xFF),
+ ),
+ array(
+ "ident" => "macroman",
+ "name" => "MacRoman",
+ "file" => "mappings/ROMAN.TXT",
+ "range" => array(0x80, 0xFF),
+ ),
+);
+
+foreach ($encodings as $e) {
+ echo
+"/* {{{ Mappings *from* Unicode for {$e['name']} */\n";
+
+ /* process file */
+ $map = array();
+ $lines = explode("\n", file_get_contents($e{'file'}));
+ foreach ($lines as $l) {
+ if (preg_match("/^0x([0-9A-Z]{2})\t0x([0-9A-Z]{2,})\s+#\s*(.*)$/i", $l, $matches))
+ $map[] = array($matches[1], $matches[2], rtrim($matches[3]));
+ }
+
+ $mappy = array();
+ foreach ($map as $v) {
+ if (hexdec($v[0]) >= $e['range'][0] && hexdec($v[0]) <= $e['range'][1])
+ $mappy[hexdec($v[1])] = array(hexdec($v[0]), strtolower($v[2]));
+ }
+ ksort($mappy);
+
+ echo
+"static const uni_to_enc unimap_{$e['ident']}[] = {\n";
+
+ foreach ($mappy as $k => $v) {
+ echo "\t{ ", sprintf("0x%04X", $k), ", ", sprintf("0x%02X", $v[0]), " },\t/* ",
+ $v[1], " */\n";
+ }
+ echo "};\n";
+
+ echo
+"/* {{{ end of mappings *from* Unicode for {$e['name']} */\n\n";
+}
+
+$data = file_get_contents("ents_html5.txt");
+$pass2 = false;
+$name = "HTML5";
+$ident = "html5";
+again:
+
+$t = <<<'CODE'
+/* HTML 5 has many more named entities.
+ * Some of them map to two unicode code points, not one.
+ * We're going to use a three-stage table (with an extra one for the entities
+ * with two code points). */
+
+#define ENT_STAGE1_INDEX(k) (((k) & 0xFFF000) >> 12) /* > 1D, we have no mapping */
+#define ENT_STAGE2_INDEX(k) (((k) & 0xFC0) >> 6)
+#define ENT_STAGE3_INDEX(k) ((k) & 0x3F)
+#define ENT_CODE_POINT_FROM_STAGES(i,j,k) (((i) << 12) | ((j) << 6) | (k))
+
+/* Table should be organized with a leading row telling the size of
+ * the table and the default entity (maybe NULL) and the rest being
+ * normal rows ordered by code point so that we can do a binary search */
+typedef union {
+ struct {
+ unsigned size; /* number of remaining entries in the table */
+ const char *default_entity;
+ unsigned short default_entity_len;
+ } leading_entry;
+ struct {
+ unsigned second_cp; /* second code point */
+ const char *entity;
+ unsigned short entity_len;
+ } normal_entry;
+} entity_multicodepoint_row;
+
+/* blocks of these should start at code points k where k % 0xFC0 == 0 */
+typedef struct {
+ char ambiguous; /* if 0 look into entity */
+ union {
+ struct {
+ const char *entity; /* may be NULL */
+ unsigned short entity_len;
+ } ent;
+ const entity_multicodepoint_row *multicodepoint_table;
+ } data;
+} entity_stage3_row;
+
+/* Calculate k & 0x3F Use as offset */
+typedef const entity_stage3_row *entity_stage2_row; /* 64 elements */
+
+/* Calculate k & 0xFC0 >> 6. Use as offset */
+typedef const entity_stage3_row *const *entity_stage1_row; /* 64 elements */
+
+/* For stage 1, Calculate k & 0xFFF000 >> 3*4.
+ * If larger than 1D, we have no mapping. Otherwise lookup that index */
+
+typedef struct {
+ const entity_stage1_row *ms_table;
+ /* for tables with only basic entities, this member is to be accessed
+ * directly for better performance: */
+ const entity_stage3_row *table;
+} entity_table_opt;
+
+/* Replaced "GT" > "gt" and "QUOT" > "quot" for consistency's sake. */
+
+
+CODE;
+
+if (!$pass2)
+ echo $t;
+
+$dp = array();
+
+foreach (explode("\n", $data) as $l) {
+ if (preg_match('/^(#?[a-z0-9]+)\s+([a-f0-9]+) ([a-f0-9]+)/i', $l, $matches)) {
+ //echo sprintf("\t{\"%-21s 1, 0x%05d},\n", $matches[1].",", $matches[2]);
+ $dp[] = array($matches[1], $matches[2], $matches[3]);
+ } else if (preg_match('/^(#?[a-z0-9]+)\s+([a-f0-9]+)/i', $l, $matches)) {
+ $dp[] = array($matches[1], $matches[2]);
+ }
+}
+
+$origdp = $dp;
+
+usort($dp, function($a, $b) { return hexdec($a[1])-hexdec($b[1]); });
+
+$multicp_rows = array();
+foreach ($dp as $el) {
+ if (count($el) == 3) {
+ $multicp_rows[$el[1]] = array();
+ }
+}
+
+foreach ($dp as $el) {
+ if (key_exists($el[1], $multicp_rows)) {
+ if (count($el) == 3)
+ $multicp_rows[$el[1]][$el[2]] = $el[0];
+ else
+ $multicp_rows[$el[1]]["default"] = $el[0];
+ }
+}
+
+if ($pass2 < 2)
+ echo "/* {{{ Start of $name multi-stage table for codepoint -> entity */", "\n\n";
+else
+ echo "/* {{{ Start of $name table for codepoint -> entity */", "\n\n";
+
+if (empty($multicp_rows))
+ goto skip_multicp;
+
+ksort($multicp_rows);
+foreach ($multicp_rows as &$v) { ksort($v); }
+
+echo
+"/* {{{ Start of double code point tables for $name */", "\n\n";
+
+foreach ($multicp_rows as $k => $v) {
+ echo "static const entity_multicodepoint_row multi_cp_{$ident}_",
+ sprintf("%05s", $k), "[] = {", "\n";
+ if (key_exists("default", $v)) {
+ if ($v['default'] == 'GT') /* hack to make > translate to &gt; not GT; */
+ $v['default'] = "gt";
+ echo "\t{ {", sprintf("%02d", count($v) - 1),
+ ",\t\t", sprintf("\"%-21s", $v["default"].'",'), "\t",
+ sprintf("% 2d", strlen($v["default"])), '} },', "\n";
+ } else {
+ echo "\t{ {", sprintf("%02d", count($v)),
+ ",\t\t", sprintf("%-22s", 'NULL'), ",\t0} },\n";
+ }
+ unset($v["default"]);
+ foreach ($v as $l => $w) {
+ echo "\t{ {", sprintf("0x%05s", $l), ",\t", sprintf("\"%-21s", $w.'",'), "\t",
+ sprintf("% 2d", strlen($w)), '} },', "\n";
+ }
+ echo "};\n";
+}
+echo "\n/* End of double code point tables }}} */", "\n\n";
+
+skip_multicp:
+
+if ($pass2 < 2)
+ echo "/* {{{ Stage 3 Tables for $name */", "\n\n";
+
+$t = <<<CODE
+static const entity_stage3_row empty_stage3_table[] = {
+ /* 64 elements */
+ {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
+ {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
+ {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
+ {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
+ {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
+ {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
+ {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
+ {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
+ {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
+ {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
+ {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
+ {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
+ {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
+ {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
+ {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
+ {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } },
+};
+
+CODE;
+
+if (!$pass2)
+ echo $t;
+
+$mstable = array();
+foreach ($dp as $el) {
+ $s1 = (hexdec($el[1]) & 0xFFF000) >> 12;
+ $s2 = (hexdec($el[1]) & 0xFC0) >> 6;
+ $s3 = hexdec($el[1]) & 0x3F;
+ if (key_exists($el[1], $multicp_rows)) {
+ $mstable[$s1][$s2][$s3] = "";
+ } else {
+ $mstable[$s1][$s2][$s3] = $el[0];
+ }
+}
+
+for ($i = 0; $i < 0x1E; $i++) {
+ for ($k = 0; $k < 64; $k++) {
+ $any3 = false;
+ $col3 = array();
+ for ($l = 0; $l < 64; $l++) {
+ if (isset($mstable[$i][$k][$l])) {
+ $any3 = true;
+ $col3[$l] = $mstable[$i][$k][$l];
+ } else {
+ $col3[$l] = null;
+ }
+ }
+ if ($any3) {
+ echo "static const entity_stage3_row stage3_table_{$ident}_",
+ sprintf("%02X%03X", $i, $k << 6), "[] = {\n";
+ foreach ($col3 as $y => $z) {
+ if ($y == 0) echo "\t";
+ elseif ($y % 4 == 0) echo "\n\t";
+ else echo " ";
+ if ($z === NULL)
+ echo "{0, { {NULL, 0} } },";
+ elseif ($z === "QUOT") /* hack to translate " into &quote;, not &QUOT; */
+ echo "{0, { {\"quot\", 4} } },";
+ elseif ($z !== "")
+ echo "{0, { {\"$z\", ", strlen($z), "} } },";
+ else
+ echo "{1, { {(void *)", sprintf("multi_cp_{$ident}_%05X",
+ ($i << 12) | ($k << 6) | $y ), "} } },";
+
+ }
+ echo "\n};\n\n";
+ }
+ }
+}
+
+if ($pass2 < 2)
+ echo "/* end of stage 3 Tables for $name }}} */", "\n\n";
+
+if ($pass2 > 1)
+ goto hashtables;
+
+echo
+"/* {{{ Stage 2 Tables for $name */", "\n\n";
+
+$t = <<<CODE
+static const entity_stage2_row empty_stage2_table[] = {
+ empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
+ empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
+ empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
+ empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
+ empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
+ empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
+ empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
+ empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
+ empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
+ empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
+ empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
+ empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
+ empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
+ empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
+ empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
+ empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table,
+};
+
+CODE;
+
+if (!$pass2)
+ echo $t;
+
+for ($i = 0; $i < 0x1E; $i++) {
+ $any = false;
+ for ($k = 0; $k < 64; $k++) {
+ if (isset($mstable[$i][$k]))
+ $any = true;
+ }
+ if ($any) {
+ echo "static const entity_stage2_row stage2_table_{$ident}_",
+ sprintf("%02X000", $i), "[] = {\n";
+ for ($k = 0; $k < 64; $k++) {
+ if ($k == 0) echo "\t";
+ elseif ($k % 4 == 0) echo "\n\t";
+ else echo " ";
+ if (isset($mstable[$i][$k])) {
+ echo sprintf("stage3_table_{$ident}_%05X", ($i << 12) | ($k << 6)), ",";
+ } else {
+ echo "empty_stage3_table", ",";
+ }
+ }
+ echo "\n};\n\n";
+ }
+}
+
+echo
+"/* end of stage 2 tables for $name }}} */", "\n\n";
+
+echo "static const entity_stage1_row entity_ms_table_{$ident}[] = {\n";
+for ($i = 0; $i < 0x1E; $i++) {
+ if (isset($mstable[$i]))
+ echo "\t", sprintf("stage2_table_{$ident}_%02X000", $i), ",\n";
+ else
+ echo "\tempty_stage2_table,\n";
+}
+echo "};\n\n";
+
+echo
+"/* end of $name multi-stage table for codepoint -> entity }}} */\n\n";
+
+/* commented-out; this enabled binary search, which turned out to be
+ * significantly slower than the hash tables for html 5 entities */
+//echo
+//"/* {{{ HTML 5 tables for entity -> codepoint */", "\n\n";
+
+//$t = <<<CODE
+//typedef struct {
+// const char *entity;
+// unsigned short entity_len;
+// unsigned int codepoint1;
+// unsigned int codepoint2;
+//} entity_cp_map;
+//
+//#define ENTITY_CP_MAP_CMP(l, lsize, r, rsize) \
+// ( ((lsize)==(rsize)) ? (memcmp((l), (r), (lsize))) : ((lsize)-(rsize)) )
+//
+//static const entity_cp_map html5_ent_cp_map[] = {
+//
+//CODE;
+//echo $t;
+//
+//$dp = $origdp;
+//usort($dp, function($a, $b) { $d = strlen($a[0])-strlen($b[0]);
+// return $d==0?strcmp($a[0], $b[0]):$d; });
+//
+//$k = 0;
+//foreach ($dp as $o) {
+// if ($k == 0) echo "\t";
+// elseif ($k % 3 == 0) echo "\n\t";
+// else echo " ";
+// if (isset($o[2]))
+// echo sprintf('{"%s", %d, 0x%X, 0x%X},', $o[0], strlen($o[0]),
+// hexdec($o[1]), hexdec($o[2]));
+// else
+// echo sprintf('{"%s", %d, 0x%X, 0},', $o[0], strlen($o[0]),
+// hexdec($o[1]));
+//
+// if (isset($o[2])) {
+// $entlen = strlen($o[0]) + 2;
+// $utf8len = strlen(
+// mb_convert_encoding("&#x{$o[1]};&#x{$o[2]};", "UTF-8", "HTML-ENTITIES"));
+// if ($utf8len > $entlen*1.2) {
+// die("violated assumption for traverse_for_entities");
+// }
+// }
+//
+// $k++;
+//}
+//echo "\n};\n\n";
+//
+//echo "static const size_t html5_ent_cp_map_size = $k;\n\n";
+//
+//echo
+//"/* end of HTML 5 tables for entity -> codepoint }}} */\n\n";
+
+hashtables:
+
+echo
+"/* {{{ $name hash table for entity -> codepoint */", "\n\n";
+
+$t = <<<CODE
+typedef struct {
+ const char *entity;
+ unsigned short entity_len;
+ unsigned int codepoint1;
+ unsigned int codepoint2;
+} entity_cp_map;
+
+typedef const entity_cp_map *entity_ht_bucket;
+
+typedef struct {
+ unsigned num_elems; /* power of 2 */
+ const entity_ht_bucket *buckets; /* .num_elems elements */
+} entity_ht;
+
+static const entity_cp_map ht_bucket_empty[] = { {NULL, 0, 0, 0} };
+
+CODE;
+
+if (!$pass2)
+ echo $t;
+
+function hashfun($str)
+{
+
+ $hash = 5381;
+ $nKeyLength = strlen($str);
+ $pos = 0;
+
+ for (; $nKeyLength > 0; $nKeyLength--) {
+ $hash = (int)(((int)(((int)($hash << 5)) + $hash)) + ord($str[$pos++]))
+ & 0xFFFFFFFF;
+ }
+ return $hash;
+
+}
+
+$numelems = max(pow(2, ceil(log(1.5*count($origdp))/log(2))),16);
+$mask = $numelems - 1;
+$hashes = array();
+foreach ($origdp as $e) {
+ $hashes[hashfun($e[0]) & $mask][] = $e;
+ if (isset($e[2])) {
+ $entlen = strlen($e[0]) + 2;
+ $utf8len = strlen(
+ mb_convert_encoding("&#x{$e[1]};&#x{$e[2]};", "UTF-8", "HTML-ENTITIES"));
+ if ($utf8len > $entlen*1.2) {
+ die("violated assumption for traverse_for_entities");
+ }
+ }
+}
+
+for ($i = 0; $i < $numelems; $i++) {
+ if (empty($hashes[$i]))
+ continue;
+ echo "static const entity_cp_map ht_bucket_{$ident}_", sprintf("%03X", $i) ,"[] = {";
+ foreach ($hashes[$i] as $h) {
+ if (isset($h[2])) {
+ echo sprintf(' {"%s", %d, 0x%05X, 0x%05X},',
+ $h[0], strlen($h[0]), hexdec($h[1]), hexdec($h[2]));
+ } else {
+ echo sprintf(' {"%s", %d, 0x%05X, 0},',
+ $h[0], strlen($h[0]), hexdec($h[1]));
+ }
+ }
+ echo " {NULL, 0, 0, 0} };\n";
+}
+echo "\n";
+
+echo
+"static const entity_cp_map *const ht_buckets_{$ident}[] = {\n";
+
+for ($i = 0; $i < $numelems; $i++) {
+ if ($i == 0) echo "\t";
+ elseif ($i % 4 == 0) echo "\n\t";
+ else echo " ";
+ if (empty($hashes[$i]))
+ echo "ht_bucket_empty,";
+ else
+ echo "ht_bucket_{$ident}_", sprintf("%03X", $i), ",";
+}
+echo "\n};\n\n";
+
+echo
+"static const entity_ht ent_ht_{$ident} = {
+ ", sprintf("0x%X", $numelems), ",
+ ht_buckets_{$ident}
+};\n\n";
+
+echo
+"/* end of $name hash table for entity -> codepoint }}} */\n\n";
+
+if (!$pass2) {
+ $data = file_get_contents("ents_html401.txt");
+ $pass2 = 1;
+ $name = "HTML 4.01";
+ $ident = "html4";
+ goto again;
+} elseif ($pass2 == 1) {
+ $data = file_get_contents("ents_basic.txt");
+ $pass2 = 2;
+ $name = "Basic entities (no apos)";
+ $ident = "be_noapos";
+ goto again;
+} elseif ($pass2 == 2) {
+ $data = file_get_contents("ents_basic_apos.txt");
+ $pass2 = 3;
+ $name = "Basic entities (with apos)";
+ $ident = "be_apos";
+ goto again;
+}
+
+echo "#endif /* HTML_TABLES_H */\n";
diff --git a/ext/standard/html_tables/mappings/8859-1.TXT b/ext/standard/html_tables/mappings/8859-1.TXT
new file mode 100644
index 0000000..473ecab
--- /dev/null
+++ b/ext/standard/html_tables/mappings/8859-1.TXT
@@ -0,0 +1,303 @@
+#
+# Name: ISO/IEC 8859-1:1998 to Unicode
+# Unicode version: 3.0
+# Table version: 1.0
+# Table format: Format A
+# Date: 1999 July 27
+# Authors: Ken Whistler <kenw@sybase.com>
+#
+# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved.
+#
+# This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
+# No claims are made as to fitness for any particular purpose. No
+# warranties of any kind are expressed or implied. The recipient
+# agrees to determine applicability of information provided. If this
+# file has been provided on optical media by Unicode, Inc., the sole
+# remedy for any claim will be exchange of defective media within 90
+# days of receipt.
+#
+# Unicode, Inc. hereby grants the right to freely use the information
+# supplied in this file in the creation of products supporting the
+# Unicode Standard, and to make copies of this file in any form for
+# internal or external distribution as long as this notice remains
+# attached.
+#
+# General notes:
+#
+# This table contains the data the Unicode Consortium has on how
+# ISO/IEC 8859-1:1998 characters map into Unicode.
+#
+# Format: Three tab-separated columns
+# Column #1 is the ISO/IEC 8859-1 code (in hex as 0xXX)
+# Column #2 is the Unicode (in hex as 0xXXXX)
+# Column #3 the Unicode name (follows a comment sign, '#')
+#
+# The entries are in ISO/IEC 8859-1 order.
+#
+# Version history
+# 1.0 version updates 0.1 version by adding mappings for all
+# control characters.
+#
+# Updated versions of this file may be found in:
+# <ftp://ftp.unicode.org/Public/MAPPINGS/>
+#
+# Any comments or problems, contact <errata@unicode.org>
+# Please note that <errata@unicode.org> is an archival address;
+# notices will be checked, but do not expect an immediate response.
+#
+0x00 0x0000 # NULL
+0x01 0x0001 # START OF HEADING
+0x02 0x0002 # START OF TEXT
+0x03 0x0003 # END OF TEXT
+0x04 0x0004 # END OF TRANSMISSION
+0x05 0x0005 # ENQUIRY
+0x06 0x0006 # ACKNOWLEDGE
+0x07 0x0007 # BELL
+0x08 0x0008 # BACKSPACE
+0x09 0x0009 # HORIZONTAL TABULATION
+0x0A 0x000A # LINE FEED
+0x0B 0x000B # VERTICAL TABULATION
+0x0C 0x000C # FORM FEED
+0x0D 0x000D # CARRIAGE RETURN
+0x0E 0x000E # SHIFT OUT
+0x0F 0x000F # SHIFT IN
+0x10 0x0010 # DATA LINK ESCAPE
+0x11 0x0011 # DEVICE CONTROL ONE
+0x12 0x0012 # DEVICE CONTROL TWO
+0x13 0x0013 # DEVICE CONTROL THREE
+0x14 0x0014 # DEVICE CONTROL FOUR
+0x15 0x0015 # NEGATIVE ACKNOWLEDGE
+0x16 0x0016 # SYNCHRONOUS IDLE
+0x17 0x0017 # END OF TRANSMISSION BLOCK
+0x18 0x0018 # CANCEL
+0x19 0x0019 # END OF MEDIUM
+0x1A 0x001A # SUBSTITUTE
+0x1B 0x001B # ESCAPE
+0x1C 0x001C # FILE SEPARATOR
+0x1D 0x001D # GROUP SEPARATOR
+0x1E 0x001E # RECORD SEPARATOR
+0x1F 0x001F # UNIT SEPARATOR
+0x20 0x0020 # SPACE
+0x21 0x0021 # EXCLAMATION MARK
+0x22 0x0022 # QUOTATION MARK
+0x23 0x0023 # NUMBER SIGN
+0x24 0x0024 # DOLLAR SIGN
+0x25 0x0025 # PERCENT SIGN
+0x26 0x0026 # AMPERSAND
+0x27 0x0027 # APOSTROPHE
+0x28 0x0028 # LEFT PARENTHESIS
+0x29 0x0029 # RIGHT PARENTHESIS
+0x2A 0x002A # ASTERISK
+0x2B 0x002B # PLUS SIGN
+0x2C 0x002C # COMMA
+0x2D 0x002D # HYPHEN-MINUS
+0x2E 0x002E # FULL STOP
+0x2F 0x002F # SOLIDUS
+0x30 0x0030 # DIGIT ZERO
+0x31 0x0031 # DIGIT ONE
+0x32 0x0032 # DIGIT TWO
+0x33 0x0033 # DIGIT THREE
+0x34 0x0034 # DIGIT FOUR
+0x35 0x0035 # DIGIT FIVE
+0x36 0x0036 # DIGIT SIX
+0x37 0x0037 # DIGIT SEVEN
+0x38 0x0038 # DIGIT EIGHT
+0x39 0x0039 # DIGIT NINE
+0x3A 0x003A # COLON
+0x3B 0x003B # SEMICOLON
+0x3C 0x003C # LESS-THAN SIGN
+0x3D 0x003D # EQUALS SIGN
+0x3E 0x003E # GREATER-THAN SIGN
+0x3F 0x003F # QUESTION MARK
+0x40 0x0040 # COMMERCIAL AT
+0x41 0x0041 # LATIN CAPITAL LETTER A
+0x42 0x0042 # LATIN CAPITAL LETTER B
+0x43 0x0043 # LATIN CAPITAL LETTER C
+0x44 0x0044 # LATIN CAPITAL LETTER D
+0x45 0x0045 # LATIN CAPITAL LETTER E
+0x46 0x0046 # LATIN CAPITAL LETTER F
+0x47 0x0047 # LATIN CAPITAL LETTER G
+0x48 0x0048 # LATIN CAPITAL LETTER H
+0x49 0x0049 # LATIN CAPITAL LETTER I
+0x4A 0x004A # LATIN CAPITAL LETTER J
+0x4B 0x004B # LATIN CAPITAL LETTER K
+0x4C 0x004C # LATIN CAPITAL LETTER L
+0x4D 0x004D # LATIN CAPITAL LETTER M
+0x4E 0x004E # LATIN CAPITAL LETTER N
+0x4F 0x004F # LATIN CAPITAL LETTER O
+0x50 0x0050 # LATIN CAPITAL LETTER P
+0x51 0x0051 # LATIN CAPITAL LETTER Q
+0x52 0x0052 # LATIN CAPITAL LETTER R
+0x53 0x0053 # LATIN CAPITAL LETTER S
+0x54 0x0054 # LATIN CAPITAL LETTER T
+0x55 0x0055 # LATIN CAPITAL LETTER U
+0x56 0x0056 # LATIN CAPITAL LETTER V
+0x57 0x0057 # LATIN CAPITAL LETTER W
+0x58 0x0058 # LATIN CAPITAL LETTER X
+0x59 0x0059 # LATIN CAPITAL LETTER Y
+0x5A 0x005A # LATIN CAPITAL LETTER Z
+0x5B 0x005B # LEFT SQUARE BRACKET
+0x5C 0x005C # REVERSE SOLIDUS
+0x5D 0x005D # RIGHT SQUARE BRACKET
+0x5E 0x005E # CIRCUMFLEX ACCENT
+0x5F 0x005F # LOW LINE
+0x60 0x0060 # GRAVE ACCENT
+0x61 0x0061 # LATIN SMALL LETTER A
+0x62 0x0062 # LATIN SMALL LETTER B
+0x63 0x0063 # LATIN SMALL LETTER C
+0x64 0x0064 # LATIN SMALL LETTER D
+0x65 0x0065 # LATIN SMALL LETTER E
+0x66 0x0066 # LATIN SMALL LETTER F
+0x67 0x0067 # LATIN SMALL LETTER G
+0x68 0x0068 # LATIN SMALL LETTER H
+0x69 0x0069 # LATIN SMALL LETTER I
+0x6A 0x006A # LATIN SMALL LETTER J
+0x6B 0x006B # LATIN SMALL LETTER K
+0x6C 0x006C # LATIN SMALL LETTER L
+0x6D 0x006D # LATIN SMALL LETTER M
+0x6E 0x006E # LATIN SMALL LETTER N
+0x6F 0x006F # LATIN SMALL LETTER O
+0x70 0x0070 # LATIN SMALL LETTER P
+0x71 0x0071 # LATIN SMALL LETTER Q
+0x72 0x0072 # LATIN SMALL LETTER R
+0x73 0x0073 # LATIN SMALL LETTER S
+0x74 0x0074 # LATIN SMALL LETTER T
+0x75 0x0075 # LATIN SMALL LETTER U
+0x76 0x0076 # LATIN SMALL LETTER V
+0x77 0x0077 # LATIN SMALL LETTER W
+0x78 0x0078 # LATIN SMALL LETTER X
+0x79 0x0079 # LATIN SMALL LETTER Y
+0x7A 0x007A # LATIN SMALL LETTER Z
+0x7B 0x007B # LEFT CURLY BRACKET
+0x7C 0x007C # VERTICAL LINE
+0x7D 0x007D # RIGHT CURLY BRACKET
+0x7E 0x007E # TILDE
+0x7F 0x007F # DELETE
+0x80 0x0080 # <control>
+0x81 0x0081 # <control>
+0x82 0x0082 # <control>
+0x83 0x0083 # <control>
+0x84 0x0084 # <control>
+0x85 0x0085 # <control>
+0x86 0x0086 # <control>
+0x87 0x0087 # <control>
+0x88 0x0088 # <control>
+0x89 0x0089 # <control>
+0x8A 0x008A # <control>
+0x8B 0x008B # <control>
+0x8C 0x008C # <control>
+0x8D 0x008D # <control>
+0x8E 0x008E # <control>
+0x8F 0x008F # <control>
+0x90 0x0090 # <control>
+0x91 0x0091 # <control>
+0x92 0x0092 # <control>
+0x93 0x0093 # <control>
+0x94 0x0094 # <control>
+0x95 0x0095 # <control>
+0x96 0x0096 # <control>
+0x97 0x0097 # <control>
+0x98 0x0098 # <control>
+0x99 0x0099 # <control>
+0x9A 0x009A # <control>
+0x9B 0x009B # <control>
+0x9C 0x009C # <control>
+0x9D 0x009D # <control>
+0x9E 0x009E # <control>
+0x9F 0x009F # <control>
+0xA0 0x00A0 # NO-BREAK SPACE
+0xA1 0x00A1 # INVERTED EXCLAMATION MARK
+0xA2 0x00A2 # CENT SIGN
+0xA3 0x00A3 # POUND SIGN
+0xA4 0x00A4 # CURRENCY SIGN
+0xA5 0x00A5 # YEN SIGN
+0xA6 0x00A6 # BROKEN BAR
+0xA7 0x00A7 # SECTION SIGN
+0xA8 0x00A8 # DIAERESIS
+0xA9 0x00A9 # COPYRIGHT SIGN
+0xAA 0x00AA # FEMININE ORDINAL INDICATOR
+0xAB 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xAC 0x00AC # NOT SIGN
+0xAD 0x00AD # SOFT HYPHEN
+0xAE 0x00AE # REGISTERED SIGN
+0xAF 0x00AF # MACRON
+0xB0 0x00B0 # DEGREE SIGN
+0xB1 0x00B1 # PLUS-MINUS SIGN
+0xB2 0x00B2 # SUPERSCRIPT TWO
+0xB3 0x00B3 # SUPERSCRIPT THREE
+0xB4 0x00B4 # ACUTE ACCENT
+0xB5 0x00B5 # MICRO SIGN
+0xB6 0x00B6 # PILCROW SIGN
+0xB7 0x00B7 # MIDDLE DOT
+0xB8 0x00B8 # CEDILLA
+0xB9 0x00B9 # SUPERSCRIPT ONE
+0xBA 0x00BA # MASCULINE ORDINAL INDICATOR
+0xBB 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xBC 0x00BC # VULGAR FRACTION ONE QUARTER
+0xBD 0x00BD # VULGAR FRACTION ONE HALF
+0xBE 0x00BE # VULGAR FRACTION THREE QUARTERS
+0xBF 0x00BF # INVERTED QUESTION MARK
+0xC0 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE
+0xC1 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE
+0xC2 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+0xC3 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE
+0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS
+0xC5 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE
+0xC6 0x00C6 # LATIN CAPITAL LETTER AE
+0xC7 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA
+0xC8 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE
+0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE
+0xCA 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+0xCB 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS
+0xCC 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE
+0xCD 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE
+0xCE 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+0xCF 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS
+0xD0 0x00D0 # LATIN CAPITAL LETTER ETH (Icelandic)
+0xD1 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE
+0xD2 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE
+0xD3 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE
+0xD4 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+0xD5 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE
+0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS
+0xD7 0x00D7 # MULTIPLICATION SIGN
+0xD8 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE
+0xD9 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE
+0xDA 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE
+0xDB 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS
+0xDD 0x00DD # LATIN CAPITAL LETTER Y WITH ACUTE
+0xDE 0x00DE # LATIN CAPITAL LETTER THORN (Icelandic)
+0xDF 0x00DF # LATIN SMALL LETTER SHARP S (German)
+0xE0 0x00E0 # LATIN SMALL LETTER A WITH GRAVE
+0xE1 0x00E1 # LATIN SMALL LETTER A WITH ACUTE
+0xE2 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX
+0xE3 0x00E3 # LATIN SMALL LETTER A WITH TILDE
+0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS
+0xE5 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE
+0xE6 0x00E6 # LATIN SMALL LETTER AE
+0xE7 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA
+0xE8 0x00E8 # LATIN SMALL LETTER E WITH GRAVE
+0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE
+0xEA 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX
+0xEB 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS
+0xEC 0x00EC # LATIN SMALL LETTER I WITH GRAVE
+0xED 0x00ED # LATIN SMALL LETTER I WITH ACUTE
+0xEE 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX
+0xEF 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS
+0xF0 0x00F0 # LATIN SMALL LETTER ETH (Icelandic)
+0xF1 0x00F1 # LATIN SMALL LETTER N WITH TILDE
+0xF2 0x00F2 # LATIN SMALL LETTER O WITH GRAVE
+0xF3 0x00F3 # LATIN SMALL LETTER O WITH ACUTE
+0xF4 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX
+0xF5 0x00F5 # LATIN SMALL LETTER O WITH TILDE
+0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS
+0xF7 0x00F7 # DIVISION SIGN
+0xF8 0x00F8 # LATIN SMALL LETTER O WITH STROKE
+0xF9 0x00F9 # LATIN SMALL LETTER U WITH GRAVE
+0xFA 0x00FA # LATIN SMALL LETTER U WITH ACUTE
+0xFB 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX
+0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS
+0xFD 0x00FD # LATIN SMALL LETTER Y WITH ACUTE
+0xFE 0x00FE # LATIN SMALL LETTER THORN (Icelandic)
+0xFF 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS
diff --git a/ext/standard/html_tables/mappings/8859-15.TXT b/ext/standard/html_tables/mappings/8859-15.TXT
new file mode 100644
index 0000000..ab2f32f
--- /dev/null
+++ b/ext/standard/html_tables/mappings/8859-15.TXT
@@ -0,0 +1,303 @@
+#
+# Name: ISO/IEC 8859-15:1999 to Unicode
+# Unicode version: 3.0
+# Table version: 1.0
+# Table format: Format A
+# Date: 1999 July 27
+# Authors: Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/>
+# Ken Whistler <kenw@sybase.com>
+#
+# Copyright (c) 1998 - 1999 Unicode, Inc. All Rights reserved.
+#
+# This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
+# No claims are made as to fitness for any particular purpose. No
+# warranties of any kind are expressed or implied. The recipient
+# agrees to determine applicability of information provided. If this
+# file has been provided on optical media by Unicode, Inc., the sole
+# remedy for any claim will be exchange of defective media within 90
+# days of receipt.
+#
+# Unicode, Inc. hereby grants the right to freely use the information
+# supplied in this file in the creation of products supporting the
+# Unicode Standard, and to make copies of this file in any form for
+# internal or external distribution as long as this notice remains
+# attached.
+#
+# General notes:
+#
+# This table contains the data the Unicode Consortium has on how
+# ISO/IEC 8859-15:1999 characters map into Unicode.
+#
+# Format: Three tab-separated columns
+# Column #1 is the ISO/IEC 8859-15 code (in hex as 0xXX)
+# Column #2 is the Unicode (in hex as 0xXXXX)
+# Column #3 the Unicode name (follows a comment sign, '#')
+#
+# The entries are in ISO/IEC 8859-15 order.
+#
+# Version history
+#
+# Updated versions of this file may be found in:
+# <ftp://ftp.unicode.org/Public/MAPPINGS/>
+#
+# Any comments or problems, contact <errata@unicode.org>
+# Please note that <errata@unicode.org> is an archival address;
+# notices will be checked, but do not expect an immediate response.
+#
+0x00 0x0000 # NULL
+0x01 0x0001 # START OF HEADING
+0x02 0x0002 # START OF TEXT
+0x03 0x0003 # END OF TEXT
+0x04 0x0004 # END OF TRANSMISSION
+0x05 0x0005 # ENQUIRY
+0x06 0x0006 # ACKNOWLEDGE
+0x07 0x0007 # BELL
+0x08 0x0008 # BACKSPACE
+0x09 0x0009 # HORIZONTAL TABULATION
+0x0A 0x000A # LINE FEED
+0x0B 0x000B # VERTICAL TABULATION
+0x0C 0x000C # FORM FEED
+0x0D 0x000D # CARRIAGE RETURN
+0x0E 0x000E # SHIFT OUT
+0x0F 0x000F # SHIFT IN
+0x10 0x0010 # DATA LINK ESCAPE
+0x11 0x0011 # DEVICE CONTROL ONE
+0x12 0x0012 # DEVICE CONTROL TWO
+0x13 0x0013 # DEVICE CONTROL THREE
+0x14 0x0014 # DEVICE CONTROL FOUR
+0x15 0x0015 # NEGATIVE ACKNOWLEDGE
+0x16 0x0016 # SYNCHRONOUS IDLE
+0x17 0x0017 # END OF TRANSMISSION BLOCK
+0x18 0x0018 # CANCEL
+0x19 0x0019 # END OF MEDIUM
+0x1A 0x001A # SUBSTITUTE
+0x1B 0x001B # ESCAPE
+0x1C 0x001C # FILE SEPARATOR
+0x1D 0x001D # GROUP SEPARATOR
+0x1E 0x001E # RECORD SEPARATOR
+0x1F 0x001F # UNIT SEPARATOR
+0x20 0x0020 # SPACE
+0x21 0x0021 # EXCLAMATION MARK
+0x22 0x0022 # QUOTATION MARK
+0x23 0x0023 # NUMBER SIGN
+0x24 0x0024 # DOLLAR SIGN
+0x25 0x0025 # PERCENT SIGN
+0x26 0x0026 # AMPERSAND
+0x27 0x0027 # APOSTROPHE
+0x28 0x0028 # LEFT PARENTHESIS
+0x29 0x0029 # RIGHT PARENTHESIS
+0x2A 0x002A # ASTERISK
+0x2B 0x002B # PLUS SIGN
+0x2C 0x002C # COMMA
+0x2D 0x002D # HYPHEN-MINUS
+0x2E 0x002E # FULL STOP
+0x2F 0x002F # SOLIDUS
+0x30 0x0030 # DIGIT ZERO
+0x31 0x0031 # DIGIT ONE
+0x32 0x0032 # DIGIT TWO
+0x33 0x0033 # DIGIT THREE
+0x34 0x0034 # DIGIT FOUR
+0x35 0x0035 # DIGIT FIVE
+0x36 0x0036 # DIGIT SIX
+0x37 0x0037 # DIGIT SEVEN
+0x38 0x0038 # DIGIT EIGHT
+0x39 0x0039 # DIGIT NINE
+0x3A 0x003A # COLON
+0x3B 0x003B # SEMICOLON
+0x3C 0x003C # LESS-THAN SIGN
+0x3D 0x003D # EQUALS SIGN
+0x3E 0x003E # GREATER-THAN SIGN
+0x3F 0x003F # QUESTION MARK
+0x40 0x0040 # COMMERCIAL AT
+0x41 0x0041 # LATIN CAPITAL LETTER A
+0x42 0x0042 # LATIN CAPITAL LETTER B
+0x43 0x0043 # LATIN CAPITAL LETTER C
+0x44 0x0044 # LATIN CAPITAL LETTER D
+0x45 0x0045 # LATIN CAPITAL LETTER E
+0x46 0x0046 # LATIN CAPITAL LETTER F
+0x47 0x0047 # LATIN CAPITAL LETTER G
+0x48 0x0048 # LATIN CAPITAL LETTER H
+0x49 0x0049 # LATIN CAPITAL LETTER I
+0x4A 0x004A # LATIN CAPITAL LETTER J
+0x4B 0x004B # LATIN CAPITAL LETTER K
+0x4C 0x004C # LATIN CAPITAL LETTER L
+0x4D 0x004D # LATIN CAPITAL LETTER M
+0x4E 0x004E # LATIN CAPITAL LETTER N
+0x4F 0x004F # LATIN CAPITAL LETTER O
+0x50 0x0050 # LATIN CAPITAL LETTER P
+0x51 0x0051 # LATIN CAPITAL LETTER Q
+0x52 0x0052 # LATIN CAPITAL LETTER R
+0x53 0x0053 # LATIN CAPITAL LETTER S
+0x54 0x0054 # LATIN CAPITAL LETTER T
+0x55 0x0055 # LATIN CAPITAL LETTER U
+0x56 0x0056 # LATIN CAPITAL LETTER V
+0x57 0x0057 # LATIN CAPITAL LETTER W
+0x58 0x0058 # LATIN CAPITAL LETTER X
+0x59 0x0059 # LATIN CAPITAL LETTER Y
+0x5A 0x005A # LATIN CAPITAL LETTER Z
+0x5B 0x005B # LEFT SQUARE BRACKET
+0x5C 0x005C # REVERSE SOLIDUS
+0x5D 0x005D # RIGHT SQUARE BRACKET
+0x5E 0x005E # CIRCUMFLEX ACCENT
+0x5F 0x005F # LOW LINE
+0x60 0x0060 # GRAVE ACCENT
+0x61 0x0061 # LATIN SMALL LETTER A
+0x62 0x0062 # LATIN SMALL LETTER B
+0x63 0x0063 # LATIN SMALL LETTER C
+0x64 0x0064 # LATIN SMALL LETTER D
+0x65 0x0065 # LATIN SMALL LETTER E
+0x66 0x0066 # LATIN SMALL LETTER F
+0x67 0x0067 # LATIN SMALL LETTER G
+0x68 0x0068 # LATIN SMALL LETTER H
+0x69 0x0069 # LATIN SMALL LETTER I
+0x6A 0x006A # LATIN SMALL LETTER J
+0x6B 0x006B # LATIN SMALL LETTER K
+0x6C 0x006C # LATIN SMALL LETTER L
+0x6D 0x006D # LATIN SMALL LETTER M
+0x6E 0x006E # LATIN SMALL LETTER N
+0x6F 0x006F # LATIN SMALL LETTER O
+0x70 0x0070 # LATIN SMALL LETTER P
+0x71 0x0071 # LATIN SMALL LETTER Q
+0x72 0x0072 # LATIN SMALL LETTER R
+0x73 0x0073 # LATIN SMALL LETTER S
+0x74 0x0074 # LATIN SMALL LETTER T
+0x75 0x0075 # LATIN SMALL LETTER U
+0x76 0x0076 # LATIN SMALL LETTER V
+0x77 0x0077 # LATIN SMALL LETTER W
+0x78 0x0078 # LATIN SMALL LETTER X
+0x79 0x0079 # LATIN SMALL LETTER Y
+0x7A 0x007A # LATIN SMALL LETTER Z
+0x7B 0x007B # LEFT CURLY BRACKET
+0x7C 0x007C # VERTICAL LINE
+0x7D 0x007D # RIGHT CURLY BRACKET
+0x7E 0x007E # TILDE
+0x7F 0x007F # DELETE
+0x80 0x0080 # <control>
+0x81 0x0081 # <control>
+0x82 0x0082 # <control>
+0x83 0x0083 # <control>
+0x84 0x0084 # <control>
+0x85 0x0085 # <control>
+0x86 0x0086 # <control>
+0x87 0x0087 # <control>
+0x88 0x0088 # <control>
+0x89 0x0089 # <control>
+0x8A 0x008A # <control>
+0x8B 0x008B # <control>
+0x8C 0x008C # <control>
+0x8D 0x008D # <control>
+0x8E 0x008E # <control>
+0x8F 0x008F # <control>
+0x90 0x0090 # <control>
+0x91 0x0091 # <control>
+0x92 0x0092 # <control>
+0x93 0x0093 # <control>
+0x94 0x0094 # <control>
+0x95 0x0095 # <control>
+0x96 0x0096 # <control>
+0x97 0x0097 # <control>
+0x98 0x0098 # <control>
+0x99 0x0099 # <control>
+0x9A 0x009A # <control>
+0x9B 0x009B # <control>
+0x9C 0x009C # <control>
+0x9D 0x009D # <control>
+0x9E 0x009E # <control>
+0x9F 0x009F # <control>
+0xA0 0x00A0 # NO-BREAK SPACE
+0xA1 0x00A1 # INVERTED EXCLAMATION MARK
+0xA2 0x00A2 # CENT SIGN
+0xA3 0x00A3 # POUND SIGN
+0xA4 0x20AC # EURO SIGN
+0xA5 0x00A5 # YEN SIGN
+0xA6 0x0160 # LATIN CAPITAL LETTER S WITH CARON
+0xA7 0x00A7 # SECTION SIGN
+0xA8 0x0161 # LATIN SMALL LETTER S WITH CARON
+0xA9 0x00A9 # COPYRIGHT SIGN
+0xAA 0x00AA # FEMININE ORDINAL INDICATOR
+0xAB 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xAC 0x00AC # NOT SIGN
+0xAD 0x00AD # SOFT HYPHEN
+0xAE 0x00AE # REGISTERED SIGN
+0xAF 0x00AF # MACRON
+0xB0 0x00B0 # DEGREE SIGN
+0xB1 0x00B1 # PLUS-MINUS SIGN
+0xB2 0x00B2 # SUPERSCRIPT TWO
+0xB3 0x00B3 # SUPERSCRIPT THREE
+0xB4 0x017D # LATIN CAPITAL LETTER Z WITH CARON
+0xB5 0x00B5 # MICRO SIGN
+0xB6 0x00B6 # PILCROW SIGN
+0xB7 0x00B7 # MIDDLE DOT
+0xB8 0x017E # LATIN SMALL LETTER Z WITH CARON
+0xB9 0x00B9 # SUPERSCRIPT ONE
+0xBA 0x00BA # MASCULINE ORDINAL INDICATOR
+0xBB 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xBC 0x0152 # LATIN CAPITAL LIGATURE OE
+0xBD 0x0153 # LATIN SMALL LIGATURE OE
+0xBE 0x0178 # LATIN CAPITAL LETTER Y WITH DIAERESIS
+0xBF 0x00BF # INVERTED QUESTION MARK
+0xC0 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE
+0xC1 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE
+0xC2 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+0xC3 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE
+0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS
+0xC5 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE
+0xC6 0x00C6 # LATIN CAPITAL LETTER AE
+0xC7 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA
+0xC8 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE
+0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE
+0xCA 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+0xCB 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS
+0xCC 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE
+0xCD 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE
+0xCE 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+0xCF 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS
+0xD0 0x00D0 # LATIN CAPITAL LETTER ETH
+0xD1 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE
+0xD2 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE
+0xD3 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE
+0xD4 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+0xD5 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE
+0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS
+0xD7 0x00D7 # MULTIPLICATION SIGN
+0xD8 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE
+0xD9 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE
+0xDA 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE
+0xDB 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS
+0xDD 0x00DD # LATIN CAPITAL LETTER Y WITH ACUTE
+0xDE 0x00DE # LATIN CAPITAL LETTER THORN
+0xDF 0x00DF # LATIN SMALL LETTER SHARP S
+0xE0 0x00E0 # LATIN SMALL LETTER A WITH GRAVE
+0xE1 0x00E1 # LATIN SMALL LETTER A WITH ACUTE
+0xE2 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX
+0xE3 0x00E3 # LATIN SMALL LETTER A WITH TILDE
+0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS
+0xE5 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE
+0xE6 0x00E6 # LATIN SMALL LETTER AE
+0xE7 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA
+0xE8 0x00E8 # LATIN SMALL LETTER E WITH GRAVE
+0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE
+0xEA 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX
+0xEB 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS
+0xEC 0x00EC # LATIN SMALL LETTER I WITH GRAVE
+0xED 0x00ED # LATIN SMALL LETTER I WITH ACUTE
+0xEE 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX
+0xEF 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS
+0xF0 0x00F0 # LATIN SMALL LETTER ETH
+0xF1 0x00F1 # LATIN SMALL LETTER N WITH TILDE
+0xF2 0x00F2 # LATIN SMALL LETTER O WITH GRAVE
+0xF3 0x00F3 # LATIN SMALL LETTER O WITH ACUTE
+0xF4 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX
+0xF5 0x00F5 # LATIN SMALL LETTER O WITH TILDE
+0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS
+0xF7 0x00F7 # DIVISION SIGN
+0xF8 0x00F8 # LATIN SMALL LETTER O WITH STROKE
+0xF9 0x00F9 # LATIN SMALL LETTER U WITH GRAVE
+0xFA 0x00FA # LATIN SMALL LETTER U WITH ACUTE
+0xFB 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX
+0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS
+0xFD 0x00FD # LATIN SMALL LETTER Y WITH ACUTE
+0xFE 0x00FE # LATIN SMALL LETTER THORN
+0xFF 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS
+
diff --git a/ext/standard/html_tables/mappings/8859-5.TXT b/ext/standard/html_tables/mappings/8859-5.TXT
new file mode 100644
index 0000000..a7ed1ce
--- /dev/null
+++ b/ext/standard/html_tables/mappings/8859-5.TXT
@@ -0,0 +1,303 @@
+#
+# Name: ISO 8859-5:1999 to Unicode
+# Unicode version: 3.0
+# Table version: 1.0
+# Table format: Format A
+# Date: 1999 July 27
+# Authors: Ken Whistler <kenw@sybase.com>
+#
+# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved.
+#
+# This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
+# No claims are made as to fitness for any particular purpose. No
+# warranties of any kind are expressed or implied. The recipient
+# agrees to determine applicability of information provided. If this
+# file has been provided on optical media by Unicode, Inc., the sole
+# remedy for any claim will be exchange of defective media within 90
+# days of receipt.
+#
+# Unicode, Inc. hereby grants the right to freely use the information
+# supplied in this file in the creation of products supporting the
+# Unicode Standard, and to make copies of this file in any form for
+# internal or external distribution as long as this notice remains
+# attached.
+#
+# General notes:
+#
+# This table contains the data the Unicode Consortium has on how
+# ISO/IEC 8859-5:1999 characters map into Unicode.
+#
+# Format: Three tab-separated columns
+# Column #1 is the ISO/IEC 8859-5 code (in hex as 0xXX)
+# Column #2 is the Unicode (in hex as 0xXXXX)
+# Column #3 the Unicode name (follows a comment sign, '#')
+#
+# The entries are in ISO/IEC 8859-5 order.
+#
+# Version history
+# 1.0 version updates 0.1 version by adding mappings for all
+# control characters.
+#
+# Updated versions of this file may be found in:
+# <ftp://ftp.unicode.org/Public/MAPPINGS/>
+#
+# Any comments or problems, contact <errata@unicode.org>
+# Please note that <errata@unicode.org> is an archival address;
+# notices will be checked, but do not expect an immediate response.
+#
+0x00 0x0000 # NULL
+0x01 0x0001 # START OF HEADING
+0x02 0x0002 # START OF TEXT
+0x03 0x0003 # END OF TEXT
+0x04 0x0004 # END OF TRANSMISSION
+0x05 0x0005 # ENQUIRY
+0x06 0x0006 # ACKNOWLEDGE
+0x07 0x0007 # BELL
+0x08 0x0008 # BACKSPACE
+0x09 0x0009 # HORIZONTAL TABULATION
+0x0A 0x000A # LINE FEED
+0x0B 0x000B # VERTICAL TABULATION
+0x0C 0x000C # FORM FEED
+0x0D 0x000D # CARRIAGE RETURN
+0x0E 0x000E # SHIFT OUT
+0x0F 0x000F # SHIFT IN
+0x10 0x0010 # DATA LINK ESCAPE
+0x11 0x0011 # DEVICE CONTROL ONE
+0x12 0x0012 # DEVICE CONTROL TWO
+0x13 0x0013 # DEVICE CONTROL THREE
+0x14 0x0014 # DEVICE CONTROL FOUR
+0x15 0x0015 # NEGATIVE ACKNOWLEDGE
+0x16 0x0016 # SYNCHRONOUS IDLE
+0x17 0x0017 # END OF TRANSMISSION BLOCK
+0x18 0x0018 # CANCEL
+0x19 0x0019 # END OF MEDIUM
+0x1A 0x001A # SUBSTITUTE
+0x1B 0x001B # ESCAPE
+0x1C 0x001C # FILE SEPARATOR
+0x1D 0x001D # GROUP SEPARATOR
+0x1E 0x001E # RECORD SEPARATOR
+0x1F 0x001F # UNIT SEPARATOR
+0x20 0x0020 # SPACE
+0x21 0x0021 # EXCLAMATION MARK
+0x22 0x0022 # QUOTATION MARK
+0x23 0x0023 # NUMBER SIGN
+0x24 0x0024 # DOLLAR SIGN
+0x25 0x0025 # PERCENT SIGN
+0x26 0x0026 # AMPERSAND
+0x27 0x0027 # APOSTROPHE
+0x28 0x0028 # LEFT PARENTHESIS
+0x29 0x0029 # RIGHT PARENTHESIS
+0x2A 0x002A # ASTERISK
+0x2B 0x002B # PLUS SIGN
+0x2C 0x002C # COMMA
+0x2D 0x002D # HYPHEN-MINUS
+0x2E 0x002E # FULL STOP
+0x2F 0x002F # SOLIDUS
+0x30 0x0030 # DIGIT ZERO
+0x31 0x0031 # DIGIT ONE
+0x32 0x0032 # DIGIT TWO
+0x33 0x0033 # DIGIT THREE
+0x34 0x0034 # DIGIT FOUR
+0x35 0x0035 # DIGIT FIVE
+0x36 0x0036 # DIGIT SIX
+0x37 0x0037 # DIGIT SEVEN
+0x38 0x0038 # DIGIT EIGHT
+0x39 0x0039 # DIGIT NINE
+0x3A 0x003A # COLON
+0x3B 0x003B # SEMICOLON
+0x3C 0x003C # LESS-THAN SIGN
+0x3D 0x003D # EQUALS SIGN
+0x3E 0x003E # GREATER-THAN SIGN
+0x3F 0x003F # QUESTION MARK
+0x40 0x0040 # COMMERCIAL AT
+0x41 0x0041 # LATIN CAPITAL LETTER A
+0x42 0x0042 # LATIN CAPITAL LETTER B
+0x43 0x0043 # LATIN CAPITAL LETTER C
+0x44 0x0044 # LATIN CAPITAL LETTER D
+0x45 0x0045 # LATIN CAPITAL LETTER E
+0x46 0x0046 # LATIN CAPITAL LETTER F
+0x47 0x0047 # LATIN CAPITAL LETTER G
+0x48 0x0048 # LATIN CAPITAL LETTER H
+0x49 0x0049 # LATIN CAPITAL LETTER I
+0x4A 0x004A # LATIN CAPITAL LETTER J
+0x4B 0x004B # LATIN CAPITAL LETTER K
+0x4C 0x004C # LATIN CAPITAL LETTER L
+0x4D 0x004D # LATIN CAPITAL LETTER M
+0x4E 0x004E # LATIN CAPITAL LETTER N
+0x4F 0x004F # LATIN CAPITAL LETTER O
+0x50 0x0050 # LATIN CAPITAL LETTER P
+0x51 0x0051 # LATIN CAPITAL LETTER Q
+0x52 0x0052 # LATIN CAPITAL LETTER R
+0x53 0x0053 # LATIN CAPITAL LETTER S
+0x54 0x0054 # LATIN CAPITAL LETTER T
+0x55 0x0055 # LATIN CAPITAL LETTER U
+0x56 0x0056 # LATIN CAPITAL LETTER V
+0x57 0x0057 # LATIN CAPITAL LETTER W
+0x58 0x0058 # LATIN CAPITAL LETTER X
+0x59 0x0059 # LATIN CAPITAL LETTER Y
+0x5A 0x005A # LATIN CAPITAL LETTER Z
+0x5B 0x005B # LEFT SQUARE BRACKET
+0x5C 0x005C # REVERSE SOLIDUS
+0x5D 0x005D # RIGHT SQUARE BRACKET
+0x5E 0x005E # CIRCUMFLEX ACCENT
+0x5F 0x005F # LOW LINE
+0x60 0x0060 # GRAVE ACCENT
+0x61 0x0061 # LATIN SMALL LETTER A
+0x62 0x0062 # LATIN SMALL LETTER B
+0x63 0x0063 # LATIN SMALL LETTER C
+0x64 0x0064 # LATIN SMALL LETTER D
+0x65 0x0065 # LATIN SMALL LETTER E
+0x66 0x0066 # LATIN SMALL LETTER F
+0x67 0x0067 # LATIN SMALL LETTER G
+0x68 0x0068 # LATIN SMALL LETTER H
+0x69 0x0069 # LATIN SMALL LETTER I
+0x6A 0x006A # LATIN SMALL LETTER J
+0x6B 0x006B # LATIN SMALL LETTER K
+0x6C 0x006C # LATIN SMALL LETTER L
+0x6D 0x006D # LATIN SMALL LETTER M
+0x6E 0x006E # LATIN SMALL LETTER N
+0x6F 0x006F # LATIN SMALL LETTER O
+0x70 0x0070 # LATIN SMALL LETTER P
+0x71 0x0071 # LATIN SMALL LETTER Q
+0x72 0x0072 # LATIN SMALL LETTER R
+0x73 0x0073 # LATIN SMALL LETTER S
+0x74 0x0074 # LATIN SMALL LETTER T
+0x75 0x0075 # LATIN SMALL LETTER U
+0x76 0x0076 # LATIN SMALL LETTER V
+0x77 0x0077 # LATIN SMALL LETTER W
+0x78 0x0078 # LATIN SMALL LETTER X
+0x79 0x0079 # LATIN SMALL LETTER Y
+0x7A 0x007A # LATIN SMALL LETTER Z
+0x7B 0x007B # LEFT CURLY BRACKET
+0x7C 0x007C # VERTICAL LINE
+0x7D 0x007D # RIGHT CURLY BRACKET
+0x7E 0x007E # TILDE
+0x7F 0x007F # DELETE
+0x80 0x0080 # <control>
+0x81 0x0081 # <control>
+0x82 0x0082 # <control>
+0x83 0x0083 # <control>
+0x84 0x0084 # <control>
+0x85 0x0085 # <control>
+0x86 0x0086 # <control>
+0x87 0x0087 # <control>
+0x88 0x0088 # <control>
+0x89 0x0089 # <control>
+0x8A 0x008A # <control>
+0x8B 0x008B # <control>
+0x8C 0x008C # <control>
+0x8D 0x008D # <control>
+0x8E 0x008E # <control>
+0x8F 0x008F # <control>
+0x90 0x0090 # <control>
+0x91 0x0091 # <control>
+0x92 0x0092 # <control>
+0x93 0x0093 # <control>
+0x94 0x0094 # <control>
+0x95 0x0095 # <control>
+0x96 0x0096 # <control>
+0x97 0x0097 # <control>
+0x98 0x0098 # <control>
+0x99 0x0099 # <control>
+0x9A 0x009A # <control>
+0x9B 0x009B # <control>
+0x9C 0x009C # <control>
+0x9D 0x009D # <control>
+0x9E 0x009E # <control>
+0x9F 0x009F # <control>
+0xA0 0x00A0 # NO-BREAK SPACE
+0xA1 0x0401 # CYRILLIC CAPITAL LETTER IO
+0xA2 0x0402 # CYRILLIC CAPITAL LETTER DJE
+0xA3 0x0403 # CYRILLIC CAPITAL LETTER GJE
+0xA4 0x0404 # CYRILLIC CAPITAL LETTER UKRAINIAN IE
+0xA5 0x0405 # CYRILLIC CAPITAL LETTER DZE
+0xA6 0x0406 # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+0xA7 0x0407 # CYRILLIC CAPITAL LETTER YI
+0xA8 0x0408 # CYRILLIC CAPITAL LETTER JE
+0xA9 0x0409 # CYRILLIC CAPITAL LETTER LJE
+0xAA 0x040A # CYRILLIC CAPITAL LETTER NJE
+0xAB 0x040B # CYRILLIC CAPITAL LETTER TSHE
+0xAC 0x040C # CYRILLIC CAPITAL LETTER KJE
+0xAD 0x00AD # SOFT HYPHEN
+0xAE 0x040E # CYRILLIC CAPITAL LETTER SHORT U
+0xAF 0x040F # CYRILLIC CAPITAL LETTER DZHE
+0xB0 0x0410 # CYRILLIC CAPITAL LETTER A
+0xB1 0x0411 # CYRILLIC CAPITAL LETTER BE
+0xB2 0x0412 # CYRILLIC CAPITAL LETTER VE
+0xB3 0x0413 # CYRILLIC CAPITAL LETTER GHE
+0xB4 0x0414 # CYRILLIC CAPITAL LETTER DE
+0xB5 0x0415 # CYRILLIC CAPITAL LETTER IE
+0xB6 0x0416 # CYRILLIC CAPITAL LETTER ZHE
+0xB7 0x0417 # CYRILLIC CAPITAL LETTER ZE
+0xB8 0x0418 # CYRILLIC CAPITAL LETTER I
+0xB9 0x0419 # CYRILLIC CAPITAL LETTER SHORT I
+0xBA 0x041A # CYRILLIC CAPITAL LETTER KA
+0xBB 0x041B # CYRILLIC CAPITAL LETTER EL
+0xBC 0x041C # CYRILLIC CAPITAL LETTER EM
+0xBD 0x041D # CYRILLIC CAPITAL LETTER EN
+0xBE 0x041E # CYRILLIC CAPITAL LETTER O
+0xBF 0x041F # CYRILLIC CAPITAL LETTER PE
+0xC0 0x0420 # CYRILLIC CAPITAL LETTER ER
+0xC1 0x0421 # CYRILLIC CAPITAL LETTER ES
+0xC2 0x0422 # CYRILLIC CAPITAL LETTER TE
+0xC3 0x0423 # CYRILLIC CAPITAL LETTER U
+0xC4 0x0424 # CYRILLIC CAPITAL LETTER EF
+0xC5 0x0425 # CYRILLIC CAPITAL LETTER HA
+0xC6 0x0426 # CYRILLIC CAPITAL LETTER TSE
+0xC7 0x0427 # CYRILLIC CAPITAL LETTER CHE
+0xC8 0x0428 # CYRILLIC CAPITAL LETTER SHA
+0xC9 0x0429 # CYRILLIC CAPITAL LETTER SHCHA
+0xCA 0x042A # CYRILLIC CAPITAL LETTER HARD SIGN
+0xCB 0x042B # CYRILLIC CAPITAL LETTER YERU
+0xCC 0x042C # CYRILLIC CAPITAL LETTER SOFT SIGN
+0xCD 0x042D # CYRILLIC CAPITAL LETTER E
+0xCE 0x042E # CYRILLIC CAPITAL LETTER YU
+0xCF 0x042F # CYRILLIC CAPITAL LETTER YA
+0xD0 0x0430 # CYRILLIC SMALL LETTER A
+0xD1 0x0431 # CYRILLIC SMALL LETTER BE
+0xD2 0x0432 # CYRILLIC SMALL LETTER VE
+0xD3 0x0433 # CYRILLIC SMALL LETTER GHE
+0xD4 0x0434 # CYRILLIC SMALL LETTER DE
+0xD5 0x0435 # CYRILLIC SMALL LETTER IE
+0xD6 0x0436 # CYRILLIC SMALL LETTER ZHE
+0xD7 0x0437 # CYRILLIC SMALL LETTER ZE
+0xD8 0x0438 # CYRILLIC SMALL LETTER I
+0xD9 0x0439 # CYRILLIC SMALL LETTER SHORT I
+0xDA 0x043A # CYRILLIC SMALL LETTER KA
+0xDB 0x043B # CYRILLIC SMALL LETTER EL
+0xDC 0x043C # CYRILLIC SMALL LETTER EM
+0xDD 0x043D # CYRILLIC SMALL LETTER EN
+0xDE 0x043E # CYRILLIC SMALL LETTER O
+0xDF 0x043F # CYRILLIC SMALL LETTER PE
+0xE0 0x0440 # CYRILLIC SMALL LETTER ER
+0xE1 0x0441 # CYRILLIC SMALL LETTER ES
+0xE2 0x0442 # CYRILLIC SMALL LETTER TE
+0xE3 0x0443 # CYRILLIC SMALL LETTER U
+0xE4 0x0444 # CYRILLIC SMALL LETTER EF
+0xE5 0x0445 # CYRILLIC SMALL LETTER HA
+0xE6 0x0446 # CYRILLIC SMALL LETTER TSE
+0xE7 0x0447 # CYRILLIC SMALL LETTER CHE
+0xE8 0x0448 # CYRILLIC SMALL LETTER SHA
+0xE9 0x0449 # CYRILLIC SMALL LETTER SHCHA
+0xEA 0x044A # CYRILLIC SMALL LETTER HARD SIGN
+0xEB 0x044B # CYRILLIC SMALL LETTER YERU
+0xEC 0x044C # CYRILLIC SMALL LETTER SOFT SIGN
+0xED 0x044D # CYRILLIC SMALL LETTER E
+0xEE 0x044E # CYRILLIC SMALL LETTER YU
+0xEF 0x044F # CYRILLIC SMALL LETTER YA
+0xF0 0x2116 # NUMERO SIGN
+0xF1 0x0451 # CYRILLIC SMALL LETTER IO
+0xF2 0x0452 # CYRILLIC SMALL LETTER DJE
+0xF3 0x0453 # CYRILLIC SMALL LETTER GJE
+0xF4 0x0454 # CYRILLIC SMALL LETTER UKRAINIAN IE
+0xF5 0x0455 # CYRILLIC SMALL LETTER DZE
+0xF6 0x0456 # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+0xF7 0x0457 # CYRILLIC SMALL LETTER YI
+0xF8 0x0458 # CYRILLIC SMALL LETTER JE
+0xF9 0x0459 # CYRILLIC SMALL LETTER LJE
+0xFA 0x045A # CYRILLIC SMALL LETTER NJE
+0xFB 0x045B # CYRILLIC SMALL LETTER TSHE
+0xFC 0x045C # CYRILLIC SMALL LETTER KJE
+0xFD 0x00A7 # SECTION SIGN
+0xFE 0x045E # CYRILLIC SMALL LETTER SHORT U
+0xFF 0x045F # CYRILLIC SMALL LETTER DZHE
diff --git a/ext/standard/html_tables/mappings/CP1251.TXT b/ext/standard/html_tables/mappings/CP1251.TXT
new file mode 100644
index 0000000..4d9b355
--- /dev/null
+++ b/ext/standard/html_tables/mappings/CP1251.TXT
@@ -0,0 +1,274 @@
+#
+# Name: cp1251 to Unicode table
+# Unicode version: 2.0
+# Table version: 2.01
+# Table format: Format A
+# Date: 04/15/98
+#
+# Contact: Shawn.Steele@microsoft.com
+#
+# General notes: none
+#
+# Format: Three tab-separated columns
+# Column #1 is the cp1251 code (in hex)
+# Column #2 is the Unicode (in hex as 0xXXXX)
+# Column #3 is the Unicode name (follows a comment sign, '#')
+#
+# The entries are in cp1251 order
+#
+0x00 0x0000 #NULL
+0x01 0x0001 #START OF HEADING
+0x02 0x0002 #START OF TEXT
+0x03 0x0003 #END OF TEXT
+0x04 0x0004 #END OF TRANSMISSION
+0x05 0x0005 #ENQUIRY
+0x06 0x0006 #ACKNOWLEDGE
+0x07 0x0007 #BELL
+0x08 0x0008 #BACKSPACE
+0x09 0x0009 #HORIZONTAL TABULATION
+0x0A 0x000A #LINE FEED
+0x0B 0x000B #VERTICAL TABULATION
+0x0C 0x000C #FORM FEED
+0x0D 0x000D #CARRIAGE RETURN
+0x0E 0x000E #SHIFT OUT
+0x0F 0x000F #SHIFT IN
+0x10 0x0010 #DATA LINK ESCAPE
+0x11 0x0011 #DEVICE CONTROL ONE
+0x12 0x0012 #DEVICE CONTROL TWO
+0x13 0x0013 #DEVICE CONTROL THREE
+0x14 0x0014 #DEVICE CONTROL FOUR
+0x15 0x0015 #NEGATIVE ACKNOWLEDGE
+0x16 0x0016 #SYNCHRONOUS IDLE
+0x17 0x0017 #END OF TRANSMISSION BLOCK
+0x18 0x0018 #CANCEL
+0x19 0x0019 #END OF MEDIUM
+0x1A 0x001A #SUBSTITUTE
+0x1B 0x001B #ESCAPE
+0x1C 0x001C #FILE SEPARATOR
+0x1D 0x001D #GROUP SEPARATOR
+0x1E 0x001E #RECORD SEPARATOR
+0x1F 0x001F #UNIT SEPARATOR
+0x20 0x0020 #SPACE
+0x21 0x0021 #EXCLAMATION MARK
+0x22 0x0022 #QUOTATION MARK
+0x23 0x0023 #NUMBER SIGN
+0x24 0x0024 #DOLLAR SIGN
+0x25 0x0025 #PERCENT SIGN
+0x26 0x0026 #AMPERSAND
+0x27 0x0027 #APOSTROPHE
+0x28 0x0028 #LEFT PARENTHESIS
+0x29 0x0029 #RIGHT PARENTHESIS
+0x2A 0x002A #ASTERISK
+0x2B 0x002B #PLUS SIGN
+0x2C 0x002C #COMMA
+0x2D 0x002D #HYPHEN-MINUS
+0x2E 0x002E #FULL STOP
+0x2F 0x002F #SOLIDUS
+0x30 0x0030 #DIGIT ZERO
+0x31 0x0031 #DIGIT ONE
+0x32 0x0032 #DIGIT TWO
+0x33 0x0033 #DIGIT THREE
+0x34 0x0034 #DIGIT FOUR
+0x35 0x0035 #DIGIT FIVE
+0x36 0x0036 #DIGIT SIX
+0x37 0x0037 #DIGIT SEVEN
+0x38 0x0038 #DIGIT EIGHT
+0x39 0x0039 #DIGIT NINE
+0x3A 0x003A #COLON
+0x3B 0x003B #SEMICOLON
+0x3C 0x003C #LESS-THAN SIGN
+0x3D 0x003D #EQUALS SIGN
+0x3E 0x003E #GREATER-THAN SIGN
+0x3F 0x003F #QUESTION MARK
+0x40 0x0040 #COMMERCIAL AT
+0x41 0x0041 #LATIN CAPITAL LETTER A
+0x42 0x0042 #LATIN CAPITAL LETTER B
+0x43 0x0043 #LATIN CAPITAL LETTER C
+0x44 0x0044 #LATIN CAPITAL LETTER D
+0x45 0x0045 #LATIN CAPITAL LETTER E
+0x46 0x0046 #LATIN CAPITAL LETTER F
+0x47 0x0047 #LATIN CAPITAL LETTER G
+0x48 0x0048 #LATIN CAPITAL LETTER H
+0x49 0x0049 #LATIN CAPITAL LETTER I
+0x4A 0x004A #LATIN CAPITAL LETTER J
+0x4B 0x004B #LATIN CAPITAL LETTER K
+0x4C 0x004C #LATIN CAPITAL LETTER L
+0x4D 0x004D #LATIN CAPITAL LETTER M
+0x4E 0x004E #LATIN CAPITAL LETTER N
+0x4F 0x004F #LATIN CAPITAL LETTER O
+0x50 0x0050 #LATIN CAPITAL LETTER P
+0x51 0x0051 #LATIN CAPITAL LETTER Q
+0x52 0x0052 #LATIN CAPITAL LETTER R
+0x53 0x0053 #LATIN CAPITAL LETTER S
+0x54 0x0054 #LATIN CAPITAL LETTER T
+0x55 0x0055 #LATIN CAPITAL LETTER U
+0x56 0x0056 #LATIN CAPITAL LETTER V
+0x57 0x0057 #LATIN CAPITAL LETTER W
+0x58 0x0058 #LATIN CAPITAL LETTER X
+0x59 0x0059 #LATIN CAPITAL LETTER Y
+0x5A 0x005A #LATIN CAPITAL LETTER Z
+0x5B 0x005B #LEFT SQUARE BRACKET
+0x5C 0x005C #REVERSE SOLIDUS
+0x5D 0x005D #RIGHT SQUARE BRACKET
+0x5E 0x005E #CIRCUMFLEX ACCENT
+0x5F 0x005F #LOW LINE
+0x60 0x0060 #GRAVE ACCENT
+0x61 0x0061 #LATIN SMALL LETTER A
+0x62 0x0062 #LATIN SMALL LETTER B
+0x63 0x0063 #LATIN SMALL LETTER C
+0x64 0x0064 #LATIN SMALL LETTER D
+0x65 0x0065 #LATIN SMALL LETTER E
+0x66 0x0066 #LATIN SMALL LETTER F
+0x67 0x0067 #LATIN SMALL LETTER G
+0x68 0x0068 #LATIN SMALL LETTER H
+0x69 0x0069 #LATIN SMALL LETTER I
+0x6A 0x006A #LATIN SMALL LETTER J
+0x6B 0x006B #LATIN SMALL LETTER K
+0x6C 0x006C #LATIN SMALL LETTER L
+0x6D 0x006D #LATIN SMALL LETTER M
+0x6E 0x006E #LATIN SMALL LETTER N
+0x6F 0x006F #LATIN SMALL LETTER O
+0x70 0x0070 #LATIN SMALL LETTER P
+0x71 0x0071 #LATIN SMALL LETTER Q
+0x72 0x0072 #LATIN SMALL LETTER R
+0x73 0x0073 #LATIN SMALL LETTER S
+0x74 0x0074 #LATIN SMALL LETTER T
+0x75 0x0075 #LATIN SMALL LETTER U
+0x76 0x0076 #LATIN SMALL LETTER V
+0x77 0x0077 #LATIN SMALL LETTER W
+0x78 0x0078 #LATIN SMALL LETTER X
+0x79 0x0079 #LATIN SMALL LETTER Y
+0x7A 0x007A #LATIN SMALL LETTER Z
+0x7B 0x007B #LEFT CURLY BRACKET
+0x7C 0x007C #VERTICAL LINE
+0x7D 0x007D #RIGHT CURLY BRACKET
+0x7E 0x007E #TILDE
+0x7F 0x007F #DELETE
+0x80 0x0402 #CYRILLIC CAPITAL LETTER DJE
+0x81 0x0403 #CYRILLIC CAPITAL LETTER GJE
+0x82 0x201A #SINGLE LOW-9 QUOTATION MARK
+0x83 0x0453 #CYRILLIC SMALL LETTER GJE
+0x84 0x201E #DOUBLE LOW-9 QUOTATION MARK
+0x85 0x2026 #HORIZONTAL ELLIPSIS
+0x86 0x2020 #DAGGER
+0x87 0x2021 #DOUBLE DAGGER
+0x88 0x20AC #EURO SIGN
+0x89 0x2030 #PER MILLE SIGN
+0x8A 0x0409 #CYRILLIC CAPITAL LETTER LJE
+0x8B 0x2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+0x8C 0x040A #CYRILLIC CAPITAL LETTER NJE
+0x8D 0x040C #CYRILLIC CAPITAL LETTER KJE
+0x8E 0x040B #CYRILLIC CAPITAL LETTER TSHE
+0x8F 0x040F #CYRILLIC CAPITAL LETTER DZHE
+0x90 0x0452 #CYRILLIC SMALL LETTER DJE
+0x91 0x2018 #LEFT SINGLE QUOTATION MARK
+0x92 0x2019 #RIGHT SINGLE QUOTATION MARK
+0x93 0x201C #LEFT DOUBLE QUOTATION MARK
+0x94 0x201D #RIGHT DOUBLE QUOTATION MARK
+0x95 0x2022 #BULLET
+0x96 0x2013 #EN DASH
+0x97 0x2014 #EM DASH
+0x98 #UNDEFINED
+0x99 0x2122 #TRADE MARK SIGN
+0x9A 0x0459 #CYRILLIC SMALL LETTER LJE
+0x9B 0x203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+0x9C 0x045A #CYRILLIC SMALL LETTER NJE
+0x9D 0x045C #CYRILLIC SMALL LETTER KJE
+0x9E 0x045B #CYRILLIC SMALL LETTER TSHE
+0x9F 0x045F #CYRILLIC SMALL LETTER DZHE
+0xA0 0x00A0 #NO-BREAK SPACE
+0xA1 0x040E #CYRILLIC CAPITAL LETTER SHORT U
+0xA2 0x045E #CYRILLIC SMALL LETTER SHORT U
+0xA3 0x0408 #CYRILLIC CAPITAL LETTER JE
+0xA4 0x00A4 #CURRENCY SIGN
+0xA5 0x0490 #CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+0xA6 0x00A6 #BROKEN BAR
+0xA7 0x00A7 #SECTION SIGN
+0xA8 0x0401 #CYRILLIC CAPITAL LETTER IO
+0xA9 0x00A9 #COPYRIGHT SIGN
+0xAA 0x0404 #CYRILLIC CAPITAL LETTER UKRAINIAN IE
+0xAB 0x00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xAC 0x00AC #NOT SIGN
+0xAD 0x00AD #SOFT HYPHEN
+0xAE 0x00AE #REGISTERED SIGN
+0xAF 0x0407 #CYRILLIC CAPITAL LETTER YI
+0xB0 0x00B0 #DEGREE SIGN
+0xB1 0x00B1 #PLUS-MINUS SIGN
+0xB2 0x0406 #CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+0xB3 0x0456 #CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+0xB4 0x0491 #CYRILLIC SMALL LETTER GHE WITH UPTURN
+0xB5 0x00B5 #MICRO SIGN
+0xB6 0x00B6 #PILCROW SIGN
+0xB7 0x00B7 #MIDDLE DOT
+0xB8 0x0451 #CYRILLIC SMALL LETTER IO
+0xB9 0x2116 #NUMERO SIGN
+0xBA 0x0454 #CYRILLIC SMALL LETTER UKRAINIAN IE
+0xBB 0x00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xBC 0x0458 #CYRILLIC SMALL LETTER JE
+0xBD 0x0405 #CYRILLIC CAPITAL LETTER DZE
+0xBE 0x0455 #CYRILLIC SMALL LETTER DZE
+0xBF 0x0457 #CYRILLIC SMALL LETTER YI
+0xC0 0x0410 #CYRILLIC CAPITAL LETTER A
+0xC1 0x0411 #CYRILLIC CAPITAL LETTER BE
+0xC2 0x0412 #CYRILLIC CAPITAL LETTER VE
+0xC3 0x0413 #CYRILLIC CAPITAL LETTER GHE
+0xC4 0x0414 #CYRILLIC CAPITAL LETTER DE
+0xC5 0x0415 #CYRILLIC CAPITAL LETTER IE
+0xC6 0x0416 #CYRILLIC CAPITAL LETTER ZHE
+0xC7 0x0417 #CYRILLIC CAPITAL LETTER ZE
+0xC8 0x0418 #CYRILLIC CAPITAL LETTER I
+0xC9 0x0419 #CYRILLIC CAPITAL LETTER SHORT I
+0xCA 0x041A #CYRILLIC CAPITAL LETTER KA
+0xCB 0x041B #CYRILLIC CAPITAL LETTER EL
+0xCC 0x041C #CYRILLIC CAPITAL LETTER EM
+0xCD 0x041D #CYRILLIC CAPITAL LETTER EN
+0xCE 0x041E #CYRILLIC CAPITAL LETTER O
+0xCF 0x041F #CYRILLIC CAPITAL LETTER PE
+0xD0 0x0420 #CYRILLIC CAPITAL LETTER ER
+0xD1 0x0421 #CYRILLIC CAPITAL LETTER ES
+0xD2 0x0422 #CYRILLIC CAPITAL LETTER TE
+0xD3 0x0423 #CYRILLIC CAPITAL LETTER U
+0xD4 0x0424 #CYRILLIC CAPITAL LETTER EF
+0xD5 0x0425 #CYRILLIC CAPITAL LETTER HA
+0xD6 0x0426 #CYRILLIC CAPITAL LETTER TSE
+0xD7 0x0427 #CYRILLIC CAPITAL LETTER CHE
+0xD8 0x0428 #CYRILLIC CAPITAL LETTER SHA
+0xD9 0x0429 #CYRILLIC CAPITAL LETTER SHCHA
+0xDA 0x042A #CYRILLIC CAPITAL LETTER HARD SIGN
+0xDB 0x042B #CYRILLIC CAPITAL LETTER YERU
+0xDC 0x042C #CYRILLIC CAPITAL LETTER SOFT SIGN
+0xDD 0x042D #CYRILLIC CAPITAL LETTER E
+0xDE 0x042E #CYRILLIC CAPITAL LETTER YU
+0xDF 0x042F #CYRILLIC CAPITAL LETTER YA
+0xE0 0x0430 #CYRILLIC SMALL LETTER A
+0xE1 0x0431 #CYRILLIC SMALL LETTER BE
+0xE2 0x0432 #CYRILLIC SMALL LETTER VE
+0xE3 0x0433 #CYRILLIC SMALL LETTER GHE
+0xE4 0x0434 #CYRILLIC SMALL LETTER DE
+0xE5 0x0435 #CYRILLIC SMALL LETTER IE
+0xE6 0x0436 #CYRILLIC SMALL LETTER ZHE
+0xE7 0x0437 #CYRILLIC SMALL LETTER ZE
+0xE8 0x0438 #CYRILLIC SMALL LETTER I
+0xE9 0x0439 #CYRILLIC SMALL LETTER SHORT I
+0xEA 0x043A #CYRILLIC SMALL LETTER KA
+0xEB 0x043B #CYRILLIC SMALL LETTER EL
+0xEC 0x043C #CYRILLIC SMALL LETTER EM
+0xED 0x043D #CYRILLIC SMALL LETTER EN
+0xEE 0x043E #CYRILLIC SMALL LETTER O
+0xEF 0x043F #CYRILLIC SMALL LETTER PE
+0xF0 0x0440 #CYRILLIC SMALL LETTER ER
+0xF1 0x0441 #CYRILLIC SMALL LETTER ES
+0xF2 0x0442 #CYRILLIC SMALL LETTER TE
+0xF3 0x0443 #CYRILLIC SMALL LETTER U
+0xF4 0x0444 #CYRILLIC SMALL LETTER EF
+0xF5 0x0445 #CYRILLIC SMALL LETTER HA
+0xF6 0x0446 #CYRILLIC SMALL LETTER TSE
+0xF7 0x0447 #CYRILLIC SMALL LETTER CHE
+0xF8 0x0448 #CYRILLIC SMALL LETTER SHA
+0xF9 0x0449 #CYRILLIC SMALL LETTER SHCHA
+0xFA 0x044A #CYRILLIC SMALL LETTER HARD SIGN
+0xFB 0x044B #CYRILLIC SMALL LETTER YERU
+0xFC 0x044C #CYRILLIC SMALL LETTER SOFT SIGN
+0xFD 0x044D #CYRILLIC SMALL LETTER E
+0xFE 0x044E #CYRILLIC SMALL LETTER YU
+0xFF 0x044F #CYRILLIC SMALL LETTER YA
diff --git a/ext/standard/html_tables/mappings/CP1252.TXT b/ext/standard/html_tables/mappings/CP1252.TXT
new file mode 100644
index 0000000..8ff4b20
--- /dev/null
+++ b/ext/standard/html_tables/mappings/CP1252.TXT
@@ -0,0 +1,274 @@
+#
+# Name: cp1252 to Unicode table
+# Unicode version: 2.0
+# Table version: 2.01
+# Table format: Format A
+# Date: 04/15/98
+#
+# Contact: Shawn.Steele@microsoft.com
+#
+# General notes: none
+#
+# Format: Three tab-separated columns
+# Column #1 is the cp1252 code (in hex)
+# Column #2 is the Unicode (in hex as 0xXXXX)
+# Column #3 is the Unicode name (follows a comment sign, '#')
+#
+# The entries are in cp1252 order
+#
+0x00 0x0000 #NULL
+0x01 0x0001 #START OF HEADING
+0x02 0x0002 #START OF TEXT
+0x03 0x0003 #END OF TEXT
+0x04 0x0004 #END OF TRANSMISSION
+0x05 0x0005 #ENQUIRY
+0x06 0x0006 #ACKNOWLEDGE
+0x07 0x0007 #BELL
+0x08 0x0008 #BACKSPACE
+0x09 0x0009 #HORIZONTAL TABULATION
+0x0A 0x000A #LINE FEED
+0x0B 0x000B #VERTICAL TABULATION
+0x0C 0x000C #FORM FEED
+0x0D 0x000D #CARRIAGE RETURN
+0x0E 0x000E #SHIFT OUT
+0x0F 0x000F #SHIFT IN
+0x10 0x0010 #DATA LINK ESCAPE
+0x11 0x0011 #DEVICE CONTROL ONE
+0x12 0x0012 #DEVICE CONTROL TWO
+0x13 0x0013 #DEVICE CONTROL THREE
+0x14 0x0014 #DEVICE CONTROL FOUR
+0x15 0x0015 #NEGATIVE ACKNOWLEDGE
+0x16 0x0016 #SYNCHRONOUS IDLE
+0x17 0x0017 #END OF TRANSMISSION BLOCK
+0x18 0x0018 #CANCEL
+0x19 0x0019 #END OF MEDIUM
+0x1A 0x001A #SUBSTITUTE
+0x1B 0x001B #ESCAPE
+0x1C 0x001C #FILE SEPARATOR
+0x1D 0x001D #GROUP SEPARATOR
+0x1E 0x001E #RECORD SEPARATOR
+0x1F 0x001F #UNIT SEPARATOR
+0x20 0x0020 #SPACE
+0x21 0x0021 #EXCLAMATION MARK
+0x22 0x0022 #QUOTATION MARK
+0x23 0x0023 #NUMBER SIGN
+0x24 0x0024 #DOLLAR SIGN
+0x25 0x0025 #PERCENT SIGN
+0x26 0x0026 #AMPERSAND
+0x27 0x0027 #APOSTROPHE
+0x28 0x0028 #LEFT PARENTHESIS
+0x29 0x0029 #RIGHT PARENTHESIS
+0x2A 0x002A #ASTERISK
+0x2B 0x002B #PLUS SIGN
+0x2C 0x002C #COMMA
+0x2D 0x002D #HYPHEN-MINUS
+0x2E 0x002E #FULL STOP
+0x2F 0x002F #SOLIDUS
+0x30 0x0030 #DIGIT ZERO
+0x31 0x0031 #DIGIT ONE
+0x32 0x0032 #DIGIT TWO
+0x33 0x0033 #DIGIT THREE
+0x34 0x0034 #DIGIT FOUR
+0x35 0x0035 #DIGIT FIVE
+0x36 0x0036 #DIGIT SIX
+0x37 0x0037 #DIGIT SEVEN
+0x38 0x0038 #DIGIT EIGHT
+0x39 0x0039 #DIGIT NINE
+0x3A 0x003A #COLON
+0x3B 0x003B #SEMICOLON
+0x3C 0x003C #LESS-THAN SIGN
+0x3D 0x003D #EQUALS SIGN
+0x3E 0x003E #GREATER-THAN SIGN
+0x3F 0x003F #QUESTION MARK
+0x40 0x0040 #COMMERCIAL AT
+0x41 0x0041 #LATIN CAPITAL LETTER A
+0x42 0x0042 #LATIN CAPITAL LETTER B
+0x43 0x0043 #LATIN CAPITAL LETTER C
+0x44 0x0044 #LATIN CAPITAL LETTER D
+0x45 0x0045 #LATIN CAPITAL LETTER E
+0x46 0x0046 #LATIN CAPITAL LETTER F
+0x47 0x0047 #LATIN CAPITAL LETTER G
+0x48 0x0048 #LATIN CAPITAL LETTER H
+0x49 0x0049 #LATIN CAPITAL LETTER I
+0x4A 0x004A #LATIN CAPITAL LETTER J
+0x4B 0x004B #LATIN CAPITAL LETTER K
+0x4C 0x004C #LATIN CAPITAL LETTER L
+0x4D 0x004D #LATIN CAPITAL LETTER M
+0x4E 0x004E #LATIN CAPITAL LETTER N
+0x4F 0x004F #LATIN CAPITAL LETTER O
+0x50 0x0050 #LATIN CAPITAL LETTER P
+0x51 0x0051 #LATIN CAPITAL LETTER Q
+0x52 0x0052 #LATIN CAPITAL LETTER R
+0x53 0x0053 #LATIN CAPITAL LETTER S
+0x54 0x0054 #LATIN CAPITAL LETTER T
+0x55 0x0055 #LATIN CAPITAL LETTER U
+0x56 0x0056 #LATIN CAPITAL LETTER V
+0x57 0x0057 #LATIN CAPITAL LETTER W
+0x58 0x0058 #LATIN CAPITAL LETTER X
+0x59 0x0059 #LATIN CAPITAL LETTER Y
+0x5A 0x005A #LATIN CAPITAL LETTER Z
+0x5B 0x005B #LEFT SQUARE BRACKET
+0x5C 0x005C #REVERSE SOLIDUS
+0x5D 0x005D #RIGHT SQUARE BRACKET
+0x5E 0x005E #CIRCUMFLEX ACCENT
+0x5F 0x005F #LOW LINE
+0x60 0x0060 #GRAVE ACCENT
+0x61 0x0061 #LATIN SMALL LETTER A
+0x62 0x0062 #LATIN SMALL LETTER B
+0x63 0x0063 #LATIN SMALL LETTER C
+0x64 0x0064 #LATIN SMALL LETTER D
+0x65 0x0065 #LATIN SMALL LETTER E
+0x66 0x0066 #LATIN SMALL LETTER F
+0x67 0x0067 #LATIN SMALL LETTER G
+0x68 0x0068 #LATIN SMALL LETTER H
+0x69 0x0069 #LATIN SMALL LETTER I
+0x6A 0x006A #LATIN SMALL LETTER J
+0x6B 0x006B #LATIN SMALL LETTER K
+0x6C 0x006C #LATIN SMALL LETTER L
+0x6D 0x006D #LATIN SMALL LETTER M
+0x6E 0x006E #LATIN SMALL LETTER N
+0x6F 0x006F #LATIN SMALL LETTER O
+0x70 0x0070 #LATIN SMALL LETTER P
+0x71 0x0071 #LATIN SMALL LETTER Q
+0x72 0x0072 #LATIN SMALL LETTER R
+0x73 0x0073 #LATIN SMALL LETTER S
+0x74 0x0074 #LATIN SMALL LETTER T
+0x75 0x0075 #LATIN SMALL LETTER U
+0x76 0x0076 #LATIN SMALL LETTER V
+0x77 0x0077 #LATIN SMALL LETTER W
+0x78 0x0078 #LATIN SMALL LETTER X
+0x79 0x0079 #LATIN SMALL LETTER Y
+0x7A 0x007A #LATIN SMALL LETTER Z
+0x7B 0x007B #LEFT CURLY BRACKET
+0x7C 0x007C #VERTICAL LINE
+0x7D 0x007D #RIGHT CURLY BRACKET
+0x7E 0x007E #TILDE
+0x7F 0x007F #DELETE
+0x80 0x20AC #EURO SIGN
+0x81 #UNDEFINED
+0x82 0x201A #SINGLE LOW-9 QUOTATION MARK
+0x83 0x0192 #LATIN SMALL LETTER F WITH HOOK
+0x84 0x201E #DOUBLE LOW-9 QUOTATION MARK
+0x85 0x2026 #HORIZONTAL ELLIPSIS
+0x86 0x2020 #DAGGER
+0x87 0x2021 #DOUBLE DAGGER
+0x88 0x02C6 #MODIFIER LETTER CIRCUMFLEX ACCENT
+0x89 0x2030 #PER MILLE SIGN
+0x8A 0x0160 #LATIN CAPITAL LETTER S WITH CARON
+0x8B 0x2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+0x8C 0x0152 #LATIN CAPITAL LIGATURE OE
+0x8D #UNDEFINED
+0x8E 0x017D #LATIN CAPITAL LETTER Z WITH CARON
+0x8F #UNDEFINED
+0x90 #UNDEFINED
+0x91 0x2018 #LEFT SINGLE QUOTATION MARK
+0x92 0x2019 #RIGHT SINGLE QUOTATION MARK
+0x93 0x201C #LEFT DOUBLE QUOTATION MARK
+0x94 0x201D #RIGHT DOUBLE QUOTATION MARK
+0x95 0x2022 #BULLET
+0x96 0x2013 #EN DASH
+0x97 0x2014 #EM DASH
+0x98 0x02DC #SMALL TILDE
+0x99 0x2122 #TRADE MARK SIGN
+0x9A 0x0161 #LATIN SMALL LETTER S WITH CARON
+0x9B 0x203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+0x9C 0x0153 #LATIN SMALL LIGATURE OE
+0x9D #UNDEFINED
+0x9E 0x017E #LATIN SMALL LETTER Z WITH CARON
+0x9F 0x0178 #LATIN CAPITAL LETTER Y WITH DIAERESIS
+0xA0 0x00A0 #NO-BREAK SPACE
+0xA1 0x00A1 #INVERTED EXCLAMATION MARK
+0xA2 0x00A2 #CENT SIGN
+0xA3 0x00A3 #POUND SIGN
+0xA4 0x00A4 #CURRENCY SIGN
+0xA5 0x00A5 #YEN SIGN
+0xA6 0x00A6 #BROKEN BAR
+0xA7 0x00A7 #SECTION SIGN
+0xA8 0x00A8 #DIAERESIS
+0xA9 0x00A9 #COPYRIGHT SIGN
+0xAA 0x00AA #FEMININE ORDINAL INDICATOR
+0xAB 0x00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xAC 0x00AC #NOT SIGN
+0xAD 0x00AD #SOFT HYPHEN
+0xAE 0x00AE #REGISTERED SIGN
+0xAF 0x00AF #MACRON
+0xB0 0x00B0 #DEGREE SIGN
+0xB1 0x00B1 #PLUS-MINUS SIGN
+0xB2 0x00B2 #SUPERSCRIPT TWO
+0xB3 0x00B3 #SUPERSCRIPT THREE
+0xB4 0x00B4 #ACUTE ACCENT
+0xB5 0x00B5 #MICRO SIGN
+0xB6 0x00B6 #PILCROW SIGN
+0xB7 0x00B7 #MIDDLE DOT
+0xB8 0x00B8 #CEDILLA
+0xB9 0x00B9 #SUPERSCRIPT ONE
+0xBA 0x00BA #MASCULINE ORDINAL INDICATOR
+0xBB 0x00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xBC 0x00BC #VULGAR FRACTION ONE QUARTER
+0xBD 0x00BD #VULGAR FRACTION ONE HALF
+0xBE 0x00BE #VULGAR FRACTION THREE QUARTERS
+0xBF 0x00BF #INVERTED QUESTION MARK
+0xC0 0x00C0 #LATIN CAPITAL LETTER A WITH GRAVE
+0xC1 0x00C1 #LATIN CAPITAL LETTER A WITH ACUTE
+0xC2 0x00C2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+0xC3 0x00C3 #LATIN CAPITAL LETTER A WITH TILDE
+0xC4 0x00C4 #LATIN CAPITAL LETTER A WITH DIAERESIS
+0xC5 0x00C5 #LATIN CAPITAL LETTER A WITH RING ABOVE
+0xC6 0x00C6 #LATIN CAPITAL LETTER AE
+0xC7 0x00C7 #LATIN CAPITAL LETTER C WITH CEDILLA
+0xC8 0x00C8 #LATIN CAPITAL LETTER E WITH GRAVE
+0xC9 0x00C9 #LATIN CAPITAL LETTER E WITH ACUTE
+0xCA 0x00CA #LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+0xCB 0x00CB #LATIN CAPITAL LETTER E WITH DIAERESIS
+0xCC 0x00CC #LATIN CAPITAL LETTER I WITH GRAVE
+0xCD 0x00CD #LATIN CAPITAL LETTER I WITH ACUTE
+0xCE 0x00CE #LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+0xCF 0x00CF #LATIN CAPITAL LETTER I WITH DIAERESIS
+0xD0 0x00D0 #LATIN CAPITAL LETTER ETH
+0xD1 0x00D1 #LATIN CAPITAL LETTER N WITH TILDE
+0xD2 0x00D2 #LATIN CAPITAL LETTER O WITH GRAVE
+0xD3 0x00D3 #LATIN CAPITAL LETTER O WITH ACUTE
+0xD4 0x00D4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+0xD5 0x00D5 #LATIN CAPITAL LETTER O WITH TILDE
+0xD6 0x00D6 #LATIN CAPITAL LETTER O WITH DIAERESIS
+0xD7 0x00D7 #MULTIPLICATION SIGN
+0xD8 0x00D8 #LATIN CAPITAL LETTER O WITH STROKE
+0xD9 0x00D9 #LATIN CAPITAL LETTER U WITH GRAVE
+0xDA 0x00DA #LATIN CAPITAL LETTER U WITH ACUTE
+0xDB 0x00DB #LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+0xDC 0x00DC #LATIN CAPITAL LETTER U WITH DIAERESIS
+0xDD 0x00DD #LATIN CAPITAL LETTER Y WITH ACUTE
+0xDE 0x00DE #LATIN CAPITAL LETTER THORN
+0xDF 0x00DF #LATIN SMALL LETTER SHARP S
+0xE0 0x00E0 #LATIN SMALL LETTER A WITH GRAVE
+0xE1 0x00E1 #LATIN SMALL LETTER A WITH ACUTE
+0xE2 0x00E2 #LATIN SMALL LETTER A WITH CIRCUMFLEX
+0xE3 0x00E3 #LATIN SMALL LETTER A WITH TILDE
+0xE4 0x00E4 #LATIN SMALL LETTER A WITH DIAERESIS
+0xE5 0x00E5 #LATIN SMALL LETTER A WITH RING ABOVE
+0xE6 0x00E6 #LATIN SMALL LETTER AE
+0xE7 0x00E7 #LATIN SMALL LETTER C WITH CEDILLA
+0xE8 0x00E8 #LATIN SMALL LETTER E WITH GRAVE
+0xE9 0x00E9 #LATIN SMALL LETTER E WITH ACUTE
+0xEA 0x00EA #LATIN SMALL LETTER E WITH CIRCUMFLEX
+0xEB 0x00EB #LATIN SMALL LETTER E WITH DIAERESIS
+0xEC 0x00EC #LATIN SMALL LETTER I WITH GRAVE
+0xED 0x00ED #LATIN SMALL LETTER I WITH ACUTE
+0xEE 0x00EE #LATIN SMALL LETTER I WITH CIRCUMFLEX
+0xEF 0x00EF #LATIN SMALL LETTER I WITH DIAERESIS
+0xF0 0x00F0 #LATIN SMALL LETTER ETH
+0xF1 0x00F1 #LATIN SMALL LETTER N WITH TILDE
+0xF2 0x00F2 #LATIN SMALL LETTER O WITH GRAVE
+0xF3 0x00F3 #LATIN SMALL LETTER O WITH ACUTE
+0xF4 0x00F4 #LATIN SMALL LETTER O WITH CIRCUMFLEX
+0xF5 0x00F5 #LATIN SMALL LETTER O WITH TILDE
+0xF6 0x00F6 #LATIN SMALL LETTER O WITH DIAERESIS
+0xF7 0x00F7 #DIVISION SIGN
+0xF8 0x00F8 #LATIN SMALL LETTER O WITH STROKE
+0xF9 0x00F9 #LATIN SMALL LETTER U WITH GRAVE
+0xFA 0x00FA #LATIN SMALL LETTER U WITH ACUTE
+0xFB 0x00FB #LATIN SMALL LETTER U WITH CIRCUMFLEX
+0xFC 0x00FC #LATIN SMALL LETTER U WITH DIAERESIS
+0xFD 0x00FD #LATIN SMALL LETTER Y WITH ACUTE
+0xFE 0x00FE #LATIN SMALL LETTER THORN
+0xFF 0x00FF #LATIN SMALL LETTER Y WITH DIAERESIS
diff --git a/ext/standard/html_tables/mappings/CP866.TXT b/ext/standard/html_tables/mappings/CP866.TXT
new file mode 100644
index 0000000..b0213a1
--- /dev/null
+++ b/ext/standard/html_tables/mappings/CP866.TXT
@@ -0,0 +1,275 @@
+#
+# Name: cp866_DOSCyrillicRussian to Unicode table
+# Unicode version: 2.0
+# Table version: 2.00
+# Table format: Format A
+# Date: 04/24/96
+# Contact: Shawn.Steele@microsoft.com
+#
+# General notes: none
+#
+# Format: Three tab-separated columns
+# Column #1 is the cp866_DOSCyrillicRussian code (in hex)
+# Column #2 is the Unicode (in hex as 0xXXXX)
+# Column #3 is the Unicode name (follows a comment sign, '#')
+#
+# The entries are in cp866_DOSCyrillicRussian order
+#
+0x00 0x0000 #NULL
+0x01 0x0001 #START OF HEADING
+0x02 0x0002 #START OF TEXT
+0x03 0x0003 #END OF TEXT
+0x04 0x0004 #END OF TRANSMISSION
+0x05 0x0005 #ENQUIRY
+0x06 0x0006 #ACKNOWLEDGE
+0x07 0x0007 #BELL
+0x08 0x0008 #BACKSPACE
+0x09 0x0009 #HORIZONTAL TABULATION
+0x0a 0x000a #LINE FEED
+0x0b 0x000b #VERTICAL TABULATION
+0x0c 0x000c #FORM FEED
+0x0d 0x000d #CARRIAGE RETURN
+0x0e 0x000e #SHIFT OUT
+0x0f 0x000f #SHIFT IN
+0x10 0x0010 #DATA LINK ESCAPE
+0x11 0x0011 #DEVICE CONTROL ONE
+0x12 0x0012 #DEVICE CONTROL TWO
+0x13 0x0013 #DEVICE CONTROL THREE
+0x14 0x0014 #DEVICE CONTROL FOUR
+0x15 0x0015 #NEGATIVE ACKNOWLEDGE
+0x16 0x0016 #SYNCHRONOUS IDLE
+0x17 0x0017 #END OF TRANSMISSION BLOCK
+0x18 0x0018 #CANCEL
+0x19 0x0019 #END OF MEDIUM
+0x1a 0x001a #SUBSTITUTE
+0x1b 0x001b #ESCAPE
+0x1c 0x001c #FILE SEPARATOR
+0x1d 0x001d #GROUP SEPARATOR
+0x1e 0x001e #RECORD SEPARATOR
+0x1f 0x001f #UNIT SEPARATOR
+0x20 0x0020 #SPACE
+0x21 0x0021 #EXCLAMATION MARK
+0x22 0x0022 #QUOTATION MARK
+0x23 0x0023 #NUMBER SIGN
+0x24 0x0024 #DOLLAR SIGN
+0x25 0x0025 #PERCENT SIGN
+0x26 0x0026 #AMPERSAND
+0x27 0x0027 #APOSTROPHE
+0x28 0x0028 #LEFT PARENTHESIS
+0x29 0x0029 #RIGHT PARENTHESIS
+0x2a 0x002a #ASTERISK
+0x2b 0x002b #PLUS SIGN
+0x2c 0x002c #COMMA
+0x2d 0x002d #HYPHEN-MINUS
+0x2e 0x002e #FULL STOP
+0x2f 0x002f #SOLIDUS
+0x30 0x0030 #DIGIT ZERO
+0x31 0x0031 #DIGIT ONE
+0x32 0x0032 #DIGIT TWO
+0x33 0x0033 #DIGIT THREE
+0x34 0x0034 #DIGIT FOUR
+0x35 0x0035 #DIGIT FIVE
+0x36 0x0036 #DIGIT SIX
+0x37 0x0037 #DIGIT SEVEN
+0x38 0x0038 #DIGIT EIGHT
+0x39 0x0039 #DIGIT NINE
+0x3a 0x003a #COLON
+0x3b 0x003b #SEMICOLON
+0x3c 0x003c #LESS-THAN SIGN
+0x3d 0x003d #EQUALS SIGN
+0x3e 0x003e #GREATER-THAN SIGN
+0x3f 0x003f #QUESTION MARK
+0x40 0x0040 #COMMERCIAL AT
+0x41 0x0041 #LATIN CAPITAL LETTER A
+0x42 0x0042 #LATIN CAPITAL LETTER B
+0x43 0x0043 #LATIN CAPITAL LETTER C
+0x44 0x0044 #LATIN CAPITAL LETTER D
+0x45 0x0045 #LATIN CAPITAL LETTER E
+0x46 0x0046 #LATIN CAPITAL LETTER F
+0x47 0x0047 #LATIN CAPITAL LETTER G
+0x48 0x0048 #LATIN CAPITAL LETTER H
+0x49 0x0049 #LATIN CAPITAL LETTER I
+0x4a 0x004a #LATIN CAPITAL LETTER J
+0x4b 0x004b #LATIN CAPITAL LETTER K
+0x4c 0x004c #LATIN CAPITAL LETTER L
+0x4d 0x004d #LATIN CAPITAL LETTER M
+0x4e 0x004e #LATIN CAPITAL LETTER N
+0x4f 0x004f #LATIN CAPITAL LETTER O
+0x50 0x0050 #LATIN CAPITAL LETTER P
+0x51 0x0051 #LATIN CAPITAL LETTER Q
+0x52 0x0052 #LATIN CAPITAL LETTER R
+0x53 0x0053 #LATIN CAPITAL LETTER S
+0x54 0x0054 #LATIN CAPITAL LETTER T
+0x55 0x0055 #LATIN CAPITAL LETTER U
+0x56 0x0056 #LATIN CAPITAL LETTER V
+0x57 0x0057 #LATIN CAPITAL LETTER W
+0x58 0x0058 #LATIN CAPITAL LETTER X
+0x59 0x0059 #LATIN CAPITAL LETTER Y
+0x5a 0x005a #LATIN CAPITAL LETTER Z
+0x5b 0x005b #LEFT SQUARE BRACKET
+0x5c 0x005c #REVERSE SOLIDUS
+0x5d 0x005d #RIGHT SQUARE BRACKET
+0x5e 0x005e #CIRCUMFLEX ACCENT
+0x5f 0x005f #LOW LINE
+0x60 0x0060 #GRAVE ACCENT
+0x61 0x0061 #LATIN SMALL LETTER A
+0x62 0x0062 #LATIN SMALL LETTER B
+0x63 0x0063 #LATIN SMALL LETTER C
+0x64 0x0064 #LATIN SMALL LETTER D
+0x65 0x0065 #LATIN SMALL LETTER E
+0x66 0x0066 #LATIN SMALL LETTER F
+0x67 0x0067 #LATIN SMALL LETTER G
+0x68 0x0068 #LATIN SMALL LETTER H
+0x69 0x0069 #LATIN SMALL LETTER I
+0x6a 0x006a #LATIN SMALL LETTER J
+0x6b 0x006b #LATIN SMALL LETTER K
+0x6c 0x006c #LATIN SMALL LETTER L
+0x6d 0x006d #LATIN SMALL LETTER M
+0x6e 0x006e #LATIN SMALL LETTER N
+0x6f 0x006f #LATIN SMALL LETTER O
+0x70 0x0070 #LATIN SMALL LETTER P
+0x71 0x0071 #LATIN SMALL LETTER Q
+0x72 0x0072 #LATIN SMALL LETTER R
+0x73 0x0073 #LATIN SMALL LETTER S
+0x74 0x0074 #LATIN SMALL LETTER T
+0x75 0x0075 #LATIN SMALL LETTER U
+0x76 0x0076 #LATIN SMALL LETTER V
+0x77 0x0077 #LATIN SMALL LETTER W
+0x78 0x0078 #LATIN SMALL LETTER X
+0x79 0x0079 #LATIN SMALL LETTER Y
+0x7a 0x007a #LATIN SMALL LETTER Z
+0x7b 0x007b #LEFT CURLY BRACKET
+0x7c 0x007c #VERTICAL LINE
+0x7d 0x007d #RIGHT CURLY BRACKET
+0x7e 0x007e #TILDE
+0x7f 0x007f #DELETE
+0x80 0x0410 #CYRILLIC CAPITAL LETTER A
+0x81 0x0411 #CYRILLIC CAPITAL LETTER BE
+0x82 0x0412 #CYRILLIC CAPITAL LETTER VE
+0x83 0x0413 #CYRILLIC CAPITAL LETTER GHE
+0x84 0x0414 #CYRILLIC CAPITAL LETTER DE
+0x85 0x0415 #CYRILLIC CAPITAL LETTER IE
+0x86 0x0416 #CYRILLIC CAPITAL LETTER ZHE
+0x87 0x0417 #CYRILLIC CAPITAL LETTER ZE
+0x88 0x0418 #CYRILLIC CAPITAL LETTER I
+0x89 0x0419 #CYRILLIC CAPITAL LETTER SHORT I
+0x8a 0x041a #CYRILLIC CAPITAL LETTER KA
+0x8b 0x041b #CYRILLIC CAPITAL LETTER EL
+0x8c 0x041c #CYRILLIC CAPITAL LETTER EM
+0x8d 0x041d #CYRILLIC CAPITAL LETTER EN
+0x8e 0x041e #CYRILLIC CAPITAL LETTER O
+0x8f 0x041f #CYRILLIC CAPITAL LETTER PE
+0x90 0x0420 #CYRILLIC CAPITAL LETTER ER
+0x91 0x0421 #CYRILLIC CAPITAL LETTER ES
+0x92 0x0422 #CYRILLIC CAPITAL LETTER TE
+0x93 0x0423 #CYRILLIC CAPITAL LETTER U
+0x94 0x0424 #CYRILLIC CAPITAL LETTER EF
+0x95 0x0425 #CYRILLIC CAPITAL LETTER HA
+0x96 0x0426 #CYRILLIC CAPITAL LETTER TSE
+0x97 0x0427 #CYRILLIC CAPITAL LETTER CHE
+0x98 0x0428 #CYRILLIC CAPITAL LETTER SHA
+0x99 0x0429 #CYRILLIC CAPITAL LETTER SHCHA
+0x9a 0x042a #CYRILLIC CAPITAL LETTER HARD SIGN
+0x9b 0x042b #CYRILLIC CAPITAL LETTER YERU
+0x9c 0x042c #CYRILLIC CAPITAL LETTER SOFT SIGN
+0x9d 0x042d #CYRILLIC CAPITAL LETTER E
+0x9e 0x042e #CYRILLIC CAPITAL LETTER YU
+0x9f 0x042f #CYRILLIC CAPITAL LETTER YA
+0xa0 0x0430 #CYRILLIC SMALL LETTER A
+0xa1 0x0431 #CYRILLIC SMALL LETTER BE
+0xa2 0x0432 #CYRILLIC SMALL LETTER VE
+0xa3 0x0433 #CYRILLIC SMALL LETTER GHE
+0xa4 0x0434 #CYRILLIC SMALL LETTER DE
+0xa5 0x0435 #CYRILLIC SMALL LETTER IE
+0xa6 0x0436 #CYRILLIC SMALL LETTER ZHE
+0xa7 0x0437 #CYRILLIC SMALL LETTER ZE
+0xa8 0x0438 #CYRILLIC SMALL LETTER I
+0xa9 0x0439 #CYRILLIC SMALL LETTER SHORT I
+0xaa 0x043a #CYRILLIC SMALL LETTER KA
+0xab 0x043b #CYRILLIC SMALL LETTER EL
+0xac 0x043c #CYRILLIC SMALL LETTER EM
+0xad 0x043d #CYRILLIC SMALL LETTER EN
+0xae 0x043e #CYRILLIC SMALL LETTER O
+0xaf 0x043f #CYRILLIC SMALL LETTER PE
+0xb0 0x2591 #LIGHT SHADE
+0xb1 0x2592 #MEDIUM SHADE
+0xb2 0x2593 #DARK SHADE
+0xb3 0x2502 #BOX DRAWINGS LIGHT VERTICAL
+0xb4 0x2524 #BOX DRAWINGS LIGHT VERTICAL AND LEFT
+0xb5 0x2561 #BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+0xb6 0x2562 #BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+0xb7 0x2556 #BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+0xb8 0x2555 #BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+0xb9 0x2563 #BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+0xba 0x2551 #BOX DRAWINGS DOUBLE VERTICAL
+0xbb 0x2557 #BOX DRAWINGS DOUBLE DOWN AND LEFT
+0xbc 0x255d #BOX DRAWINGS DOUBLE UP AND LEFT
+0xbd 0x255c #BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+0xbe 0x255b #BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+0xbf 0x2510 #BOX DRAWINGS LIGHT DOWN AND LEFT
+0xc0 0x2514 #BOX DRAWINGS LIGHT UP AND RIGHT
+0xc1 0x2534 #BOX DRAWINGS LIGHT UP AND HORIZONTAL
+0xc2 0x252c #BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+0xc3 0x251c #BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+0xc4 0x2500 #BOX DRAWINGS LIGHT HORIZONTAL
+0xc5 0x253c #BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+0xc6 0x255e #BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+0xc7 0x255f #BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+0xc8 0x255a #BOX DRAWINGS DOUBLE UP AND RIGHT
+0xc9 0x2554 #BOX DRAWINGS DOUBLE DOWN AND RIGHT
+0xca 0x2569 #BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+0xcb 0x2566 #BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+0xcc 0x2560 #BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+0xcd 0x2550 #BOX DRAWINGS DOUBLE HORIZONTAL
+0xce 0x256c #BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+0xcf 0x2567 #BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+0xd0 0x2568 #BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+0xd1 0x2564 #BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+0xd2 0x2565 #BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+0xd3 0x2559 #BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+0xd4 0x2558 #BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+0xd5 0x2552 #BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+0xd6 0x2553 #BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+0xd7 0x256b #BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+0xd8 0x256a #BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+0xd9 0x2518 #BOX DRAWINGS LIGHT UP AND LEFT
+0xda 0x250c #BOX DRAWINGS LIGHT DOWN AND RIGHT
+0xdb 0x2588 #FULL BLOCK
+0xdc 0x2584 #LOWER HALF BLOCK
+0xdd 0x258c #LEFT HALF BLOCK
+0xde 0x2590 #RIGHT HALF BLOCK
+0xdf 0x2580 #UPPER HALF BLOCK
+0xe0 0x0440 #CYRILLIC SMALL LETTER ER
+0xe1 0x0441 #CYRILLIC SMALL LETTER ES
+0xe2 0x0442 #CYRILLIC SMALL LETTER TE
+0xe3 0x0443 #CYRILLIC SMALL LETTER U
+0xe4 0x0444 #CYRILLIC SMALL LETTER EF
+0xe5 0x0445 #CYRILLIC SMALL LETTER HA
+0xe6 0x0446 #CYRILLIC SMALL LETTER TSE
+0xe7 0x0447 #CYRILLIC SMALL LETTER CHE
+0xe8 0x0448 #CYRILLIC SMALL LETTER SHA
+0xe9 0x0449 #CYRILLIC SMALL LETTER SHCHA
+0xea 0x044a #CYRILLIC SMALL LETTER HARD SIGN
+0xeb 0x044b #CYRILLIC SMALL LETTER YERU
+0xec 0x044c #CYRILLIC SMALL LETTER SOFT SIGN
+0xed 0x044d #CYRILLIC SMALL LETTER E
+0xee 0x044e #CYRILLIC SMALL LETTER YU
+0xef 0x044f #CYRILLIC SMALL LETTER YA
+0xf0 0x0401 #CYRILLIC CAPITAL LETTER IO
+0xf1 0x0451 #CYRILLIC SMALL LETTER IO
+0xf2 0x0404 #CYRILLIC CAPITAL LETTER UKRAINIAN IE
+0xf3 0x0454 #CYRILLIC SMALL LETTER UKRAINIAN IE
+0xf4 0x0407 #CYRILLIC CAPITAL LETTER YI
+0xf5 0x0457 #CYRILLIC SMALL LETTER YI
+0xf6 0x040e #CYRILLIC CAPITAL LETTER SHORT U
+0xf7 0x045e #CYRILLIC SMALL LETTER SHORT U
+0xf8 0x00b0 #DEGREE SIGN
+0xf9 0x2219 #BULLET OPERATOR
+0xfa 0x00b7 #MIDDLE DOT
+0xfb 0x221a #SQUARE ROOT
+0xfc 0x2116 #NUMERO SIGN
+0xfd 0x00a4 #CURRENCY SIGN
+0xfe 0x25a0 #BLACK SQUARE
+0xff 0x00a0 #NO-BREAK SPACE
+
+ \ No newline at end of file
diff --git a/ext/standard/html_tables/mappings/KOI8-R.TXT b/ext/standard/html_tables/mappings/KOI8-R.TXT
new file mode 100644
index 0000000..5105610
--- /dev/null
+++ b/ext/standard/html_tables/mappings/KOI8-R.TXT
@@ -0,0 +1,302 @@
+#
+# Name: KOI8-R (RFC1489) to Unicode
+# Unicode version: 3.0
+# Table version: 1.0
+# Table format: Format A
+# Date: 18 August 1999
+# Authors: Helmut Richter <richter@lrz.de>
+#
+# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved.
+#
+# This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
+# No claims are made as to fitness for any particular purpose. No
+# warranties of any kind are expressed or implied. The recipient
+# agrees to determine applicability of information provided. If this
+# file has been provided on optical media by Unicode, Inc., the sole
+# remedy for any claim will be exchange of defective media within 90
+# days of receipt.
+#
+# Unicode, Inc. hereby grants the right to freely use the information
+# supplied in this file in the creation of products supporting the
+# Unicode Standard, and to make copies of this file in any form for
+# internal or external distribution as long as this notice remains
+# attached.
+#
+# General notes:
+#
+# This table contains the data the Unicode Consortium has on how
+# KOI8-R characters map into Unicode. The underlying document is the
+# mapping described in RFC 1489. No statements are made as to whether
+# this mapping is the same as the mapping defined as "Code Page 878"
+# with some vendors.
+#
+# Format: Three tab-separated columns
+# Column #1 is the KOI8-R code (in hex as 0xXX)
+# Column #2 is the Unicode (in hex as 0xXXXX)
+# Column #3 the Unicode name (follows a comment sign, '#')
+#
+# The entries are in KOI8-R order.
+#
+# Version history
+# 1.0 version: created.
+#
+# Any comments or problems, contact <errata@unicode.org>
+# Please note that <errata@unicode.org> is an archival address;
+# notices will be checked, but do not expect an immediate response.
+#
+0x00 0x0000 # NULL
+0x01 0x0001 # START OF HEADING
+0x02 0x0002 # START OF TEXT
+0x03 0x0003 # END OF TEXT
+0x04 0x0004 # END OF TRANSMISSION
+0x05 0x0005 # ENQUIRY
+0x06 0x0006 # ACKNOWLEDGE
+0x07 0x0007 # BELL
+0x08 0x0008 # BACKSPACE
+0x09 0x0009 # HORIZONTAL TABULATION
+0x0A 0x000A # LINE FEED
+0x0B 0x000B # VERTICAL TABULATION
+0x0C 0x000C # FORM FEED
+0x0D 0x000D # CARRIAGE RETURN
+0x0E 0x000E # SHIFT OUT
+0x0F 0x000F # SHIFT IN
+0x10 0x0010 # DATA LINK ESCAPE
+0x11 0x0011 # DEVICE CONTROL ONE
+0x12 0x0012 # DEVICE CONTROL TWO
+0x13 0x0013 # DEVICE CONTROL THREE
+0x14 0x0014 # DEVICE CONTROL FOUR
+0x15 0x0015 # NEGATIVE ACKNOWLEDGE
+0x16 0x0016 # SYNCHRONOUS IDLE
+0x17 0x0017 # END OF TRANSMISSION BLOCK
+0x18 0x0018 # CANCEL
+0x19 0x0019 # END OF MEDIUM
+0x1A 0x001A # SUBSTITUTE
+0x1B 0x001B # ESCAPE
+0x1C 0x001C # FILE SEPARATOR
+0x1D 0x001D # GROUP SEPARATOR
+0x1E 0x001E # RECORD SEPARATOR
+0x1F 0x001F # UNIT SEPARATOR
+0x20 0x0020 # SPACE
+0x21 0x0021 # EXCLAMATION MARK
+0x22 0x0022 # QUOTATION MARK
+0x23 0x0023 # NUMBER SIGN
+0x24 0x0024 # DOLLAR SIGN
+0x25 0x0025 # PERCENT SIGN
+0x26 0x0026 # AMPERSAND
+0x27 0x0027 # APOSTROPHE
+0x28 0x0028 # LEFT PARENTHESIS
+0x29 0x0029 # RIGHT PARENTHESIS
+0x2A 0x002A # ASTERISK
+0x2B 0x002B # PLUS SIGN
+0x2C 0x002C # COMMA
+0x2D 0x002D # HYPHEN-MINUS
+0x2E 0x002E # FULL STOP
+0x2F 0x002F # SOLIDUS
+0x30 0x0030 # DIGIT ZERO
+0x31 0x0031 # DIGIT ONE
+0x32 0x0032 # DIGIT TWO
+0x33 0x0033 # DIGIT THREE
+0x34 0x0034 # DIGIT FOUR
+0x35 0x0035 # DIGIT FIVE
+0x36 0x0036 # DIGIT SIX
+0x37 0x0037 # DIGIT SEVEN
+0x38 0x0038 # DIGIT EIGHT
+0x39 0x0039 # DIGIT NINE
+0x3A 0x003A # COLON
+0x3B 0x003B # SEMICOLON
+0x3C 0x003C # LESS-THAN SIGN
+0x3D 0x003D # EQUALS SIGN
+0x3E 0x003E # GREATER-THAN SIGN
+0x3F 0x003F # QUESTION MARK
+0x40 0x0040 # COMMERCIAL AT
+0x41 0x0041 # LATIN CAPITAL LETTER A
+0x42 0x0042 # LATIN CAPITAL LETTER B
+0x43 0x0043 # LATIN CAPITAL LETTER C
+0x44 0x0044 # LATIN CAPITAL LETTER D
+0x45 0x0045 # LATIN CAPITAL LETTER E
+0x46 0x0046 # LATIN CAPITAL LETTER F
+0x47 0x0047 # LATIN CAPITAL LETTER G
+0x48 0x0048 # LATIN CAPITAL LETTER H
+0x49 0x0049 # LATIN CAPITAL LETTER I
+0x4A 0x004A # LATIN CAPITAL LETTER J
+0x4B 0x004B # LATIN CAPITAL LETTER K
+0x4C 0x004C # LATIN CAPITAL LETTER L
+0x4D 0x004D # LATIN CAPITAL LETTER M
+0x4E 0x004E # LATIN CAPITAL LETTER N
+0x4F 0x004F # LATIN CAPITAL LETTER O
+0x50 0x0050 # LATIN CAPITAL LETTER P
+0x51 0x0051 # LATIN CAPITAL LETTER Q
+0x52 0x0052 # LATIN CAPITAL LETTER R
+0x53 0x0053 # LATIN CAPITAL LETTER S
+0x54 0x0054 # LATIN CAPITAL LETTER T
+0x55 0x0055 # LATIN CAPITAL LETTER U
+0x56 0x0056 # LATIN CAPITAL LETTER V
+0x57 0x0057 # LATIN CAPITAL LETTER W
+0x58 0x0058 # LATIN CAPITAL LETTER X
+0x59 0x0059 # LATIN CAPITAL LETTER Y
+0x5A 0x005A # LATIN CAPITAL LETTER Z
+0x5B 0x005B # LEFT SQUARE BRACKET
+0x5C 0x005C # REVERSE SOLIDUS
+0x5D 0x005D # RIGHT SQUARE BRACKET
+0x5E 0x005E # CIRCUMFLEX ACCENT
+0x5F 0x005F # LOW LINE
+0x60 0x0060 # GRAVE ACCENT
+0x61 0x0061 # LATIN SMALL LETTER A
+0x62 0x0062 # LATIN SMALL LETTER B
+0x63 0x0063 # LATIN SMALL LETTER C
+0x64 0x0064 # LATIN SMALL LETTER D
+0x65 0x0065 # LATIN SMALL LETTER E
+0x66 0x0066 # LATIN SMALL LETTER F
+0x67 0x0067 # LATIN SMALL LETTER G
+0x68 0x0068 # LATIN SMALL LETTER H
+0x69 0x0069 # LATIN SMALL LETTER I
+0x6A 0x006A # LATIN SMALL LETTER J
+0x6B 0x006B # LATIN SMALL LETTER K
+0x6C 0x006C # LATIN SMALL LETTER L
+0x6D 0x006D # LATIN SMALL LETTER M
+0x6E 0x006E # LATIN SMALL LETTER N
+0x6F 0x006F # LATIN SMALL LETTER O
+0x70 0x0070 # LATIN SMALL LETTER P
+0x71 0x0071 # LATIN SMALL LETTER Q
+0x72 0x0072 # LATIN SMALL LETTER R
+0x73 0x0073 # LATIN SMALL LETTER S
+0x74 0x0074 # LATIN SMALL LETTER T
+0x75 0x0075 # LATIN SMALL LETTER U
+0x76 0x0076 # LATIN SMALL LETTER V
+0x77 0x0077 # LATIN SMALL LETTER W
+0x78 0x0078 # LATIN SMALL LETTER X
+0x79 0x0079 # LATIN SMALL LETTER Y
+0x7A 0x007A # LATIN SMALL LETTER Z
+0x7B 0x007B # LEFT CURLY BRACKET
+0x7C 0x007C # VERTICAL LINE
+0x7D 0x007D # RIGHT CURLY BRACKET
+0x7E 0x007E # TILDE
+0x7F 0x007F # DELETE
+0x80 0x2500 # BOX DRAWINGS LIGHT HORIZONTAL
+0x81 0x2502 # BOX DRAWINGS LIGHT VERTICAL
+0x82 0x250C # BOX DRAWINGS LIGHT DOWN AND RIGHT
+0x83 0x2510 # BOX DRAWINGS LIGHT DOWN AND LEFT
+0x84 0x2514 # BOX DRAWINGS LIGHT UP AND RIGHT
+0x85 0x2518 # BOX DRAWINGS LIGHT UP AND LEFT
+0x86 0x251C # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+0x87 0x2524 # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+0x88 0x252C # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+0x89 0x2534 # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+0x8A 0x253C # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+0x8B 0x2580 # UPPER HALF BLOCK
+0x8C 0x2584 # LOWER HALF BLOCK
+0x8D 0x2588 # FULL BLOCK
+0x8E 0x258C # LEFT HALF BLOCK
+0x8F 0x2590 # RIGHT HALF BLOCK
+0x90 0x2591 # LIGHT SHADE
+0x91 0x2592 # MEDIUM SHADE
+0x92 0x2593 # DARK SHADE
+0x93 0x2320 # TOP HALF INTEGRAL
+0x94 0x25A0 # BLACK SQUARE
+0x95 0x2219 # BULLET OPERATOR
+0x96 0x221A # SQUARE ROOT
+0x97 0x2248 # ALMOST EQUAL TO
+0x98 0x2264 # LESS-THAN OR EQUAL TO
+0x99 0x2265 # GREATER-THAN OR EQUAL TO
+0x9A 0x00A0 # NO-BREAK SPACE
+0x9B 0x2321 # BOTTOM HALF INTEGRAL
+0x9C 0x00B0 # DEGREE SIGN
+0x9D 0x00B2 # SUPERSCRIPT TWO
+0x9E 0x00B7 # MIDDLE DOT
+0x9F 0x00F7 # DIVISION SIGN
+0xA0 0x2550 # BOX DRAWINGS DOUBLE HORIZONTAL
+0xA1 0x2551 # BOX DRAWINGS DOUBLE VERTICAL
+0xA2 0x2552 # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+0xA3 0x0451 # CYRILLIC SMALL LETTER IO
+0xA4 0x2553 # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+0xA5 0x2554 # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+0xA6 0x2555 # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+0xA7 0x2556 # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+0xA8 0x2557 # BOX DRAWINGS DOUBLE DOWN AND LEFT
+0xA9 0x2558 # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+0xAA 0x2559 # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+0xAB 0x255A # BOX DRAWINGS DOUBLE UP AND RIGHT
+0xAC 0x255B # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+0xAD 0x255C # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+0xAE 0x255D # BOX DRAWINGS DOUBLE UP AND LEFT
+0xAF 0x255E # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+0xB0 0x255F # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+0xB1 0x2560 # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+0xB2 0x2561 # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+0xB3 0x0401 # CYRILLIC CAPITAL LETTER IO
+0xB4 0x2562 # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+0xB5 0x2563 # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+0xB6 0x2564 # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+0xB7 0x2565 # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+0xB8 0x2566 # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+0xB9 0x2567 # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+0xBA 0x2568 # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+0xBB 0x2569 # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+0xBC 0x256A # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+0xBD 0x256B # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+0xBE 0x256C # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+0xBF 0x00A9 # COPYRIGHT SIGN
+0xC0 0x044E # CYRILLIC SMALL LETTER YU
+0xC1 0x0430 # CYRILLIC SMALL LETTER A
+0xC2 0x0431 # CYRILLIC SMALL LETTER BE
+0xC3 0x0446 # CYRILLIC SMALL LETTER TSE
+0xC4 0x0434 # CYRILLIC SMALL LETTER DE
+0xC5 0x0435 # CYRILLIC SMALL LETTER IE
+0xC6 0x0444 # CYRILLIC SMALL LETTER EF
+0xC7 0x0433 # CYRILLIC SMALL LETTER GHE
+0xC8 0x0445 # CYRILLIC SMALL LETTER HA
+0xC9 0x0438 # CYRILLIC SMALL LETTER I
+0xCA 0x0439 # CYRILLIC SMALL LETTER SHORT I
+0xCB 0x043A # CYRILLIC SMALL LETTER KA
+0xCC 0x043B # CYRILLIC SMALL LETTER EL
+0xCD 0x043C # CYRILLIC SMALL LETTER EM
+0xCE 0x043D # CYRILLIC SMALL LETTER EN
+0xCF 0x043E # CYRILLIC SMALL LETTER O
+0xD0 0x043F # CYRILLIC SMALL LETTER PE
+0xD1 0x044F # CYRILLIC SMALL LETTER YA
+0xD2 0x0440 # CYRILLIC SMALL LETTER ER
+0xD3 0x0441 # CYRILLIC SMALL LETTER ES
+0xD4 0x0442 # CYRILLIC SMALL LETTER TE
+0xD5 0x0443 # CYRILLIC SMALL LETTER U
+0xD6 0x0436 # CYRILLIC SMALL LETTER ZHE
+0xD7 0x0432 # CYRILLIC SMALL LETTER VE
+0xD8 0x044C # CYRILLIC SMALL LETTER SOFT SIGN
+0xD9 0x044B # CYRILLIC SMALL LETTER YERU
+0xDA 0x0437 # CYRILLIC SMALL LETTER ZE
+0xDB 0x0448 # CYRILLIC SMALL LETTER SHA
+0xDC 0x044D # CYRILLIC SMALL LETTER E
+0xDD 0x0449 # CYRILLIC SMALL LETTER SHCHA
+0xDE 0x0447 # CYRILLIC SMALL LETTER CHE
+0xDF 0x044A # CYRILLIC SMALL LETTER HARD SIGN
+0xE0 0x042E # CYRILLIC CAPITAL LETTER YU
+0xE1 0x0410 # CYRILLIC CAPITAL LETTER A
+0xE2 0x0411 # CYRILLIC CAPITAL LETTER BE
+0xE3 0x0426 # CYRILLIC CAPITAL LETTER TSE
+0xE4 0x0414 # CYRILLIC CAPITAL LETTER DE
+0xE5 0x0415 # CYRILLIC CAPITAL LETTER IE
+0xE6 0x0424 # CYRILLIC CAPITAL LETTER EF
+0xE7 0x0413 # CYRILLIC CAPITAL LETTER GHE
+0xE8 0x0425 # CYRILLIC CAPITAL LETTER HA
+0xE9 0x0418 # CYRILLIC CAPITAL LETTER I
+0xEA 0x0419 # CYRILLIC CAPITAL LETTER SHORT I
+0xEB 0x041A # CYRILLIC CAPITAL LETTER KA
+0xEC 0x041B # CYRILLIC CAPITAL LETTER EL
+0xED 0x041C # CYRILLIC CAPITAL LETTER EM
+0xEE 0x041D # CYRILLIC CAPITAL LETTER EN
+0xEF 0x041E # CYRILLIC CAPITAL LETTER O
+0xF0 0x041F # CYRILLIC CAPITAL LETTER PE
+0xF1 0x042F # CYRILLIC CAPITAL LETTER YA
+0xF2 0x0420 # CYRILLIC CAPITAL LETTER ER
+0xF3 0x0421 # CYRILLIC CAPITAL LETTER ES
+0xF4 0x0422 # CYRILLIC CAPITAL LETTER TE
+0xF5 0x0423 # CYRILLIC CAPITAL LETTER U
+0xF6 0x0416 # CYRILLIC CAPITAL LETTER ZHE
+0xF7 0x0412 # CYRILLIC CAPITAL LETTER VE
+0xF8 0x042C # CYRILLIC CAPITAL LETTER SOFT SIGN
+0xF9 0x042B # CYRILLIC CAPITAL LETTER YERU
+0xFA 0x0417 # CYRILLIC CAPITAL LETTER ZE
+0xFB 0x0428 # CYRILLIC CAPITAL LETTER SHA
+0xFC 0x042D # CYRILLIC CAPITAL LETTER E
+0xFD 0x0429 # CYRILLIC CAPITAL LETTER SHCHA
+0xFE 0x0427 # CYRILLIC CAPITAL LETTER CHE
+0xFF 0x042A # CYRILLIC CAPITAL LETTER HARD SIGN
diff --git a/ext/standard/html_tables/mappings/ROMAN.TXT b/ext/standard/html_tables/mappings/ROMAN.TXT
new file mode 100644
index 0000000..5b3b8b4
--- /dev/null
+++ b/ext/standard/html_tables/mappings/ROMAN.TXT
@@ -0,0 +1,370 @@
+#=======================================================================
+# File name: ROMAN.TXT
+#
+# Contents: Map (external version) from Mac OS Roman
+# character set to Unicode 2.1 and later.
+#
+# Copyright: (c) 1994-2002, 2005 by Apple Computer, Inc., all rights
+# reserved.
+#
+# Contact: charsets@apple.com
+#
+# Changes:
+#
+# c02 2005-Apr-05 Update header comments. Matches internal xml
+# <c1.1> and Text Encoding Converter 2.0.
+# b4,c1 2002-Dec-19 Update URLs, notes. Matches internal
+# utom<b5>.
+# b03 1999-Sep-22 Update contact e-mail address. Matches
+# internal utom<b4>, ufrm<b3>, and Text
+# Encoding Converter version 1.5.
+# b02 1998-Aug-18 Encoding changed for Mac OS 8.5; change
+# mapping of 0xDB from CURRENCY SIGN to
+# EURO SIGN. Matches internal utom<b3>,
+# ufrm<b3>.
+# n08 1998-Feb-05 Minor update to header comments
+# n06 1997-Dec-14 Add warning about future changes to 0xDB
+# from CURRENCY SIGN to EURO SIGN. Clarify
+# some header information
+# n04 1997-Dec-01 Update to match internal utom<n3>, ufrm<n22>:
+# Change standard mapping for 0xBD from U+2126
+# to its canonical decomposition, U+03A9.
+# n03 1995-Apr-15 First version (after fixing some typos).
+# Matches internal ufrm<n9>.
+#
+# Standard header:
+# ----------------
+#
+# Apple, the Apple logo, and Macintosh are trademarks of Apple
+# Computer, Inc., registered in the United States and other countries.
+# Unicode is a trademark of Unicode Inc. For the sake of brevity,
+# throughout this document, "Macintosh" can be used to refer to
+# Macintosh computers and "Unicode" can be used to refer to the
+# Unicode standard.
+#
+# Apple Computer, Inc. ("Apple") makes no warranty or representation,
+# either express or implied, with respect to this document and the
+# included data, its quality, accuracy, or fitness for a particular
+# purpose. In no event will Apple be liable for direct, indirect,
+# special, incidental, or consequential damages resulting from any
+# defect or inaccuracy in this document or the included data.
+#
+# These mapping tables and character lists are subject to change.
+# The latest tables should be available from the following:
+#
+# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
+#
+# For general information about Mac OS encodings and these mapping
+# tables, see the file "README.TXT".
+#
+# Format:
+# -------
+#
+# Three tab-separated columns;
+# '#' begins a comment which continues to the end of the line.
+# Column #1 is the Mac OS Roman code (in hex as 0xNN)
+# Column #2 is the corresponding Unicode (in hex as 0xNNNN)
+# Column #3 is a comment containing the Unicode name
+#
+# The entries are in Mac OS Roman code order.
+#
+# One of these mappings requires the use of a corporate character.
+# See the file "CORPCHAR.TXT" and notes below.
+#
+# Control character mappings are not shown in this table, following
+# the conventions of the standard UTC mapping tables. However, the
+# Mac OS Roman character set uses the standard control characters at
+# 0x00-0x1F and 0x7F.
+#
+# Notes on Mac OS Roman:
+# ----------------------
+#
+# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa
+# environments, it is only supported directly in programming
+# interfaces for QuickDraw Text, the Script Manager, and related
+# Text Utilities. For other purposes it is supported via transcoding
+# to and from Unicode.
+#
+# This character set is used for at least the following Mac OS
+# localizations: U.S., British, Canadian French, French, Swiss
+# French, German, Swiss German, Italian, Swiss Italian, Dutch,
+# Swedish, Norwegian, Danish, Finnish, Spanish, Catalan,
+# Portuguese, Brazilian, and the default International system.
+#
+# Variants of Mac OS Roman are used for Croatian, Icelandic,
+# Turkish, Romanian, and other encodings. Separate mapping tables
+# are available for these encodings.
+#
+# Before Mac OS 8.5, code point 0xDB was CURRENCY SIGN, and was
+# mapped to U+00A4. In Mac OS 8.5 and later versions, code point
+# 0xDB is changed to EURO SIGN and maps to U+20AC; the standard
+# Apple fonts are updated for Mac OS 8.5 to reflect this. There is
+# a "currency sign" variant of the Mac OS Roman encoding that still
+# maps 0xDB to U+00A4; this can be used for older fonts.
+#
+# Before Mac OS 8.5, the ROM bitmap versions of the fonts Chicago,
+# New York, Geneva, and Monaco did not implement the full Mac OS
+# Roman character set; they only supported character codes up to
+# 0xD8. The TrueType versions of these fonts have always implemented
+# the full character set, as with the bitmap and TrueType versions
+# of the other standard Roman fonts.
+#
+# In all Mac OS encodings, fonts such as Chicago which are used
+# as "system" fonts (for menus, dialogs, etc.) have four glyphs
+# at code points 0x11-0x14 for transient use by the Menu Manager.
+# These glyphs are not intended as characters for use in normal
+# text, and the associated code points are not generally
+# interpreted as associated with these glyphs; they are usually
+# interpreted (if at all) as the control codes DC1-DC4.
+#
+# Unicode mapping issues and notes:
+# ---------------------------------
+#
+# The following corporate zone Unicode character is used in this
+# mapping:
+#
+# 0xF8FF Apple logo
+#
+# NOTE: The graphic image associated with the Apple logo character
+# is not authorized for use without permission of Apple, and
+# unauthorized use might constitute trademark infringement.
+#
+# Details of mapping changes in each version:
+# -------------------------------------------
+#
+# Changes from version n08 to version b02:
+#
+# - Encoding changed for Mac OS 8.5; change mapping of 0xDB from
+# CURRENCY SIGN (U+00A4) to EURO SIGN (U+20AC).
+#
+# Changes from version n03 to version n04:
+#
+# - Change mapping of 0xBD from U+2126 to its canonical
+# decomposition, U+03A9.
+#
+##################
+
+0x20 0x0020 # SPACE
+0x21 0x0021 # EXCLAMATION MARK
+0x22 0x0022 # QUOTATION MARK
+0x23 0x0023 # NUMBER SIGN
+0x24 0x0024 # DOLLAR SIGN
+0x25 0x0025 # PERCENT SIGN
+0x26 0x0026 # AMPERSAND
+0x27 0x0027 # APOSTROPHE
+0x28 0x0028 # LEFT PARENTHESIS
+0x29 0x0029 # RIGHT PARENTHESIS
+0x2A 0x002A # ASTERISK
+0x2B 0x002B # PLUS SIGN
+0x2C 0x002C # COMMA
+0x2D 0x002D # HYPHEN-MINUS
+0x2E 0x002E # FULL STOP
+0x2F 0x002F # SOLIDUS
+0x30 0x0030 # DIGIT ZERO
+0x31 0x0031 # DIGIT ONE
+0x32 0x0032 # DIGIT TWO
+0x33 0x0033 # DIGIT THREE
+0x34 0x0034 # DIGIT FOUR
+0x35 0x0035 # DIGIT FIVE
+0x36 0x0036 # DIGIT SIX
+0x37 0x0037 # DIGIT SEVEN
+0x38 0x0038 # DIGIT EIGHT
+0x39 0x0039 # DIGIT NINE
+0x3A 0x003A # COLON
+0x3B 0x003B # SEMICOLON
+0x3C 0x003C # LESS-THAN SIGN
+0x3D 0x003D # EQUALS SIGN
+0x3E 0x003E # GREATER-THAN SIGN
+0x3F 0x003F # QUESTION MARK
+0x40 0x0040 # COMMERCIAL AT
+0x41 0x0041 # LATIN CAPITAL LETTER A
+0x42 0x0042 # LATIN CAPITAL LETTER B
+0x43 0x0043 # LATIN CAPITAL LETTER C
+0x44 0x0044 # LATIN CAPITAL LETTER D
+0x45 0x0045 # LATIN CAPITAL LETTER E
+0x46 0x0046 # LATIN CAPITAL LETTER F
+0x47 0x0047 # LATIN CAPITAL LETTER G
+0x48 0x0048 # LATIN CAPITAL LETTER H
+0x49 0x0049 # LATIN CAPITAL LETTER I
+0x4A 0x004A # LATIN CAPITAL LETTER J
+0x4B 0x004B # LATIN CAPITAL LETTER K
+0x4C 0x004C # LATIN CAPITAL LETTER L
+0x4D 0x004D # LATIN CAPITAL LETTER M
+0x4E 0x004E # LATIN CAPITAL LETTER N
+0x4F 0x004F # LATIN CAPITAL LETTER O
+0x50 0x0050 # LATIN CAPITAL LETTER P
+0x51 0x0051 # LATIN CAPITAL LETTER Q
+0x52 0x0052 # LATIN CAPITAL LETTER R
+0x53 0x0053 # LATIN CAPITAL LETTER S
+0x54 0x0054 # LATIN CAPITAL LETTER T
+0x55 0x0055 # LATIN CAPITAL LETTER U
+0x56 0x0056 # LATIN CAPITAL LETTER V
+0x57 0x0057 # LATIN CAPITAL LETTER W
+0x58 0x0058 # LATIN CAPITAL LETTER X
+0x59 0x0059 # LATIN CAPITAL LETTER Y
+0x5A 0x005A # LATIN CAPITAL LETTER Z
+0x5B 0x005B # LEFT SQUARE BRACKET
+0x5C 0x005C # REVERSE SOLIDUS
+0x5D 0x005D # RIGHT SQUARE BRACKET
+0x5E 0x005E # CIRCUMFLEX ACCENT
+0x5F 0x005F # LOW LINE
+0x60 0x0060 # GRAVE ACCENT
+0x61 0x0061 # LATIN SMALL LETTER A
+0x62 0x0062 # LATIN SMALL LETTER B
+0x63 0x0063 # LATIN SMALL LETTER C
+0x64 0x0064 # LATIN SMALL LETTER D
+0x65 0x0065 # LATIN SMALL LETTER E
+0x66 0x0066 # LATIN SMALL LETTER F
+0x67 0x0067 # LATIN SMALL LETTER G
+0x68 0x0068 # LATIN SMALL LETTER H
+0x69 0x0069 # LATIN SMALL LETTER I
+0x6A 0x006A # LATIN SMALL LETTER J
+0x6B 0x006B # LATIN SMALL LETTER K
+0x6C 0x006C # LATIN SMALL LETTER L
+0x6D 0x006D # LATIN SMALL LETTER M
+0x6E 0x006E # LATIN SMALL LETTER N
+0x6F 0x006F # LATIN SMALL LETTER O
+0x70 0x0070 # LATIN SMALL LETTER P
+0x71 0x0071 # LATIN SMALL LETTER Q
+0x72 0x0072 # LATIN SMALL LETTER R
+0x73 0x0073 # LATIN SMALL LETTER S
+0x74 0x0074 # LATIN SMALL LETTER T
+0x75 0x0075 # LATIN SMALL LETTER U
+0x76 0x0076 # LATIN SMALL LETTER V
+0x77 0x0077 # LATIN SMALL LETTER W
+0x78 0x0078 # LATIN SMALL LETTER X
+0x79 0x0079 # LATIN SMALL LETTER Y
+0x7A 0x007A # LATIN SMALL LETTER Z
+0x7B 0x007B # LEFT CURLY BRACKET
+0x7C 0x007C # VERTICAL LINE
+0x7D 0x007D # RIGHT CURLY BRACKET
+0x7E 0x007E # TILDE
+#
+0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS
+0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE
+0x82 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA
+0x83 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE
+0x84 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE
+0x85 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS
+0x86 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS
+0x87 0x00E1 # LATIN SMALL LETTER A WITH ACUTE
+0x88 0x00E0 # LATIN SMALL LETTER A WITH GRAVE
+0x89 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX
+0x8A 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS
+0x8B 0x00E3 # LATIN SMALL LETTER A WITH TILDE
+0x8C 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE
+0x8D 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA
+0x8E 0x00E9 # LATIN SMALL LETTER E WITH ACUTE
+0x8F 0x00E8 # LATIN SMALL LETTER E WITH GRAVE
+0x90 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX
+0x91 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS
+0x92 0x00ED # LATIN SMALL LETTER I WITH ACUTE
+0x93 0x00EC # LATIN SMALL LETTER I WITH GRAVE
+0x94 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX
+0x95 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS
+0x96 0x00F1 # LATIN SMALL LETTER N WITH TILDE
+0x97 0x00F3 # LATIN SMALL LETTER O WITH ACUTE
+0x98 0x00F2 # LATIN SMALL LETTER O WITH GRAVE
+0x99 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX
+0x9A 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS
+0x9B 0x00F5 # LATIN SMALL LETTER O WITH TILDE
+0x9C 0x00FA # LATIN SMALL LETTER U WITH ACUTE
+0x9D 0x00F9 # LATIN SMALL LETTER U WITH GRAVE
+0x9E 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX
+0x9F 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS
+0xA0 0x2020 # DAGGER
+0xA1 0x00B0 # DEGREE SIGN
+0xA2 0x00A2 # CENT SIGN
+0xA3 0x00A3 # POUND SIGN
+0xA4 0x00A7 # SECTION SIGN
+0xA5 0x2022 # BULLET
+0xA6 0x00B6 # PILCROW SIGN
+0xA7 0x00DF # LATIN SMALL LETTER SHARP S
+0xA8 0x00AE # REGISTERED SIGN
+0xA9 0x00A9 # COPYRIGHT SIGN
+0xAA 0x2122 # TRADE MARK SIGN
+0xAB 0x00B4 # ACUTE ACCENT
+0xAC 0x00A8 # DIAERESIS
+0xAD 0x2260 # NOT EQUAL TO
+0xAE 0x00C6 # LATIN CAPITAL LETTER AE
+0xAF 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE
+0xB0 0x221E # INFINITY
+0xB1 0x00B1 # PLUS-MINUS SIGN
+0xB2 0x2264 # LESS-THAN OR EQUAL TO
+0xB3 0x2265 # GREATER-THAN OR EQUAL TO
+0xB4 0x00A5 # YEN SIGN
+0xB5 0x00B5 # MICRO SIGN
+0xB6 0x2202 # PARTIAL DIFFERENTIAL
+0xB7 0x2211 # N-ARY SUMMATION
+0xB8 0x220F # N-ARY PRODUCT
+0xB9 0x03C0 # GREEK SMALL LETTER PI
+0xBA 0x222B # INTEGRAL
+0xBB 0x00AA # FEMININE ORDINAL INDICATOR
+0xBC 0x00BA # MASCULINE ORDINAL INDICATOR
+0xBD 0x03A9 # GREEK CAPITAL LETTER OMEGA
+0xBE 0x00E6 # LATIN SMALL LETTER AE
+0xBF 0x00F8 # LATIN SMALL LETTER O WITH STROKE
+0xC0 0x00BF # INVERTED QUESTION MARK
+0xC1 0x00A1 # INVERTED EXCLAMATION MARK
+0xC2 0x00AC # NOT SIGN
+0xC3 0x221A # SQUARE ROOT
+0xC4 0x0192 # LATIN SMALL LETTER F WITH HOOK
+0xC5 0x2248 # ALMOST EQUAL TO
+0xC6 0x2206 # INCREMENT
+0xC7 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xC8 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xC9 0x2026 # HORIZONTAL ELLIPSIS
+0xCA 0x00A0 # NO-BREAK SPACE
+0xCB 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE
+0xCC 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE
+0xCD 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE
+0xCE 0x0152 # LATIN CAPITAL LIGATURE OE
+0xCF 0x0153 # LATIN SMALL LIGATURE OE
+0xD0 0x2013 # EN DASH
+0xD1 0x2014 # EM DASH
+0xD2 0x201C # LEFT DOUBLE QUOTATION MARK
+0xD3 0x201D # RIGHT DOUBLE QUOTATION MARK
+0xD4 0x2018 # LEFT SINGLE QUOTATION MARK
+0xD5 0x2019 # RIGHT SINGLE QUOTATION MARK
+0xD6 0x00F7 # DIVISION SIGN
+0xD7 0x25CA # LOZENGE
+0xD8 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS
+0xD9 0x0178 # LATIN CAPITAL LETTER Y WITH DIAERESIS
+0xDA 0x2044 # FRACTION SLASH
+0xDB 0x20AC # EURO SIGN
+0xDC 0x2039 # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+0xDD 0x203A # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+0xDE 0xFB01 # LATIN SMALL LIGATURE FI
+0xDF 0xFB02 # LATIN SMALL LIGATURE FL
+0xE0 0x2021 # DOUBLE DAGGER
+0xE1 0x00B7 # MIDDLE DOT
+0xE2 0x201A # SINGLE LOW-9 QUOTATION MARK
+0xE3 0x201E # DOUBLE LOW-9 QUOTATION MARK
+0xE4 0x2030 # PER MILLE SIGN
+0xE5 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+0xE6 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+0xE7 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE
+0xE8 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS
+0xE9 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE
+0xEA 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE
+0xEB 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+0xEC 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS
+0xED 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE
+0xEE 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE
+0xEF 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+0xF0 0xF8FF # Apple logo
+0xF1 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE
+0xF2 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE
+0xF3 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+0xF4 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE
+0xF5 0x0131 # LATIN SMALL LETTER DOTLESS I
+0xF6 0x02C6 # MODIFIER LETTER CIRCUMFLEX ACCENT
+0xF7 0x02DC # SMALL TILDE
+0xF8 0x00AF # MACRON
+0xF9 0x02D8 # BREVE
+0xFA 0x02D9 # DOT ABOVE
+0xFB 0x02DA # RING ABOVE
+0xFC 0x00B8 # CEDILLA
+0xFD 0x02DD # DOUBLE ACUTE ACCENT
+0xFE 0x02DB # OGONEK
+0xFF 0x02C7 # CARON