diff options
Diffstat (limited to 'ext/standard/html_tables')
-rw-r--r-- | ext/standard/html_tables/ents_basic.txt | 5 | ||||
-rw-r--r-- | ext/standard/html_tables/ents_basic_apos.txt | 5 | ||||
-rw-r--r-- | ext/standard/html_tables/ents_html401.txt | 253 | ||||
-rw-r--r-- | ext/standard/html_tables/ents_html5.txt | 2125 | ||||
-rw-r--r-- | ext/standard/html_tables/ents_xhtml.txt | 253 | ||||
-rw-r--r-- | ext/standard/html_tables/html_table_gen.php | 812 | ||||
-rw-r--r-- | ext/standard/html_tables/mappings/8859-1.TXT | 303 | ||||
-rw-r--r-- | ext/standard/html_tables/mappings/8859-15.TXT | 303 | ||||
-rw-r--r-- | ext/standard/html_tables/mappings/8859-5.TXT | 303 | ||||
-rw-r--r-- | ext/standard/html_tables/mappings/CP1251.TXT | 274 | ||||
-rw-r--r-- | ext/standard/html_tables/mappings/CP1252.TXT | 274 | ||||
-rw-r--r-- | ext/standard/html_tables/mappings/CP866.TXT | 275 | ||||
-rw-r--r-- | ext/standard/html_tables/mappings/KOI8-R.TXT | 302 | ||||
-rw-r--r-- | ext/standard/html_tables/mappings/ROMAN.TXT | 370 |
14 files changed, 5857 insertions, 0 deletions
diff --git a/ext/standard/html_tables/ents_basic.txt b/ext/standard/html_tables/ents_basic.txt new file mode 100644 index 0000000..3a2ec93 --- /dev/null +++ b/ext/standard/html_tables/ents_basic.txt @@ -0,0 +1,5 @@ +quot 22 +amp 26 +#039 27 +lt 3C +gt 3E diff --git a/ext/standard/html_tables/ents_basic_apos.txt b/ext/standard/html_tables/ents_basic_apos.txt new file mode 100644 index 0000000..6a0f307 --- /dev/null +++ b/ext/standard/html_tables/ents_basic_apos.txt @@ -0,0 +1,5 @@ +quot 22 +amp 26 +apos 27 +lt 3C +gt 3E diff --git a/ext/standard/html_tables/ents_html401.txt b/ext/standard/html_tables/ents_html401.txt new file mode 100644 index 0000000..7e1564b --- /dev/null +++ b/ext/standard/html_tables/ents_html401.txt @@ -0,0 +1,253 @@ +#039 0027 //artifical; there's no ' in HTML 4.01 +nbsp 00A0 +iexcl 00A1 +cent 00A2 +pound 00A3 +curren 00A4 +yen 00A5 +brvbar 00A6 +sect 00A7 +uml 00A8 +copy 00A9 +ordf 00AA +laquo 00AB +not 00AC +shy 00AD +reg 00AE +macr 00AF +deg 00B0 +plusmn 00B1 +sup2 00B2 +sup3 00B3 +acute 00B4 +micro 00B5 +para 00B6 +middot 00B7 +cedil 00B8 +sup1 00B9 +ordm 00BA +raquo 00BB +frac14 00BC +frac12 00BD +frac34 00BE +iquest 00BF +Agrave 00C0 +Aacute 00C1 +Acirc 00C2 +Atilde 00C3 +Auml 00C4 +Aring 00C5 +AElig 00C6 +Ccedil 00C7 +Egrave 00C8 +Eacute 00C9 +Ecirc 00CA +Euml 00CB +Igrave 00CC +Iacute 00CD +Icirc 00CE +Iuml 00CF +ETH 00D0 +Ntilde 00D1 +Ograve 00D2 +Oacute 00D3 +Ocirc 00D4 +Otilde 00D5 +Ouml 00D6 +times 00D7 +Oslash 00D8 +Ugrave 00D9 +Uacute 00DA +Ucirc 00DB +Uuml 00DC +Yacute 00DD +THORN 00DE +szlig 00DF +agrave 00E0 +aacute 00E1 +acirc 00E2 +atilde 00E3 +auml 00E4 +aring 00E5 +aelig 00E6 +ccedil 00E7 +egrave 00E8 +eacute 00E9 +ecirc 00EA +euml 00EB +igrave 00EC +iacute 00ED +icirc 00EE +iuml 00EF +eth 00F0 +ntilde 00F1 +ograve 00F2 +oacute 00F3 +ocirc 00F4 +otilde 00F5 +ouml 00F6 +divide 00F7 +oslash 00F8 +ugrave 00F9 +uacute 00FA +ucirc 00FB +uuml 00FC +yacute 00FD +thorn 00FE +yuml 00FF +fnof 0192 +Alpha 0391 +Beta 0392 +Gamma 0393 +Delta 0394 +Epsilon 0395 +Zeta 0396 +Eta 0397 +Theta 0398 +Iota 0399 +Kappa 039A +Lambda 039B +Mu 039C +Nu 039D +Xi 039E +Omicron 039F +Pi 03A0 +Rho 03A1 +Sigma 03A3 +Tau 03A4 +Upsilon 03A5 +Phi 03A6 +Chi 03A7 +Psi 03A8 +Omega 03A9 +alpha 03B1 +beta 03B2 +gamma 03B3 +delta 03B4 +epsilon 03B5 +zeta 03B6 +eta 03B7 +theta 03B8 +iota 03B9 +kappa 03BA +lambda 03BB +mu 03BC +nu 03BD +xi 03BE +omicron 03BF +pi 03C0 +rho 03C1 +sigmaf 03C2 +sigma 03C3 +tau 03C4 +upsilon 03C5 +phi 03C6 +chi 03C7 +psi 03C8 +omega 03C9 +thetasym 03D1 +upsih 03D2 +piv 03D6 +bull 2022 +hellip 2026 +prime 2032 +Prime 2033 +oline 203E +frasl 2044 +weierp 2118 +image 2111 +real 211C +trade 2122 +alefsym 2135 +larr 2190 +uarr 2191 +rarr 2192 +darr 2193 +harr 2194 +crarr 21B5 +lArr 21D0 +uArr 21D1 +rArr 21D2 +dArr 21D3 +hArr 21D4 +forall 2200 +part 2202 +exist 2203 +empty 2205 +nabla 2207 +isin 2208 +notin 2209 +ni 220B +prod 220F +sum 2211 +minus 2212 +lowast 2217 +radic 221A +prop 221D +infin 221E +ang 2220 +and 2227 +or 2228 +cap 2229 +cup 222A +int 222B +there4 2234 +sim 223C +cong 2245 +asymp 2248 +ne 2260 +equiv 2261 +le 2264 +ge 2265 +sub 2282 +sup 2283 +nsub 2284 +sube 2286 +supe 2287 +oplus 2295 +otimes 2297 +perp 22A5 +sdot 22C5 +lceil 2308 +rceil 2309 +lfloor 230A +rfloor 230B +lang 2329 +rang 232A +loz 25CA +spades 2660 +clubs 2663 +hearts 2665 +diams 2666 +quot 0022 +amp 0026 +lt 003C +gt 003E +OElig 0152 +oelig 0153 +Scaron 0160 +scaron 0161 +Yuml 0178 +circ 02C6 +tilde 02DC +ensp 2002 +emsp 2003 +thinsp 2009 +zwnj 200C +zwj 200D +lrm 200E +rlm 200F +ndash 2013 +mdash 2014 +lsquo 2018 +rsquo 2019 +sbquo 201A +ldquo 201C +rdquo 201D +bdquo 201E +dagger 2020 +Dagger 2021 +permil 2030 +lsaquo 2039 +rsaquo 203A +euro 20AC
\ No newline at end of file diff --git a/ext/standard/html_tables/ents_html5.txt b/ext/standard/html_tables/ents_html5.txt new file mode 100644 index 0000000..18defb2 --- /dev/null +++ b/ext/standard/html_tables/ents_html5.txt @@ -0,0 +1,2125 @@ +AElig 000C6 +AMP 00026 +Aacute 000C1 +Abreve 00102 +Acirc 000C2 +Acy 00410 +Afr 1D504 +Agrave 000C0 +Alpha 00391 +Amacr 00100 +And 02A53 +Aogon 00104 +Aopf 1D538 +ApplyFunction 02061 +Aring 000C5 +Ascr 1D49C +Assign 02254 +Atilde 000C3 +Auml 000C4 +Backslash 02216 +Barv 02AE7 +Barwed 02306 +Bcy 00411 +Because 02235 +Bernoullis 0212C +Beta 00392 +Bfr 1D505 +Bopf 1D539 +Breve 002D8 +Bscr 0212C +Bumpeq 0224E +CHcy 00427 +COPY 000A9 +Cacute 00106 +Cap 022D2 +CapitalDifferentialD 02145 +Cayleys 0212D +Ccaron 0010C +Ccedil 000C7 +Ccirc 00108 +Cconint 02230 +Cdot 0010A +Cedilla 000B8 +CenterDot 000B7 +Cfr 0212D +Chi 003A7 +CircleDot 02299 +CircleMinus 02296 +CirclePlus 02295 +CircleTimes 02297 +ClockwiseContourIntegral 02232 +CloseCurlyDoubleQuote 0201D +CloseCurlyQuote 02019 +Colon 02237 +Colone 02A74 +Congruent 02261 +Conint 0222F +ContourIntegral 0222E +Copf 02102 +Coproduct 02210 +CounterClockwiseContourIntegral 02233 +Cross 02A2F +Cscr 1D49E +Cup 022D3 +CupCap 0224D +DD 02145 +DDotrahd 02911 +DJcy 00402 +DScy 00405 +DZcy 0040F +Dagger 02021 +Darr 021A1 +Dashv 02AE4 +Dcaron 0010E +Dcy 00414 +Del 02207 +Delta 00394 +Dfr 1D507 +DiacriticalAcute 000B4 +DiacriticalDot 002D9 +DiacriticalDoubleAcute 002DD +DiacriticalGrave 00060 +DiacriticalTilde 002DC +Diamond 022C4 +DifferentialD 02146 +Dopf 1D53B +Dot 000A8 +DotDot 020DC +DotEqual 02250 +DoubleContourIntegral 0222F +DoubleDot 000A8 +DoubleDownArrow 021D3 +DoubleLeftArrow 021D0 +DoubleLeftRightArrow 021D4 +DoubleLeftTee 02AE4 +DoubleLongLeftArrow 027F8 +DoubleLongLeftRightArrow 027FA +DoubleLongRightArrow 027F9 +DoubleRightArrow 021D2 +DoubleRightTee 022A8 +DoubleUpArrow 021D1 +DoubleUpDownArrow 021D5 +DoubleVerticalBar 02225 +DownArrow 02193 +DownArrowBar 02913 +DownArrowUpArrow 021F5 +DownBreve 00311 +DownLeftRightVector 02950 +DownLeftTeeVector 0295E +DownLeftVector 021BD +DownLeftVectorBar 02956 +DownRightTeeVector 0295F +DownRightVector 021C1 +DownRightVectorBar 02957 +DownTee 022A4 +DownTeeArrow 021A7 +Downarrow 021D3 +Dscr 1D49F +Dstrok 00110 +ENG 0014A +ETH 000D0 +Eacute 000C9 +Ecaron 0011A +Ecirc 000CA +Ecy 0042D +Edot 00116 +Efr 1D508 +Egrave 000C8 +Element 02208 +Emacr 00112 +EmptySmallSquare 025FB +EmptyVerySmallSquare 025AB +Eogon 00118 +Eopf 1D53C +Epsilon 00395 +Equal 02A75 +EqualTilde 02242 +Equilibrium 021CC +Escr 02130 +Esim 02A73 +Eta 00397 +Euml 000CB +Exists 02203 +ExponentialE 02147 +Fcy 00424 +Ffr 1D509 +FilledSmallSquare 025FC +FilledVerySmallSquare 025AA +Fopf 1D53D +ForAll 02200 +Fouriertrf 02131 +Fscr 02131 +GJcy 00403 +GT 0003E +Gamma 00393 +Gammad 003DC +Gbreve 0011E +Gcedil 00122 +Gcirc 0011C +Gcy 00413 +Gdot 00120 +Gfr 1D50A +Gg 022D9 +Gopf 1D53E +GreaterEqual 02265 +GreaterEqualLess 022DB +GreaterFullEqual 02267 +GreaterGreater 02AA2 +GreaterLess 02277 +GreaterSlantEqual 02A7E +GreaterTilde 02273 +Gscr 1D4A2 +Gt 0226B +HARDcy 0042A +Hacek 002C7 +Hat 0005E +Hcirc 00124 +Hfr 0210C +HilbertSpace 0210B +Hopf 0210D +HorizontalLine 02500 +Hscr 0210B +Hstrok 00126 +HumpDownHump 0224E +HumpEqual 0224F +IEcy 00415 +IJlig 00132 +IOcy 00401 +Iacute 000CD +Icirc 000CE +Icy 00418 +Idot 00130 +Ifr 02111 +Igrave 000CC +Im 02111 +Imacr 0012A +ImaginaryI 02148 +Implies 021D2 +Int 0222C +Integral 0222B +Intersection 022C2 +InvisibleComma 02063 +InvisibleTimes 02062 +Iogon 0012E +Iopf 1D540 +Iota 00399 +Iscr 02110 +Itilde 00128 +Iukcy 00406 +Iuml 000CF +Jcirc 00134 +Jcy 00419 +Jfr 1D50D +Jopf 1D541 +Jscr 1D4A5 +Jsercy 00408 +Jukcy 00404 +KHcy 00425 +KJcy 0040C +Kappa 0039A +Kcedil 00136 +Kcy 0041A +Kfr 1D50E +Kopf 1D542 +Kscr 1D4A6 +LJcy 00409 +LT 0003C +Lacute 00139 +Lambda 0039B +Lang 027EA +Laplacetrf 02112 +Larr 0219E +Lcaron 0013D +Lcedil 0013B +Lcy 0041B +LeftAngleBracket 027E8 +LeftArrow 02190 +LeftArrowBar 021E4 +LeftArrowRightArrow 021C6 +LeftCeiling 02308 +LeftDoubleBracket 027E6 +LeftDownTeeVector 02961 +LeftDownVector 021C3 +LeftDownVectorBar 02959 +LeftFloor 0230A +LeftRightArrow 02194 +LeftRightVector 0294E +LeftTee 022A3 +LeftTeeArrow 021A4 +LeftTeeVector 0295A +LeftTriangle 022B2 +LeftTriangleBar 029CF +LeftTriangleEqual 022B4 +LeftUpDownVector 02951 +LeftUpTeeVector 02960 +LeftUpVector 021BF +LeftUpVectorBar 02958 +LeftVector 021BC +LeftVectorBar 02952 +Leftarrow 021D0 +Leftrightarrow 021D4 +LessEqualGreater 022DA +LessFullEqual 02266 +LessGreater 02276 +LessLess 02AA1 +LessSlantEqual 02A7D +LessTilde 02272 +Lfr 1D50F +Ll 022D8 +Lleftarrow 021DA +Lmidot 0013F +LongLeftArrow 027F5 +LongLeftRightArrow 027F7 +LongRightArrow 027F6 +Longleftarrow 027F8 +Longleftrightarrow 027FA +Longrightarrow 027F9 +Lopf 1D543 +LowerLeftArrow 02199 +LowerRightArrow 02198 +Lscr 02112 +Lsh 021B0 +Lstrok 00141 +Lt 0226A +Map 02905 +Mcy 0041C +MediumSpace 0205F +Mellintrf 02133 +Mfr 1D510 +MinusPlus 02213 +Mopf 1D544 +Mscr 02133 +Mu 0039C +NJcy 0040A +Nacute 00143 +Ncaron 00147 +Ncedil 00145 +Ncy 0041D +NegativeMediumSpace 0200B +NegativeThickSpace 0200B +NegativeThinSpace 0200B +NegativeVeryThinSpace 0200B +NestedGreaterGreater 0226B +NestedLessLess 0226A +NewLine 0000A +Nfr 1D511 +NoBreak 02060 +NonBreakingSpace 000A0 +Nopf 02115 +Not 02AEC +NotCongruent 02262 +NotCupCap 0226D +NotDoubleVerticalBar 02226 +NotElement 02209 +NotEqual 02260 +NotEqualTilde 02242 00338 +NotExists 02204 +NotGreater 0226F +NotGreaterEqual 02271 +NotGreaterFullEqual 02267 00338 +NotGreaterGreater 0226B 00338 +NotGreaterLess 02279 +NotGreaterSlantEqual 02A7E 00338 +NotGreaterTilde 02275 +NotHumpDownHump 0224E 00338 +NotHumpEqual 0224F 00338 +NotLeftTriangle 022EA +NotLeftTriangleBar 029CF 00338 +NotLeftTriangleEqual 022EC +NotLess 0226E +NotLessEqual 02270 +NotLessGreater 02278 +NotLessLess 0226A 00338 +NotLessSlantEqual 02A7D 00338 +NotLessTilde 02274 +NotNestedGreaterGreater 02AA2 00338 +NotNestedLessLess 02AA1 00338 +NotPrecedes 02280 +NotPrecedesEqual 02AAF 00338 +NotPrecedesSlantEqual 022E0 +NotReverseElement 0220C +NotRightTriangle 022EB +NotRightTriangleBar 029D0 00338 +NotRightTriangleEqual 022ED +NotSquareSubset 0228F 00338 +NotSquareSubsetEqual 022E2 +NotSquareSuperset 02290 00338 +NotSquareSupersetEqual 022E3 +NotSubset 02282 020D2 +NotSubsetEqual 02288 +NotSucceeds 02281 +NotSucceedsEqual 02AB0 00338 +NotSucceedsSlantEqual 022E1 +NotSucceedsTilde 0227F 00338 +NotSuperset 02283 020D2 +NotSupersetEqual 02289 +NotTilde 02241 +NotTildeEqual 02244 +NotTildeFullEqual 02247 +NotTildeTilde 02249 +NotVerticalBar 02224 +Nscr 1D4A9 +Ntilde 000D1 +Nu 0039D +OElig 00152 +Oacute 000D3 +Ocirc 000D4 +Ocy 0041E +Odblac 00150 +Ofr 1D512 +Ograve 000D2 +Omacr 0014C +Omega 003A9 +Omicron 0039F +Oopf 1D546 +OpenCurlyDoubleQuote 0201C +OpenCurlyQuote 02018 +Or 02A54 +Oscr 1D4AA +Oslash 000D8 +Otilde 000D5 +Otimes 02A37 +Ouml 000D6 +OverBar 0203E +OverBrace 023DE +OverBracket 023B4 +OverParenthesis 023DC +PartialD 02202 +Pcy 0041F +Pfr 1D513 +Phi 003A6 +Pi 003A0 +PlusMinus 000B1 +Poincareplane 0210C +Popf 02119 +Pr 02ABB +Precedes 0227A +PrecedesEqual 02AAF +PrecedesSlantEqual 0227C +PrecedesTilde 0227E +Prime 02033 +Product 0220F +Proportion 02237 +Proportional 0221D +Pscr 1D4AB +Psi 003A8 +QUOT 00022 +Qfr 1D514 +Qopf 0211A +Qscr 1D4AC +RBarr 02910 +REG 000AE +Racute 00154 +Rang 027EB +Rarr 021A0 +Rarrtl 02916 +Rcaron 00158 +Rcedil 00156 +Rcy 00420 +Re 0211C +ReverseElement 0220B +ReverseEquilibrium 021CB +ReverseUpEquilibrium 0296F +Rfr 0211C +Rho 003A1 +RightAngleBracket 027E9 +RightArrow 02192 +RightArrowBar 021E5 +RightArrowLeftArrow 021C4 +RightCeiling 02309 +RightDoubleBracket 027E7 +RightDownTeeVector 0295D +RightDownVector 021C2 +RightDownVectorBar 02955 +RightFloor 0230B +RightTee 022A2 +RightTeeArrow 021A6 +RightTeeVector 0295B +RightTriangle 022B3 +RightTriangleBar 029D0 +RightTriangleEqual 022B5 +RightUpDownVector 0294F +RightUpTeeVector 0295C +RightUpVector 021BE +RightUpVectorBar 02954 +RightVector 021C0 +RightVectorBar 02953 +Rightarrow 021D2 +Ropf 0211D +RoundImplies 02970 +Rrightarrow 021DB +Rscr 0211B +Rsh 021B1 +RuleDelayed 029F4 +SHCHcy 00429 +SHcy 00428 +SOFTcy 0042C +Sacute 0015A +Sc 02ABC +Scaron 00160 +Scedil 0015E +Scirc 0015C +Scy 00421 +Sfr 1D516 +ShortDownArrow 02193 +ShortLeftArrow 02190 +ShortRightArrow 02192 +ShortUpArrow 02191 +Sigma 003A3 +SmallCircle 02218 +Sopf 1D54A +Sqrt 0221A +Square 025A1 +SquareIntersection 02293 +SquareSubset 0228F +SquareSubsetEqual 02291 +SquareSuperset 02290 +SquareSupersetEqual 02292 +SquareUnion 02294 +Sscr 1D4AE +Star 022C6 +Sub 022D0 +Subset 022D0 +SubsetEqual 02286 +Succeeds 0227B +SucceedsEqual 02AB0 +SucceedsSlantEqual 0227D +SucceedsTilde 0227F +SuchThat 0220B +Sum 02211 +Sup 022D1 +Superset 02283 +SupersetEqual 02287 +Supset 022D1 +THORN 000DE +TRADE 02122 +TSHcy 0040B +TScy 00426 +Tab 00009 +Tau 003A4 +Tcaron 00164 +Tcedil 00162 +Tcy 00422 +Tfr 1D517 +Therefore 02234 +Theta 00398 +ThickSpace 0205F 0200A +ThinSpace 02009 +Tilde 0223C +TildeEqual 02243 +TildeFullEqual 02245 +TildeTilde 02248 +Topf 1D54B +TripleDot 020DB +Tscr 1D4AF +Tstrok 00166 +Uacute 000DA +Uarr 0219F +Uarrocir 02949 +Ubrcy 0040E +Ubreve 0016C +Ucirc 000DB +Ucy 00423 +Udblac 00170 +Ufr 1D518 +Ugrave 000D9 +Umacr 0016A +UnderBar 0005F +UnderBrace 023DF +UnderBracket 023B5 +UnderParenthesis 023DD +Union 022C3 +UnionPlus 0228E +Uogon 00172 +Uopf 1D54C +UpArrow 02191 +UpArrowBar 02912 +UpArrowDownArrow 021C5 +UpDownArrow 02195 +UpEquilibrium 0296E +UpTee 022A5 +UpTeeArrow 021A5 +Uparrow 021D1 +Updownarrow 021D5 +UpperLeftArrow 02196 +UpperRightArrow 02197 +Upsi 003D2 +Upsilon 003A5 +Uring 0016E +Uscr 1D4B0 +Utilde 00168 +Uuml 000DC +VDash 022AB +Vbar 02AEB +Vcy 00412 +Vdash 022A9 +Vdashl 02AE6 +Vee 022C1 +Verbar 02016 +Vert 02016 +VerticalBar 02223 +VerticalLine 0007C +VerticalSeparator 02758 +VerticalTilde 02240 +VeryThinSpace 0200A +Vfr 1D519 +Vopf 1D54D +Vscr 1D4B1 +Vvdash 022AA +Wcirc 00174 +Wedge 022C0 +Wfr 1D51A +Wopf 1D54E +Wscr 1D4B2 +Xfr 1D51B +Xi 0039E +Xopf 1D54F +Xscr 1D4B3 +YAcy 0042F +YIcy 00407 +YUcy 0042E +Yacute 000DD +Ycirc 00176 +Ycy 0042B +Yfr 1D51C +Yopf 1D550 +Yscr 1D4B4 +Yuml 00178 +ZHcy 00416 +Zacute 00179 +Zcaron 0017D +Zcy 00417 +Zdot 0017B +ZeroWidthSpace 0200B +Zeta 00396 +Zfr 02128 +Zopf 02124 +Zscr 1D4B5 +aacute 000E1 +abreve 00103 +ac 0223E +acE 0223E 00333 +acd 0223F +acirc 000E2 +acute 000B4 +acy 00430 +aelig 000E6 +af 02061 +afr 1D51E +agrave 000E0 +alefsym 02135 +aleph 02135 +alpha 003B1 +amacr 00101 +amalg 02A3F +amp 00026 +and 02227 +andand 02A55 +andd 02A5C +andslope 02A58 +andv 02A5A +ang 02220 +ange 029A4 +angle 02220 +angmsd 02221 +angmsdaa 029A8 +angmsdab 029A9 +angmsdac 029AA +angmsdad 029AB +angmsdae 029AC +angmsdaf 029AD +angmsdag 029AE +angmsdah 029AF +angrt 0221F +angrtvb 022BE +angrtvbd 0299D +angsph 02222 +angst 000C5 +angzarr 0237C +aogon 00105 +aopf 1D552 +ap 02248 +apE 02A70 +apacir 02A6F +ape 0224A +apid 0224B +apos 00027 +approx 02248 +approxeq 0224A +aring 000E5 +ascr 1D4B6 +ast 0002A +asymp 02248 +asympeq 0224D +atilde 000E3 +auml 000E4 +awconint 02233 +awint 02A11 +bNot 02AED +backcong 0224C +backepsilon 003F6 +backprime 02035 +backsim 0223D +backsimeq 022CD +barvee 022BD +barwed 02305 +barwedge 02305 +bbrk 023B5 +bbrktbrk 023B6 +bcong 0224C +bcy 00431 +bdquo 0201E +becaus 02235 +because 02235 +bemptyv 029B0 +bepsi 003F6 +bernou 0212C +beta 003B2 +beth 02136 +between 0226C +bfr 1D51F +bigcap 022C2 +bigcirc 025EF +bigcup 022C3 +bigodot 02A00 +bigoplus 02A01 +bigotimes 02A02 +bigsqcup 02A06 +bigstar 02605 +bigtriangledown 025BD +bigtriangleup 025B3 +biguplus 02A04 +bigvee 022C1 +bigwedge 022C0 +bkarow 0290D +blacklozenge 029EB +blacksquare 025AA +blacktriangle 025B4 +blacktriangledown 025BE +blacktriangleleft 025C2 +blacktriangleright 025B8 +blank 02423 +blk12 02592 +blk14 02591 +blk34 02593 +block 02588 +bne 0003D 020E5 +bnequiv 02261 020E5 +bnot 02310 +bopf 1D553 +bot 022A5 +bottom 022A5 +bowtie 022C8 +boxDL 02557 +boxDR 02554 +boxDl 02556 +boxDr 02553 +boxH 02550 +boxHD 02566 +boxHU 02569 +boxHd 02564 +boxHu 02567 +boxUL 0255D +boxUR 0255A +boxUl 0255C +boxUr 02559 +boxV 02551 +boxVH 0256C +boxVL 02563 +boxVR 02560 +boxVh 0256B +boxVl 02562 +boxVr 0255F +boxbox 029C9 +boxdL 02555 +boxdR 02552 +boxdl 02510 +boxdr 0250C +boxh 02500 +boxhD 02565 +boxhU 02568 +boxhd 0252C +boxhu 02534 +boxminus 0229F +boxplus 0229E +boxtimes 022A0 +boxuL 0255B +boxuR 02558 +boxul 02518 +boxur 02514 +boxv 02502 +boxvH 0256A +boxvL 02561 +boxvR 0255E +boxvh 0253C +boxvl 02524 +boxvr 0251C +bprime 02035 +breve 002D8 +brvbar 000A6 +bscr 1D4B7 +bsemi 0204F +bsim 0223D +bsime 022CD +bsol 0005C +bsolb 029C5 +bsolhsub 027C8 +bull 02022 +bullet 02022 +bump 0224E +bumpE 02AAE +bumpe 0224F +bumpeq 0224F +cacute 00107 +cap 02229 +capand 02A44 +capbrcup 02A49 +capcap 02A4B +capcup 02A47 +capdot 02A40 +caps 02229 0FE00 +caret 02041 +caron 002C7 +ccaps 02A4D +ccaron 0010D +ccedil 000E7 +ccirc 00109 +ccups 02A4C +ccupssm 02A50 +cdot 0010B +cedil 000B8 +cemptyv 029B2 +cent 000A2 +centerdot 000B7 +cfr 1D520 +chcy 00447 +check 02713 +checkmark 02713 +chi 003C7 +cir 025CB +cirE 029C3 +circ 002C6 +circeq 02257 +circlearrowleft 021BA +circlearrowright 021BB +circledR 000AE +circledS 024C8 +circledast 0229B +circledcirc 0229A +circleddash 0229D +cire 02257 +cirfnint 02A10 +cirmid 02AEF +cirscir 029C2 +clubs 02663 +clubsuit 02663 +colon 0003A +colone 02254 +coloneq 02254 +comma 0002C +commat 00040 +comp 02201 +compfn 02218 +complement 02201 +complexes 02102 +cong 02245 +congdot 02A6D +conint 0222E +copf 1D554 +coprod 02210 +copy 000A9 +copysr 02117 +crarr 021B5 +cross 02717 +cscr 1D4B8 +csub 02ACF +csube 02AD1 +csup 02AD0 +csupe 02AD2 +ctdot 022EF +cudarrl 02938 +cudarrr 02935 +cuepr 022DE +cuesc 022DF +cularr 021B6 +cularrp 0293D +cup 0222A +cupbrcap 02A48 +cupcap 02A46 +cupcup 02A4A +cupdot 0228D +cupor 02A45 +cups 0222A 0FE00 +curarr 021B7 +curarrm 0293C +curlyeqprec 022DE +curlyeqsucc 022DF +curlyvee 022CE +curlywedge 022CF +curren 000A4 +curvearrowleft 021B6 +curvearrowright 021B7 +cuvee 022CE +cuwed 022CF +cwconint 02232 +cwint 02231 +cylcty 0232D +dArr 021D3 +dHar 02965 +dagger 02020 +daleth 02138 +darr 02193 +dash 02010 +dashv 022A3 +dbkarow 0290F +dblac 002DD +dcaron 0010F +dcy 00434 +dd 02146 +ddagger 02021 +ddarr 021CA +ddotseq 02A77 +deg 000B0 +delta 003B4 +demptyv 029B1 +dfisht 0297F +dfr 1D521 +dharl 021C3 +dharr 021C2 +diam 022C4 +diamond 022C4 +diamondsuit 02666 +diams 02666 +die 000A8 +digamma 003DD +disin 022F2 +div 000F7 +divide 000F7 +divideontimes 022C7 +divonx 022C7 +djcy 00452 +dlcorn 0231E +dlcrop 0230D +dollar 00024 +dopf 1D555 +dot 002D9 +doteq 02250 +doteqdot 02251 +dotminus 02238 +dotplus 02214 +dotsquare 022A1 +doublebarwedge 02306 +downarrow 02193 +downdownarrows 021CA +downharpoonleft 021C3 +downharpoonright 021C2 +drbkarow 02910 +drcorn 0231F +drcrop 0230C +dscr 1D4B9 +dscy 00455 +dsol 029F6 +dstrok 00111 +dtdot 022F1 +dtri 025BF +dtrif 025BE +duarr 021F5 +duhar 0296F +dwangle 029A6 +dzcy 0045F +dzigrarr 027FF +eDDot 02A77 +eDot 02251 +eacute 000E9 +easter 02A6E +ecaron 0011B +ecir 02256 +ecirc 000EA +ecolon 02255 +ecy 0044D +edot 00117 +ee 02147 +efDot 02252 +efr 1D522 +eg 02A9A +egrave 000E8 +egs 02A96 +egsdot 02A98 +el 02A99 +elinters 023E7 +ell 02113 +els 02A95 +elsdot 02A97 +emacr 00113 +empty 02205 +emptyset 02205 +emptyv 02205 +emsp 02003 +emsp13 02004 +emsp14 02005 +eng 0014B +ensp 02002 +eogon 00119 +eopf 1D556 +epar 022D5 +eparsl 029E3 +eplus 02A71 +epsi 003B5 +epsilon 003B5 +epsiv 003F5 +eqcirc 02256 +eqcolon 02255 +eqsim 02242 +eqslantgtr 02A96 +eqslantless 02A95 +equals 0003D +equest 0225F +equiv 02261 +equivDD 02A78 +eqvparsl 029E5 +erDot 02253 +erarr 02971 +escr 0212F +esdot 02250 +esim 02242 +eta 003B7 +eth 000F0 +euml 000EB +euro 020AC +excl 00021 +exist 02203 +expectation 02130 +exponentiale 02147 +fallingdotseq 02252 +fcy 00444 +female 02640 +ffilig 0FB03 +fflig 0FB00 +ffllig 0FB04 +ffr 1D523 +filig 0FB01 +fjlig 00066 0006A +flat 0266D +fllig 0FB02 +fltns 025B1 +fnof 00192 +fopf 1D557 +forall 02200 +fork 022D4 +forkv 02AD9 +fpartint 02A0D +frac12 000BD +frac13 02153 +frac14 000BC +frac15 02155 +frac16 02159 +frac18 0215B +frac23 02154 +frac25 02156 +frac34 000BE +frac35 02157 +frac38 0215C +frac45 02158 +frac56 0215A +frac58 0215D +frac78 0215E +frasl 02044 +frown 02322 +fscr 1D4BB +gE 02267 +gEl 02A8C +gacute 001F5 +gamma 003B3 +gammad 003DD +gap 02A86 +gbreve 0011F +gcirc 0011D +gcy 00433 +gdot 00121 +ge 02265 +gel 022DB +geq 02265 +geqq 02267 +geqslant 02A7E +ges 02A7E +gescc 02AA9 +gesdot 02A80 +gesdoto 02A82 +gesdotol 02A84 +gesl 022DB 0FE00 +gesles 02A94 +gfr 1D524 +gg 0226B +ggg 022D9 +gimel 02137 +gjcy 00453 +gl 02277 +glE 02A92 +gla 02AA5 +glj 02AA4 +gnE 02269 +gnap 02A8A +gnapprox 02A8A +gne 02A88 +gneq 02A88 +gneqq 02269 +gnsim 022E7 +gopf 1D558 +grave 00060 +gscr 0210A +gsim 02273 +gsime 02A8E +gsiml 02A90 +gt 0003E +gtcc 02AA7 +gtcir 02A7A +gtdot 022D7 +gtlPar 02995 +gtquest 02A7C +gtrapprox 02A86 +gtrarr 02978 +gtrdot 022D7 +gtreqless 022DB +gtreqqless 02A8C +gtrless 02277 +gtrsim 02273 +gvertneqq 02269 0FE00 +gvnE 02269 0FE00 +hArr 021D4 +hairsp 0200A +half 000BD +hamilt 0210B +hardcy 0044A +harr 02194 +harrcir 02948 +harrw 021AD +hbar 0210F +hcirc 00125 +hearts 02665 +heartsuit 02665 +hellip 02026 +hercon 022B9 +hfr 1D525 +hksearow 02925 +hkswarow 02926 +hoarr 021FF +homtht 0223B +hookleftarrow 021A9 +hookrightarrow 021AA +hopf 1D559 +horbar 02015 +hscr 1D4BD +hslash 0210F +hstrok 00127 +hybull 02043 +hyphen 02010 +iacute 000ED +ic 02063 +icirc 000EE +icy 00438 +iecy 00435 +iexcl 000A1 +iff 021D4 +ifr 1D526 +igrave 000EC +ii 02148 +iiiint 02A0C +iiint 0222D +iinfin 029DC +iiota 02129 +ijlig 00133 +imacr 0012B +image 02111 +imagline 02110 +imagpart 02111 +imath 00131 +imof 022B7 +imped 001B5 +in 02208 +incare 02105 +infin 0221E +infintie 029DD +inodot 00131 +int 0222B +intcal 022BA +integers 02124 +intercal 022BA +intlarhk 02A17 +intprod 02A3C +iocy 00451 +iogon 0012F +iopf 1D55A +iota 003B9 +iprod 02A3C +iquest 000BF +iscr 1D4BE +isin 02208 +isinE 022F9 +isindot 022F5 +isins 022F4 +isinsv 022F3 +isinv 02208 +it 02062 +itilde 00129 +iukcy 00456 +iuml 000EF +jcirc 00135 +jcy 00439 +jfr 1D527 +jmath 00237 +jopf 1D55B +jscr 1D4BF +jsercy 00458 +jukcy 00454 +kappa 003BA +kappav 003F0 +kcedil 00137 +kcy 0043A +kfr 1D528 +kgreen 00138 +khcy 00445 +kjcy 0045C +kopf 1D55C +kscr 1D4C0 +lAarr 021DA +lArr 021D0 +lAtail 0291B +lBarr 0290E +lE 02266 +lEg 02A8B +lHar 02962 +lacute 0013A +laemptyv 029B4 +lagran 02112 +lambda 003BB +lang 027E8 +langd 02991 +langle 027E8 +lap 02A85 +laquo 000AB +larr 02190 +larrb 021E4 +larrbfs 0291F +larrfs 0291D +larrhk 021A9 +larrlp 021AB +larrpl 02939 +larrsim 02973 +larrtl 021A2 +lat 02AAB +latail 02919 +late 02AAD +lates 02AAD 0FE00 +lbarr 0290C +lbbrk 02772 +lbrace 0007B +lbrack 0005B +lbrke 0298B +lbrksld 0298F +lbrkslu 0298D +lcaron 0013E +lcedil 0013C +lceil 02308 +lcub 0007B +lcy 0043B +ldca 02936 +ldquo 0201C +ldquor 0201E +ldrdhar 02967 +ldrushar 0294B +ldsh 021B2 +le 02264 +leftarrow 02190 +leftarrowtail 021A2 +leftharpoondown 021BD +leftharpoonup 021BC +leftleftarrows 021C7 +leftrightarrow 02194 +leftrightarrows 021C6 +leftrightharpoons 021CB +leftrightsquigarrow 021AD +leftthreetimes 022CB +leg 022DA +leq 02264 +leqq 02266 +leqslant 02A7D +les 02A7D +lescc 02AA8 +lesdot 02A7F +lesdoto 02A81 +lesdotor 02A83 +lesg 022DA 0FE00 +lesges 02A93 +lessapprox 02A85 +lessdot 022D6 +lesseqgtr 022DA +lesseqqgtr 02A8B +lessgtr 02276 +lesssim 02272 +lfisht 0297C +lfloor 0230A +lfr 1D529 +lg 02276 +lgE 02A91 +lhard 021BD +lharu 021BC +lharul 0296A +lhblk 02584 +ljcy 00459 +ll 0226A +llarr 021C7 +llcorner 0231E +llhard 0296B +lltri 025FA +lmidot 00140 +lmoust 023B0 +lmoustache 023B0 +lnE 02268 +lnap 02A89 +lnapprox 02A89 +lne 02A87 +lneq 02A87 +lneqq 02268 +lnsim 022E6 +loang 027EC +loarr 021FD +lobrk 027E6 +longleftarrow 027F5 +longleftrightarrow 027F7 +longmapsto 027FC +longrightarrow 027F6 +looparrowleft 021AB +looparrowright 021AC +lopar 02985 +lopf 1D55D +loplus 02A2D +lotimes 02A34 +lowast 02217 +lowbar 0005F +loz 025CA +lozenge 025CA +lozf 029EB +lpar 00028 +lparlt 02993 +lrarr 021C6 +lrcorner 0231F +lrhar 021CB +lrhard 0296D +lrm 0200E +lrtri 022BF +lsaquo 02039 +lscr 1D4C1 +lsh 021B0 +lsim 02272 +lsime 02A8D +lsimg 02A8F +lsqb 0005B +lsquo 02018 +lsquor 0201A +lstrok 00142 +lt 0003C +ltcc 02AA6 +ltcir 02A79 +ltdot 022D6 +lthree 022CB +ltimes 022C9 +ltlarr 02976 +ltquest 02A7B +ltrPar 02996 +ltri 025C3 +ltrie 022B4 +ltrif 025C2 +lurdshar 0294A +luruhar 02966 +lvertneqq 02268 0FE00 +lvnE 02268 0FE00 +mDDot 0223A +macr 000AF +male 02642 +malt 02720 +maltese 02720 +map 021A6 +mapsto 021A6 +mapstodown 021A7 +mapstoleft 021A4 +mapstoup 021A5 +marker 025AE +mcomma 02A29 +mcy 0043C +mdash 02014 +measuredangle 02221 +mfr 1D52A +mho 02127 +micro 000B5 +mid 02223 +midast 0002A +midcir 02AF0 +middot 000B7 +minus 02212 +minusb 0229F +minusd 02238 +minusdu 02A2A +mlcp 02ADB +mldr 02026 +mnplus 02213 +models 022A7 +mopf 1D55E +mp 02213 +mscr 1D4C2 +mstpos 0223E +mu 003BC +multimap 022B8 +mumap 022B8 +nGg 022D9 00338 +nGt 0226B 020D2 +nGtv 0226B 00338 +nLeftarrow 021CD +nLeftrightarrow 021CE +nLl 022D8 00338 +nLt 0226A 020D2 +nLtv 0226A 00338 +nRightarrow 021CF +nVDash 022AF +nVdash 022AE +nabla 02207 +nacute 00144 +nang 02220 020D2 +nap 02249 +napE 02A70 00338 +napid 0224B 00338 +napos 00149 +napprox 02249 +natur 0266E +natural 0266E +naturals 02115 +nbsp 000A0 +nbump 0224E 00338 +nbumpe 0224F 00338 +ncap 02A43 +ncaron 00148 +ncedil 00146 +ncong 02247 +ncongdot 02A6D 00338 +ncup 02A42 +ncy 0043D +ndash 02013 +ne 02260 +neArr 021D7 +nearhk 02924 +nearr 02197 +nearrow 02197 +nedot 02250 00338 +nequiv 02262 +nesear 02928 +nesim 02242 00338 +nexist 02204 +nexists 02204 +nfr 1D52B +ngE 02267 00338 +nge 02271 +ngeq 02271 +ngeqq 02267 00338 +ngeqslant 02A7E 00338 +nges 02A7E 00338 +ngsim 02275 +ngt 0226F +ngtr 0226F +nhArr 021CE +nharr 021AE +nhpar 02AF2 +ni 0220B +nis 022FC +nisd 022FA +niv 0220B +njcy 0045A +nlArr 021CD +nlE 02266 00338 +nlarr 0219A +nldr 02025 +nle 02270 +nleftarrow 0219A +nleftrightarrow 021AE +nleq 02270 +nleqq 02266 00338 +nleqslant 02A7D 00338 +nles 02A7D 00338 +nless 0226E +nlsim 02274 +nlt 0226E +nltri 022EA +nltrie 022EC +nmid 02224 +nopf 1D55F +not 000AC +notin 02209 +notinE 022F9 00338 +notindot 022F5 00338 +notinva 02209 +notinvb 022F7 +notinvc 022F6 +notni 0220C +notniva 0220C +notnivb 022FE +notnivc 022FD +npar 02226 +nparallel 02226 +nparsl 02AFD 020E5 +npart 02202 00338 +npolint 02A14 +npr 02280 +nprcue 022E0 +npre 02AAF 00338 +nprec 02280 +npreceq 02AAF 00338 +nrArr 021CF +nrarr 0219B +nrarrc 02933 00338 +nrarrw 0219D 00338 +nrightarrow 0219B +nrtri 022EB +nrtrie 022ED +nsc 02281 +nsccue 022E1 +nsce 02AB0 00338 +nscr 1D4C3 +nshortmid 02224 +nshortparallel 02226 +nsim 02241 +nsime 02244 +nsimeq 02244 +nsmid 02224 +nspar 02226 +nsqsube 022E2 +nsqsupe 022E3 +nsub 02284 +nsubE 02AC5 00338 +nsube 02288 +nsubset 02282 020D2 +nsubseteq 02288 +nsubseteqq 02AC5 00338 +nsucc 02281 +nsucceq 02AB0 00338 +nsup 02285 +nsupE 02AC6 00338 +nsupe 02289 +nsupset 02283 020D2 +nsupseteq 02289 +nsupseteqq 02AC6 00338 +ntgl 02279 +ntilde 000F1 +ntlg 02278 +ntriangleleft 022EA +ntrianglelefteq 022EC +ntriangleright 022EB +ntrianglerighteq 022ED +nu 003BD +num 00023 +numero 02116 +numsp 02007 +nvDash 022AD +nvHarr 02904 +nvap 0224D 020D2 +nvdash 022AC +nvge 02265 020D2 +nvgt 0003E 020D2 +nvinfin 029DE +nvlArr 02902 +nvle 02264 020D2 +nvlt 0003C 020D2 +nvltrie 022B4 020D2 +nvrArr 02903 +nvrtrie 022B5 020D2 +nvsim 0223C 020D2 +nwArr 021D6 +nwarhk 02923 +nwarr 02196 +nwarrow 02196 +nwnear 02927 +oS 024C8 +oacute 000F3 +oast 0229B +ocir 0229A +ocirc 000F4 +ocy 0043E +odash 0229D +odblac 00151 +odiv 02A38 +odot 02299 +odsold 029BC +oelig 00153 +ofcir 029BF +ofr 1D52C +ogon 002DB +ograve 000F2 +ogt 029C1 +ohbar 029B5 +ohm 003A9 +oint 0222E +olarr 021BA +olcir 029BE +olcross 029BB +oline 0203E +olt 029C0 +omacr 0014D +omega 003C9 +omicron 003BF +omid 029B6 +ominus 02296 +oopf 1D560 +opar 029B7 +operp 029B9 +oplus 02295 +or 02228 +orarr 021BB +ord 02A5D +order 02134 +orderof 02134 +ordf 000AA +ordm 000BA +origof 022B6 +oror 02A56 +orslope 02A57 +orv 02A5B +oscr 02134 +oslash 000F8 +osol 02298 +otilde 000F5 +otimes 02297 +otimesas 02A36 +ouml 000F6 +ovbar 0233D +par 02225 +para 000B6 +parallel 02225 +parsim 02AF3 +parsl 02AFD +part 02202 +pcy 0043F +percnt 00025 +period 0002E +permil 02030 +perp 022A5 +pertenk 02031 +pfr 1D52D +phi 003C6 +phiv 003D5 +phmmat 02133 +phone 0260E +pi 003C0 +pitchfork 022D4 +piv 003D6 +planck 0210F +planckh 0210E +plankv 0210F +plus 0002B +plusacir 02A23 +plusb 0229E +pluscir 02A22 +plusdo 02214 +plusdu 02A25 +pluse 02A72 +plusmn 000B1 +plussim 02A26 +plustwo 02A27 +pm 000B1 +pointint 02A15 +popf 1D561 +pound 000A3 +pr 0227A +prE 02AB3 +prap 02AB7 +prcue 0227C +pre 02AAF +prec 0227A +precapprox 02AB7 +preccurlyeq 0227C +preceq 02AAF +precnapprox 02AB9 +precneqq 02AB5 +precnsim 022E8 +precsim 0227E +prime 02032 +primes 02119 +prnE 02AB5 +prnap 02AB9 +prnsim 022E8 +prod 0220F +profalar 0232E +profline 02312 +profsurf 02313 +prop 0221D +propto 0221D +prsim 0227E +prurel 022B0 +pscr 1D4C5 +psi 003C8 +puncsp 02008 +qfr 1D52E +qint 02A0C +qopf 1D562 +qprime 02057 +qscr 1D4C6 +quaternions 0210D +quatint 02A16 +quest 0003F +questeq 0225F +quot 00022 +rAarr 021DB +rArr 021D2 +rAtail 0291C +rBarr 0290F +rHar 02964 +race 0223D 00331 +racute 00155 +radic 0221A +raemptyv 029B3 +rang 027E9 +rangd 02992 +range 029A5 +rangle 027E9 +raquo 000BB +rarr 02192 +rarrap 02975 +rarrb 021E5 +rarrbfs 02920 +rarrc 02933 +rarrfs 0291E +rarrhk 021AA +rarrlp 021AC +rarrpl 02945 +rarrsim 02974 +rarrtl 021A3 +rarrw 0219D +ratail 0291A +ratio 02236 +rationals 0211A +rbarr 0290D +rbbrk 02773 +rbrace 0007D +rbrack 0005D +rbrke 0298C +rbrksld 0298E +rbrkslu 02990 +rcaron 00159 +rcedil 00157 +rceil 02309 +rcub 0007D +rcy 00440 +rdca 02937 +rdldhar 02969 +rdquo 0201D +rdquor 0201D +rdsh 021B3 +real 0211C +realine 0211B +realpart 0211C +reals 0211D +rect 025AD +reg 000AE +rfisht 0297D +rfloor 0230B +rfr 1D52F +rhard 021C1 +rharu 021C0 +rharul 0296C +rho 003C1 +rhov 003F1 +rightarrow 02192 +rightarrowtail 021A3 +rightharpoondown 021C1 +rightharpoonup 021C0 +rightleftarrows 021C4 +rightleftharpoons 021CC +rightrightarrows 021C9 +rightsquigarrow 0219D +rightthreetimes 022CC +ring 002DA +risingdotseq 02253 +rlarr 021C4 +rlhar 021CC +rlm 0200F +rmoust 023B1 +rmoustache 023B1 +rnmid 02AEE +roang 027ED +roarr 021FE +robrk 027E7 +ropar 02986 +ropf 1D563 +roplus 02A2E +rotimes 02A35 +rpar 00029 +rpargt 02994 +rppolint 02A12 +rrarr 021C9 +rsaquo 0203A +rscr 1D4C7 +rsh 021B1 +rsqb 0005D +rsquo 02019 +rsquor 02019 +rthree 022CC +rtimes 022CA +rtri 025B9 +rtrie 022B5 +rtrif 025B8 +rtriltri 029CE +ruluhar 02968 +rx 0211E +sacute 0015B +sbquo 0201A +sc 0227B +scE 02AB4 +scap 02AB8 +scaron 00161 +sccue 0227D +sce 02AB0 +scedil 0015F +scirc 0015D +scnE 02AB6 +scnap 02ABA +scnsim 022E9 +scpolint 02A13 +scsim 0227F +scy 00441 +sdot 022C5 +sdotb 022A1 +sdote 02A66 +seArr 021D8 +searhk 02925 +searr 02198 +searrow 02198 +sect 000A7 +semi 0003B +seswar 02929 +setminus 02216 +setmn 02216 +sext 02736 +sfr 1D530 +sfrown 02322 +sharp 0266F +shchcy 00449 +shcy 00448 +shortmid 02223 +shortparallel 02225 +shy 000AD +sigma 003C3 +sigmaf 003C2 +sigmav 003C2 +sim 0223C +simdot 02A6A +sime 02243 +simeq 02243 +simg 02A9E +simgE 02AA0 +siml 02A9D +simlE 02A9F +simne 02246 +simplus 02A24 +simrarr 02972 +slarr 02190 +smallsetminus 02216 +smashp 02A33 +smeparsl 029E4 +smid 02223 +smile 02323 +smt 02AAA +smte 02AAC +smtes 02AAC 0FE00 +softcy 0044C +sol 0002F +solb 029C4 +solbar 0233F +sopf 1D564 +spades 02660 +spadesuit 02660 +spar 02225 +sqcap 02293 +sqcaps 02293 0FE00 +sqcup 02294 +sqcups 02294 0FE00 +sqsub 0228F +sqsube 02291 +sqsubset 0228F +sqsubseteq 02291 +sqsup 02290 +sqsupe 02292 +sqsupset 02290 +sqsupseteq 02292 +squ 025A1 +square 025A1 +squarf 025AA +squf 025AA +srarr 02192 +sscr 1D4C8 +ssetmn 02216 +ssmile 02323 +sstarf 022C6 +star 02606 +starf 02605 +straightepsilon 003F5 +straightphi 003D5 +strns 000AF +sub 02282 +subE 02AC5 +subdot 02ABD +sube 02286 +subedot 02AC3 +submult 02AC1 +subnE 02ACB +subne 0228A +subplus 02ABF +subrarr 02979 +subset 02282 +subseteq 02286 +subseteqq 02AC5 +subsetneq 0228A +subsetneqq 02ACB +subsim 02AC7 +subsub 02AD5 +subsup 02AD3 +succ 0227B +succapprox 02AB8 +succcurlyeq 0227D +succeq 02AB0 +succnapprox 02ABA +succneqq 02AB6 +succnsim 022E9 +succsim 0227F +sum 02211 +sung 0266A +sup 02283 +sup1 000B9 +sup2 000B2 +sup3 000B3 +supE 02AC6 +supdot 02ABE +supdsub 02AD8 +supe 02287 +supedot 02AC4 +suphsol 027C9 +suphsub 02AD7 +suplarr 0297B +supmult 02AC2 +supnE 02ACC +supne 0228B +supplus 02AC0 +supset 02283 +supseteq 02287 +supseteqq 02AC6 +supsetneq 0228B +supsetneqq 02ACC +supsim 02AC8 +supsub 02AD4 +supsup 02AD6 +swArr 021D9 +swarhk 02926 +swarr 02199 +swarrow 02199 +swnwar 0292A +szlig 000DF +target 02316 +tau 003C4 +tbrk 023B4 +tcaron 00165 +tcedil 00163 +tcy 00442 +tdot 020DB +telrec 02315 +tfr 1D531 +there4 02234 +therefore 02234 +theta 003B8 +thetasym 003D1 +thetav 003D1 +thickapprox 02248 +thicksim 0223C +thinsp 02009 +thkap 02248 +thksim 0223C +thorn 000FE +tilde 002DC +times 000D7 +timesb 022A0 +timesbar 02A31 +timesd 02A30 +tint 0222D +toea 02928 +top 022A4 +topbot 02336 +topcir 02AF1 +topf 1D565 +topfork 02ADA +tosa 02929 +tprime 02034 +trade 02122 +triangle 025B5 +triangledown 025BF +triangleleft 025C3 +trianglelefteq 022B4 +triangleq 0225C +triangleright 025B9 +trianglerighteq 022B5 +tridot 025EC +trie 0225C +triminus 02A3A +triplus 02A39 +trisb 029CD +tritime 02A3B +trpezium 023E2 +tscr 1D4C9 +tscy 00446 +tshcy 0045B +tstrok 00167 +twixt 0226C +twoheadleftarrow 0219E +twoheadrightarrow 021A0 +uArr 021D1 +uHar 02963 +uacute 000FA +uarr 02191 +ubrcy 0045E +ubreve 0016D +ucirc 000FB +ucy 00443 +udarr 021C5 +udblac 00171 +udhar 0296E +ufisht 0297E +ufr 1D532 +ugrave 000F9 +uharl 021BF +uharr 021BE +uhblk 02580 +ulcorn 0231C +ulcorner 0231C +ulcrop 0230F +ultri 025F8 +umacr 0016B +uml 000A8 +uogon 00173 +uopf 1D566 +uparrow 02191 +updownarrow 02195 +upharpoonleft 021BF +upharpoonright 021BE +uplus 0228E +upsi 003C5 +upsih 003D2 +upsilon 003C5 +upuparrows 021C8 +urcorn 0231D +urcorner 0231D +urcrop 0230E +uring 0016F +urtri 025F9 +uscr 1D4CA +utdot 022F0 +utilde 00169 +utri 025B5 +utrif 025B4 +uuarr 021C8 +uuml 000FC +uwangle 029A7 +vArr 021D5 +vBar 02AE8 +vBarv 02AE9 +vDash 022A8 +vangrt 0299C +varepsilon 003F5 +varkappa 003F0 +varnothing 02205 +varphi 003D5 +varpi 003D6 +varpropto 0221D +varr 02195 +varrho 003F1 +varsigma 003C2 +varsubsetneq 0228A 0FE00 +varsubsetneqq 02ACB 0FE00 +varsupsetneq 0228B 0FE00 +varsupsetneqq 02ACC 0FE00 +vartheta 003D1 +vartriangleleft 022B2 +vartriangleright 022B3 +vcy 00432 +vdash 022A2 +vee 02228 +veebar 022BB +veeeq 0225A +vellip 022EE +verbar 0007C +vert 0007C +vfr 1D533 +vltri 022B2 +vnsub 02282 020D2 +vnsup 02283 020D2 +vopf 1D567 +vprop 0221D +vrtri 022B3 +vscr 1D4CB +vsubnE 02ACB 0FE00 +vsubne 0228A 0FE00 +vsupnE 02ACC 0FE00 +vsupne 0228B 0FE00 +vzigzag 0299A +wcirc 00175 +wedbar 02A5F +wedge 02227 +wedgeq 02259 +weierp 02118 +wfr 1D534 +wopf 1D568 +wp 02118 +wr 02240 +wreath 02240 +wscr 1D4CC +xcap 022C2 +xcirc 025EF +xcup 022C3 +xdtri 025BD +xfr 1D535 +xhArr 027FA +xharr 027F7 +xi 003BE +xlArr 027F8 +xlarr 027F5 +xmap 027FC +xnis 022FB +xodot 02A00 +xopf 1D569 +xoplus 02A01 +xotime 02A02 +xrArr 027F9 +xrarr 027F6 +xscr 1D4CD +xsqcup 02A06 +xuplus 02A04 +xutri 025B3 +xvee 022C1 +xwedge 022C0 +yacute 000FD +yacy 0044F +ycirc 00177 +ycy 0044B +yen 000A5 +yfr 1D536 +yicy 00457 +yopf 1D56A +yscr 1D4CE +yucy 0044E +yuml 000FF +zacute 0017A +zcaron 0017E +zcy 00437 +zdot 0017C +zeetrf 02128 +zeta 003B6 +zfr 1D537 +zhcy 00436 +zigrarr 021DD +zopf 1D56B +zscr 1D4CF +zwj 0200D +zwnj 0200C
\ No newline at end of file diff --git a/ext/standard/html_tables/ents_xhtml.txt b/ext/standard/html_tables/ents_xhtml.txt new file mode 100644 index 0000000..81800bc --- /dev/null +++ b/ext/standard/html_tables/ents_xhtml.txt @@ -0,0 +1,253 @@ +nbsp 00A0 +iexcl 00A1 +cent 00A2 +pound 00A3 +curren 00A4 +yen 00A5 +brvbar 00A6 +sect 00A7 +uml 00A8 +copy 00A9 +ordf 00AA +laquo 00AB +not 00AC +shy 00AD +reg 00AE +macr 00AF +deg 00B0 +plusmn 00B1 +sup2 00B2 +sup3 00B3 +acute 00B4 +micro 00B5 +para 00B6 +middot 00B7 +cedil 00B8 +sup1 00B9 +ordm 00BA +raquo 00BB +frac14 00BC +frac12 00BD +frac34 00BE +iquest 00BF +Agrave 00C0 +Aacute 00C1 +Acirc 00C2 +Atilde 00C3 +Auml 00C4 +Aring 00C5 +AElig 00C6 +Ccedil 00C7 +Egrave 00C8 +Eacute 00C9 +Ecirc 00CA +Euml 00CB +Igrave 00CC +Iacute 00CD +Icirc 00CE +Iuml 00CF +ETH 00D0 +Ntilde 00D1 +Ograve 00D2 +Oacute 00D3 +Ocirc 00D4 +Otilde 00D5 +Ouml 00D6 +times 00D7 +Oslash 00D8 +Ugrave 00D9 +Uacute 00DA +Ucirc 00DB +Uuml 00DC +Yacute 00DD +THORN 00DE +szlig 00DF +agrave 00E0 +aacute 00E1 +acirc 00E2 +atilde 00E3 +auml 00E4 +aring 00E5 +aelig 00E6 +ccedil 00E7 +egrave 00E8 +eacute 00E9 +ecirc 00EA +euml 00EB +igrave 00EC +iacute 00ED +icirc 00EE +iuml 00EF +eth 00F0 +ntilde 00F1 +ograve 00F2 +oacute 00F3 +ocirc 00F4 +otilde 00F5 +ouml 00F6 +divide 00F7 +oslash 00F8 +ugrave 00F9 +uacute 00FA +ucirc 00FB +uuml 00FC +yacute 00FD +thorn 00FE +yuml 00FF +quot 0022 +amp 0026 +lt 003C +gt 003E +apos 0027 +OElig 0152 +oelig 0153 +Scaron 0160 +scaron 0161 +Yuml 0178 +circ 02C6 +tilde 02DC +ensp 2002 +emsp 2003 +thinsp 2009 +zwnj 200C +zwj 200D +lrm 200E +rlm 200F +ndash 2013 +mdash 2014 +lsquo 2018 +rsquo 2019 +sbquo 201A +ldquo 201C +rdquo 201D +bdquo 201E +dagger 2020 +Dagger 2021 +permil 2030 +lsaquo 2039 +rsaquo 203A +euro 20AC +fnof 0192 +Alpha 0391 +Beta 0392 +Gamma 0393 +Delta 0394 +Epsilon 0395 +Zeta 0396 +Eta 0397 +Theta 0398 +Iota 0399 +Kappa 039A +Lambda 039B +Mu 039C +Nu 039D +Xi 039E +Omicron 039F +Pi 03A0 +Rho 03A1 +Sigma 03A3 +Tau 03A4 +Upsilon 03A5 +Phi 03A6 +Chi 03A7 +Psi 03A8 +Omega 03A9 +alpha 03B1 +beta 03B2 +gamma 03B3 +delta 03B4 +epsilon 03B5 +zeta 03B6 +eta 03B7 +theta 03B8 +iota 03B9 +kappa 03BA +lambda 03BB +mu 03BC +nu 03BD +xi 03BE +omicron 03BF +pi 03C0 +rho 03C1 +sigmaf 03C2 +sigma 03C3 +tau 03C4 +upsilon 03C5 +phi 03C6 +chi 03C7 +psi 03C8 +omega 03C9 +thetasym 03D1 +upsih 03D2 +piv 03D6 +bull 2022 +hellip 2026 +prime 2032 +Prime 2033 +oline 203E +frasl 2044 +weierp 2118 +image 2111 +real 211C +trade 2122 +alefsym 2135 +larr 2190 +uarr 2191 +rarr 2192 +darr 2193 +harr 2194 +crarr 21B5 +lArr 21D0 +uArr 21D1 +rArr 21D2 +dArr 21D3 +hArr 21D4 +forall 2200 +part 2202 +exist 2203 +empty 2205 +nabla 2207 +isin 2208 +notin 2209 +ni 220B +prod 220F +sum 2211 +minus 2212 +lowast 2217 +radic 221A +prop 221D +infin 221E +ang 2220 +and 2227 +or 2228 +cap 2229 +cup 222A +int 222B +there4 2234 +sim 223C +cong 2245 +asymp 2248 +ne 2260 +equiv 2261 +le 2264 +ge 2265 +sub 2282 +sup 2283 +nsub 2284 +sube 2286 +supe 2287 +oplus 2295 +otimes 2297 +perp 22A5 +sdot 22C5 +lceil 2308 +rceil 2309 +lfloor 230A +rfloor 230B +lang 2329 +rang 232A +loz 25CA +spades 2660 +clubs 2663 +hearts 2665 +diams 2666
\ No newline at end of file diff --git a/ext/standard/html_tables/html_table_gen.php b/ext/standard/html_tables/html_table_gen.php new file mode 100644 index 0000000..7e7314f --- /dev/null +++ b/ext/standard/html_tables/html_table_gen.php @@ -0,0 +1,812 @@ +<?php +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997-2010 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Gustavo Lopes <cataphract@php.net> | + +----------------------------------------------------------------------+ +*/ + +/* This file prints to stdout the contents of ext/standard/html_tables.h */ +/* put together with glue; have patience */ + +$t = <<<CODE +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997-%s The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ +*/ + +/* \$Id$ */ + +#ifndef HTML_TABLES_H +#define HTML_TABLES_H + +/************************************************************************** +*************************************************************************** +** THIS FILE IS AUTOMATICALLY GENERATED. DO NOT MODIFY IT. ** +*************************************************************************** +** Please change html_tables/html_table_gen.php instead and then ** +** run it in order to generate this file ** +*************************************************************************** +**************************************************************************/ + +enum entity_charset { cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15, cs_cp1251, + cs_8859_5, cs_cp866, cs_macroman, cs_koi8r, cs_big5, + cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp, + cs_numelems /* used to count the number of charsets */ + }; +#define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_8859_1) +#define CHARSET_SINGLE_BYTE(cs) ((cs) > cs_utf_8 && (cs) < cs_big5) +#define CHARSET_PARTIAL_SUPPORT(cs) ((cs) >= cs_big5) + +static const struct { + const char *codeset; + enum entity_charset charset; +} charset_map[] = { + { "ISO-8859-1", cs_8859_1 }, + { "ISO8859-1", cs_8859_1 }, + { "ISO-8859-15", cs_8859_15 }, + { "ISO8859-15", cs_8859_15 }, + { "utf-8", cs_utf_8 }, + { "cp1252", cs_cp1252 }, + { "Windows-1252", cs_cp1252 }, + { "1252", cs_cp1252 }, + { "BIG5", cs_big5 }, + { "950", cs_big5 }, + { "GB2312", cs_gb2312 }, + { "936", cs_gb2312 }, + { "BIG5-HKSCS", cs_big5hkscs }, + { "Shift_JIS", cs_sjis }, + { "SJIS", cs_sjis }, + { "932", cs_sjis }, + { "EUCJP", cs_eucjp }, + { "EUC-JP", cs_eucjp }, + { "KOI8-R", cs_koi8r }, + { "koi8-ru", cs_koi8r }, + { "koi8r", cs_koi8r }, + { "cp1251", cs_cp1251 }, + { "Windows-1251", cs_cp1251 }, + { "win-1251", cs_cp1251 }, + { "iso8859-5", cs_8859_5 }, + { "iso-8859-5", cs_8859_5 }, + { "cp866", cs_cp866 }, + { "866", cs_cp866 }, + { "ibm866", cs_cp866 }, + { "MacRoman", cs_macroman }, + { NULL } +}; + +/* longest entity name length excluding & and ; */ +#define LONGEST_ENTITY_LENGTH 31 + +/* Definitions for mappings *to* Unicode. + * The origin charset must have at most 256 code points. + * The multi-byte encodings are not supported */ +typedef struct { + unsigned short uni_cp[64]; +} enc_to_uni_stage2; + +typedef struct { + const enc_to_uni_stage2 *inner[4]; +} enc_to_uni; + +/* bits 7-8 bits (only single bytes encodings supported )*/ +#define ENT_ENC_TO_UNI_STAGE1(k) ((k & 0xC0) >> 6) +/* bits 1-6 */ +#define ENT_ENC_TO_UNI_STAGE2(k) ((k) & 0x3F) + + +CODE; + +echo sprintf($t, date("Y")); + +$encodings = array( + array( + "ident" => "iso88591", + "enumid" => 1, + "name" => "ISO-8859-1", + "file" => "mappings/8859-1.TXT", + ), + array( + "ident" => "iso88595", + "enumid" => 5, + "name" => "ISO-8859-5", + "file" => "mappings/8859-5.TXT", + ), + array( + "ident" => "iso885915", + "enumid" => 3, + "name" => "ISO-8859-15", + "file" => "mappings/8859-15.TXT", + ), + array( + "ident" => "win1252", + "enumid" => 2, + "enumident" => "cp1252", + "name" => "Windows-1252", + "file" => "mappings/CP1252.TXT", + ), + array( + "ident" => "win1251", + "enumid" => 4, + "enumident" => "cp1252", + "name" => "Windows-1251", + "file" => "mappings/CP1251.TXT", + ), + array( + "ident" => "koi8r", + "enumid" => 8, + "name" => "KOI8-R", + "file" => "mappings/KOI8-R.TXT", + ), + array( + "ident" => "cp866", + "enumid" => 6, + "name" => "CP-866", + "file" => "mappings/CP866.TXT", + ), + array( + "ident" => "macroman", + "enumid" => 7, + "name" => "MacRoman", + "file" => "mappings/ROMAN.TXT", + ), +); + +$prevStage2 = array(); + +foreach ($encodings as $e) { + echo +"/* {{{ Mappings *to* Unicode for {$e['name']} */\n\n"; + + /* process file */ + $map = array(); + $lines = explode("\n", file_get_contents($e{'file'})); + foreach ($lines as $l) { + if (preg_match("/^0x([0-9A-Z]{2})\t0x([0-9A-Z]{2,})/i", $l, $matches)) + $map[] = array($matches[1], $matches[2]); + } + + $mappy = array(); + foreach ($map as $v) { $mappy[hexdec($v[0])] = hexdec($v[1]); } + + $mstable = array("ident" => $e['ident']); + /* calculate two-stage tables */ + for ($i = 0; $i < 4; $i++) { + for ($j = 0; $j < 64; $j++) { + $cp = $i << 6 | $j; + $mstable[$i][$j] = isset($mappy[$cp]) ? $mappy[$cp] : NULL; + } + } + + echo +"/* {{{ Stage 2 tables for {$e['name']} */\n\n"; + + $s2tables_idents = array(); + for ($i = 0; $i < 4; $i++) { + if (($t = array_keys($prevStage2, $mstable[$i])) !== array()) { + $s2tables_idents[$i] = $encodings[$t[0]/5]["ident"]; + continue; + } + + $s2tables_idents[$i] = $e["ident"]; + + echo "static const enc_to_uni_stage2 enc_to_uni_s2_{$e['ident']}_". + sprintf("%02X", $i << 6)." = { {\n"; + for ($j = 0; $j < 64; $j++) { + if ($j == 0) echo "\t"; + elseif ($j % 6 == 0) echo "\n\t"; + else echo " "; + if ($mstable[$i][$j] !== NULL) + echo sprintf("0x%04X,", $mstable[$i][$j]); + else + echo "0xFFFF,"; /* special value; indicates no mapping */ + } + echo "\n} };\n\n"; + + $prevStage2[] = $mstable[$i]; + } + + echo +"/* end of stage 2 tables for {$e['name']} }}} */\n\n"; + + echo +"/* {{{ Stage 1 table for {$e['name']} */\n"; + + echo +"static const enc_to_uni enc_to_uni_{$e['ident']} = { { +\t&enc_to_uni_s2_{$s2tables_idents[0]}_00, +\t&enc_to_uni_s2_{$s2tables_idents[1]}_40, +\t&enc_to_uni_s2_{$s2tables_idents[2]}_80, +\t&enc_to_uni_s2_{$s2tables_idents[3]}_C0 } +}; +"; + + echo +"/* end of stage 1 table for {$e['name']} }}} */\n\n"; +} + +$maxencnum = max(array_map(function($e) { return $e['enumid']; }, $encodings)); +$a = range(0, $maxencnum); +foreach ($encodings as $e) { $a[$e['enumid']] = $e['ident']; } + + echo +"/* {{{ Index of tables for encoding conversion */ +static const enc_to_uni *const enc_to_uni_index[cs_numelems] = {\n"; + +foreach ($a as $k => $v) { + if (is_numeric($v)) + echo "\tNULL,\n"; + else + echo "\t&enc_to_uni_$v,\n"; +} + + echo +"}; +/* }}} */\n"; + +$t = <<<CODE + +/* Definitions for mappings *from* Unicode */ + +typedef struct { + unsigned short un_code_point; /* we don't need bigger */ + unsigned char cs_code; /* currently, we only have maps to single-byte encodings */ +} uni_to_enc; + + +CODE; + +echo $t; + +$encodings = array( + array( + "ident" => "iso885915", + "name" => "ISO-8859-15", + "file" => "mappings/8859-15.TXT", + "range" => array(0xA4, 0xBE), + ), + array( + "ident" => "win1252", + "name" => "Windows-1252", + "file" => "mappings/CP1252.TXT", + "range" => array(0x80, 0x9F), + ), + array( + "ident" => "win1251", + "name" => "Windows-1251", + "file" => "mappings/CP1251.TXT", + "range" => array(0x80, 0xFF), + ), + array( + "ident" => "koi8r", + "name" => "KOI8-R", + "file" => "mappings/KOI8-R.TXT", + "range" => array(0x80, 0xFF), + ), + array( + "ident" => "cp866", + "name" => "CP-866", + "file" => "mappings/CP866.TXT", + "range" => array(0x80, 0xFF), + ), + array( + "ident" => "macroman", + "name" => "MacRoman", + "file" => "mappings/ROMAN.TXT", + "range" => array(0x80, 0xFF), + ), +); + +foreach ($encodings as $e) { + echo +"/* {{{ Mappings *from* Unicode for {$e['name']} */\n"; + + /* process file */ + $map = array(); + $lines = explode("\n", file_get_contents($e{'file'})); + foreach ($lines as $l) { + if (preg_match("/^0x([0-9A-Z]{2})\t0x([0-9A-Z]{2,})\s+#\s*(.*)$/i", $l, $matches)) + $map[] = array($matches[1], $matches[2], rtrim($matches[3])); + } + + $mappy = array(); + foreach ($map as $v) { + if (hexdec($v[0]) >= $e['range'][0] && hexdec($v[0]) <= $e['range'][1]) + $mappy[hexdec($v[1])] = array(hexdec($v[0]), strtolower($v[2])); + } + ksort($mappy); + + echo +"static const uni_to_enc unimap_{$e['ident']}[] = {\n"; + + foreach ($mappy as $k => $v) { + echo "\t{ ", sprintf("0x%04X", $k), ", ", sprintf("0x%02X", $v[0]), " },\t/* ", + $v[1], " */\n"; + } + echo "};\n"; + + echo +"/* {{{ end of mappings *from* Unicode for {$e['name']} */\n\n"; +} + +$data = file_get_contents("ents_html5.txt"); +$pass2 = false; +$name = "HTML5"; +$ident = "html5"; +again: + +$t = <<<'CODE' +/* HTML 5 has many more named entities. + * Some of them map to two unicode code points, not one. + * We're going to use a three-stage table (with an extra one for the entities + * with two code points). */ + +#define ENT_STAGE1_INDEX(k) (((k) & 0xFFF000) >> 12) /* > 1D, we have no mapping */ +#define ENT_STAGE2_INDEX(k) (((k) & 0xFC0) >> 6) +#define ENT_STAGE3_INDEX(k) ((k) & 0x3F) +#define ENT_CODE_POINT_FROM_STAGES(i,j,k) (((i) << 12) | ((j) << 6) | (k)) + +/* Table should be organized with a leading row telling the size of + * the table and the default entity (maybe NULL) and the rest being + * normal rows ordered by code point so that we can do a binary search */ +typedef union { + struct { + unsigned size; /* number of remaining entries in the table */ + const char *default_entity; + unsigned short default_entity_len; + } leading_entry; + struct { + unsigned second_cp; /* second code point */ + const char *entity; + unsigned short entity_len; + } normal_entry; +} entity_multicodepoint_row; + +/* blocks of these should start at code points k where k % 0xFC0 == 0 */ +typedef struct { + char ambiguous; /* if 0 look into entity */ + union { + struct { + const char *entity; /* may be NULL */ + unsigned short entity_len; + } ent; + const entity_multicodepoint_row *multicodepoint_table; + } data; +} entity_stage3_row; + +/* Calculate k & 0x3F Use as offset */ +typedef const entity_stage3_row *entity_stage2_row; /* 64 elements */ + +/* Calculate k & 0xFC0 >> 6. Use as offset */ +typedef const entity_stage3_row *const *entity_stage1_row; /* 64 elements */ + +/* For stage 1, Calculate k & 0xFFF000 >> 3*4. + * If larger than 1D, we have no mapping. Otherwise lookup that index */ + +typedef struct { + const entity_stage1_row *ms_table; + /* for tables with only basic entities, this member is to be accessed + * directly for better performance: */ + const entity_stage3_row *table; +} entity_table_opt; + +/* Replaced "GT" > "gt" and "QUOT" > "quot" for consistency's sake. */ + + +CODE; + +if (!$pass2) + echo $t; + +$dp = array(); + +foreach (explode("\n", $data) as $l) { + if (preg_match('/^(#?[a-z0-9]+)\s+([a-f0-9]+) ([a-f0-9]+)/i', $l, $matches)) { + //echo sprintf("\t{\"%-21s 1, 0x%05d},\n", $matches[1].",", $matches[2]); + $dp[] = array($matches[1], $matches[2], $matches[3]); + } else if (preg_match('/^(#?[a-z0-9]+)\s+([a-f0-9]+)/i', $l, $matches)) { + $dp[] = array($matches[1], $matches[2]); + } +} + +$origdp = $dp; + +usort($dp, function($a, $b) { return hexdec($a[1])-hexdec($b[1]); }); + +$multicp_rows = array(); +foreach ($dp as $el) { + if (count($el) == 3) { + $multicp_rows[$el[1]] = array(); + } +} + +foreach ($dp as $el) { + if (key_exists($el[1], $multicp_rows)) { + if (count($el) == 3) + $multicp_rows[$el[1]][$el[2]] = $el[0]; + else + $multicp_rows[$el[1]]["default"] = $el[0]; + } +} + +if ($pass2 < 2) + echo "/* {{{ Start of $name multi-stage table for codepoint -> entity */", "\n\n"; +else + echo "/* {{{ Start of $name table for codepoint -> entity */", "\n\n"; + +if (empty($multicp_rows)) + goto skip_multicp; + +ksort($multicp_rows); +foreach ($multicp_rows as &$v) { ksort($v); } + +echo +"/* {{{ Start of double code point tables for $name */", "\n\n"; + +foreach ($multicp_rows as $k => $v) { + echo "static const entity_multicodepoint_row multi_cp_{$ident}_", + sprintf("%05s", $k), "[] = {", "\n"; + if (key_exists("default", $v)) { + if ($v['default'] == 'GT') /* hack to make > translate to > not GT; */ + $v['default'] = "gt"; + echo "\t{ {", sprintf("%02d", count($v) - 1), + ",\t\t", sprintf("\"%-21s", $v["default"].'",'), "\t", + sprintf("% 2d", strlen($v["default"])), '} },', "\n"; + } else { + echo "\t{ {", sprintf("%02d", count($v)), + ",\t\t", sprintf("%-22s", 'NULL'), ",\t0} },\n"; + } + unset($v["default"]); + foreach ($v as $l => $w) { + echo "\t{ {", sprintf("0x%05s", $l), ",\t", sprintf("\"%-21s", $w.'",'), "\t", + sprintf("% 2d", strlen($w)), '} },', "\n"; + } + echo "};\n"; +} +echo "\n/* End of double code point tables }}} */", "\n\n"; + +skip_multicp: + +if ($pass2 < 2) + echo "/* {{{ Stage 3 Tables for $name */", "\n\n"; + +$t = <<<CODE +static const entity_stage3_row empty_stage3_table[] = { + /* 64 elements */ + {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, + {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, + {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, + {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, + {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, + {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, + {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, + {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, + {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, + {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, + {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, + {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, + {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, + {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, + {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, + {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, {0, { {NULL, 0} } }, +}; + +CODE; + +if (!$pass2) + echo $t; + +$mstable = array(); +foreach ($dp as $el) { + $s1 = (hexdec($el[1]) & 0xFFF000) >> 12; + $s2 = (hexdec($el[1]) & 0xFC0) >> 6; + $s3 = hexdec($el[1]) & 0x3F; + if (key_exists($el[1], $multicp_rows)) { + $mstable[$s1][$s2][$s3] = ""; + } else { + $mstable[$s1][$s2][$s3] = $el[0]; + } +} + +for ($i = 0; $i < 0x1E; $i++) { + for ($k = 0; $k < 64; $k++) { + $any3 = false; + $col3 = array(); + for ($l = 0; $l < 64; $l++) { + if (isset($mstable[$i][$k][$l])) { + $any3 = true; + $col3[$l] = $mstable[$i][$k][$l]; + } else { + $col3[$l] = null; + } + } + if ($any3) { + echo "static const entity_stage3_row stage3_table_{$ident}_", + sprintf("%02X%03X", $i, $k << 6), "[] = {\n"; + foreach ($col3 as $y => $z) { + if ($y == 0) echo "\t"; + elseif ($y % 4 == 0) echo "\n\t"; + else echo " "; + if ($z === NULL) + echo "{0, { {NULL, 0} } },"; + elseif ($z === "QUOT") /* hack to translate " into "e;, not " */ + echo "{0, { {\"quot\", 4} } },"; + elseif ($z !== "") + echo "{0, { {\"$z\", ", strlen($z), "} } },"; + else + echo "{1, { {(void *)", sprintf("multi_cp_{$ident}_%05X", + ($i << 12) | ($k << 6) | $y ), "} } },"; + + } + echo "\n};\n\n"; + } + } +} + +if ($pass2 < 2) + echo "/* end of stage 3 Tables for $name }}} */", "\n\n"; + +if ($pass2 > 1) + goto hashtables; + +echo +"/* {{{ Stage 2 Tables for $name */", "\n\n"; + +$t = <<<CODE +static const entity_stage2_row empty_stage2_table[] = { + empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table, + empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table, + empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table, + empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table, + empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table, + empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table, + empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table, + empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table, + empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table, + empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table, + empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table, + empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table, + empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table, + empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table, + empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table, + empty_stage3_table, empty_stage3_table, empty_stage3_table, empty_stage3_table, +}; + +CODE; + +if (!$pass2) + echo $t; + +for ($i = 0; $i < 0x1E; $i++) { + $any = false; + for ($k = 0; $k < 64; $k++) { + if (isset($mstable[$i][$k])) + $any = true; + } + if ($any) { + echo "static const entity_stage2_row stage2_table_{$ident}_", + sprintf("%02X000", $i), "[] = {\n"; + for ($k = 0; $k < 64; $k++) { + if ($k == 0) echo "\t"; + elseif ($k % 4 == 0) echo "\n\t"; + else echo " "; + if (isset($mstable[$i][$k])) { + echo sprintf("stage3_table_{$ident}_%05X", ($i << 12) | ($k << 6)), ","; + } else { + echo "empty_stage3_table", ","; + } + } + echo "\n};\n\n"; + } +} + +echo +"/* end of stage 2 tables for $name }}} */", "\n\n"; + +echo "static const entity_stage1_row entity_ms_table_{$ident}[] = {\n"; +for ($i = 0; $i < 0x1E; $i++) { + if (isset($mstable[$i])) + echo "\t", sprintf("stage2_table_{$ident}_%02X000", $i), ",\n"; + else + echo "\tempty_stage2_table,\n"; +} +echo "};\n\n"; + +echo +"/* end of $name multi-stage table for codepoint -> entity }}} */\n\n"; + +/* commented-out; this enabled binary search, which turned out to be + * significantly slower than the hash tables for html 5 entities */ +//echo +//"/* {{{ HTML 5 tables for entity -> codepoint */", "\n\n"; + +//$t = <<<CODE +//typedef struct { +// const char *entity; +// unsigned short entity_len; +// unsigned int codepoint1; +// unsigned int codepoint2; +//} entity_cp_map; +// +//#define ENTITY_CP_MAP_CMP(l, lsize, r, rsize) \ +// ( ((lsize)==(rsize)) ? (memcmp((l), (r), (lsize))) : ((lsize)-(rsize)) ) +// +//static const entity_cp_map html5_ent_cp_map[] = { +// +//CODE; +//echo $t; +// +//$dp = $origdp; +//usort($dp, function($a, $b) { $d = strlen($a[0])-strlen($b[0]); +// return $d==0?strcmp($a[0], $b[0]):$d; }); +// +//$k = 0; +//foreach ($dp as $o) { +// if ($k == 0) echo "\t"; +// elseif ($k % 3 == 0) echo "\n\t"; +// else echo " "; +// if (isset($o[2])) +// echo sprintf('{"%s", %d, 0x%X, 0x%X},', $o[0], strlen($o[0]), +// hexdec($o[1]), hexdec($o[2])); +// else +// echo sprintf('{"%s", %d, 0x%X, 0},', $o[0], strlen($o[0]), +// hexdec($o[1])); +// +// if (isset($o[2])) { +// $entlen = strlen($o[0]) + 2; +// $utf8len = strlen( +// mb_convert_encoding("&#x{$o[1]};&#x{$o[2]};", "UTF-8", "HTML-ENTITIES")); +// if ($utf8len > $entlen*1.2) { +// die("violated assumption for traverse_for_entities"); +// } +// } +// +// $k++; +//} +//echo "\n};\n\n"; +// +//echo "static const size_t html5_ent_cp_map_size = $k;\n\n"; +// +//echo +//"/* end of HTML 5 tables for entity -> codepoint }}} */\n\n"; + +hashtables: + +echo +"/* {{{ $name hash table for entity -> codepoint */", "\n\n"; + +$t = <<<CODE +typedef struct { + const char *entity; + unsigned short entity_len; + unsigned int codepoint1; + unsigned int codepoint2; +} entity_cp_map; + +typedef const entity_cp_map *entity_ht_bucket; + +typedef struct { + unsigned num_elems; /* power of 2 */ + const entity_ht_bucket *buckets; /* .num_elems elements */ +} entity_ht; + +static const entity_cp_map ht_bucket_empty[] = { {NULL, 0, 0, 0} }; + +CODE; + +if (!$pass2) + echo $t; + +function hashfun($str) +{ + + $hash = 5381; + $nKeyLength = strlen($str); + $pos = 0; + + for (; $nKeyLength > 0; $nKeyLength--) { + $hash = (int)(((int)(((int)($hash << 5)) + $hash)) + ord($str[$pos++])) + & 0xFFFFFFFF; + } + return $hash; + +} + +$numelems = max(pow(2, ceil(log(1.5*count($origdp))/log(2))),16); +$mask = $numelems - 1; +$hashes = array(); +foreach ($origdp as $e) { + $hashes[hashfun($e[0]) & $mask][] = $e; + if (isset($e[2])) { + $entlen = strlen($e[0]) + 2; + $utf8len = strlen( + mb_convert_encoding("&#x{$e[1]};&#x{$e[2]};", "UTF-8", "HTML-ENTITIES")); + if ($utf8len > $entlen*1.2) { + die("violated assumption for traverse_for_entities"); + } + } +} + +for ($i = 0; $i < $numelems; $i++) { + if (empty($hashes[$i])) + continue; + echo "static const entity_cp_map ht_bucket_{$ident}_", sprintf("%03X", $i) ,"[] = {"; + foreach ($hashes[$i] as $h) { + if (isset($h[2])) { + echo sprintf(' {"%s", %d, 0x%05X, 0x%05X},', + $h[0], strlen($h[0]), hexdec($h[1]), hexdec($h[2])); + } else { + echo sprintf(' {"%s", %d, 0x%05X, 0},', + $h[0], strlen($h[0]), hexdec($h[1])); + } + } + echo " {NULL, 0, 0, 0} };\n"; +} +echo "\n"; + +echo +"static const entity_cp_map *const ht_buckets_{$ident}[] = {\n"; + +for ($i = 0; $i < $numelems; $i++) { + if ($i == 0) echo "\t"; + elseif ($i % 4 == 0) echo "\n\t"; + else echo " "; + if (empty($hashes[$i])) + echo "ht_bucket_empty,"; + else + echo "ht_bucket_{$ident}_", sprintf("%03X", $i), ","; +} +echo "\n};\n\n"; + +echo +"static const entity_ht ent_ht_{$ident} = { + ", sprintf("0x%X", $numelems), ", + ht_buckets_{$ident} +};\n\n"; + +echo +"/* end of $name hash table for entity -> codepoint }}} */\n\n"; + +if (!$pass2) { + $data = file_get_contents("ents_html401.txt"); + $pass2 = 1; + $name = "HTML 4.01"; + $ident = "html4"; + goto again; +} elseif ($pass2 == 1) { + $data = file_get_contents("ents_basic.txt"); + $pass2 = 2; + $name = "Basic entities (no apos)"; + $ident = "be_noapos"; + goto again; +} elseif ($pass2 == 2) { + $data = file_get_contents("ents_basic_apos.txt"); + $pass2 = 3; + $name = "Basic entities (with apos)"; + $ident = "be_apos"; + goto again; +} + +echo "#endif /* HTML_TABLES_H */\n"; diff --git a/ext/standard/html_tables/mappings/8859-1.TXT b/ext/standard/html_tables/mappings/8859-1.TXT new file mode 100644 index 0000000..473ecab --- /dev/null +++ b/ext/standard/html_tables/mappings/8859-1.TXT @@ -0,0 +1,303 @@ +# +# Name: ISO/IEC 8859-1:1998 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 1999 July 27 +# Authors: Ken Whistler <kenw@sybase.com> +# +# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-1:1998 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-1 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-1 order. +# +# Version history +# 1.0 version updates 0.1 version by adding mappings for all +# control characters. +# +# Updated versions of this file may be found in: +# <ftp://ftp.unicode.org/Public/MAPPINGS/> +# +# Any comments or problems, contact <errata@unicode.org> +# Please note that <errata@unicode.org> is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # <control> +0x81 0x0081 # <control> +0x82 0x0082 # <control> +0x83 0x0083 # <control> +0x84 0x0084 # <control> +0x85 0x0085 # <control> +0x86 0x0086 # <control> +0x87 0x0087 # <control> +0x88 0x0088 # <control> +0x89 0x0089 # <control> +0x8A 0x008A # <control> +0x8B 0x008B # <control> +0x8C 0x008C # <control> +0x8D 0x008D # <control> +0x8E 0x008E # <control> +0x8F 0x008F # <control> +0x90 0x0090 # <control> +0x91 0x0091 # <control> +0x92 0x0092 # <control> +0x93 0x0093 # <control> +0x94 0x0094 # <control> +0x95 0x0095 # <control> +0x96 0x0096 # <control> +0x97 0x0097 # <control> +0x98 0x0098 # <control> +0x99 0x0099 # <control> +0x9A 0x009A # <control> +0x9B 0x009B # <control> +0x9C 0x009C # <control> +0x9D 0x009D # <control> +0x9E 0x009E # <control> +0x9F 0x009F # <control> +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x00A1 # INVERTED EXCLAMATION MARK +0xA2 0x00A2 # CENT SIGN +0xA3 0x00A3 # POUND SIGN +0xA4 0x00A4 # CURRENCY SIGN +0xA5 0x00A5 # YEN SIGN +0xA6 0x00A6 # BROKEN BAR +0xA7 0x00A7 # SECTION SIGN +0xA8 0x00A8 # DIAERESIS +0xA9 0x00A9 # COPYRIGHT SIGN +0xAA 0x00AA # FEMININE ORDINAL INDICATOR +0xAB 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC 0x00AC # NOT SIGN +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x00AE # REGISTERED SIGN +0xAF 0x00AF # MACRON +0xB0 0x00B0 # DEGREE SIGN +0xB1 0x00B1 # PLUS-MINUS SIGN +0xB2 0x00B2 # SUPERSCRIPT TWO +0xB3 0x00B3 # SUPERSCRIPT THREE +0xB4 0x00B4 # ACUTE ACCENT +0xB5 0x00B5 # MICRO SIGN +0xB6 0x00B6 # PILCROW SIGN +0xB7 0x00B7 # MIDDLE DOT +0xB8 0x00B8 # CEDILLA +0xB9 0x00B9 # SUPERSCRIPT ONE +0xBA 0x00BA # MASCULINE ORDINAL INDICATOR +0xBB 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC 0x00BC # VULGAR FRACTION ONE QUARTER +0xBD 0x00BD # VULGAR FRACTION ONE HALF +0xBE 0x00BE # VULGAR FRACTION THREE QUARTERS +0xBF 0x00BF # INVERTED QUESTION MARK +0xC0 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE +0xC1 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE +0xC2 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xC3 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE +0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE +0xC6 0x00C6 # LATIN CAPITAL LETTER AE +0xC7 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA +0xC8 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE +0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0xCA 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0xCB 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS +0xCC 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE +0xCD 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE +0xCE 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xCF 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS +0xD0 0x00D0 # LATIN CAPITAL LETTER ETH (Icelandic) +0xD1 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE +0xD2 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE +0xD3 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE +0xD4 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xD5 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE +0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 0x00D7 # MULTIPLICATION SIGN +0xD8 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE +0xD9 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE +0xDA 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE +0xDB 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD 0x00DD # LATIN CAPITAL LETTER Y WITH ACUTE +0xDE 0x00DE # LATIN CAPITAL LETTER THORN (Icelandic) +0xDF 0x00DF # LATIN SMALL LETTER SHARP S (German) +0xE0 0x00E0 # LATIN SMALL LETTER A WITH GRAVE +0xE1 0x00E1 # LATIN SMALL LETTER A WITH ACUTE +0xE2 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE3 0x00E3 # LATIN SMALL LETTER A WITH TILDE +0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0xE5 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE +0xE6 0x00E6 # LATIN SMALL LETTER AE +0xE7 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA +0xE8 0x00E8 # LATIN SMALL LETTER E WITH GRAVE +0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0xEA 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX +0xEB 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS +0xEC 0x00EC # LATIN SMALL LETTER I WITH GRAVE +0xED 0x00ED # LATIN SMALL LETTER I WITH ACUTE +0xEE 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS +0xF0 0x00F0 # LATIN SMALL LETTER ETH (Icelandic) +0xF1 0x00F1 # LATIN SMALL LETTER N WITH TILDE +0xF2 0x00F2 # LATIN SMALL LETTER O WITH GRAVE +0xF3 0x00F3 # LATIN SMALL LETTER O WITH ACUTE +0xF4 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 0x00F5 # LATIN SMALL LETTER O WITH TILDE +0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0xF7 0x00F7 # DIVISION SIGN +0xF8 0x00F8 # LATIN SMALL LETTER O WITH STROKE +0xF9 0x00F9 # LATIN SMALL LETTER U WITH GRAVE +0xFA 0x00FA # LATIN SMALL LETTER U WITH ACUTE +0xFB 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX +0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xFD 0x00FD # LATIN SMALL LETTER Y WITH ACUTE +0xFE 0x00FE # LATIN SMALL LETTER THORN (Icelandic) +0xFF 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS diff --git a/ext/standard/html_tables/mappings/8859-15.TXT b/ext/standard/html_tables/mappings/8859-15.TXT new file mode 100644 index 0000000..ab2f32f --- /dev/null +++ b/ext/standard/html_tables/mappings/8859-15.TXT @@ -0,0 +1,303 @@ +# +# Name: ISO/IEC 8859-15:1999 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 1999 July 27 +# Authors: Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/> +# Ken Whistler <kenw@sybase.com> +# +# Copyright (c) 1998 - 1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-15:1999 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-15 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-15 order. +# +# Version history +# +# Updated versions of this file may be found in: +# <ftp://ftp.unicode.org/Public/MAPPINGS/> +# +# Any comments or problems, contact <errata@unicode.org> +# Please note that <errata@unicode.org> is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # <control> +0x81 0x0081 # <control> +0x82 0x0082 # <control> +0x83 0x0083 # <control> +0x84 0x0084 # <control> +0x85 0x0085 # <control> +0x86 0x0086 # <control> +0x87 0x0087 # <control> +0x88 0x0088 # <control> +0x89 0x0089 # <control> +0x8A 0x008A # <control> +0x8B 0x008B # <control> +0x8C 0x008C # <control> +0x8D 0x008D # <control> +0x8E 0x008E # <control> +0x8F 0x008F # <control> +0x90 0x0090 # <control> +0x91 0x0091 # <control> +0x92 0x0092 # <control> +0x93 0x0093 # <control> +0x94 0x0094 # <control> +0x95 0x0095 # <control> +0x96 0x0096 # <control> +0x97 0x0097 # <control> +0x98 0x0098 # <control> +0x99 0x0099 # <control> +0x9A 0x009A # <control> +0x9B 0x009B # <control> +0x9C 0x009C # <control> +0x9D 0x009D # <control> +0x9E 0x009E # <control> +0x9F 0x009F # <control> +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x00A1 # INVERTED EXCLAMATION MARK +0xA2 0x00A2 # CENT SIGN +0xA3 0x00A3 # POUND SIGN +0xA4 0x20AC # EURO SIGN +0xA5 0x00A5 # YEN SIGN +0xA6 0x0160 # LATIN CAPITAL LETTER S WITH CARON +0xA7 0x00A7 # SECTION SIGN +0xA8 0x0161 # LATIN SMALL LETTER S WITH CARON +0xA9 0x00A9 # COPYRIGHT SIGN +0xAA 0x00AA # FEMININE ORDINAL INDICATOR +0xAB 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC 0x00AC # NOT SIGN +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x00AE # REGISTERED SIGN +0xAF 0x00AF # MACRON +0xB0 0x00B0 # DEGREE SIGN +0xB1 0x00B1 # PLUS-MINUS SIGN +0xB2 0x00B2 # SUPERSCRIPT TWO +0xB3 0x00B3 # SUPERSCRIPT THREE +0xB4 0x017D # LATIN CAPITAL LETTER Z WITH CARON +0xB5 0x00B5 # MICRO SIGN +0xB6 0x00B6 # PILCROW SIGN +0xB7 0x00B7 # MIDDLE DOT +0xB8 0x017E # LATIN SMALL LETTER Z WITH CARON +0xB9 0x00B9 # SUPERSCRIPT ONE +0xBA 0x00BA # MASCULINE ORDINAL INDICATOR +0xBB 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC 0x0152 # LATIN CAPITAL LIGATURE OE +0xBD 0x0153 # LATIN SMALL LIGATURE OE +0xBE 0x0178 # LATIN CAPITAL LETTER Y WITH DIAERESIS +0xBF 0x00BF # INVERTED QUESTION MARK +0xC0 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE +0xC1 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE +0xC2 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xC3 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE +0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE +0xC6 0x00C6 # LATIN CAPITAL LETTER AE +0xC7 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA +0xC8 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE +0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0xCA 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0xCB 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS +0xCC 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE +0xCD 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE +0xCE 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xCF 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS +0xD0 0x00D0 # LATIN CAPITAL LETTER ETH +0xD1 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE +0xD2 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE +0xD3 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE +0xD4 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xD5 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE +0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 0x00D7 # MULTIPLICATION SIGN +0xD8 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE +0xD9 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE +0xDA 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE +0xDB 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD 0x00DD # LATIN CAPITAL LETTER Y WITH ACUTE +0xDE 0x00DE # LATIN CAPITAL LETTER THORN +0xDF 0x00DF # LATIN SMALL LETTER SHARP S +0xE0 0x00E0 # LATIN SMALL LETTER A WITH GRAVE +0xE1 0x00E1 # LATIN SMALL LETTER A WITH ACUTE +0xE2 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE3 0x00E3 # LATIN SMALL LETTER A WITH TILDE +0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0xE5 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE +0xE6 0x00E6 # LATIN SMALL LETTER AE +0xE7 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA +0xE8 0x00E8 # LATIN SMALL LETTER E WITH GRAVE +0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0xEA 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX +0xEB 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS +0xEC 0x00EC # LATIN SMALL LETTER I WITH GRAVE +0xED 0x00ED # LATIN SMALL LETTER I WITH ACUTE +0xEE 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS +0xF0 0x00F0 # LATIN SMALL LETTER ETH +0xF1 0x00F1 # LATIN SMALL LETTER N WITH TILDE +0xF2 0x00F2 # LATIN SMALL LETTER O WITH GRAVE +0xF3 0x00F3 # LATIN SMALL LETTER O WITH ACUTE +0xF4 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 0x00F5 # LATIN SMALL LETTER O WITH TILDE +0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0xF7 0x00F7 # DIVISION SIGN +0xF8 0x00F8 # LATIN SMALL LETTER O WITH STROKE +0xF9 0x00F9 # LATIN SMALL LETTER U WITH GRAVE +0xFA 0x00FA # LATIN SMALL LETTER U WITH ACUTE +0xFB 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX +0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xFD 0x00FD # LATIN SMALL LETTER Y WITH ACUTE +0xFE 0x00FE # LATIN SMALL LETTER THORN +0xFF 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS + diff --git a/ext/standard/html_tables/mappings/8859-5.TXT b/ext/standard/html_tables/mappings/8859-5.TXT new file mode 100644 index 0000000..a7ed1ce --- /dev/null +++ b/ext/standard/html_tables/mappings/8859-5.TXT @@ -0,0 +1,303 @@ +# +# Name: ISO 8859-5:1999 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 1999 July 27 +# Authors: Ken Whistler <kenw@sybase.com> +# +# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-5:1999 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-5 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-5 order. +# +# Version history +# 1.0 version updates 0.1 version by adding mappings for all +# control characters. +# +# Updated versions of this file may be found in: +# <ftp://ftp.unicode.org/Public/MAPPINGS/> +# +# Any comments or problems, contact <errata@unicode.org> +# Please note that <errata@unicode.org> is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # <control> +0x81 0x0081 # <control> +0x82 0x0082 # <control> +0x83 0x0083 # <control> +0x84 0x0084 # <control> +0x85 0x0085 # <control> +0x86 0x0086 # <control> +0x87 0x0087 # <control> +0x88 0x0088 # <control> +0x89 0x0089 # <control> +0x8A 0x008A # <control> +0x8B 0x008B # <control> +0x8C 0x008C # <control> +0x8D 0x008D # <control> +0x8E 0x008E # <control> +0x8F 0x008F # <control> +0x90 0x0090 # <control> +0x91 0x0091 # <control> +0x92 0x0092 # <control> +0x93 0x0093 # <control> +0x94 0x0094 # <control> +0x95 0x0095 # <control> +0x96 0x0096 # <control> +0x97 0x0097 # <control> +0x98 0x0098 # <control> +0x99 0x0099 # <control> +0x9A 0x009A # <control> +0x9B 0x009B # <control> +0x9C 0x009C # <control> +0x9D 0x009D # <control> +0x9E 0x009E # <control> +0x9F 0x009F # <control> +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x0401 # CYRILLIC CAPITAL LETTER IO +0xA2 0x0402 # CYRILLIC CAPITAL LETTER DJE +0xA3 0x0403 # CYRILLIC CAPITAL LETTER GJE +0xA4 0x0404 # CYRILLIC CAPITAL LETTER UKRAINIAN IE +0xA5 0x0405 # CYRILLIC CAPITAL LETTER DZE +0xA6 0x0406 # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +0xA7 0x0407 # CYRILLIC CAPITAL LETTER YI +0xA8 0x0408 # CYRILLIC CAPITAL LETTER JE +0xA9 0x0409 # CYRILLIC CAPITAL LETTER LJE +0xAA 0x040A # CYRILLIC CAPITAL LETTER NJE +0xAB 0x040B # CYRILLIC CAPITAL LETTER TSHE +0xAC 0x040C # CYRILLIC CAPITAL LETTER KJE +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x040E # CYRILLIC CAPITAL LETTER SHORT U +0xAF 0x040F # CYRILLIC CAPITAL LETTER DZHE +0xB0 0x0410 # CYRILLIC CAPITAL LETTER A +0xB1 0x0411 # CYRILLIC CAPITAL LETTER BE +0xB2 0x0412 # CYRILLIC CAPITAL LETTER VE +0xB3 0x0413 # CYRILLIC CAPITAL LETTER GHE +0xB4 0x0414 # CYRILLIC CAPITAL LETTER DE +0xB5 0x0415 # CYRILLIC CAPITAL LETTER IE +0xB6 0x0416 # CYRILLIC CAPITAL LETTER ZHE +0xB7 0x0417 # CYRILLIC CAPITAL LETTER ZE +0xB8 0x0418 # CYRILLIC CAPITAL LETTER I +0xB9 0x0419 # CYRILLIC CAPITAL LETTER SHORT I +0xBA 0x041A # CYRILLIC CAPITAL LETTER KA +0xBB 0x041B # CYRILLIC CAPITAL LETTER EL +0xBC 0x041C # CYRILLIC CAPITAL LETTER EM +0xBD 0x041D # CYRILLIC CAPITAL LETTER EN +0xBE 0x041E # CYRILLIC CAPITAL LETTER O +0xBF 0x041F # CYRILLIC CAPITAL LETTER PE +0xC0 0x0420 # CYRILLIC CAPITAL LETTER ER +0xC1 0x0421 # CYRILLIC CAPITAL LETTER ES +0xC2 0x0422 # CYRILLIC CAPITAL LETTER TE +0xC3 0x0423 # CYRILLIC CAPITAL LETTER U +0xC4 0x0424 # CYRILLIC CAPITAL LETTER EF +0xC5 0x0425 # CYRILLIC CAPITAL LETTER HA +0xC6 0x0426 # CYRILLIC CAPITAL LETTER TSE +0xC7 0x0427 # CYRILLIC CAPITAL LETTER CHE +0xC8 0x0428 # CYRILLIC CAPITAL LETTER SHA +0xC9 0x0429 # CYRILLIC CAPITAL LETTER SHCHA +0xCA 0x042A # CYRILLIC CAPITAL LETTER HARD SIGN +0xCB 0x042B # CYRILLIC CAPITAL LETTER YERU +0xCC 0x042C # CYRILLIC CAPITAL LETTER SOFT SIGN +0xCD 0x042D # CYRILLIC CAPITAL LETTER E +0xCE 0x042E # CYRILLIC CAPITAL LETTER YU +0xCF 0x042F # CYRILLIC CAPITAL LETTER YA +0xD0 0x0430 # CYRILLIC SMALL LETTER A +0xD1 0x0431 # CYRILLIC SMALL LETTER BE +0xD2 0x0432 # CYRILLIC SMALL LETTER VE +0xD3 0x0433 # CYRILLIC SMALL LETTER GHE +0xD4 0x0434 # CYRILLIC SMALL LETTER DE +0xD5 0x0435 # CYRILLIC SMALL LETTER IE +0xD6 0x0436 # CYRILLIC SMALL LETTER ZHE +0xD7 0x0437 # CYRILLIC SMALL LETTER ZE +0xD8 0x0438 # CYRILLIC SMALL LETTER I +0xD9 0x0439 # CYRILLIC SMALL LETTER SHORT I +0xDA 0x043A # CYRILLIC SMALL LETTER KA +0xDB 0x043B # CYRILLIC SMALL LETTER EL +0xDC 0x043C # CYRILLIC SMALL LETTER EM +0xDD 0x043D # CYRILLIC SMALL LETTER EN +0xDE 0x043E # CYRILLIC SMALL LETTER O +0xDF 0x043F # CYRILLIC SMALL LETTER PE +0xE0 0x0440 # CYRILLIC SMALL LETTER ER +0xE1 0x0441 # CYRILLIC SMALL LETTER ES +0xE2 0x0442 # CYRILLIC SMALL LETTER TE +0xE3 0x0443 # CYRILLIC SMALL LETTER U +0xE4 0x0444 # CYRILLIC SMALL LETTER EF +0xE5 0x0445 # CYRILLIC SMALL LETTER HA +0xE6 0x0446 # CYRILLIC SMALL LETTER TSE +0xE7 0x0447 # CYRILLIC SMALL LETTER CHE +0xE8 0x0448 # CYRILLIC SMALL LETTER SHA +0xE9 0x0449 # CYRILLIC SMALL LETTER SHCHA +0xEA 0x044A # CYRILLIC SMALL LETTER HARD SIGN +0xEB 0x044B # CYRILLIC SMALL LETTER YERU +0xEC 0x044C # CYRILLIC SMALL LETTER SOFT SIGN +0xED 0x044D # CYRILLIC SMALL LETTER E +0xEE 0x044E # CYRILLIC SMALL LETTER YU +0xEF 0x044F # CYRILLIC SMALL LETTER YA +0xF0 0x2116 # NUMERO SIGN +0xF1 0x0451 # CYRILLIC SMALL LETTER IO +0xF2 0x0452 # CYRILLIC SMALL LETTER DJE +0xF3 0x0453 # CYRILLIC SMALL LETTER GJE +0xF4 0x0454 # CYRILLIC SMALL LETTER UKRAINIAN IE +0xF5 0x0455 # CYRILLIC SMALL LETTER DZE +0xF6 0x0456 # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +0xF7 0x0457 # CYRILLIC SMALL LETTER YI +0xF8 0x0458 # CYRILLIC SMALL LETTER JE +0xF9 0x0459 # CYRILLIC SMALL LETTER LJE +0xFA 0x045A # CYRILLIC SMALL LETTER NJE +0xFB 0x045B # CYRILLIC SMALL LETTER TSHE +0xFC 0x045C # CYRILLIC SMALL LETTER KJE +0xFD 0x00A7 # SECTION SIGN +0xFE 0x045E # CYRILLIC SMALL LETTER SHORT U +0xFF 0x045F # CYRILLIC SMALL LETTER DZHE diff --git a/ext/standard/html_tables/mappings/CP1251.TXT b/ext/standard/html_tables/mappings/CP1251.TXT new file mode 100644 index 0000000..4d9b355 --- /dev/null +++ b/ext/standard/html_tables/mappings/CP1251.TXT @@ -0,0 +1,274 @@ +# +# Name: cp1251 to Unicode table +# Unicode version: 2.0 +# Table version: 2.01 +# Table format: Format A +# Date: 04/15/98 +# +# Contact: Shawn.Steele@microsoft.com +# +# General notes: none +# +# Format: Three tab-separated columns +# Column #1 is the cp1251 code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in cp1251 order +# +0x00 0x0000 #NULL +0x01 0x0001 #START OF HEADING +0x02 0x0002 #START OF TEXT +0x03 0x0003 #END OF TEXT +0x04 0x0004 #END OF TRANSMISSION +0x05 0x0005 #ENQUIRY +0x06 0x0006 #ACKNOWLEDGE +0x07 0x0007 #BELL +0x08 0x0008 #BACKSPACE +0x09 0x0009 #HORIZONTAL TABULATION +0x0A 0x000A #LINE FEED +0x0B 0x000B #VERTICAL TABULATION +0x0C 0x000C #FORM FEED +0x0D 0x000D #CARRIAGE RETURN +0x0E 0x000E #SHIFT OUT +0x0F 0x000F #SHIFT IN +0x10 0x0010 #DATA LINK ESCAPE +0x11 0x0011 #DEVICE CONTROL ONE +0x12 0x0012 #DEVICE CONTROL TWO +0x13 0x0013 #DEVICE CONTROL THREE +0x14 0x0014 #DEVICE CONTROL FOUR +0x15 0x0015 #NEGATIVE ACKNOWLEDGE +0x16 0x0016 #SYNCHRONOUS IDLE +0x17 0x0017 #END OF TRANSMISSION BLOCK +0x18 0x0018 #CANCEL +0x19 0x0019 #END OF MEDIUM +0x1A 0x001A #SUBSTITUTE +0x1B 0x001B #ESCAPE +0x1C 0x001C #FILE SEPARATOR +0x1D 0x001D #GROUP SEPARATOR +0x1E 0x001E #RECORD SEPARATOR +0x1F 0x001F #UNIT SEPARATOR +0x20 0x0020 #SPACE +0x21 0x0021 #EXCLAMATION MARK +0x22 0x0022 #QUOTATION MARK +0x23 0x0023 #NUMBER SIGN +0x24 0x0024 #DOLLAR SIGN +0x25 0x0025 #PERCENT SIGN +0x26 0x0026 #AMPERSAND +0x27 0x0027 #APOSTROPHE +0x28 0x0028 #LEFT PARENTHESIS +0x29 0x0029 #RIGHT PARENTHESIS +0x2A 0x002A #ASTERISK +0x2B 0x002B #PLUS SIGN +0x2C 0x002C #COMMA +0x2D 0x002D #HYPHEN-MINUS +0x2E 0x002E #FULL STOP +0x2F 0x002F #SOLIDUS +0x30 0x0030 #DIGIT ZERO +0x31 0x0031 #DIGIT ONE +0x32 0x0032 #DIGIT TWO +0x33 0x0033 #DIGIT THREE +0x34 0x0034 #DIGIT FOUR +0x35 0x0035 #DIGIT FIVE +0x36 0x0036 #DIGIT SIX +0x37 0x0037 #DIGIT SEVEN +0x38 0x0038 #DIGIT EIGHT +0x39 0x0039 #DIGIT NINE +0x3A 0x003A #COLON +0x3B 0x003B #SEMICOLON +0x3C 0x003C #LESS-THAN SIGN +0x3D 0x003D #EQUALS SIGN +0x3E 0x003E #GREATER-THAN SIGN +0x3F 0x003F #QUESTION MARK +0x40 0x0040 #COMMERCIAL AT +0x41 0x0041 #LATIN CAPITAL LETTER A +0x42 0x0042 #LATIN CAPITAL LETTER B +0x43 0x0043 #LATIN CAPITAL LETTER C +0x44 0x0044 #LATIN CAPITAL LETTER D +0x45 0x0045 #LATIN CAPITAL LETTER E +0x46 0x0046 #LATIN CAPITAL LETTER F +0x47 0x0047 #LATIN CAPITAL LETTER G +0x48 0x0048 #LATIN CAPITAL LETTER H +0x49 0x0049 #LATIN CAPITAL LETTER I +0x4A 0x004A #LATIN CAPITAL LETTER J +0x4B 0x004B #LATIN CAPITAL LETTER K +0x4C 0x004C #LATIN CAPITAL LETTER L +0x4D 0x004D #LATIN CAPITAL LETTER M +0x4E 0x004E #LATIN CAPITAL LETTER N +0x4F 0x004F #LATIN CAPITAL LETTER O +0x50 0x0050 #LATIN CAPITAL LETTER P +0x51 0x0051 #LATIN CAPITAL LETTER Q +0x52 0x0052 #LATIN CAPITAL LETTER R +0x53 0x0053 #LATIN CAPITAL LETTER S +0x54 0x0054 #LATIN CAPITAL LETTER T +0x55 0x0055 #LATIN CAPITAL LETTER U +0x56 0x0056 #LATIN CAPITAL LETTER V +0x57 0x0057 #LATIN CAPITAL LETTER W +0x58 0x0058 #LATIN CAPITAL LETTER X +0x59 0x0059 #LATIN CAPITAL LETTER Y +0x5A 0x005A #LATIN CAPITAL LETTER Z +0x5B 0x005B #LEFT SQUARE BRACKET +0x5C 0x005C #REVERSE SOLIDUS +0x5D 0x005D #RIGHT SQUARE BRACKET +0x5E 0x005E #CIRCUMFLEX ACCENT +0x5F 0x005F #LOW LINE +0x60 0x0060 #GRAVE ACCENT +0x61 0x0061 #LATIN SMALL LETTER A +0x62 0x0062 #LATIN SMALL LETTER B +0x63 0x0063 #LATIN SMALL LETTER C +0x64 0x0064 #LATIN SMALL LETTER D +0x65 0x0065 #LATIN SMALL LETTER E +0x66 0x0066 #LATIN SMALL LETTER F +0x67 0x0067 #LATIN SMALL LETTER G +0x68 0x0068 #LATIN SMALL LETTER H +0x69 0x0069 #LATIN SMALL LETTER I +0x6A 0x006A #LATIN SMALL LETTER J +0x6B 0x006B #LATIN SMALL LETTER K +0x6C 0x006C #LATIN SMALL LETTER L +0x6D 0x006D #LATIN SMALL LETTER M +0x6E 0x006E #LATIN SMALL LETTER N +0x6F 0x006F #LATIN SMALL LETTER O +0x70 0x0070 #LATIN SMALL LETTER P +0x71 0x0071 #LATIN SMALL LETTER Q +0x72 0x0072 #LATIN SMALL LETTER R +0x73 0x0073 #LATIN SMALL LETTER S +0x74 0x0074 #LATIN SMALL LETTER T +0x75 0x0075 #LATIN SMALL LETTER U +0x76 0x0076 #LATIN SMALL LETTER V +0x77 0x0077 #LATIN SMALL LETTER W +0x78 0x0078 #LATIN SMALL LETTER X +0x79 0x0079 #LATIN SMALL LETTER Y +0x7A 0x007A #LATIN SMALL LETTER Z +0x7B 0x007B #LEFT CURLY BRACKET +0x7C 0x007C #VERTICAL LINE +0x7D 0x007D #RIGHT CURLY BRACKET +0x7E 0x007E #TILDE +0x7F 0x007F #DELETE +0x80 0x0402 #CYRILLIC CAPITAL LETTER DJE +0x81 0x0403 #CYRILLIC CAPITAL LETTER GJE +0x82 0x201A #SINGLE LOW-9 QUOTATION MARK +0x83 0x0453 #CYRILLIC SMALL LETTER GJE +0x84 0x201E #DOUBLE LOW-9 QUOTATION MARK +0x85 0x2026 #HORIZONTAL ELLIPSIS +0x86 0x2020 #DAGGER +0x87 0x2021 #DOUBLE DAGGER +0x88 0x20AC #EURO SIGN +0x89 0x2030 #PER MILLE SIGN +0x8A 0x0409 #CYRILLIC CAPITAL LETTER LJE +0x8B 0x2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK +0x8C 0x040A #CYRILLIC CAPITAL LETTER NJE +0x8D 0x040C #CYRILLIC CAPITAL LETTER KJE +0x8E 0x040B #CYRILLIC CAPITAL LETTER TSHE +0x8F 0x040F #CYRILLIC CAPITAL LETTER DZHE +0x90 0x0452 #CYRILLIC SMALL LETTER DJE +0x91 0x2018 #LEFT SINGLE QUOTATION MARK +0x92 0x2019 #RIGHT SINGLE QUOTATION MARK +0x93 0x201C #LEFT DOUBLE QUOTATION MARK +0x94 0x201D #RIGHT DOUBLE QUOTATION MARK +0x95 0x2022 #BULLET +0x96 0x2013 #EN DASH +0x97 0x2014 #EM DASH +0x98 #UNDEFINED +0x99 0x2122 #TRADE MARK SIGN +0x9A 0x0459 #CYRILLIC SMALL LETTER LJE +0x9B 0x203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +0x9C 0x045A #CYRILLIC SMALL LETTER NJE +0x9D 0x045C #CYRILLIC SMALL LETTER KJE +0x9E 0x045B #CYRILLIC SMALL LETTER TSHE +0x9F 0x045F #CYRILLIC SMALL LETTER DZHE +0xA0 0x00A0 #NO-BREAK SPACE +0xA1 0x040E #CYRILLIC CAPITAL LETTER SHORT U +0xA2 0x045E #CYRILLIC SMALL LETTER SHORT U +0xA3 0x0408 #CYRILLIC CAPITAL LETTER JE +0xA4 0x00A4 #CURRENCY SIGN +0xA5 0x0490 #CYRILLIC CAPITAL LETTER GHE WITH UPTURN +0xA6 0x00A6 #BROKEN BAR +0xA7 0x00A7 #SECTION SIGN +0xA8 0x0401 #CYRILLIC CAPITAL LETTER IO +0xA9 0x00A9 #COPYRIGHT SIGN +0xAA 0x0404 #CYRILLIC CAPITAL LETTER UKRAINIAN IE +0xAB 0x00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC 0x00AC #NOT SIGN +0xAD 0x00AD #SOFT HYPHEN +0xAE 0x00AE #REGISTERED SIGN +0xAF 0x0407 #CYRILLIC CAPITAL LETTER YI +0xB0 0x00B0 #DEGREE SIGN +0xB1 0x00B1 #PLUS-MINUS SIGN +0xB2 0x0406 #CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +0xB3 0x0456 #CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +0xB4 0x0491 #CYRILLIC SMALL LETTER GHE WITH UPTURN +0xB5 0x00B5 #MICRO SIGN +0xB6 0x00B6 #PILCROW SIGN +0xB7 0x00B7 #MIDDLE DOT +0xB8 0x0451 #CYRILLIC SMALL LETTER IO +0xB9 0x2116 #NUMERO SIGN +0xBA 0x0454 #CYRILLIC SMALL LETTER UKRAINIAN IE +0xBB 0x00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC 0x0458 #CYRILLIC SMALL LETTER JE +0xBD 0x0405 #CYRILLIC CAPITAL LETTER DZE +0xBE 0x0455 #CYRILLIC SMALL LETTER DZE +0xBF 0x0457 #CYRILLIC SMALL LETTER YI +0xC0 0x0410 #CYRILLIC CAPITAL LETTER A +0xC1 0x0411 #CYRILLIC CAPITAL LETTER BE +0xC2 0x0412 #CYRILLIC CAPITAL LETTER VE +0xC3 0x0413 #CYRILLIC CAPITAL LETTER GHE +0xC4 0x0414 #CYRILLIC CAPITAL LETTER DE +0xC5 0x0415 #CYRILLIC CAPITAL LETTER IE +0xC6 0x0416 #CYRILLIC CAPITAL LETTER ZHE +0xC7 0x0417 #CYRILLIC CAPITAL LETTER ZE +0xC8 0x0418 #CYRILLIC CAPITAL LETTER I +0xC9 0x0419 #CYRILLIC CAPITAL LETTER SHORT I +0xCA 0x041A #CYRILLIC CAPITAL LETTER KA +0xCB 0x041B #CYRILLIC CAPITAL LETTER EL +0xCC 0x041C #CYRILLIC CAPITAL LETTER EM +0xCD 0x041D #CYRILLIC CAPITAL LETTER EN +0xCE 0x041E #CYRILLIC CAPITAL LETTER O +0xCF 0x041F #CYRILLIC CAPITAL LETTER PE +0xD0 0x0420 #CYRILLIC CAPITAL LETTER ER +0xD1 0x0421 #CYRILLIC CAPITAL LETTER ES +0xD2 0x0422 #CYRILLIC CAPITAL LETTER TE +0xD3 0x0423 #CYRILLIC CAPITAL LETTER U +0xD4 0x0424 #CYRILLIC CAPITAL LETTER EF +0xD5 0x0425 #CYRILLIC CAPITAL LETTER HA +0xD6 0x0426 #CYRILLIC CAPITAL LETTER TSE +0xD7 0x0427 #CYRILLIC CAPITAL LETTER CHE +0xD8 0x0428 #CYRILLIC CAPITAL LETTER SHA +0xD9 0x0429 #CYRILLIC CAPITAL LETTER SHCHA +0xDA 0x042A #CYRILLIC CAPITAL LETTER HARD SIGN +0xDB 0x042B #CYRILLIC CAPITAL LETTER YERU +0xDC 0x042C #CYRILLIC CAPITAL LETTER SOFT SIGN +0xDD 0x042D #CYRILLIC CAPITAL LETTER E +0xDE 0x042E #CYRILLIC CAPITAL LETTER YU +0xDF 0x042F #CYRILLIC CAPITAL LETTER YA +0xE0 0x0430 #CYRILLIC SMALL LETTER A +0xE1 0x0431 #CYRILLIC SMALL LETTER BE +0xE2 0x0432 #CYRILLIC SMALL LETTER VE +0xE3 0x0433 #CYRILLIC SMALL LETTER GHE +0xE4 0x0434 #CYRILLIC SMALL LETTER DE +0xE5 0x0435 #CYRILLIC SMALL LETTER IE +0xE6 0x0436 #CYRILLIC SMALL LETTER ZHE +0xE7 0x0437 #CYRILLIC SMALL LETTER ZE +0xE8 0x0438 #CYRILLIC SMALL LETTER I +0xE9 0x0439 #CYRILLIC SMALL LETTER SHORT I +0xEA 0x043A #CYRILLIC SMALL LETTER KA +0xEB 0x043B #CYRILLIC SMALL LETTER EL +0xEC 0x043C #CYRILLIC SMALL LETTER EM +0xED 0x043D #CYRILLIC SMALL LETTER EN +0xEE 0x043E #CYRILLIC SMALL LETTER O +0xEF 0x043F #CYRILLIC SMALL LETTER PE +0xF0 0x0440 #CYRILLIC SMALL LETTER ER +0xF1 0x0441 #CYRILLIC SMALL LETTER ES +0xF2 0x0442 #CYRILLIC SMALL LETTER TE +0xF3 0x0443 #CYRILLIC SMALL LETTER U +0xF4 0x0444 #CYRILLIC SMALL LETTER EF +0xF5 0x0445 #CYRILLIC SMALL LETTER HA +0xF6 0x0446 #CYRILLIC SMALL LETTER TSE +0xF7 0x0447 #CYRILLIC SMALL LETTER CHE +0xF8 0x0448 #CYRILLIC SMALL LETTER SHA +0xF9 0x0449 #CYRILLIC SMALL LETTER SHCHA +0xFA 0x044A #CYRILLIC SMALL LETTER HARD SIGN +0xFB 0x044B #CYRILLIC SMALL LETTER YERU +0xFC 0x044C #CYRILLIC SMALL LETTER SOFT SIGN +0xFD 0x044D #CYRILLIC SMALL LETTER E +0xFE 0x044E #CYRILLIC SMALL LETTER YU +0xFF 0x044F #CYRILLIC SMALL LETTER YA diff --git a/ext/standard/html_tables/mappings/CP1252.TXT b/ext/standard/html_tables/mappings/CP1252.TXT new file mode 100644 index 0000000..8ff4b20 --- /dev/null +++ b/ext/standard/html_tables/mappings/CP1252.TXT @@ -0,0 +1,274 @@ +# +# Name: cp1252 to Unicode table +# Unicode version: 2.0 +# Table version: 2.01 +# Table format: Format A +# Date: 04/15/98 +# +# Contact: Shawn.Steele@microsoft.com +# +# General notes: none +# +# Format: Three tab-separated columns +# Column #1 is the cp1252 code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in cp1252 order +# +0x00 0x0000 #NULL +0x01 0x0001 #START OF HEADING +0x02 0x0002 #START OF TEXT +0x03 0x0003 #END OF TEXT +0x04 0x0004 #END OF TRANSMISSION +0x05 0x0005 #ENQUIRY +0x06 0x0006 #ACKNOWLEDGE +0x07 0x0007 #BELL +0x08 0x0008 #BACKSPACE +0x09 0x0009 #HORIZONTAL TABULATION +0x0A 0x000A #LINE FEED +0x0B 0x000B #VERTICAL TABULATION +0x0C 0x000C #FORM FEED +0x0D 0x000D #CARRIAGE RETURN +0x0E 0x000E #SHIFT OUT +0x0F 0x000F #SHIFT IN +0x10 0x0010 #DATA LINK ESCAPE +0x11 0x0011 #DEVICE CONTROL ONE +0x12 0x0012 #DEVICE CONTROL TWO +0x13 0x0013 #DEVICE CONTROL THREE +0x14 0x0014 #DEVICE CONTROL FOUR +0x15 0x0015 #NEGATIVE ACKNOWLEDGE +0x16 0x0016 #SYNCHRONOUS IDLE +0x17 0x0017 #END OF TRANSMISSION BLOCK +0x18 0x0018 #CANCEL +0x19 0x0019 #END OF MEDIUM +0x1A 0x001A #SUBSTITUTE +0x1B 0x001B #ESCAPE +0x1C 0x001C #FILE SEPARATOR +0x1D 0x001D #GROUP SEPARATOR +0x1E 0x001E #RECORD SEPARATOR +0x1F 0x001F #UNIT SEPARATOR +0x20 0x0020 #SPACE +0x21 0x0021 #EXCLAMATION MARK +0x22 0x0022 #QUOTATION MARK +0x23 0x0023 #NUMBER SIGN +0x24 0x0024 #DOLLAR SIGN +0x25 0x0025 #PERCENT SIGN +0x26 0x0026 #AMPERSAND +0x27 0x0027 #APOSTROPHE +0x28 0x0028 #LEFT PARENTHESIS +0x29 0x0029 #RIGHT PARENTHESIS +0x2A 0x002A #ASTERISK +0x2B 0x002B #PLUS SIGN +0x2C 0x002C #COMMA +0x2D 0x002D #HYPHEN-MINUS +0x2E 0x002E #FULL STOP +0x2F 0x002F #SOLIDUS +0x30 0x0030 #DIGIT ZERO +0x31 0x0031 #DIGIT ONE +0x32 0x0032 #DIGIT TWO +0x33 0x0033 #DIGIT THREE +0x34 0x0034 #DIGIT FOUR +0x35 0x0035 #DIGIT FIVE +0x36 0x0036 #DIGIT SIX +0x37 0x0037 #DIGIT SEVEN +0x38 0x0038 #DIGIT EIGHT +0x39 0x0039 #DIGIT NINE +0x3A 0x003A #COLON +0x3B 0x003B #SEMICOLON +0x3C 0x003C #LESS-THAN SIGN +0x3D 0x003D #EQUALS SIGN +0x3E 0x003E #GREATER-THAN SIGN +0x3F 0x003F #QUESTION MARK +0x40 0x0040 #COMMERCIAL AT +0x41 0x0041 #LATIN CAPITAL LETTER A +0x42 0x0042 #LATIN CAPITAL LETTER B +0x43 0x0043 #LATIN CAPITAL LETTER C +0x44 0x0044 #LATIN CAPITAL LETTER D +0x45 0x0045 #LATIN CAPITAL LETTER E +0x46 0x0046 #LATIN CAPITAL LETTER F +0x47 0x0047 #LATIN CAPITAL LETTER G +0x48 0x0048 #LATIN CAPITAL LETTER H +0x49 0x0049 #LATIN CAPITAL LETTER I +0x4A 0x004A #LATIN CAPITAL LETTER J +0x4B 0x004B #LATIN CAPITAL LETTER K +0x4C 0x004C #LATIN CAPITAL LETTER L +0x4D 0x004D #LATIN CAPITAL LETTER M +0x4E 0x004E #LATIN CAPITAL LETTER N +0x4F 0x004F #LATIN CAPITAL LETTER O +0x50 0x0050 #LATIN CAPITAL LETTER P +0x51 0x0051 #LATIN CAPITAL LETTER Q +0x52 0x0052 #LATIN CAPITAL LETTER R +0x53 0x0053 #LATIN CAPITAL LETTER S +0x54 0x0054 #LATIN CAPITAL LETTER T +0x55 0x0055 #LATIN CAPITAL LETTER U +0x56 0x0056 #LATIN CAPITAL LETTER V +0x57 0x0057 #LATIN CAPITAL LETTER W +0x58 0x0058 #LATIN CAPITAL LETTER X +0x59 0x0059 #LATIN CAPITAL LETTER Y +0x5A 0x005A #LATIN CAPITAL LETTER Z +0x5B 0x005B #LEFT SQUARE BRACKET +0x5C 0x005C #REVERSE SOLIDUS +0x5D 0x005D #RIGHT SQUARE BRACKET +0x5E 0x005E #CIRCUMFLEX ACCENT +0x5F 0x005F #LOW LINE +0x60 0x0060 #GRAVE ACCENT +0x61 0x0061 #LATIN SMALL LETTER A +0x62 0x0062 #LATIN SMALL LETTER B +0x63 0x0063 #LATIN SMALL LETTER C +0x64 0x0064 #LATIN SMALL LETTER D +0x65 0x0065 #LATIN SMALL LETTER E +0x66 0x0066 #LATIN SMALL LETTER F +0x67 0x0067 #LATIN SMALL LETTER G +0x68 0x0068 #LATIN SMALL LETTER H +0x69 0x0069 #LATIN SMALL LETTER I +0x6A 0x006A #LATIN SMALL LETTER J +0x6B 0x006B #LATIN SMALL LETTER K +0x6C 0x006C #LATIN SMALL LETTER L +0x6D 0x006D #LATIN SMALL LETTER M +0x6E 0x006E #LATIN SMALL LETTER N +0x6F 0x006F #LATIN SMALL LETTER O +0x70 0x0070 #LATIN SMALL LETTER P +0x71 0x0071 #LATIN SMALL LETTER Q +0x72 0x0072 #LATIN SMALL LETTER R +0x73 0x0073 #LATIN SMALL LETTER S +0x74 0x0074 #LATIN SMALL LETTER T +0x75 0x0075 #LATIN SMALL LETTER U +0x76 0x0076 #LATIN SMALL LETTER V +0x77 0x0077 #LATIN SMALL LETTER W +0x78 0x0078 #LATIN SMALL LETTER X +0x79 0x0079 #LATIN SMALL LETTER Y +0x7A 0x007A #LATIN SMALL LETTER Z +0x7B 0x007B #LEFT CURLY BRACKET +0x7C 0x007C #VERTICAL LINE +0x7D 0x007D #RIGHT CURLY BRACKET +0x7E 0x007E #TILDE +0x7F 0x007F #DELETE +0x80 0x20AC #EURO SIGN +0x81 #UNDEFINED +0x82 0x201A #SINGLE LOW-9 QUOTATION MARK +0x83 0x0192 #LATIN SMALL LETTER F WITH HOOK +0x84 0x201E #DOUBLE LOW-9 QUOTATION MARK +0x85 0x2026 #HORIZONTAL ELLIPSIS +0x86 0x2020 #DAGGER +0x87 0x2021 #DOUBLE DAGGER +0x88 0x02C6 #MODIFIER LETTER CIRCUMFLEX ACCENT +0x89 0x2030 #PER MILLE SIGN +0x8A 0x0160 #LATIN CAPITAL LETTER S WITH CARON +0x8B 0x2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK +0x8C 0x0152 #LATIN CAPITAL LIGATURE OE +0x8D #UNDEFINED +0x8E 0x017D #LATIN CAPITAL LETTER Z WITH CARON +0x8F #UNDEFINED +0x90 #UNDEFINED +0x91 0x2018 #LEFT SINGLE QUOTATION MARK +0x92 0x2019 #RIGHT SINGLE QUOTATION MARK +0x93 0x201C #LEFT DOUBLE QUOTATION MARK +0x94 0x201D #RIGHT DOUBLE QUOTATION MARK +0x95 0x2022 #BULLET +0x96 0x2013 #EN DASH +0x97 0x2014 #EM DASH +0x98 0x02DC #SMALL TILDE +0x99 0x2122 #TRADE MARK SIGN +0x9A 0x0161 #LATIN SMALL LETTER S WITH CARON +0x9B 0x203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +0x9C 0x0153 #LATIN SMALL LIGATURE OE +0x9D #UNDEFINED +0x9E 0x017E #LATIN SMALL LETTER Z WITH CARON +0x9F 0x0178 #LATIN CAPITAL LETTER Y WITH DIAERESIS +0xA0 0x00A0 #NO-BREAK SPACE +0xA1 0x00A1 #INVERTED EXCLAMATION MARK +0xA2 0x00A2 #CENT SIGN +0xA3 0x00A3 #POUND SIGN +0xA4 0x00A4 #CURRENCY SIGN +0xA5 0x00A5 #YEN SIGN +0xA6 0x00A6 #BROKEN BAR +0xA7 0x00A7 #SECTION SIGN +0xA8 0x00A8 #DIAERESIS +0xA9 0x00A9 #COPYRIGHT SIGN +0xAA 0x00AA #FEMININE ORDINAL INDICATOR +0xAB 0x00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC 0x00AC #NOT SIGN +0xAD 0x00AD #SOFT HYPHEN +0xAE 0x00AE #REGISTERED SIGN +0xAF 0x00AF #MACRON +0xB0 0x00B0 #DEGREE SIGN +0xB1 0x00B1 #PLUS-MINUS SIGN +0xB2 0x00B2 #SUPERSCRIPT TWO +0xB3 0x00B3 #SUPERSCRIPT THREE +0xB4 0x00B4 #ACUTE ACCENT +0xB5 0x00B5 #MICRO SIGN +0xB6 0x00B6 #PILCROW SIGN +0xB7 0x00B7 #MIDDLE DOT +0xB8 0x00B8 #CEDILLA +0xB9 0x00B9 #SUPERSCRIPT ONE +0xBA 0x00BA #MASCULINE ORDINAL INDICATOR +0xBB 0x00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC 0x00BC #VULGAR FRACTION ONE QUARTER +0xBD 0x00BD #VULGAR FRACTION ONE HALF +0xBE 0x00BE #VULGAR FRACTION THREE QUARTERS +0xBF 0x00BF #INVERTED QUESTION MARK +0xC0 0x00C0 #LATIN CAPITAL LETTER A WITH GRAVE +0xC1 0x00C1 #LATIN CAPITAL LETTER A WITH ACUTE +0xC2 0x00C2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xC3 0x00C3 #LATIN CAPITAL LETTER A WITH TILDE +0xC4 0x00C4 #LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 0x00C5 #LATIN CAPITAL LETTER A WITH RING ABOVE +0xC6 0x00C6 #LATIN CAPITAL LETTER AE +0xC7 0x00C7 #LATIN CAPITAL LETTER C WITH CEDILLA +0xC8 0x00C8 #LATIN CAPITAL LETTER E WITH GRAVE +0xC9 0x00C9 #LATIN CAPITAL LETTER E WITH ACUTE +0xCA 0x00CA #LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0xCB 0x00CB #LATIN CAPITAL LETTER E WITH DIAERESIS +0xCC 0x00CC #LATIN CAPITAL LETTER I WITH GRAVE +0xCD 0x00CD #LATIN CAPITAL LETTER I WITH ACUTE +0xCE 0x00CE #LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xCF 0x00CF #LATIN CAPITAL LETTER I WITH DIAERESIS +0xD0 0x00D0 #LATIN CAPITAL LETTER ETH +0xD1 0x00D1 #LATIN CAPITAL LETTER N WITH TILDE +0xD2 0x00D2 #LATIN CAPITAL LETTER O WITH GRAVE +0xD3 0x00D3 #LATIN CAPITAL LETTER O WITH ACUTE +0xD4 0x00D4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xD5 0x00D5 #LATIN CAPITAL LETTER O WITH TILDE +0xD6 0x00D6 #LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 0x00D7 #MULTIPLICATION SIGN +0xD8 0x00D8 #LATIN CAPITAL LETTER O WITH STROKE +0xD9 0x00D9 #LATIN CAPITAL LETTER U WITH GRAVE +0xDA 0x00DA #LATIN CAPITAL LETTER U WITH ACUTE +0xDB 0x00DB #LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xDC 0x00DC #LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD 0x00DD #LATIN CAPITAL LETTER Y WITH ACUTE +0xDE 0x00DE #LATIN CAPITAL LETTER THORN +0xDF 0x00DF #LATIN SMALL LETTER SHARP S +0xE0 0x00E0 #LATIN SMALL LETTER A WITH GRAVE +0xE1 0x00E1 #LATIN SMALL LETTER A WITH ACUTE +0xE2 0x00E2 #LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE3 0x00E3 #LATIN SMALL LETTER A WITH TILDE +0xE4 0x00E4 #LATIN SMALL LETTER A WITH DIAERESIS +0xE5 0x00E5 #LATIN SMALL LETTER A WITH RING ABOVE +0xE6 0x00E6 #LATIN SMALL LETTER AE +0xE7 0x00E7 #LATIN SMALL LETTER C WITH CEDILLA +0xE8 0x00E8 #LATIN SMALL LETTER E WITH GRAVE +0xE9 0x00E9 #LATIN SMALL LETTER E WITH ACUTE +0xEA 0x00EA #LATIN SMALL LETTER E WITH CIRCUMFLEX +0xEB 0x00EB #LATIN SMALL LETTER E WITH DIAERESIS +0xEC 0x00EC #LATIN SMALL LETTER I WITH GRAVE +0xED 0x00ED #LATIN SMALL LETTER I WITH ACUTE +0xEE 0x00EE #LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF 0x00EF #LATIN SMALL LETTER I WITH DIAERESIS +0xF0 0x00F0 #LATIN SMALL LETTER ETH +0xF1 0x00F1 #LATIN SMALL LETTER N WITH TILDE +0xF2 0x00F2 #LATIN SMALL LETTER O WITH GRAVE +0xF3 0x00F3 #LATIN SMALL LETTER O WITH ACUTE +0xF4 0x00F4 #LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 0x00F5 #LATIN SMALL LETTER O WITH TILDE +0xF6 0x00F6 #LATIN SMALL LETTER O WITH DIAERESIS +0xF7 0x00F7 #DIVISION SIGN +0xF8 0x00F8 #LATIN SMALL LETTER O WITH STROKE +0xF9 0x00F9 #LATIN SMALL LETTER U WITH GRAVE +0xFA 0x00FA #LATIN SMALL LETTER U WITH ACUTE +0xFB 0x00FB #LATIN SMALL LETTER U WITH CIRCUMFLEX +0xFC 0x00FC #LATIN SMALL LETTER U WITH DIAERESIS +0xFD 0x00FD #LATIN SMALL LETTER Y WITH ACUTE +0xFE 0x00FE #LATIN SMALL LETTER THORN +0xFF 0x00FF #LATIN SMALL LETTER Y WITH DIAERESIS diff --git a/ext/standard/html_tables/mappings/CP866.TXT b/ext/standard/html_tables/mappings/CP866.TXT new file mode 100644 index 0000000..b0213a1 --- /dev/null +++ b/ext/standard/html_tables/mappings/CP866.TXT @@ -0,0 +1,275 @@ +# +# Name: cp866_DOSCyrillicRussian to Unicode table +# Unicode version: 2.0 +# Table version: 2.00 +# Table format: Format A +# Date: 04/24/96 +# Contact: Shawn.Steele@microsoft.com +# +# General notes: none +# +# Format: Three tab-separated columns +# Column #1 is the cp866_DOSCyrillicRussian code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in cp866_DOSCyrillicRussian order +# +0x00 0x0000 #NULL +0x01 0x0001 #START OF HEADING +0x02 0x0002 #START OF TEXT +0x03 0x0003 #END OF TEXT +0x04 0x0004 #END OF TRANSMISSION +0x05 0x0005 #ENQUIRY +0x06 0x0006 #ACKNOWLEDGE +0x07 0x0007 #BELL +0x08 0x0008 #BACKSPACE +0x09 0x0009 #HORIZONTAL TABULATION +0x0a 0x000a #LINE FEED +0x0b 0x000b #VERTICAL TABULATION +0x0c 0x000c #FORM FEED +0x0d 0x000d #CARRIAGE RETURN +0x0e 0x000e #SHIFT OUT +0x0f 0x000f #SHIFT IN +0x10 0x0010 #DATA LINK ESCAPE +0x11 0x0011 #DEVICE CONTROL ONE +0x12 0x0012 #DEVICE CONTROL TWO +0x13 0x0013 #DEVICE CONTROL THREE +0x14 0x0014 #DEVICE CONTROL FOUR +0x15 0x0015 #NEGATIVE ACKNOWLEDGE +0x16 0x0016 #SYNCHRONOUS IDLE +0x17 0x0017 #END OF TRANSMISSION BLOCK +0x18 0x0018 #CANCEL +0x19 0x0019 #END OF MEDIUM +0x1a 0x001a #SUBSTITUTE +0x1b 0x001b #ESCAPE +0x1c 0x001c #FILE SEPARATOR +0x1d 0x001d #GROUP SEPARATOR +0x1e 0x001e #RECORD SEPARATOR +0x1f 0x001f #UNIT SEPARATOR +0x20 0x0020 #SPACE +0x21 0x0021 #EXCLAMATION MARK +0x22 0x0022 #QUOTATION MARK +0x23 0x0023 #NUMBER SIGN +0x24 0x0024 #DOLLAR SIGN +0x25 0x0025 #PERCENT SIGN +0x26 0x0026 #AMPERSAND +0x27 0x0027 #APOSTROPHE +0x28 0x0028 #LEFT PARENTHESIS +0x29 0x0029 #RIGHT PARENTHESIS +0x2a 0x002a #ASTERISK +0x2b 0x002b #PLUS SIGN +0x2c 0x002c #COMMA +0x2d 0x002d #HYPHEN-MINUS +0x2e 0x002e #FULL STOP +0x2f 0x002f #SOLIDUS +0x30 0x0030 #DIGIT ZERO +0x31 0x0031 #DIGIT ONE +0x32 0x0032 #DIGIT TWO +0x33 0x0033 #DIGIT THREE +0x34 0x0034 #DIGIT FOUR +0x35 0x0035 #DIGIT FIVE +0x36 0x0036 #DIGIT SIX +0x37 0x0037 #DIGIT SEVEN +0x38 0x0038 #DIGIT EIGHT +0x39 0x0039 #DIGIT NINE +0x3a 0x003a #COLON +0x3b 0x003b #SEMICOLON +0x3c 0x003c #LESS-THAN SIGN +0x3d 0x003d #EQUALS SIGN +0x3e 0x003e #GREATER-THAN SIGN +0x3f 0x003f #QUESTION MARK +0x40 0x0040 #COMMERCIAL AT +0x41 0x0041 #LATIN CAPITAL LETTER A +0x42 0x0042 #LATIN CAPITAL LETTER B +0x43 0x0043 #LATIN CAPITAL LETTER C +0x44 0x0044 #LATIN CAPITAL LETTER D +0x45 0x0045 #LATIN CAPITAL LETTER E +0x46 0x0046 #LATIN CAPITAL LETTER F +0x47 0x0047 #LATIN CAPITAL LETTER G +0x48 0x0048 #LATIN CAPITAL LETTER H +0x49 0x0049 #LATIN CAPITAL LETTER I +0x4a 0x004a #LATIN CAPITAL LETTER J +0x4b 0x004b #LATIN CAPITAL LETTER K +0x4c 0x004c #LATIN CAPITAL LETTER L +0x4d 0x004d #LATIN CAPITAL LETTER M +0x4e 0x004e #LATIN CAPITAL LETTER N +0x4f 0x004f #LATIN CAPITAL LETTER O +0x50 0x0050 #LATIN CAPITAL LETTER P +0x51 0x0051 #LATIN CAPITAL LETTER Q +0x52 0x0052 #LATIN CAPITAL LETTER R +0x53 0x0053 #LATIN CAPITAL LETTER S +0x54 0x0054 #LATIN CAPITAL LETTER T +0x55 0x0055 #LATIN CAPITAL LETTER U +0x56 0x0056 #LATIN CAPITAL LETTER V +0x57 0x0057 #LATIN CAPITAL LETTER W +0x58 0x0058 #LATIN CAPITAL LETTER X +0x59 0x0059 #LATIN CAPITAL LETTER Y +0x5a 0x005a #LATIN CAPITAL LETTER Z +0x5b 0x005b #LEFT SQUARE BRACKET +0x5c 0x005c #REVERSE SOLIDUS +0x5d 0x005d #RIGHT SQUARE BRACKET +0x5e 0x005e #CIRCUMFLEX ACCENT +0x5f 0x005f #LOW LINE +0x60 0x0060 #GRAVE ACCENT +0x61 0x0061 #LATIN SMALL LETTER A +0x62 0x0062 #LATIN SMALL LETTER B +0x63 0x0063 #LATIN SMALL LETTER C +0x64 0x0064 #LATIN SMALL LETTER D +0x65 0x0065 #LATIN SMALL LETTER E +0x66 0x0066 #LATIN SMALL LETTER F +0x67 0x0067 #LATIN SMALL LETTER G +0x68 0x0068 #LATIN SMALL LETTER H +0x69 0x0069 #LATIN SMALL LETTER I +0x6a 0x006a #LATIN SMALL LETTER J +0x6b 0x006b #LATIN SMALL LETTER K +0x6c 0x006c #LATIN SMALL LETTER L +0x6d 0x006d #LATIN SMALL LETTER M +0x6e 0x006e #LATIN SMALL LETTER N +0x6f 0x006f #LATIN SMALL LETTER O +0x70 0x0070 #LATIN SMALL LETTER P +0x71 0x0071 #LATIN SMALL LETTER Q +0x72 0x0072 #LATIN SMALL LETTER R +0x73 0x0073 #LATIN SMALL LETTER S +0x74 0x0074 #LATIN SMALL LETTER T +0x75 0x0075 #LATIN SMALL LETTER U +0x76 0x0076 #LATIN SMALL LETTER V +0x77 0x0077 #LATIN SMALL LETTER W +0x78 0x0078 #LATIN SMALL LETTER X +0x79 0x0079 #LATIN SMALL LETTER Y +0x7a 0x007a #LATIN SMALL LETTER Z +0x7b 0x007b #LEFT CURLY BRACKET +0x7c 0x007c #VERTICAL LINE +0x7d 0x007d #RIGHT CURLY BRACKET +0x7e 0x007e #TILDE +0x7f 0x007f #DELETE +0x80 0x0410 #CYRILLIC CAPITAL LETTER A +0x81 0x0411 #CYRILLIC CAPITAL LETTER BE +0x82 0x0412 #CYRILLIC CAPITAL LETTER VE +0x83 0x0413 #CYRILLIC CAPITAL LETTER GHE +0x84 0x0414 #CYRILLIC CAPITAL LETTER DE +0x85 0x0415 #CYRILLIC CAPITAL LETTER IE +0x86 0x0416 #CYRILLIC CAPITAL LETTER ZHE +0x87 0x0417 #CYRILLIC CAPITAL LETTER ZE +0x88 0x0418 #CYRILLIC CAPITAL LETTER I +0x89 0x0419 #CYRILLIC CAPITAL LETTER SHORT I +0x8a 0x041a #CYRILLIC CAPITAL LETTER KA +0x8b 0x041b #CYRILLIC CAPITAL LETTER EL +0x8c 0x041c #CYRILLIC CAPITAL LETTER EM +0x8d 0x041d #CYRILLIC CAPITAL LETTER EN +0x8e 0x041e #CYRILLIC CAPITAL LETTER O +0x8f 0x041f #CYRILLIC CAPITAL LETTER PE +0x90 0x0420 #CYRILLIC CAPITAL LETTER ER +0x91 0x0421 #CYRILLIC CAPITAL LETTER ES +0x92 0x0422 #CYRILLIC CAPITAL LETTER TE +0x93 0x0423 #CYRILLIC CAPITAL LETTER U +0x94 0x0424 #CYRILLIC CAPITAL LETTER EF +0x95 0x0425 #CYRILLIC CAPITAL LETTER HA +0x96 0x0426 #CYRILLIC CAPITAL LETTER TSE +0x97 0x0427 #CYRILLIC CAPITAL LETTER CHE +0x98 0x0428 #CYRILLIC CAPITAL LETTER SHA +0x99 0x0429 #CYRILLIC CAPITAL LETTER SHCHA +0x9a 0x042a #CYRILLIC CAPITAL LETTER HARD SIGN +0x9b 0x042b #CYRILLIC CAPITAL LETTER YERU +0x9c 0x042c #CYRILLIC CAPITAL LETTER SOFT SIGN +0x9d 0x042d #CYRILLIC CAPITAL LETTER E +0x9e 0x042e #CYRILLIC CAPITAL LETTER YU +0x9f 0x042f #CYRILLIC CAPITAL LETTER YA +0xa0 0x0430 #CYRILLIC SMALL LETTER A +0xa1 0x0431 #CYRILLIC SMALL LETTER BE +0xa2 0x0432 #CYRILLIC SMALL LETTER VE +0xa3 0x0433 #CYRILLIC SMALL LETTER GHE +0xa4 0x0434 #CYRILLIC SMALL LETTER DE +0xa5 0x0435 #CYRILLIC SMALL LETTER IE +0xa6 0x0436 #CYRILLIC SMALL LETTER ZHE +0xa7 0x0437 #CYRILLIC SMALL LETTER ZE +0xa8 0x0438 #CYRILLIC SMALL LETTER I +0xa9 0x0439 #CYRILLIC SMALL LETTER SHORT I +0xaa 0x043a #CYRILLIC SMALL LETTER KA +0xab 0x043b #CYRILLIC SMALL LETTER EL +0xac 0x043c #CYRILLIC SMALL LETTER EM +0xad 0x043d #CYRILLIC SMALL LETTER EN +0xae 0x043e #CYRILLIC SMALL LETTER O +0xaf 0x043f #CYRILLIC SMALL LETTER PE +0xb0 0x2591 #LIGHT SHADE +0xb1 0x2592 #MEDIUM SHADE +0xb2 0x2593 #DARK SHADE +0xb3 0x2502 #BOX DRAWINGS LIGHT VERTICAL +0xb4 0x2524 #BOX DRAWINGS LIGHT VERTICAL AND LEFT +0xb5 0x2561 #BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE +0xb6 0x2562 #BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE +0xb7 0x2556 #BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE +0xb8 0x2555 #BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE +0xb9 0x2563 #BOX DRAWINGS DOUBLE VERTICAL AND LEFT +0xba 0x2551 #BOX DRAWINGS DOUBLE VERTICAL +0xbb 0x2557 #BOX DRAWINGS DOUBLE DOWN AND LEFT +0xbc 0x255d #BOX DRAWINGS DOUBLE UP AND LEFT +0xbd 0x255c #BOX DRAWINGS UP DOUBLE AND LEFT SINGLE +0xbe 0x255b #BOX DRAWINGS UP SINGLE AND LEFT DOUBLE +0xbf 0x2510 #BOX DRAWINGS LIGHT DOWN AND LEFT +0xc0 0x2514 #BOX DRAWINGS LIGHT UP AND RIGHT +0xc1 0x2534 #BOX DRAWINGS LIGHT UP AND HORIZONTAL +0xc2 0x252c #BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +0xc3 0x251c #BOX DRAWINGS LIGHT VERTICAL AND RIGHT +0xc4 0x2500 #BOX DRAWINGS LIGHT HORIZONTAL +0xc5 0x253c #BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +0xc6 0x255e #BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE +0xc7 0x255f #BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE +0xc8 0x255a #BOX DRAWINGS DOUBLE UP AND RIGHT +0xc9 0x2554 #BOX DRAWINGS DOUBLE DOWN AND RIGHT +0xca 0x2569 #BOX DRAWINGS DOUBLE UP AND HORIZONTAL +0xcb 0x2566 #BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +0xcc 0x2560 #BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +0xcd 0x2550 #BOX DRAWINGS DOUBLE HORIZONTAL +0xce 0x256c #BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +0xcf 0x2567 #BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE +0xd0 0x2568 #BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE +0xd1 0x2564 #BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE +0xd2 0x2565 #BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE +0xd3 0x2559 #BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE +0xd4 0x2558 #BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE +0xd5 0x2552 #BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE +0xd6 0x2553 #BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE +0xd7 0x256b #BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE +0xd8 0x256a #BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE +0xd9 0x2518 #BOX DRAWINGS LIGHT UP AND LEFT +0xda 0x250c #BOX DRAWINGS LIGHT DOWN AND RIGHT +0xdb 0x2588 #FULL BLOCK +0xdc 0x2584 #LOWER HALF BLOCK +0xdd 0x258c #LEFT HALF BLOCK +0xde 0x2590 #RIGHT HALF BLOCK +0xdf 0x2580 #UPPER HALF BLOCK +0xe0 0x0440 #CYRILLIC SMALL LETTER ER +0xe1 0x0441 #CYRILLIC SMALL LETTER ES +0xe2 0x0442 #CYRILLIC SMALL LETTER TE +0xe3 0x0443 #CYRILLIC SMALL LETTER U +0xe4 0x0444 #CYRILLIC SMALL LETTER EF +0xe5 0x0445 #CYRILLIC SMALL LETTER HA +0xe6 0x0446 #CYRILLIC SMALL LETTER TSE +0xe7 0x0447 #CYRILLIC SMALL LETTER CHE +0xe8 0x0448 #CYRILLIC SMALL LETTER SHA +0xe9 0x0449 #CYRILLIC SMALL LETTER SHCHA +0xea 0x044a #CYRILLIC SMALL LETTER HARD SIGN +0xeb 0x044b #CYRILLIC SMALL LETTER YERU +0xec 0x044c #CYRILLIC SMALL LETTER SOFT SIGN +0xed 0x044d #CYRILLIC SMALL LETTER E +0xee 0x044e #CYRILLIC SMALL LETTER YU +0xef 0x044f #CYRILLIC SMALL LETTER YA +0xf0 0x0401 #CYRILLIC CAPITAL LETTER IO +0xf1 0x0451 #CYRILLIC SMALL LETTER IO +0xf2 0x0404 #CYRILLIC CAPITAL LETTER UKRAINIAN IE +0xf3 0x0454 #CYRILLIC SMALL LETTER UKRAINIAN IE +0xf4 0x0407 #CYRILLIC CAPITAL LETTER YI +0xf5 0x0457 #CYRILLIC SMALL LETTER YI +0xf6 0x040e #CYRILLIC CAPITAL LETTER SHORT U +0xf7 0x045e #CYRILLIC SMALL LETTER SHORT U +0xf8 0x00b0 #DEGREE SIGN +0xf9 0x2219 #BULLET OPERATOR +0xfa 0x00b7 #MIDDLE DOT +0xfb 0x221a #SQUARE ROOT +0xfc 0x2116 #NUMERO SIGN +0xfd 0x00a4 #CURRENCY SIGN +0xfe 0x25a0 #BLACK SQUARE +0xff 0x00a0 #NO-BREAK SPACE + +
\ No newline at end of file diff --git a/ext/standard/html_tables/mappings/KOI8-R.TXT b/ext/standard/html_tables/mappings/KOI8-R.TXT new file mode 100644 index 0000000..5105610 --- /dev/null +++ b/ext/standard/html_tables/mappings/KOI8-R.TXT @@ -0,0 +1,302 @@ +# +# Name: KOI8-R (RFC1489) to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 18 August 1999 +# Authors: Helmut Richter <richter@lrz.de> +# +# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# KOI8-R characters map into Unicode. The underlying document is the +# mapping described in RFC 1489. No statements are made as to whether +# this mapping is the same as the mapping defined as "Code Page 878" +# with some vendors. +# +# Format: Three tab-separated columns +# Column #1 is the KOI8-R code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in KOI8-R order. +# +# Version history +# 1.0 version: created. +# +# Any comments or problems, contact <errata@unicode.org> +# Please note that <errata@unicode.org> is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x2500 # BOX DRAWINGS LIGHT HORIZONTAL +0x81 0x2502 # BOX DRAWINGS LIGHT VERTICAL +0x82 0x250C # BOX DRAWINGS LIGHT DOWN AND RIGHT +0x83 0x2510 # BOX DRAWINGS LIGHT DOWN AND LEFT +0x84 0x2514 # BOX DRAWINGS LIGHT UP AND RIGHT +0x85 0x2518 # BOX DRAWINGS LIGHT UP AND LEFT +0x86 0x251C # BOX DRAWINGS LIGHT VERTICAL AND RIGHT +0x87 0x2524 # BOX DRAWINGS LIGHT VERTICAL AND LEFT +0x88 0x252C # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +0x89 0x2534 # BOX DRAWINGS LIGHT UP AND HORIZONTAL +0x8A 0x253C # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +0x8B 0x2580 # UPPER HALF BLOCK +0x8C 0x2584 # LOWER HALF BLOCK +0x8D 0x2588 # FULL BLOCK +0x8E 0x258C # LEFT HALF BLOCK +0x8F 0x2590 # RIGHT HALF BLOCK +0x90 0x2591 # LIGHT SHADE +0x91 0x2592 # MEDIUM SHADE +0x92 0x2593 # DARK SHADE +0x93 0x2320 # TOP HALF INTEGRAL +0x94 0x25A0 # BLACK SQUARE +0x95 0x2219 # BULLET OPERATOR +0x96 0x221A # SQUARE ROOT +0x97 0x2248 # ALMOST EQUAL TO +0x98 0x2264 # LESS-THAN OR EQUAL TO +0x99 0x2265 # GREATER-THAN OR EQUAL TO +0x9A 0x00A0 # NO-BREAK SPACE +0x9B 0x2321 # BOTTOM HALF INTEGRAL +0x9C 0x00B0 # DEGREE SIGN +0x9D 0x00B2 # SUPERSCRIPT TWO +0x9E 0x00B7 # MIDDLE DOT +0x9F 0x00F7 # DIVISION SIGN +0xA0 0x2550 # BOX DRAWINGS DOUBLE HORIZONTAL +0xA1 0x2551 # BOX DRAWINGS DOUBLE VERTICAL +0xA2 0x2552 # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE +0xA3 0x0451 # CYRILLIC SMALL LETTER IO +0xA4 0x2553 # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE +0xA5 0x2554 # BOX DRAWINGS DOUBLE DOWN AND RIGHT +0xA6 0x2555 # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE +0xA7 0x2556 # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE +0xA8 0x2557 # BOX DRAWINGS DOUBLE DOWN AND LEFT +0xA9 0x2558 # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE +0xAA 0x2559 # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE +0xAB 0x255A # BOX DRAWINGS DOUBLE UP AND RIGHT +0xAC 0x255B # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE +0xAD 0x255C # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE +0xAE 0x255D # BOX DRAWINGS DOUBLE UP AND LEFT +0xAF 0x255E # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE +0xB0 0x255F # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE +0xB1 0x2560 # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +0xB2 0x2561 # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE +0xB3 0x0401 # CYRILLIC CAPITAL LETTER IO +0xB4 0x2562 # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE +0xB5 0x2563 # BOX DRAWINGS DOUBLE VERTICAL AND LEFT +0xB6 0x2564 # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE +0xB7 0x2565 # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE +0xB8 0x2566 # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +0xB9 0x2567 # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE +0xBA 0x2568 # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE +0xBB 0x2569 # BOX DRAWINGS DOUBLE UP AND HORIZONTAL +0xBC 0x256A # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE +0xBD 0x256B # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE +0xBE 0x256C # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +0xBF 0x00A9 # COPYRIGHT SIGN +0xC0 0x044E # CYRILLIC SMALL LETTER YU +0xC1 0x0430 # CYRILLIC SMALL LETTER A +0xC2 0x0431 # CYRILLIC SMALL LETTER BE +0xC3 0x0446 # CYRILLIC SMALL LETTER TSE +0xC4 0x0434 # CYRILLIC SMALL LETTER DE +0xC5 0x0435 # CYRILLIC SMALL LETTER IE +0xC6 0x0444 # CYRILLIC SMALL LETTER EF +0xC7 0x0433 # CYRILLIC SMALL LETTER GHE +0xC8 0x0445 # CYRILLIC SMALL LETTER HA +0xC9 0x0438 # CYRILLIC SMALL LETTER I +0xCA 0x0439 # CYRILLIC SMALL LETTER SHORT I +0xCB 0x043A # CYRILLIC SMALL LETTER KA +0xCC 0x043B # CYRILLIC SMALL LETTER EL +0xCD 0x043C # CYRILLIC SMALL LETTER EM +0xCE 0x043D # CYRILLIC SMALL LETTER EN +0xCF 0x043E # CYRILLIC SMALL LETTER O +0xD0 0x043F # CYRILLIC SMALL LETTER PE +0xD1 0x044F # CYRILLIC SMALL LETTER YA +0xD2 0x0440 # CYRILLIC SMALL LETTER ER +0xD3 0x0441 # CYRILLIC SMALL LETTER ES +0xD4 0x0442 # CYRILLIC SMALL LETTER TE +0xD5 0x0443 # CYRILLIC SMALL LETTER U +0xD6 0x0436 # CYRILLIC SMALL LETTER ZHE +0xD7 0x0432 # CYRILLIC SMALL LETTER VE +0xD8 0x044C # CYRILLIC SMALL LETTER SOFT SIGN +0xD9 0x044B # CYRILLIC SMALL LETTER YERU +0xDA 0x0437 # CYRILLIC SMALL LETTER ZE +0xDB 0x0448 # CYRILLIC SMALL LETTER SHA +0xDC 0x044D # CYRILLIC SMALL LETTER E +0xDD 0x0449 # CYRILLIC SMALL LETTER SHCHA +0xDE 0x0447 # CYRILLIC SMALL LETTER CHE +0xDF 0x044A # CYRILLIC SMALL LETTER HARD SIGN +0xE0 0x042E # CYRILLIC CAPITAL LETTER YU +0xE1 0x0410 # CYRILLIC CAPITAL LETTER A +0xE2 0x0411 # CYRILLIC CAPITAL LETTER BE +0xE3 0x0426 # CYRILLIC CAPITAL LETTER TSE +0xE4 0x0414 # CYRILLIC CAPITAL LETTER DE +0xE5 0x0415 # CYRILLIC CAPITAL LETTER IE +0xE6 0x0424 # CYRILLIC CAPITAL LETTER EF +0xE7 0x0413 # CYRILLIC CAPITAL LETTER GHE +0xE8 0x0425 # CYRILLIC CAPITAL LETTER HA +0xE9 0x0418 # CYRILLIC CAPITAL LETTER I +0xEA 0x0419 # CYRILLIC CAPITAL LETTER SHORT I +0xEB 0x041A # CYRILLIC CAPITAL LETTER KA +0xEC 0x041B # CYRILLIC CAPITAL LETTER EL +0xED 0x041C # CYRILLIC CAPITAL LETTER EM +0xEE 0x041D # CYRILLIC CAPITAL LETTER EN +0xEF 0x041E # CYRILLIC CAPITAL LETTER O +0xF0 0x041F # CYRILLIC CAPITAL LETTER PE +0xF1 0x042F # CYRILLIC CAPITAL LETTER YA +0xF2 0x0420 # CYRILLIC CAPITAL LETTER ER +0xF3 0x0421 # CYRILLIC CAPITAL LETTER ES +0xF4 0x0422 # CYRILLIC CAPITAL LETTER TE +0xF5 0x0423 # CYRILLIC CAPITAL LETTER U +0xF6 0x0416 # CYRILLIC CAPITAL LETTER ZHE +0xF7 0x0412 # CYRILLIC CAPITAL LETTER VE +0xF8 0x042C # CYRILLIC CAPITAL LETTER SOFT SIGN +0xF9 0x042B # CYRILLIC CAPITAL LETTER YERU +0xFA 0x0417 # CYRILLIC CAPITAL LETTER ZE +0xFB 0x0428 # CYRILLIC CAPITAL LETTER SHA +0xFC 0x042D # CYRILLIC CAPITAL LETTER E +0xFD 0x0429 # CYRILLIC CAPITAL LETTER SHCHA +0xFE 0x0427 # CYRILLIC CAPITAL LETTER CHE +0xFF 0x042A # CYRILLIC CAPITAL LETTER HARD SIGN diff --git a/ext/standard/html_tables/mappings/ROMAN.TXT b/ext/standard/html_tables/mappings/ROMAN.TXT new file mode 100644 index 0000000..5b3b8b4 --- /dev/null +++ b/ext/standard/html_tables/mappings/ROMAN.TXT @@ -0,0 +1,370 @@ +#======================================================================= +# File name: ROMAN.TXT +# +# Contents: Map (external version) from Mac OS Roman +# character set to Unicode 2.1 and later. +# +# Copyright: (c) 1994-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Update header comments. Matches internal xml +# <c1.1> and Text Encoding Converter 2.0. +# b4,c1 2002-Dec-19 Update URLs, notes. Matches internal +# utom<b5>. +# b03 1999-Sep-22 Update contact e-mail address. Matches +# internal utom<b4>, ufrm<b3>, and Text +# Encoding Converter version 1.5. +# b02 1998-Aug-18 Encoding changed for Mac OS 8.5; change +# mapping of 0xDB from CURRENCY SIGN to +# EURO SIGN. Matches internal utom<b3>, +# ufrm<b3>. +# n08 1998-Feb-05 Minor update to header comments +# n06 1997-Dec-14 Add warning about future changes to 0xDB +# from CURRENCY SIGN to EURO SIGN. Clarify +# some header information +# n04 1997-Dec-01 Update to match internal utom<n3>, ufrm<n22>: +# Change standard mapping for 0xBD from U+2126 +# to its canonical decomposition, U+03A9. +# n03 1995-Apr-15 First version (after fixing some typos). +# Matches internal ufrm<n9>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Roman code (in hex as 0xNN) +# Column #2 is the corresponding Unicode (in hex as 0xNNNN) +# Column #3 is a comment containing the Unicode name +# +# The entries are in Mac OS Roman code order. +# +# One of these mappings requires the use of a corporate character. +# See the file "CORPCHAR.TXT" and notes below. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Roman character set uses the standard control characters at +# 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Roman: +# ---------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported directly in programming +# interfaces for QuickDraw Text, the Script Manager, and related +# Text Utilities. For other purposes it is supported via transcoding +# to and from Unicode. +# +# This character set is used for at least the following Mac OS +# localizations: U.S., British, Canadian French, French, Swiss +# French, German, Swiss German, Italian, Swiss Italian, Dutch, +# Swedish, Norwegian, Danish, Finnish, Spanish, Catalan, +# Portuguese, Brazilian, and the default International system. +# +# Variants of Mac OS Roman are used for Croatian, Icelandic, +# Turkish, Romanian, and other encodings. Separate mapping tables +# are available for these encodings. +# +# Before Mac OS 8.5, code point 0xDB was CURRENCY SIGN, and was +# mapped to U+00A4. In Mac OS 8.5 and later versions, code point +# 0xDB is changed to EURO SIGN and maps to U+20AC; the standard +# Apple fonts are updated for Mac OS 8.5 to reflect this. There is +# a "currency sign" variant of the Mac OS Roman encoding that still +# maps 0xDB to U+00A4; this can be used for older fonts. +# +# Before Mac OS 8.5, the ROM bitmap versions of the fonts Chicago, +# New York, Geneva, and Monaco did not implement the full Mac OS +# Roman character set; they only supported character codes up to +# 0xD8. The TrueType versions of these fonts have always implemented +# the full character set, as with the bitmap and TrueType versions +# of the other standard Roman fonts. +# +# In all Mac OS encodings, fonts such as Chicago which are used +# as "system" fonts (for menus, dialogs, etc.) have four glyphs +# at code points 0x11-0x14 for transient use by the Menu Manager. +# These glyphs are not intended as characters for use in normal +# text, and the associated code points are not generally +# interpreted as associated with these glyphs; they are usually +# interpreted (if at all) as the control codes DC1-DC4. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# The following corporate zone Unicode character is used in this +# mapping: +# +# 0xF8FF Apple logo +# +# NOTE: The graphic image associated with the Apple logo character +# is not authorized for use without permission of Apple, and +# unauthorized use might constitute trademark infringement. +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version n08 to version b02: +# +# - Encoding changed for Mac OS 8.5; change mapping of 0xDB from +# CURRENCY SIGN (U+00A4) to EURO SIGN (U+20AC). +# +# Changes from version n03 to version n04: +# +# - Change mapping of 0xBD from U+2126 to its canonical +# decomposition, U+03A9. +# +################## + +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +# +0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE +0x82 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA +0x83 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0x84 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE +0x85 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0x86 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0x87 0x00E1 # LATIN SMALL LETTER A WITH ACUTE +0x88 0x00E0 # LATIN SMALL LETTER A WITH GRAVE +0x89 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0x8A 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0x8B 0x00E3 # LATIN SMALL LETTER A WITH TILDE +0x8C 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE +0x8D 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA +0x8E 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0x8F 0x00E8 # LATIN SMALL LETTER E WITH GRAVE +0x90 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX +0x91 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS +0x92 0x00ED # LATIN SMALL LETTER I WITH ACUTE +0x93 0x00EC # LATIN SMALL LETTER I WITH GRAVE +0x94 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0x95 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS +0x96 0x00F1 # LATIN SMALL LETTER N WITH TILDE +0x97 0x00F3 # LATIN SMALL LETTER O WITH ACUTE +0x98 0x00F2 # LATIN SMALL LETTER O WITH GRAVE +0x99 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0x9A 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0x9B 0x00F5 # LATIN SMALL LETTER O WITH TILDE +0x9C 0x00FA # LATIN SMALL LETTER U WITH ACUTE +0x9D 0x00F9 # LATIN SMALL LETTER U WITH GRAVE +0x9E 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX +0x9F 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xA0 0x2020 # DAGGER +0xA1 0x00B0 # DEGREE SIGN +0xA2 0x00A2 # CENT SIGN +0xA3 0x00A3 # POUND SIGN +0xA4 0x00A7 # SECTION SIGN +0xA5 0x2022 # BULLET +0xA6 0x00B6 # PILCROW SIGN +0xA7 0x00DF # LATIN SMALL LETTER SHARP S +0xA8 0x00AE # REGISTERED SIGN +0xA9 0x00A9 # COPYRIGHT SIGN +0xAA 0x2122 # TRADE MARK SIGN +0xAB 0x00B4 # ACUTE ACCENT +0xAC 0x00A8 # DIAERESIS +0xAD 0x2260 # NOT EQUAL TO +0xAE 0x00C6 # LATIN CAPITAL LETTER AE +0xAF 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE +0xB0 0x221E # INFINITY +0xB1 0x00B1 # PLUS-MINUS SIGN +0xB2 0x2264 # LESS-THAN OR EQUAL TO +0xB3 0x2265 # GREATER-THAN OR EQUAL TO +0xB4 0x00A5 # YEN SIGN +0xB5 0x00B5 # MICRO SIGN +0xB6 0x2202 # PARTIAL DIFFERENTIAL +0xB7 0x2211 # N-ARY SUMMATION +0xB8 0x220F # N-ARY PRODUCT +0xB9 0x03C0 # GREEK SMALL LETTER PI +0xBA 0x222B # INTEGRAL +0xBB 0x00AA # FEMININE ORDINAL INDICATOR +0xBC 0x00BA # MASCULINE ORDINAL INDICATOR +0xBD 0x03A9 # GREEK CAPITAL LETTER OMEGA +0xBE 0x00E6 # LATIN SMALL LETTER AE +0xBF 0x00F8 # LATIN SMALL LETTER O WITH STROKE +0xC0 0x00BF # INVERTED QUESTION MARK +0xC1 0x00A1 # INVERTED EXCLAMATION MARK +0xC2 0x00AC # NOT SIGN +0xC3 0x221A # SQUARE ROOT +0xC4 0x0192 # LATIN SMALL LETTER F WITH HOOK +0xC5 0x2248 # ALMOST EQUAL TO +0xC6 0x2206 # INCREMENT +0xC7 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xC8 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xC9 0x2026 # HORIZONTAL ELLIPSIS +0xCA 0x00A0 # NO-BREAK SPACE +0xCB 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE +0xCC 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE +0xCD 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE +0xCE 0x0152 # LATIN CAPITAL LIGATURE OE +0xCF 0x0153 # LATIN SMALL LIGATURE OE +0xD0 0x2013 # EN DASH +0xD1 0x2014 # EM DASH +0xD2 0x201C # LEFT DOUBLE QUOTATION MARK +0xD3 0x201D # RIGHT DOUBLE QUOTATION MARK +0xD4 0x2018 # LEFT SINGLE QUOTATION MARK +0xD5 0x2019 # RIGHT SINGLE QUOTATION MARK +0xD6 0x00F7 # DIVISION SIGN +0xD7 0x25CA # LOZENGE +0xD8 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS +0xD9 0x0178 # LATIN CAPITAL LETTER Y WITH DIAERESIS +0xDA 0x2044 # FRACTION SLASH +0xDB 0x20AC # EURO SIGN +0xDC 0x2039 # SINGLE LEFT-POINTING ANGLE QUOTATION MARK +0xDD 0x203A # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +0xDE 0xFB01 # LATIN SMALL LIGATURE FI +0xDF 0xFB02 # LATIN SMALL LIGATURE FL +0xE0 0x2021 # DOUBLE DAGGER +0xE1 0x00B7 # MIDDLE DOT +0xE2 0x201A # SINGLE LOW-9 QUOTATION MARK +0xE3 0x201E # DOUBLE LOW-9 QUOTATION MARK +0xE4 0x2030 # PER MILLE SIGN +0xE5 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xE6 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0xE7 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE +0xE8 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS +0xE9 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE +0xEA 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE +0xEB 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xEC 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS +0xED 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE +0xEE 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE +0xEF 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xF0 0xF8FF # Apple logo +0xF1 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE +0xF2 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE +0xF3 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xF4 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE +0xF5 0x0131 # LATIN SMALL LETTER DOTLESS I +0xF6 0x02C6 # MODIFIER LETTER CIRCUMFLEX ACCENT +0xF7 0x02DC # SMALL TILDE +0xF8 0x00AF # MACRON +0xF9 0x02D8 # BREVE +0xFA 0x02D9 # DOT ABOVE +0xFB 0x02DA # RING ABOVE +0xFC 0x00B8 # CEDILLA +0xFD 0x02DD # DOUBLE ACUTE ACCENT +0xFE 0x02DB # OGONEK +0xFF 0x02C7 # CARON |