diff options
Diffstat (limited to 'pygments/lexers/webmisc.py')
-rw-r--r-- | pygments/lexers/webmisc.py | 41 |
1 files changed, 20 insertions, 21 deletions
diff --git a/pygments/lexers/webmisc.py b/pygments/lexers/webmisc.py index 84ba490c..dab36aa6 100644 --- a/pygments/lexers/webmisc.py +++ b/pygments/lexers/webmisc.py @@ -15,7 +15,6 @@ from pygments.lexer import RegexLexer, ExtendedRegexLexer, include, bygroups, \ default, using from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation, Literal -from pygments.util import unirange from pygments.lexers.css import _indentation, _starts_block from pygments.lexers.html import HtmlLexer @@ -74,15 +73,15 @@ class XQueryLexer(ExtendedRegexLexer): # FIX UNICODE LATER # ncnamestartchar = ( - # ur"[A-Z]|_|[a-z]|[\u00C0-\u00D6]|[\u00D8-\u00F6]|[\u00F8-\u02FF]|" - # ur"[\u0370-\u037D]|[\u037F-\u1FFF]|[\u200C-\u200D]|[\u2070-\u218F]|" - # ur"[\u2C00-\u2FEF]|[\u3001-\uD7FF]|[\uF900-\uFDCF]|[\uFDF0-\uFFFD]|" - # ur"[\u10000-\uEFFFF]" + # r"[A-Z]|_|[a-z]|[\u00C0-\u00D6]|[\u00D8-\u00F6]|[\u00F8-\u02FF]|" + # r"[\u0370-\u037D]|[\u037F-\u1FFF]|[\u200C-\u200D]|[\u2070-\u218F]|" + # r"[\u2C00-\u2FEF]|[\u3001-\uD7FF]|[\uF900-\uFDCF]|[\uFDF0-\uFFFD]|" + # r"[\u10000-\uEFFFF]" # ) ncnamestartchar = r"(?:[A-Z]|_|[a-z])" # FIX UNICODE LATER - # ncnamechar = ncnamestartchar + (ur"|-|\.|[0-9]|\u00B7|[\u0300-\u036F]|" - # ur"[\u203F-\u2040]") + # ncnamechar = ncnamestartchar + (r"|-|\.|[0-9]|\u00B7|[\u0300-\u036F]|" + # r"[\u203F-\u2040]") ncnamechar = r"(?:" + ncnamestartchar + r"|-|\.|[0-9])" ncname = "(?:%s+%s*)" % (ncnamestartchar, ncnamechar) pitarget_namestartchar = r"(?:[A-KN-WYZ]|_|:|[a-kn-wyz])" @@ -99,14 +98,14 @@ class XQueryLexer(ExtendedRegexLexer): stringsingle = r"(?:'(?:" + entityref + r"|" + charref + r"|''|[^&'])*')" # FIX UNICODE LATER - # elementcontentchar = (ur'\t|\r|\n|[\u0020-\u0025]|[\u0028-\u003b]|' - # ur'[\u003d-\u007a]|\u007c|[\u007e-\u007F]') + # elementcontentchar = (r'\t|\r|\n|[\u0020-\u0025]|[\u0028-\u003b]|' + # r'[\u003d-\u007a]|\u007c|[\u007e-\u007F]') elementcontentchar = r'[A-Za-z]|\s|\d|[!"#$%()*+,\-./:;=?@\[\\\]^_\'`|~]' - # quotattrcontentchar = (ur'\t|\r|\n|[\u0020-\u0021]|[\u0023-\u0025]|' - # ur'[\u0027-\u003b]|[\u003d-\u007a]|\u007c|[\u007e-\u007F]') + # quotattrcontentchar = (r'\t|\r|\n|[\u0020-\u0021]|[\u0023-\u0025]|' + # r'[\u0027-\u003b]|[\u003d-\u007a]|\u007c|[\u007e-\u007F]') quotattrcontentchar = r'[A-Za-z]|\s|\d|[!#$%()*+,\-./:;=?@\[\\\]^_\'`|~]' - # aposattrcontentchar = (ur'\t|\r|\n|[\u0020-\u0025]|[\u0028-\u003b]|' - # ur'[\u003d-\u007a]|\u007c|[\u007e-\u007F]') + # aposattrcontentchar = (r'\t|\r|\n|[\u0020-\u0025]|[\u0028-\u003b]|' + # r'[\u003d-\u007a]|\u007c|[\u007e-\u007F]') aposattrcontentchar = r'[A-Za-z]|\s|\d|[!"#$%()*+,\-./:;=?@\[\\\]^_`|~]' # CHAR elements - fix the above elementcontentchar, quotattrcontentchar, @@ -518,8 +517,8 @@ class XQueryLexer(ExtendedRegexLexer): 'xml_comment': [ (r'(-->)', popstate_xmlcomment_callback), (r'[^-]{1,2}', Literal), - (u'\\t|\\r|\\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' + - unirange(0x10000, 0x10ffff), Literal), + (r'\t|\r|\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|[\U00010000-\U0010FFFF]', + Literal), ], 'processing_instruction': [ (r'\s+', Text, 'processing_instruction_content'), @@ -528,13 +527,13 @@ class XQueryLexer(ExtendedRegexLexer): ], 'processing_instruction_content': [ (r'\?>', String.Doc, '#pop'), - (u'\\t|\\r|\\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' + - unirange(0x10000, 0x10ffff), Literal), + (r'\t|\r|\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|[\U00010000-\U0010FFFF]', + Literal), ], 'cdata_section': [ (r']]>', String.Doc, '#pop'), - (u'\\t|\\r|\\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' + - unirange(0x10000, 0x10ffff), Literal), + (r'\t|\r|\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|[\U00010000-\U0010FFFF]', + Literal), ], 'start_tag': [ include('whitespace'), @@ -603,8 +602,8 @@ class XQueryLexer(ExtendedRegexLexer): ], 'pragmacontents': [ (r'#\)', Punctuation, 'operator'), - (u'\\t|\\r|\\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' + - unirange(0x10000, 0x10ffff), Literal), + (r'\t|\r|\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|[\U00010000-\U0010FFFF]', + Literal), (r'(\s+)', Text), ], 'occurrenceindicator': [ |