summaryrefslogtreecommitdiff
path: root/pygments
diff options
context:
space:
mode:
Diffstat (limited to 'pygments')
-rw-r--r--pygments/lexers/esoteric.py15
-rw-r--r--pygments/lexers/ezhil.py2
-rw-r--r--pygments/lexers/modula2.py15
-rw-r--r--pygments/lexers/perl.py16
-rw-r--r--pygments/lexers/theorem.py4
-rw-r--r--pygments/lexers/unicon.py9
6 files changed, 51 insertions, 10 deletions
diff --git a/pygments/lexers/esoteric.py b/pygments/lexers/esoteric.py
index 0997ffe1..0fe89299 100644
--- a/pygments/lexers/esoteric.py
+++ b/pygments/lexers/esoteric.py
@@ -55,17 +55,26 @@ class BrainfuckLexer(RegexLexer):
and < > is brainfuck."""
plus_minus_count = 0
greater_less_count = 0
- for c in text:
+
+ range_to_check = max(256, len(text))
+
+ for c in text[:range_to_check]:
if c == '+' or c == '-':
plus_minus_count += 1
if c == '<' or c == '>':
greater_less_count += 1
- if plus_minus_count > (0.25 * len(text)):
+ if plus_minus_count > (0.25 * range_to_check):
return 1.0
- if greater_less_count > (0.25 * len(text)):
+ if greater_less_count > (0.25 * range_to_check):
return 1.0
+ result = 0
+ if '[-]' in text:
+ result += 0.5
+
+ return result
+
class BefungeLexer(RegexLexer):
"""
diff --git a/pygments/lexers/ezhil.py b/pygments/lexers/ezhil.py
index 109b607b..4af37f33 100644
--- a/pygments/lexers/ezhil.py
+++ b/pygments/lexers/ezhil.py
@@ -69,7 +69,7 @@ class EzhilLexer(RegexLexer):
decent amount of Tamil-characters, it's this language. This assumption
is obviously horribly off if someone uses string literals in tamil
in another language."""
- if len(re.findall('[\u0b80-\u0bff]')) > 10:
+ if len(re.findall(r'[\u0b80-\u0bff]', text)) > 10:
return 0.25
def __init__(self, **options):
diff --git a/pygments/lexers/modula2.py b/pygments/lexers/modula2.py
index c4b95b38..05144222 100644
--- a/pygments/lexers/modula2.py
+++ b/pygments/lexers/modula2.py
@@ -1563,9 +1563,20 @@ class Modula2Lexer(RegexLexer):
def analyse_text(text):
"""Not much we can go by. (* for comments is our best guess."""
result = 0
+
+ is_pascal_like = 0
if '(*' in text and '*)' in text:
- result += 0.01
+ is_pascal_like += 0.5
if ':=' in text:
- result += 0.01
+ is_pascal_like += 0.5
+
+ if is_pascal_like == 1:
+ # Procedure is in Modula2
+ if re.search(r'\bPROCEDURE\b', text):
+ result += 0.6
+
+ # FUNCTION is only valid in Pascal, but not in Modula2
+ if re.search(r'\bFUNCTION\b', text):
+ result = 0.0
return result
diff --git a/pygments/lexers/perl.py b/pygments/lexers/perl.py
index 741de3fd..95fb94e7 100644
--- a/pygments/lexers/perl.py
+++ b/pygments/lexers/perl.py
@@ -208,8 +208,18 @@ class PerlLexer(RegexLexer):
def analyse_text(text):
if shebang_matches(text, r'perl'):
return True
+
+ result = 0
+
if re.search(r'(?:my|our)\s+[$@%(]', text):
- return 0.9
+ result += 0.9
+
+ if ':=' in text:
+ # := is not valid Perl, but it appears in unicon, so we should
+ # become less confident if we think we found Perl with :=
+ result /= 2
+
+ return result
class Perl6Lexer(ExtendedRegexLexer):
@@ -711,6 +721,10 @@ class Perl6Lexer(ExtendedRegexLexer):
continue
break
+ if ':=' in text:
+ # Same logic as above for PerlLexer
+ rating /= 2
+
return rating
def __init__(self, **options):
diff --git a/pygments/lexers/theorem.py b/pygments/lexers/theorem.py
index c4c857d4..a4fa24de 100644
--- a/pygments/lexers/theorem.py
+++ b/pygments/lexers/theorem.py
@@ -154,8 +154,8 @@ class CoqLexer(RegexLexer):
}
def analyse_text(text):
- if text.startswith('(*'):
- return True
+ if 'qed' in text and 'tauto' in text:
+ return 1
class IsabelleLexer(RegexLexer):
diff --git a/pygments/lexers/unicon.py b/pygments/lexers/unicon.py
index 45b4c15a..95815907 100644
--- a/pygments/lexers/unicon.py
+++ b/pygments/lexers/unicon.py
@@ -387,7 +387,8 @@ class UcodeLexer(RegexLexer):
}
def analyse_text(text):
- """endsuspend and endrepeat are unique to this language."""
+ """endsuspend and endrepeat are unique to this language, and
+ \\self, /self doesn't seem to get used anywhere else either."""
result = 0
if 'endsuspend' in text:
@@ -402,4 +403,10 @@ class UcodeLexer(RegexLexer):
if 'procedure' in text and 'end' in text:
result += 0.01
+ # This seems quite unique to unicon -- doesn't appear in any other
+ # example source we have (A quick search reveals that \SELF appears in
+ # Perl/Raku code)
+ if r'\self' in text and r'/self' in text:
+ result += 0.5
+
return result