diff options
Diffstat (limited to 'numpy/f2py')
-rwxr-xr-x | numpy/f2py/crackfortran.py | 2 | ||||
-rw-r--r-- | numpy/f2py/tests/test_crackfortran.py | 49 |
2 files changed, 49 insertions, 2 deletions
diff --git a/numpy/f2py/crackfortran.py b/numpy/f2py/crackfortran.py index b831697d8..36a913047 100755 --- a/numpy/f2py/crackfortran.py +++ b/numpy/f2py/crackfortran.py @@ -935,7 +935,7 @@ typedefpattern = re.compile( r'(?:,(?P<attributes>[\w(),]+))?(::)?(?P<name>\b[a-z$_][\w$]*\b)' r'(?:\((?P<params>[\w,]*)\))?\Z', re.I) nameargspattern = re.compile( - r'\s*(?P<name>\b[\w$]+\b)\s*(@\(@\s*(?P<args>[\w\s,]*)\s*@\)@|)\s*((result(\s*@\(@\s*(?P<result>\b[\w$]+\b)\s*@\)@|))|(bind\s*@\(@\s*(?P<bind>.*)\s*@\)@))*\s*\Z', re.I) + r'\s*(?P<name>\b[\w$]+\b)\s*(@\(@\s*(?P<args>[\w\s,]*)\s*@\)@|)\s*((result(\s*@\(@\s*(?P<result>\b[\w$]+\b)\s*@\)@|))|(bind\s*@\(@\s*(?P<bind>(?:(?!@\)@).)*)\s*@\)@))*\s*\Z', re.I) operatorpattern = re.compile( r'\s*(?P<scheme>(operator|assignment))' r'@\(@\s*(?P<name>[^)]+)\s*@\)@\s*\Z', re.I) diff --git a/numpy/f2py/tests/test_crackfortran.py b/numpy/f2py/tests/test_crackfortran.py index 73ac4e276..23965087d 100644 --- a/numpy/f2py/tests/test_crackfortran.py +++ b/numpy/f2py/tests/test_crackfortran.py @@ -1,9 +1,10 @@ import importlib import codecs +import time import unicodedata import pytest import numpy as np -from numpy.f2py.crackfortran import markinnerspaces +from numpy.f2py.crackfortran import markinnerspaces, nameargspattern from . import util from numpy.f2py import crackfortran import textwrap @@ -276,3 +277,49 @@ class TestUnicodeComment(util.F2PyTest): ) def test_encoding_comment(self): self.module.foo(3) + +class TestNameArgsPatternBacktracking: + @pytest.mark.parametrize( + ['adversary'], + [ + ('@)@bind@(@',), + ('@)@bind @(@',), + ('@)@bind foo bar baz@(@',) + ] + ) + def test_nameargspattern_backtracking(self, adversary): + '''address ReDOS vulnerability: + https://github.com/numpy/numpy/issues/23338''' + last_median = 0. + trials_per_count = 128 + start_reps, end_reps = 15, 25 + times_median_doubled = 0 + for ii in range(start_reps, end_reps): + repeated_adversary = adversary * ii + times = [] + for _ in range(trials_per_count): + t0 = time.perf_counter() + mtch = nameargspattern.search(repeated_adversary) + times.append(time.perf_counter() - t0) + # We should use a measure of time that's resilient to outliers. + # Times jump around a lot due to the CPU's scheduler. + median = np.median(times) + assert not mtch + # if the adversary is capped with @)@, it becomes acceptable + # according to the old version of the regex. + # that should still be true. + good_version_of_adversary = repeated_adversary + '@)@' + assert nameargspattern.search(good_version_of_adversary) + if ii > start_reps: + # the hallmark of exponentially catastrophic backtracking + # is that runtime doubles for every added instance of + # the problematic pattern. + times_median_doubled += median > 2 * last_median + # also try to rule out non-exponential but still bad cases + # arbitrarily, we should set a hard limit of 10ms as too slow + assert median < trials_per_count * 0.01 + last_median = median + # we accept that maybe the median might double once, due to + # the CPU scheduler acting weird or whatever. More than that + # seems suspicious. + assert times_median_doubled < 2
\ No newline at end of file |