summaryrefslogtreecommitdiff
path: root/numpy/f2py
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/f2py')
-rwxr-xr-xnumpy/f2py/crackfortran.py2
-rw-r--r--numpy/f2py/tests/test_crackfortran.py49
2 files changed, 49 insertions, 2 deletions
diff --git a/numpy/f2py/crackfortran.py b/numpy/f2py/crackfortran.py
index b831697d8..36a913047 100755
--- a/numpy/f2py/crackfortran.py
+++ b/numpy/f2py/crackfortran.py
@@ -935,7 +935,7 @@ typedefpattern = re.compile(
r'(?:,(?P<attributes>[\w(),]+))?(::)?(?P<name>\b[a-z$_][\w$]*\b)'
r'(?:\((?P<params>[\w,]*)\))?\Z', re.I)
nameargspattern = re.compile(
- r'\s*(?P<name>\b[\w$]+\b)\s*(@\(@\s*(?P<args>[\w\s,]*)\s*@\)@|)\s*((result(\s*@\(@\s*(?P<result>\b[\w$]+\b)\s*@\)@|))|(bind\s*@\(@\s*(?P<bind>.*)\s*@\)@))*\s*\Z', re.I)
+ r'\s*(?P<name>\b[\w$]+\b)\s*(@\(@\s*(?P<args>[\w\s,]*)\s*@\)@|)\s*((result(\s*@\(@\s*(?P<result>\b[\w$]+\b)\s*@\)@|))|(bind\s*@\(@\s*(?P<bind>(?:(?!@\)@).)*)\s*@\)@))*\s*\Z', re.I)
operatorpattern = re.compile(
r'\s*(?P<scheme>(operator|assignment))'
r'@\(@\s*(?P<name>[^)]+)\s*@\)@\s*\Z', re.I)
diff --git a/numpy/f2py/tests/test_crackfortran.py b/numpy/f2py/tests/test_crackfortran.py
index 73ac4e276..23965087d 100644
--- a/numpy/f2py/tests/test_crackfortran.py
+++ b/numpy/f2py/tests/test_crackfortran.py
@@ -1,9 +1,10 @@
import importlib
import codecs
+import time
import unicodedata
import pytest
import numpy as np
-from numpy.f2py.crackfortran import markinnerspaces
+from numpy.f2py.crackfortran import markinnerspaces, nameargspattern
from . import util
from numpy.f2py import crackfortran
import textwrap
@@ -276,3 +277,49 @@ class TestUnicodeComment(util.F2PyTest):
)
def test_encoding_comment(self):
self.module.foo(3)
+
+class TestNameArgsPatternBacktracking:
+ @pytest.mark.parametrize(
+ ['adversary'],
+ [
+ ('@)@bind@(@',),
+ ('@)@bind @(@',),
+ ('@)@bind foo bar baz@(@',)
+ ]
+ )
+ def test_nameargspattern_backtracking(self, adversary):
+ '''address ReDOS vulnerability:
+ https://github.com/numpy/numpy/issues/23338'''
+ last_median = 0.
+ trials_per_count = 128
+ start_reps, end_reps = 15, 25
+ times_median_doubled = 0
+ for ii in range(start_reps, end_reps):
+ repeated_adversary = adversary * ii
+ times = []
+ for _ in range(trials_per_count):
+ t0 = time.perf_counter()
+ mtch = nameargspattern.search(repeated_adversary)
+ times.append(time.perf_counter() - t0)
+ # We should use a measure of time that's resilient to outliers.
+ # Times jump around a lot due to the CPU's scheduler.
+ median = np.median(times)
+ assert not mtch
+ # if the adversary is capped with @)@, it becomes acceptable
+ # according to the old version of the regex.
+ # that should still be true.
+ good_version_of_adversary = repeated_adversary + '@)@'
+ assert nameargspattern.search(good_version_of_adversary)
+ if ii > start_reps:
+ # the hallmark of exponentially catastrophic backtracking
+ # is that runtime doubles for every added instance of
+ # the problematic pattern.
+ times_median_doubled += median > 2 * last_median
+ # also try to rule out non-exponential but still bad cases
+ # arbitrarily, we should set a hard limit of 10ms as too slow
+ assert median < trials_per_count * 0.01
+ last_median = median
+ # we accept that maybe the median might double once, due to
+ # the CPU scheduler acting weird or whatever. More than that
+ # seems suspicious.
+ assert times_median_doubled < 2 \ No newline at end of file