diff options
-rw-r--r-- | .github/workflows/cygwin.yml | 14 | ||||
-rw-r--r-- | .github/workflows/docker.yml | 25 | ||||
-rw-r--r-- | .github/workflows/gitpod.yml | 8 | ||||
-rw-r--r-- | .github/workflows/wheels.yml | 40 | ||||
-rw-r--r-- | numpy/core/multiarray.pyi | 2 | ||||
-rw-r--r-- | numpy/lib/format.py | 59 | ||||
-rw-r--r-- | numpy/lib/npyio.py | 32 | ||||
-rw-r--r-- | numpy/lib/tests/test_format.py | 47 | ||||
-rw-r--r-- | numpy/lib/utils.py | 6 | ||||
-rw-r--r-- | numpy/random/_common.pxd | 2 | ||||
-rw-r--r-- | numpy/random/_common.pyx | 6 | ||||
-rw-r--r-- | numpy/random/_generator.pyx | 8 | ||||
-rw-r--r-- | numpy/random/mtrand.pyx | 10 | ||||
-rw-r--r-- | numpy/random/tests/test_generator_mt19937.py | 20 | ||||
-rw-r--r-- | numpy/random/tests/test_randomstate.py | 19 | ||||
-rw-r--r-- | tools/gitpod/Dockerfile | 6 |
16 files changed, 217 insertions, 87 deletions
diff --git a/.github/workflows/cygwin.yml b/.github/workflows/cygwin.yml index efac1ce03..18fd03c52 100644 --- a/.github/workflows/cygwin.yml +++ b/.github/workflows/cygwin.yml @@ -24,7 +24,7 @@ jobs: - uses: actions/checkout@v3 with: submodules: recursive - fetch-depth: 3000 + fetch-depth: 0 - name: Install Cygwin uses: cygwin/cygwin-install-action@v2 with: @@ -39,22 +39,14 @@ jobs: - name: Set Windows PATH uses: egor-tensin/cleanup-path@v1 with: - dirs: 'C:\tools\cygwin\lib\lapack;C:\tools\cygwin\bin' - - name: Remove OpenBLAS - # Added to work around OpenBLAS bugs on AVX-512 - # Add libopenblas to the Cygwin install step when removing this step - # Should be possible after next Cygwin OpenBLAS update. - run: | - dash -c "/bin/rm -f /usr/bin/cygblas-0.dll" + dirs: 'C:\tools\cygwin\bin;C:\tools\cygwin\lib\lapack' - name: Verify that bash is Cygwin bash run: | command bash bash -c "uname -svrmo" - - name: Update with Cygwin git - # fetch-depth=0 above should make this short. + - name: Tell Cygwin's git about this repository. run: | dash -c "which git; /usr/bin/git config --system --add safe.directory /cygdrive/d/a/numpy/numpy" - dash -c "which git; /usr/bin/git fetch --all -p" - name: Verify python version # Make sure it's the Cygwin one, not a Windows one run: | diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index a220eb9ca..694483ed7 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -1,32 +1,35 @@ name: Build Base Docker Image on: - push: + push: branches: - main paths: - - 'environment.yml' + - "environment.yml" -jobs: - build: - name: Build base Docker image +permissions: + contents: read # to fetch code (actions/checkout) + +jobs: + build_docker: + name: Build base Docker image runs-on: ubuntu-latest environment: numpy-dev if: "github.repository_owner == 'numpy' && !contains(github.event.head_commit.message, '[ci skip]') && !contains(github.event.head_commit.message, '[skip ci]') && !contains(github.event.head_commit.message, '[skip github]')" steps: - name: Clone repository uses: actions/checkout@v3 - - name: Lint Docker + - name: Lint Docker uses: brpaz/hadolint-action@v1.2.1 - with: + with: dockerfile: ./tools/gitpod/Dockerfile - name: Get refs shell: bash run: | export raw_branch=${GITHUB_REF#refs/heads/} - echo "::set-output name=branch::${raw_branch//\//-}" - echo "::set-output name=date::$(date +'%Y%m%d')" - echo "::set-output name=sha8::$(echo ${GITHUB_SHA} | cut -c1-8)" + echo "branch=${raw_branch//\//-}" >> $GITHUB_OUTPUT + echo "date=$(date +'%Y%m%d')" >> $GITHUB_OUTPUT + echo "sha8=$(echo ${GITHUB_SHA} | cut -c1-8)" >> $GITHUB_OUTPUT id: getrefs - name: Set up Docker Buildx uses: docker/setup-buildx-action@v1 @@ -52,6 +55,6 @@ jobs: cache-to: type=local,dest=/tmp/.buildx-cache tags: | numpy/numpy-dev:${{ steps.getrefs.outputs.date }}-${{ steps.getrefs.outputs.branch}}-${{ steps.getrefs.outputs.sha8 }}, numpy/numpy-dev:latest - - name: Image digest + - name: Image digest # Return details of the image build: sha and shell run: echo ${{ steps.docker_build.outputs.digest }} diff --git a/.github/workflows/gitpod.yml b/.github/workflows/gitpod.yml index 1c33566a5..472bd0079 100644 --- a/.github/workflows/gitpod.yml +++ b/.github/workflows/gitpod.yml @@ -6,7 +6,7 @@ on: - main jobs: - build: + build_gitpod: name: Build Gitpod Docker image runs-on: ubuntu-latest environment: numpy-dev @@ -24,9 +24,9 @@ jobs: shell: bash run: | export raw_branch=${GITHUB_REF#refs/heads/} - echo "::set-output name=branch::${raw_branch//\//-}" - echo "::set-output name=date::$(date +'%Y%m%d')" - echo "::set-output name=sha8::$(echo ${GITHUB_SHA} | cut -c1-8)" + echo "branch=${raw_branch//\//-}" >> $GITHUB_OUTPUT + echo "date=$(date +'%Y%m%d')" >> $GITHUB_OUTPUT + echo "sha8=$(echo ${GITHUB_SHA} | cut -c1-8)" >> $GITHUB_OUTPUT id: getrefs - name: Set up Docker Buildx uses: docker/setup-buildx-action@v1 diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 4169a0b3f..e212f20a7 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -15,16 +15,16 @@ name: Wheel builder on: schedule: - # ┌───────────── minute (0 - 59) - # │ ┌───────────── hour (0 - 23) - # │ │ ┌───────────── day of the month (1 - 31) - # │ │ │ ┌───────────── month (1 - 12 or JAN-DEC) - # │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT) - # │ │ │ │ │ - - cron: "42 1 * * 4" + # ┌───────────── minute (0 - 59) + # │ ┌───────────── hour (0 - 23) + # │ │ ┌───────────── day of the month (1 - 31) + # │ │ │ ┌───────────── month (1 - 12 or JAN-DEC) + # │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT) + # │ │ │ │ │ + - cron: "42 1 * * 4" push: pull_request: - types: [labeled, opened, synchronize, reopened] + types: [labeled, opened, synchronize, reopened] workflow_dispatch: concurrency: @@ -49,7 +49,7 @@ jobs: run: | set -xe COMMIT_MSG=$(git log --no-merges -1 --oneline) - echo "::set-output name=message::$COMMIT_MSG" + echo "message=$COMMIT_MSG" >> $GITHUB_OUTPUT echo github.ref ${{ github.ref }} build_wheels: @@ -71,20 +71,20 @@ jobs: # Github Actions doesn't support pairing matrix values together, let's improvise # https://github.com/github/feedback/discussions/7835#discussioncomment-1769026 buildplat: - - [ubuntu-20.04, manylinux_x86_64] - - [macos-10.15, macosx_*] - - [windows-2019, win_amd64] - - [windows-2019, win32] + - [ubuntu-20.04, manylinux_x86_64] + - [macos-10.15, macosx_*] + - [windows-2019, win_amd64] + - [windows-2019, win32] # TODO: uncomment PyPy 3.9 builds once PyPy # re-releases a new minor version # NOTE: This needs a bump of cibuildwheel version, also, once that happens. python: ["cp38", "cp39", "cp310", "cp311", "pp38"] #, "pp39"] exclude: - # Don't build PyPy 32-bit windows - - buildplat: [windows-2019, win32] - python: "pp38" - - buildplat: [windows-2019, win32] - python: "pp39" + # Don't build PyPy 32-bit windows + - buildplat: [windows-2019, win32] + python: "pp38" + - buildplat: [windows-2019, win32] + python: "pp39" env: IS_32_BIT: ${{ matrix.buildplat[1] == 'win32' }} IS_PUSH: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') }} @@ -103,7 +103,7 @@ jobs: # Used to push the built wheels - uses: actions/setup-python@v3 with: - python-version: '3.x' + python-version: "3.x" - name: Configure mingw for 32-bit builds run: | @@ -169,7 +169,7 @@ jobs: - uses: actions/setup-python@v3 with: # Build sdist on lowest supported Python - python-version: '3.8' + python-version: "3.8" - name: Build sdist run: | python setup.py sdist diff --git a/numpy/core/multiarray.pyi b/numpy/core/multiarray.pyi index 1be582357..3822cec4f 100644 --- a/numpy/core/multiarray.pyi +++ b/numpy/core/multiarray.pyi @@ -971,7 +971,7 @@ _GetItemKeys = L[ ] _SetItemKeys = L[ "A", "ALIGNED", - "W", "WRITABLE", + "W", "WRITEABLE", "X", "WRITEBACKIFCOPY", ] diff --git a/numpy/lib/format.py b/numpy/lib/format.py index 625768b62..19fec48ed 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -186,6 +186,10 @@ _header_size_info = { (3, 0): ('<I', 'utf8'), } +# Python's literal_eval is not actually safe for large inputs, since parsing +# may become slow or even cause interpreter crashes. +# This is an arbitrary, low limit which should make it safe in practice. +_MAX_HEADER_SIZE = 10000 def _check_version(version): if version not in [(1, 0), (2, 0), (3, 0), None]: @@ -465,7 +469,7 @@ def write_array_header_2_0(fp, d): """ _write_array_header(fp, d, (2, 0)) -def read_array_header_1_0(fp): +def read_array_header_1_0(fp, max_header_size=_MAX_HEADER_SIZE): """ Read an array header from a filelike object using the 1.0 file format version. @@ -487,6 +491,10 @@ def read_array_header_1_0(fp): contiguous before writing it out. dtype : dtype The dtype of the file's data. + max_header_size : int, optional + Maximum allowed size of the header. Large headers may not be safe + to load securely and thus require explicitly passing a larger value. + See :py:meth:`ast.literal_eval()` for details. Raises ------ @@ -494,9 +502,10 @@ def read_array_header_1_0(fp): If the data is invalid. """ - return _read_array_header(fp, version=(1, 0)) + return _read_array_header( + fp, version=(1, 0), max_header_size=max_header_size) -def read_array_header_2_0(fp): +def read_array_header_2_0(fp, max_header_size=_MAX_HEADER_SIZE): """ Read an array header from a filelike object using the 2.0 file format version. @@ -509,6 +518,10 @@ def read_array_header_2_0(fp): ---------- fp : filelike object A file object or something with a `.read()` method like a file. + max_header_size : int, optional + Maximum allowed size of the header. Large headers may not be safe + to load securely and thus require explicitly passing a larger value. + See :py:meth:`ast.literal_eval()` for details. Returns ------- @@ -527,7 +540,8 @@ def read_array_header_2_0(fp): If the data is invalid. """ - return _read_array_header(fp, version=(2, 0)) + return _read_array_header( + fp, version=(2, 0), max_header_size=max_header_size) def _filter_header(s): @@ -565,7 +579,7 @@ def _filter_header(s): return tokenize.untokenize(tokens) -def _read_array_header(fp, version): +def _read_array_header(fp, version, max_header_size=_MAX_HEADER_SIZE): """ see read_array_header_1_0 """ @@ -581,6 +595,14 @@ def _read_array_header(fp, version): header_length = struct.unpack(hlength_type, hlength_str)[0] header = _read_bytes(fp, header_length, "array header") header = header.decode(encoding) + if len(header) > max_header_size: + raise ValueError( + f"Header info length ({len(header)}) is large and may not be safe " + "to load securely.\n" + "To allow loading, adjust `max_header_size` or fully trust " + "the `.npy` file using `allow_pickle=True`.\n" + "For safety against large resource use or crashes, sandboxing " + "may be necessary.") # The header is a pretty-printed string representation of a literal # Python dictionary with trailing newlines padded to a ARRAY_ALIGN byte @@ -694,7 +716,8 @@ def write_array(fp, array, version=None, allow_pickle=True, pickle_kwargs=None): fp.write(chunk.tobytes('C')) -def read_array(fp, allow_pickle=False, pickle_kwargs=None): +def read_array(fp, allow_pickle=False, pickle_kwargs=None, *, + max_header_size=_MAX_HEADER_SIZE): """ Read an array from an NPY file. @@ -713,6 +736,12 @@ def read_array(fp, allow_pickle=False, pickle_kwargs=None): Additional keyword arguments to pass to pickle.load. These are only useful when loading object arrays saved on Python 2 when using Python 3. + max_header_size : int, optional + Maximum allowed size of the header. Large headers may not be safe + to load securely and thus require explicitly passing a larger value. + See :py:meth:`ast.literal_eval()` for details. + This option is ignored when `allow_pickle` is passed. In that case + the file is by definition trusted and the limit is unnecessary. Returns ------- @@ -726,9 +755,15 @@ def read_array(fp, allow_pickle=False, pickle_kwargs=None): an object array. """ + if allow_pickle: + # Effectively ignore max_header_size, since `allow_pickle` indicates + # that the input is fully trusted. + max_header_size = 2**64 + version = read_magic(fp) _check_version(version) - shape, fortran_order, dtype = _read_array_header(fp, version) + shape, fortran_order, dtype = _read_array_header( + fp, version, max_header_size=max_header_size) if len(shape) == 0: count = 1 else: @@ -788,7 +823,8 @@ def read_array(fp, allow_pickle=False, pickle_kwargs=None): def open_memmap(filename, mode='r+', dtype=None, shape=None, - fortran_order=False, version=None): + fortran_order=False, version=None, *, + max_header_size=_MAX_HEADER_SIZE): """ Open a .npy file as a memory-mapped array. @@ -819,6 +855,10 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None, If the mode is a "write" mode, then this is the version of the file format used to create the file. None means use the oldest supported version that is able to store the data. Default: None + max_header_size : int, optional + Maximum allowed size of the header. Large headers may not be safe + to load securely and thus require explicitly passing a larger value. + See :py:meth:`ast.literal_eval()` for details. Returns ------- @@ -866,7 +906,8 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None, version = read_magic(fp) _check_version(version) - shape, fortran_order, dtype = _read_array_header(fp, version) + shape, fortran_order, dtype = _read_array_header( + fp, version, max_header_size=max_header_size) if dtype.hasobject: msg = "Array can't be memory-mapped: Python objects in dtype." raise ValueError(msg) diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index f0ec40a2e..343d76ae3 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -139,6 +139,12 @@ class NpzFile(Mapping): Additional keyword arguments to pass on to pickle.load. These are only useful when loading object arrays saved on Python 2 when using Python 3. + max_header_size : int, optional + Maximum allowed size of the header. Large headers may not be safe + to load securely and thus require explicitly passing a larger value. + See :py:meth:`ast.literal_eval()` for details. + This option is ignored when `allow_pickle` is passed. In that case + the file is by definition trusted and the limit is unnecessary. Parameters ---------- @@ -174,13 +180,15 @@ class NpzFile(Mapping): fid = None def __init__(self, fid, own_fid=False, allow_pickle=False, - pickle_kwargs=None): + pickle_kwargs=None, *, + max_header_size=format._MAX_HEADER_SIZE): # Import is postponed to here since zipfile depends on gzip, an # optional component of the so-called standard library. _zip = zipfile_factory(fid) self._files = _zip.namelist() self.files = [] self.allow_pickle = allow_pickle + self.max_header_size = max_header_size self.pickle_kwargs = pickle_kwargs for x in self._files: if x.endswith('.npy'): @@ -244,7 +252,8 @@ class NpzFile(Mapping): bytes = self.zip.open(key) return format.read_array(bytes, allow_pickle=self.allow_pickle, - pickle_kwargs=self.pickle_kwargs) + pickle_kwargs=self.pickle_kwargs, + max_header_size=self.max_header_size) else: return self.zip.read(key) else: @@ -253,7 +262,7 @@ class NpzFile(Mapping): @set_module('numpy') def load(file, mmap_mode=None, allow_pickle=False, fix_imports=True, - encoding='ASCII'): + encoding='ASCII', *, max_header_size=format._MAX_HEADER_SIZE): """ Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files. @@ -297,6 +306,12 @@ def load(file, mmap_mode=None, allow_pickle=False, fix_imports=True, npy/npz files containing object arrays. Values other than 'latin1', 'ASCII', and 'bytes' are not allowed, as they can corrupt numerical data. Default: 'ASCII' + max_header_size : int, optional + Maximum allowed size of the header. Large headers may not be safe + to load securely and thus require explicitly passing a larger value. + See :py:meth:`ast.literal_eval()` for details. + This option is ignored when `allow_pickle` is passed. In that case + the file is by definition trusted and the limit is unnecessary. Returns ------- @@ -403,15 +418,20 @@ def load(file, mmap_mode=None, allow_pickle=False, fix_imports=True, # Potentially transfer file ownership to NpzFile stack.pop_all() ret = NpzFile(fid, own_fid=own_fid, allow_pickle=allow_pickle, - pickle_kwargs=pickle_kwargs) + pickle_kwargs=pickle_kwargs, + max_header_size=max_header_size) return ret elif magic == format.MAGIC_PREFIX: # .npy file if mmap_mode: - return format.open_memmap(file, mode=mmap_mode) + if allow_pickle: + max_header_size = 2**64 + return format.open_memmap(file, mode=mmap_mode, + max_header_size=max_header_size) else: return format.read_array(fid, allow_pickle=allow_pickle, - pickle_kwargs=pickle_kwargs) + pickle_kwargs=pickle_kwargs, + max_header_size=max_header_size) else: # Try a pickle if not allow_pickle: diff --git a/numpy/lib/tests/test_format.py b/numpy/lib/tests/test_format.py index 581d067de..53d3bf1d3 100644 --- a/numpy/lib/tests/test_format.py +++ b/numpy/lib/tests/test_format.py @@ -459,6 +459,7 @@ def test_long_str(): assert_array_equal(long_str_arr, long_str_arr2) +@pytest.mark.slow def test_memmap_roundtrip(tmpdir): for i, arr in enumerate(basic_arrays + record_arrays): if arr.dtype.hasobject: @@ -667,7 +668,7 @@ def test_version_2_0(): assert_(len(header) % format.ARRAY_ALIGN == 0) f.seek(0) - n = format.read_array(f) + n = format.read_array(f, max_header_size=200000) assert_array_equal(d, n) # 1.0 requested but data cannot be saved this way @@ -689,7 +690,7 @@ def test_version_2_0_memmap(tmpdir): shape=d.shape, version=(2, 0)) ma[...] = d ma.flush() - ma = format.open_memmap(tf1, mode='r') + ma = format.open_memmap(tf1, mode='r', max_header_size=200000) assert_array_equal(ma, d) with warnings.catch_warnings(record=True) as w: @@ -700,9 +701,49 @@ def test_version_2_0_memmap(tmpdir): ma[...] = d ma.flush() - ma = format.open_memmap(tf2, mode='r') + ma = format.open_memmap(tf2, mode='r', max_header_size=200000) + assert_array_equal(ma, d) +@pytest.mark.parametrize("mmap_mode", ["r", None]) +def test_huge_header(tmpdir, mmap_mode): + f = os.path.join(tmpdir, f'large_header.npy') + arr = np.array(1, dtype="i,"*10000+"i") + + with pytest.warns(UserWarning, match=".*format 2.0"): + np.save(f, arr) + + with pytest.raises(ValueError, match="Header.*large"): + np.load(f, mmap_mode=mmap_mode) + + with pytest.raises(ValueError, match="Header.*large"): + np.load(f, mmap_mode=mmap_mode, max_header_size=20000) + + res = np.load(f, mmap_mode=mmap_mode, allow_pickle=True) + assert_array_equal(res, arr) + + res = np.load(f, mmap_mode=mmap_mode, max_header_size=180000) + assert_array_equal(res, arr) + +def test_huge_header_npz(tmpdir): + f = os.path.join(tmpdir, f'large_header.npz') + arr = np.array(1, dtype="i,"*10000+"i") + + with pytest.warns(UserWarning, match=".*format 2.0"): + np.savez(f, arr=arr) + + # Only getting the array from the file actually reads it + with pytest.raises(ValueError, match="Header.*large"): + np.load(f)["arr"] + + with pytest.raises(ValueError, match="Header.*large"): + np.load(f, max_header_size=20000)["arr"] + + res = np.load(f, allow_pickle=True)["arr"] + assert_array_equal(res, arr) + + res = np.load(f, max_header_size=180000)["arr"] + assert_array_equal(res, arr) def test_write_version(): f = BytesIO() diff --git a/numpy/lib/utils.py b/numpy/lib/utils.py index 9e4fe7ebb..831a78ec1 100644 --- a/numpy/lib/utils.py +++ b/numpy/lib/utils.py @@ -971,6 +971,12 @@ def safe_eval(source): Evaluate a string containing a Python literal expression without allowing the execution of arbitrary non-literal code. + .. warning:: + + This function is identical to :py:meth:`ast.literal_eval` and + has the same security implications. It may not always be safe + to evaluate large input strings. + Parameters ---------- source : str diff --git a/numpy/random/_common.pxd b/numpy/random/_common.pxd index 3625634cd..3eaf39ddf 100644 --- a/numpy/random/_common.pxd +++ b/numpy/random/_common.pxd @@ -17,8 +17,8 @@ cdef enum ConstraintType: CONS_POSITIVE CONS_POSITIVE_NOT_NAN CONS_BOUNDED_0_1 - CONS_BOUNDED_0_1_NOTNAN CONS_BOUNDED_GT_0_1 + CONS_BOUNDED_LT_0_1 CONS_GT_1 CONS_GTE_1 CONS_POISSON diff --git a/numpy/random/_common.pyx b/numpy/random/_common.pyx index 607034a38..7b6f69303 100644 --- a/numpy/random/_common.pyx +++ b/numpy/random/_common.pyx @@ -392,6 +392,9 @@ cdef int check_array_constraint(np.ndarray val, object name, constraint_type con elif cons == CONS_BOUNDED_GT_0_1: if not np.all(np.greater(val, 0)) or not np.all(np.less_equal(val, 1)): raise ValueError("{0} <= 0, {0} > 1 or {0} contains NaNs".format(name)) + elif cons == CONS_BOUNDED_LT_0_1: + if not np.all(np.greater_equal(val, 0)) or not np.all(np.less(val, 1)): + raise ValueError("{0} < 0, {0} >= 1 or {0} contains NaNs".format(name)) elif cons == CONS_GT_1: if not np.all(np.greater(val, 1)): raise ValueError("{0} <= 1 or {0} contains NaNs".format(name)) @@ -428,6 +431,9 @@ cdef int check_constraint(double val, object name, constraint_type cons) except elif cons == CONS_BOUNDED_GT_0_1: if not val >0 or not val <= 1: raise ValueError("{0} <= 0, {0} > 1 or {0} contains NaNs".format(name)) + elif cons == CONS_BOUNDED_LT_0_1: + if not (val >= 0) or not (val < 1): + raise ValueError("{0} < 0, {0} >= 1 or {0} is NaN".format(name)) elif cons == CONS_GT_1: if not (val > 1): raise ValueError("{0} <= 1 or {0} is NaN".format(name)) diff --git a/numpy/random/_generator.pyx b/numpy/random/_generator.pyx index 5218c6d0e..2c25b7191 100644 --- a/numpy/random/_generator.pyx +++ b/numpy/random/_generator.pyx @@ -25,7 +25,7 @@ from ._pcg64 import PCG64 from numpy.random cimport bitgen_t from ._common cimport (POISSON_LAM_MAX, CONS_POSITIVE, CONS_NONE, CONS_NON_NEGATIVE, CONS_BOUNDED_0_1, CONS_BOUNDED_GT_0_1, - CONS_GT_1, CONS_POSITIVE_NOT_NAN, CONS_POISSON, + CONS_BOUNDED_LT_0_1, CONS_GT_1, CONS_POSITIVE_NOT_NAN, CONS_POISSON, double_fill, cont, kahan_sum, cont_broadcast_3, float_fill, cont_f, check_array_constraint, check_constraint, disc, discrete_broadcast_iii, validate_output_shape @@ -3437,12 +3437,12 @@ cdef class Generator: Draw samples from a logarithmic series distribution. Samples are drawn from a log series distribution with specified - shape parameter, 0 < ``p`` < 1. + shape parameter, 0 <= ``p`` < 1. Parameters ---------- p : float or array_like of floats - Shape parameter for the distribution. Must be in the range (0, 1). + Shape parameter for the distribution. Must be in the range [0, 1). size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. If size is ``None`` (default), @@ -3506,7 +3506,7 @@ cdef class Generator: """ return disc(&random_logseries, &self._bitgen, size, self.lock, 1, 0, - p, 'p', CONS_BOUNDED_0_1, + p, 'p', CONS_BOUNDED_LT_0_1, 0.0, '', CONS_NONE, 0.0, '', CONS_NONE) diff --git a/numpy/random/mtrand.pyx b/numpy/random/mtrand.pyx index fcc1f27d2..ae40931d0 100644 --- a/numpy/random/mtrand.pyx +++ b/numpy/random/mtrand.pyx @@ -19,8 +19,8 @@ from ._bounded_integers cimport (_rand_bool, _rand_int32, _rand_int64, from ._mt19937 import MT19937 as _MT19937 from numpy.random cimport bitgen_t from ._common cimport (POISSON_LAM_MAX, CONS_POSITIVE, CONS_NONE, - CONS_NON_NEGATIVE, CONS_BOUNDED_0_1, CONS_BOUNDED_GT_0_1, CONS_GTE_1, - CONS_GT_1, LEGACY_CONS_POISSON, + CONS_NON_NEGATIVE, CONS_BOUNDED_0_1, CONS_BOUNDED_GT_0_1, + CONS_BOUNDED_LT_0_1, CONS_GTE_1, CONS_GT_1, LEGACY_CONS_POISSON, double_fill, cont, kahan_sum, cont_broadcast_3, check_array_constraint, check_constraint, disc, discrete_broadcast_iii, validate_output_shape @@ -3895,7 +3895,7 @@ cdef class RandomState: Draw samples from a logarithmic series distribution. Samples are drawn from a log series distribution with specified - shape parameter, 0 < ``p`` < 1. + shape parameter, 0 <= ``p`` < 1. .. note:: New code should use the ``logseries`` method of a ``default_rng()`` @@ -3904,7 +3904,7 @@ cdef class RandomState: Parameters ---------- p : float or array_like of floats - Shape parameter for the distribution. Must be in the range (0, 1). + Shape parameter for the distribution. Must be in the range [0, 1). size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. If size is ``None`` (default), @@ -3969,7 +3969,7 @@ cdef class RandomState: """ out = disc(&legacy_logseries, &self._bitgen, size, self.lock, 1, 0, - p, 'p', CONS_BOUNDED_0_1, + p, 'p', CONS_BOUNDED_LT_0_1, 0.0, '', CONS_NONE, 0.0, '', CONS_NONE) # Match historical output type diff --git a/numpy/random/tests/test_generator_mt19937.py b/numpy/random/tests/test_generator_mt19937.py index b550cd508..73d915e02 100644 --- a/numpy/random/tests/test_generator_mt19937.py +++ b/numpy/random/tests/test_generator_mt19937.py @@ -1363,10 +1363,22 @@ class TestRandomDist: [5, 1]]) assert_array_equal(actual, desired) - def test_logseries_exceptions(self): - with np.errstate(invalid='ignore'): - assert_raises(ValueError, random.logseries, np.nan) - assert_raises(ValueError, random.logseries, [np.nan] * 10) + def test_logseries_zero(self): + random = Generator(MT19937(self.seed)) + assert random.logseries(0) == 1 + + @pytest.mark.parametrize("value", [np.nextafter(0., -1), 1., np.nan, 5.]) + def test_logseries_exceptions(self, value): + random = Generator(MT19937(self.seed)) + with np.errstate(invalid="ignore"): + with pytest.raises(ValueError): + random.logseries(value) + with pytest.raises(ValueError): + # contiguous path: + random.logseries(np.array([value] * 10)) + with pytest.raises(ValueError): + # non-contiguous path: + random.logseries(np.array([value] * 10)[::2]) def test_multinomial(self): random = Generator(MT19937(self.seed)) diff --git a/numpy/random/tests/test_randomstate.py b/numpy/random/tests/test_randomstate.py index 22b167224..c0e42ec1e 100644 --- a/numpy/random/tests/test_randomstate.py +++ b/numpy/random/tests/test_randomstate.py @@ -942,11 +942,20 @@ class TestRandomDist: [3, 6]]) assert_array_equal(actual, desired) - def test_logseries_exceptions(self): - with suppress_warnings() as sup: - sup.record(RuntimeWarning) - assert_raises(ValueError, random.logseries, np.nan) - assert_raises(ValueError, random.logseries, [np.nan] * 10) + def test_logseries_zero(self): + assert random.logseries(0) == 1 + + @pytest.mark.parametrize("value", [np.nextafter(0., -1), 1., np.nan, 5.]) + def test_logseries_exceptions(self, value): + with np.errstate(invalid="ignore"): + with pytest.raises(ValueError): + random.logseries(value) + with pytest.raises(ValueError): + # contiguous path: + random.logseries(np.array([value] * 10)) + with pytest.raises(ValueError): + # non-contiguous path: + random.logseries(np.array([value] * 10)[::2]) def test_multinomial(self): random.seed(self.seed) diff --git a/tools/gitpod/Dockerfile b/tools/gitpod/Dockerfile index 592a5ee0a..dd5561750 100644 --- a/tools/gitpod/Dockerfile +++ b/tools/gitpod/Dockerfile @@ -53,10 +53,10 @@ RUN apt-get update && \ texlive-latex-extra \ vim && \ # this needs to be done after installing dirmngr - apt-key adv --keyserver keyserver.ubuntu.com --recv-key C99B11DEB97541F0 && \ - apt-add-repository https://cli.github.com/packages && \ + apt-key adv --keyserver keyserver.ubuntu.com --recv-key 23F3D4EA75716059 && \ + apt-add-repository https://cli.github.com/packages && \ apt-get install -yq --no-install-recommends \ - gh && \ + gh && \ locale-gen en_US.UTF-8 && \ apt-get clean && \ rm -rf /var/cache/apt/* &&\ |