1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
|
#include "numpy/random/distributions.h"
#include <stdint.h>
#include <stdlib.h>
#include <stdbool.h>
/*
* random_multivariate_hypergeometric_count
*
* Draw variates from the multivariate hypergeometric distribution--
* the "count" algorithm.
*
* Parameters
* ----------
* bitgen_t *bitgen_state
* Pointer to a `bitgen_t` instance.
* int64_t total
* The sum of the values in the array `colors`. (This is redundant
* information, but we know the caller has already computed it, so
* we might as well use it.)
* size_t num_colors
* The length of the `colors` array.
* int64_t *colors
* The array of colors (i.e. the number of each type in the collection
* from which the random variate is drawn).
* int64_t nsample
* The number of objects drawn without replacement for each variate.
* `nsample` must not exceed sum(colors). This condition is not checked;
* it is assumed that the caller has already validated the value.
* size_t num_variates
* The number of variates to be produced and put in the array
* pointed to by `variates`. One variate is a vector of length
* `num_colors`, so the array pointed to by `variates` must have length
* `num_variates * num_colors`.
* int64_t *variates
* The array that will hold the result. It must have length
* `num_variates * num_colors`.
* The array is not initialized in the function; it is expected that the
* array has been initialized with zeros when the function is called.
*
* Notes
* -----
* The "count" algorithm for drawing one variate is roughly equivalent to the
* following numpy code:
*
* choices = np.repeat(np.arange(len(colors)), colors)
* selection = np.random.choice(choices, nsample, replace=False)
* variate = np.bincount(selection, minlength=len(colors))
*
* This function uses a temporary array with length sum(colors).
*
* Assumptions on the arguments (not checked in the function):
* * colors[k] >= 0 for k in range(num_colors)
* * total = sum(colors)
* * 0 <= nsample <= total
* * the product total * sizeof(size_t) does not exceed SIZE_MAX
* * the product num_variates * num_colors does not overflow
*/
int random_multivariate_hypergeometric_count(bitgen_t *bitgen_state,
int64_t total,
size_t num_colors, int64_t *colors,
int64_t nsample,
size_t num_variates, int64_t *variates)
{
size_t *choices;
bool more_than_half;
if ((total == 0) || (nsample == 0) || (num_variates == 0)) {
// Nothing to do.
return 0;
}
choices = malloc(total * (sizeof *choices));
if (choices == NULL) {
return -1;
}
/*
* If colors contains, for example, [3 2 5], then choices
* will contain [0 0 0 1 1 2 2 2 2 2].
*/
for (size_t i = 0, k = 0; i < num_colors; ++i) {
for (int64_t j = 0; j < colors[i]; ++j) {
choices[k] = i;
++k;
}
}
more_than_half = nsample > (total / 2);
if (more_than_half) {
nsample = total - nsample;
}
for (size_t i = 0; i < num_variates * num_colors; i += num_colors) {
/*
* Fisher-Yates shuffle, but only loop through the first
* `nsample` entries of `choices`. After the loop,
* choices[:nsample] contains a random sample from the
* the full array.
*/
for (size_t j = 0; j < (size_t) nsample; ++j) {
size_t tmp, k;
// Note: nsample is not greater than total, so there is no danger
// of integer underflow in `(size_t) total - j - 1`.
k = j + (size_t) random_interval(bitgen_state,
(size_t) total - j - 1);
tmp = choices[k];
choices[k] = choices[j];
choices[j] = tmp;
}
/*
* Count the number of occurrences of each value in choices[:nsample].
* The result, stored in sample[i:i+num_colors], is the sample from
* the multivariate hypergeometric distribution.
*/
for (size_t j = 0; j < (size_t) nsample; ++j) {
variates[i + choices[j]] += 1;
}
if (more_than_half) {
for (size_t k = 0; k < num_colors; ++k) {
variates[i + k] = colors[k] - variates[i + k];
}
}
}
free(choices);
return 0;
}
|