diff options
Diffstat (limited to 'Lib/statistics.py')
-rw-r--r-- | Lib/statistics.py | 68 |
1 files changed, 38 insertions, 30 deletions
diff --git a/Lib/statistics.py b/Lib/statistics.py index b081b5a006..4f5c1c164a 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -105,7 +105,6 @@ import math from fractions import Fraction from decimal import Decimal from itertools import groupby -from bisect import bisect_left, bisect_right @@ -224,26 +223,56 @@ def _exact_ratio(x): # Optimise the common case of floats. We expect that the most often # used numeric type will be builtin floats, so try to make this as # fast as possible. - if type(x) is float or type(x) is Decimal: + if type(x) is float: return x.as_integer_ratio() try: # x may be an int, Fraction, or Integral ABC. return (x.numerator, x.denominator) except AttributeError: try: - # x may be a float or Decimal subclass. + # x may be a float subclass. return x.as_integer_ratio() except AttributeError: - # Just give up? - pass + try: + # x may be a Decimal. + return _decimal_to_ratio(x) + except AttributeError: + # Just give up? + pass except (OverflowError, ValueError): # float NAN or INF. - assert not _isfinite(x) + assert not math.isfinite(x) return (x, None) msg = "can't convert type '{}' to numerator/denominator" raise TypeError(msg.format(type(x).__name__)) +# FIXME This is faster than Fraction.from_decimal, but still too slow. +def _decimal_to_ratio(d): + """Convert Decimal d to exact integer ratio (numerator, denominator). + + >>> from decimal import Decimal + >>> _decimal_to_ratio(Decimal("2.6")) + (26, 10) + + """ + sign, digits, exp = d.as_tuple() + if exp in ('F', 'n', 'N'): # INF, NAN, sNAN + assert not d.is_finite() + return (d, None) + num = 0 + for digit in digits: + num = num*10 + digit + if exp < 0: + den = 10**-exp + else: + num *= 10**exp + den = 1 + if sign: + num = -num + return (num, den) + + def _convert(value, T): """Convert value to given numeric type T.""" if type(value) is T: @@ -276,21 +305,6 @@ def _counts(data): return table -def _find_lteq(a, x): - 'Locate the leftmost value exactly equal to x' - i = bisect_left(a, x) - if i != len(a) and a[i] == x: - return i - raise ValueError - - -def _find_rteq(a, l, x): - 'Locate the rightmost value exactly equal to x' - i = bisect_right(a, x, lo=l) - if i != (len(a)+1) and a[i-1] == x: - return i-1 - raise ValueError - # === Measures of central tendency (averages) === def mean(data): @@ -428,15 +442,9 @@ def median_grouped(data, interval=1): except TypeError: # Mixed type. For now we just coerce to float. L = float(x) - float(interval)/2 - - # Uses bisection search to search for x in data with log(n) time complexity - # Find the position of leftmost occurrence of x in data - l1 = _find_lteq(data, x) - # Find the position of rightmost occurrence of x in data[l1...len(data)] - # Assuming always l1 <= l2 - l2 = _find_rteq(data, l1, x) - cf = l1 - f = l2 - l1 + 1 + cf = data.index(x) # Number of values below the median interval. + # FIXME The following line could be more efficient for big lists. + f = data.count(x) # Number of data points in the median interval. return L + interval*(n/2 - cf)/f |