#include #include /* From the C99 standard, section 7.19.6: The exponent always contains at least two digits, and only as many more digits as necessary to represent the exponent. */ /* We force 3 digits on windows for python < 2.6 for compatibility reason */ #if defined(MS_WIN32) && (PY_VERSION_HEX < 0x02060000) #define MIN_EXPONENT_DIGITS 3 #else #define MIN_EXPONENT_DIGITS 2 #endif /* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS in length. */ static void _ensure_minimum_exponent_length(char* buffer, size_t buf_size) { char *p = strpbrk(buffer, "eE"); if (p && (*(p + 1) == '-' || *(p + 1) == '+')) { char *start = p + 2; int exponent_digit_cnt = 0; int leading_zero_cnt = 0; int in_leading_zeros = 1; int significant_digit_cnt; /* Skip over the exponent and the sign. */ p += 2; /* Find the end of the exponent, keeping track of leading zeros. */ while (*p && isdigit(Py_CHARMASK(*p))) { if (in_leading_zeros && *p == '0') ++leading_zero_cnt; if (*p != '0') in_leading_zeros = 0; ++p; ++exponent_digit_cnt; } significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt; if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) { /* If there are 2 exactly digits, we're done, regardless of what they contain */ } else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) { int extra_zeros_cnt; /* There are more than 2 digits in the exponent. See if we can delete some of the leading zeros */ if (significant_digit_cnt < MIN_EXPONENT_DIGITS) significant_digit_cnt = MIN_EXPONENT_DIGITS; extra_zeros_cnt = exponent_digit_cnt - significant_digit_cnt; /* Delete extra_zeros_cnt worth of characters from the front of the exponent */ assert(extra_zeros_cnt >= 0); /* Add one to significant_digit_cnt to copy the trailing 0 byte, thus setting the length */ memmove(start, start + extra_zeros_cnt, significant_digit_cnt + 1); } else { /* If there are fewer than 2 digits, add zeros until there are 2, if there's enough room */ int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt; if (start + zeros + exponent_digit_cnt + 1 < buffer + buf_size) { memmove(start + zeros, start, exponent_digit_cnt + 1); memset(start, '0', zeros); } } } } /* Ensure that buffer has a decimal point in it. The decimal point will not be in the current locale, it will always be '.' */ static void _ensure_decimal_point(char* buffer, size_t buf_size) { int insert_count = 0; char* chars_to_insert; /* search for the first non-digit character */ char *p = buffer; if (*p == '-' || *p == '+') /* Skip leading sign, if present. I think this could only ever be '-', but it can't hurt to check for both. */ ++p; while (*p && isdigit(Py_CHARMASK(*p))) ++p; if (*p == '.') { if (isdigit(Py_CHARMASK(*(p+1)))) { /* Nothing to do, we already have a decimal point and a digit after it */ } else { /* We have a decimal point, but no following digit. Insert a zero after the decimal. */ ++p; chars_to_insert = "0"; insert_count = 1; } } else { chars_to_insert = ".0"; insert_count = 2; } if (insert_count) { size_t buf_len = strlen(buffer); if (buf_len + insert_count + 1 >= buf_size) { /* If there is not enough room in the buffer for the additional text, just skip it. It's not worth generating an error over. */ } else { memmove(p + insert_count, p, buffer + strlen(buffer) - p + 1); memcpy(p, chars_to_insert, insert_count); } } } /* see FORMATBUFLEN in unicodeobject.c */ #define FLOAT_FORMATBUFLEN 120 /* Given a string that may have a decimal point in the current locale, change it back to a dot. Since the string cannot get longer, no need for a maximum buffer size parameter. */ static void _change_decimal_from_locale_to_dot(char* buffer) { struct lconv *locale_data = localeconv(); const char *decimal_point = locale_data->decimal_point; if (decimal_point[0] != '.' || decimal_point[1] != 0) { size_t decimal_point_len = strlen(decimal_point); if (*buffer == '+' || *buffer == '-') buffer++; while (isdigit(Py_CHARMASK(*buffer))) buffer++; if (strncmp(buffer, decimal_point, decimal_point_len) == 0) { *buffer = '.'; buffer++; if (decimal_point_len > 1) { /* buffer needs to get smaller */ size_t rest_len = strlen(buffer + (decimal_point_len - 1)); memmove(buffer, buffer + (decimal_point_len - 1), rest_len); buffer[rest_len] = 0; } } } } /* * Check that the format string is a valid one for NumPyOS_ascii_format* */ static int _check_ascii_format(const char *format) { char format_char; size_t format_len = strlen(format); /* The last character in the format string must be the format char */ format_char = format[format_len - 1]; if (format[0] != '%') { return -1; } /* I'm not sure why this test is here. It's ensuring that the format string after the first character doesn't have a single quote, a lowercase l, or a percent. This is the reverse of the commented-out test about 10 lines ago. */ if (strpbrk(format + 1, "'l%")) { return -1; } /* Also curious about this function is that it accepts format strings like "%xg", which are invalid for floats. In general, the interface to this function is not very good, but changing it is difficult because it's a public API. */ if (!(format_char == 'e' || format_char == 'E' || format_char == 'f' || format_char == 'F' || format_char == 'g' || format_char == 'G')) { return -1; } return 0; } /* * Fix the generated string: make sure the decimal is ., that exponent has a * minimal number of digits, and that it has a decimal + one digit after that * decimal if decimal argument != 0 (Same effect that 'Z' format in * PyOS_ascii_formatd */ static char* _fix_ascii_format(char* buf, size_t buflen, int decimal) { /* Get the current locale, and find the decimal point string. Convert that string back to a dot. */ _change_decimal_from_locale_to_dot(buf); /* If an exponent exists, ensure that the exponent is at least MIN_EXPONENT_DIGITS digits, providing the buffer is large enough for the extra zeros. Also, if there are more than MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get back to MIN_EXPONENT_DIGITS */ _ensure_minimum_exponent_length(buf, buflen); if (decimal != 0) { _ensure_decimal_point(buf, buflen); } return buf; } /* * NumPyOS_ascii_format*: * - buffer: A buffer to place the resulting string in * - buf_size: The length of the buffer. * - format: The printf()-style format to use for the code to use for * converting. * - value: The value to convert * - decimal: if != 0, always has a decimal, and at leasat one digit after * the decimal. This has the same effect as passing 'Z' in the origianl * PyOS_ascii_formatd * * This is similar to PyOS_ascii_formatd in python > 2.6, except that it does * not handle 'n', and handles nan / inf. * * Converts a #gdouble to a string, using the '.' as decimal point. To format * the number you pass in a printf()-style format string. Allowed conversion * specifiers are 'e', 'E', 'f', 'F', 'g', 'G'. * * Return value: The pointer to the buffer with the converted string. */ #define _ASCII_FORMAT(type, suffix, print_type) \ static char* \ NumPyOS_ascii_format ## suffix(char *buffer, size_t buf_size, \ const char *format, \ type val, int decimal) \ { \ if (isfinite(val)) { \ if(_check_ascii_format(format)) { \ return NULL; \ } \ PyOS_snprintf(buffer, buf_size, format, (print_type)val); \ return _fix_ascii_format(buffer, buf_size, decimal); \ } \ else if (isnan(val)){ \ if (buf_size < 4) { \ return NULL; \ } \ strcpy(buffer, "nan"); \ } \ else { \ if (signbit(val)) { \ if (buf_size < 5) { \ return NULL; \ } \ strcpy(buffer, "-inf"); \ } \ else { \ if (buf_size < 4) { \ return NULL; \ } \ strcpy(buffer, "inf"); \ } \ } \ return buffer; \ } _ASCII_FORMAT(float, f, float) _ASCII_FORMAT(double, d, double) #ifndef FORCE_NO_LONG_DOUBLE_FORMATTING _ASCII_FORMAT(long double, l, long double) #else _ASCII_FORMAT(long double, l, double) #endif static double NumPyOS_PINF; /* Positive infinity */ static double NumPyOS_PZERO; /* +0 */ static double NumPyOS_NAN; /* NaN */ /* NumPyOS_init: * * initialize floating-point constants */ static void NumPyOS_init(void) { double mul = 1e100; double div = 1e10; double tmp, c; tmp = 0; c = mul; for (;;) { c *= mul; if (c == tmp) break; tmp = c; } NumPyOS_PINF = c; tmp = 0; c = div; for (;;) { c /= div; if (c == tmp) break; tmp = c; } NumPyOS_PZERO = c; NumPyOS_NAN = NumPyOS_PINF / NumPyOS_PINF; } /* NumPyOS_ascii_isspace: * * Same as isspace under C locale */ static int NumPyOS_ascii_isspace(char c) { return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v'; } /* NumPyOS_ascii_isalpha: * * Same as isalpha under C locale */ static int NumPyOS_ascii_isalpha(char c) { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); } /* NumPyOS_ascii_isdigit: * * Same as isdigit under C locale */ static int NumPyOS_ascii_isdigit(char c) { return (c >= '0' && c <= '9'); } /* NumPyOS_ascii_isalnum: * * Same as isalnum under C locale */ static int NumPyOS_ascii_isalnum(char c) { return NumPyOS_ascii_isdigit(c) || NumPyOS_ascii_isalpha(c); } /* NumPyOS_ascii_tolower: * * Same as tolower under C locale */ static char NumPyOS_ascii_tolower(char c) { if (c >= 'A' && c <= 'Z') return c + ('a'-'A'); return c; } /* NumPyOS_ascii_strncasecmp: * * Same as strncasecmp under C locale */ static int NumPyOS_ascii_strncasecmp(const char* s1, const char* s2, size_t len) { int diff; while (len > 0 && *s1 != '\0' && *s2 != '\0') { diff = ((int)NumPyOS_ascii_tolower(*s1)) - ((int)NumPyOS_ascii_tolower(*s2)); if (diff != 0) return diff; ++s1; ++s2; --len; } if (len > 0) return ((int)*s1) - ((int)*s2); return 0; } /* NumPyOS_ascii_strtod: * * Work around bugs in PyOS_ascii_strtod */ static double NumPyOS_ascii_strtod(const char *s, char** endptr) { struct lconv *locale_data = localeconv(); const char *decimal_point = locale_data->decimal_point; size_t decimal_point_len = strlen(decimal_point); char buffer[FLOAT_FORMATBUFLEN+1]; const char *p; char *q; size_t n; double result; while (NumPyOS_ascii_isspace(*s)) { ++s; } /* ##1 * * Recognize POSIX inf/nan representations on all platforms. */ p = s; result = 1.0; if (*p == '-') { result = -1.0; ++p; } else if (*p == '+') { ++p; } if (NumPyOS_ascii_strncasecmp(p, "nan", 3) == 0) { p += 3; if (*p == '(') { ++p; while (NumPyOS_ascii_isalnum(*p) || *p == '_') ++p; if (*p == ')') ++p; } if (endptr != NULL) *endptr = (char*)p; return NumPyOS_NAN; } else if (NumPyOS_ascii_strncasecmp(p, "inf", 3) == 0) { p += 3; if (NumPyOS_ascii_strncasecmp(p, "inity", 5) == 0) p += 5; if (endptr != NULL) *endptr = (char*)p; return result*NumPyOS_PINF; } /* End of ##1 */ /* ## 2 * * At least Python versions <= 2.5.2 and <= 2.6.1 * * Fails to do best-efforts parsing of strings of the form "1234" * where is the decimal point under the foreign locale. */ if (decimal_point[0] != '.' || decimal_point[1] != 0) { p = s; if (*p == '+' || *p == '-') ++p; while (*p >= '0' && *p <= '9') ++p; if (strncmp(p, decimal_point, decimal_point_len) == 0) { n = (size_t)(p - s); if (n > FLOAT_FORMATBUFLEN) n = FLOAT_FORMATBUFLEN; memcpy(buffer, s, n); buffer[n] = '\0'; result = PyOS_ascii_strtod(buffer, &q); if (endptr != NULL) { *endptr = (char*)(s + (q - buffer)); } return result; } } /* End of ##2 */ return PyOS_ascii_strtod(s, endptr); } /* * NumPyOS_ascii_ftolf: * * fp: FILE pointer * * value: Place to store the value read * * Similar to PyOS_ascii_strtod, except that it reads input from a file. * * Similarly to fscanf, this function always consumes leading whitespace, * and any text that could be the leading part in valid input. * * Return value: similar to fscanf. * * 0 if no number read, * * 1 if a number read, * * EOF if end-of-file met before reading anything. */ static int NumPyOS_ascii_ftolf(FILE *fp, double *value) { char buffer[FLOAT_FORMATBUFLEN+1]; char *endp; char *p; int c; int ok; /* * Pass on to PyOS_ascii_strtod the leftmost matching part in regexp * * \s*[+-]? ( [0-9]*\.[0-9]+([eE][+-]?[0-9]+) * | nan ( \([:alphanum:_]*\) )? * | inf(inity)? * ) * * case-insensitively. * * The "do { ... } while (0)" wrapping in macros ensures that they behave * properly eg. in "if ... else" structures. */ #define END_MATCH() \ goto buffer_filled #define NEXT_CHAR() \ do { \ if (c == EOF || endp >= buffer + FLOAT_FORMATBUFLEN) \ END_MATCH(); \ *endp++ = (char)c; \ c = getc(fp); \ } while (0) #define MATCH_ALPHA_STRING_NOCASE(string) \ do { \ for (p=(string); *p!='\0' && (c==*p || c+('a'-'A')==*p); ++p) \ NEXT_CHAR(); \ if (*p != '\0') END_MATCH(); \ } while (0) #define MATCH_ONE_OR_NONE(condition) \ do { if (condition) NEXT_CHAR(); } while (0) #define MATCH_ONE_OR_MORE(condition) \ do { \ ok = 0; \ while (condition) { NEXT_CHAR(); ok = 1; } \ if (!ok) END_MATCH(); \ } while (0) #define MATCH_ZERO_OR_MORE(condition) \ while (condition) { NEXT_CHAR(); } /* 1. emulate fscanf EOF handling */ c = getc(fp); if (c == EOF) return EOF; /* 2. consume leading whitespace unconditionally */ while (NumPyOS_ascii_isspace(c)) { c = getc(fp); } /* 3. start reading matching input to buffer */ endp = buffer; /* 4.1 sign (optional) */ MATCH_ONE_OR_NONE(c == '+' || c == '-'); /* 4.2 nan, inf, infinity; [case-insensitive] */ if (c == 'n' || c == 'N') { NEXT_CHAR(); MATCH_ALPHA_STRING_NOCASE("an"); /* accept nan([:alphanum:_]*), similarly to strtod */ if (c == '(') { NEXT_CHAR(); MATCH_ZERO_OR_MORE(NumPyOS_ascii_isalnum(c) || c == '_'); if (c == ')') NEXT_CHAR(); } END_MATCH(); } else if (c == 'i' || c == 'I') { NEXT_CHAR(); MATCH_ALPHA_STRING_NOCASE("nfinity"); END_MATCH(); } /* 4.3 mantissa */ MATCH_ZERO_OR_MORE(NumPyOS_ascii_isdigit(c)); if (c == '.') { NEXT_CHAR(); MATCH_ONE_OR_MORE(NumPyOS_ascii_isdigit(c)); } /* 4.4 exponent */ if (c == 'e' || c == 'E') { NEXT_CHAR(); MATCH_ONE_OR_NONE(c == '+' || c == '-'); MATCH_ONE_OR_MORE(NumPyOS_ascii_isdigit(c)); } END_MATCH(); buffer_filled: ungetc(c, fp); *endp = '\0'; /* 5. try to convert buffer. */ *value = NumPyOS_ascii_strtod(buffer, &p); return (buffer == p) ? 0 : 1; /* if something was read */ } #undef END_MATCH #undef NEXT_CHAR #undef MATCH_ALPHA_STRING_NOCASE #undef MATCH_ONE_OR_NONE #undef MATCH_ONE_OR_MORE #undef MATCH_ZERO_OR_MORE