diff options
Diffstat (limited to 'Python')
-rw-r--r-- | Python/Python-ast.c | 167 | ||||
-rw-r--r-- | Python/ast.c | 1056 | ||||
-rw-r--r-- | Python/bltinmodule.c | 5 | ||||
-rw-r--r-- | Python/compile.c | 121 | ||||
-rw-r--r-- | Python/graminit.c | 106 | ||||
-rw-r--r-- | Python/pythonrun.c | 10 | ||||
-rw-r--r-- | Python/pytime.c | 254 | ||||
-rw-r--r-- | Python/random.c | 49 | ||||
-rw-r--r-- | Python/symtable.c | 8 |
9 files changed, 1547 insertions, 229 deletions
diff --git a/Python/Python-ast.c b/Python/Python-ast.c index 8a2dc7cc54..a2e9816486 100644 --- a/Python/Python-ast.c +++ b/Python/Python-ast.c @@ -285,6 +285,18 @@ _Py_IDENTIFIER(s); static char *Str_fields[]={ "s", }; +static PyTypeObject *FormattedValue_type; +_Py_IDENTIFIER(conversion); +_Py_IDENTIFIER(format_spec); +static char *FormattedValue_fields[]={ + "value", + "conversion", + "format_spec", +}; +static PyTypeObject *JoinedStr_type; +static char *JoinedStr_fields[]={ + "values", +}; static PyTypeObject *Bytes_type; static char *Bytes_fields[]={ "s", @@ -769,7 +781,7 @@ static int obj2ast_int(PyObject* obj, int* out, PyArena* arena) return 1; } - i = (int)PyLong_AsLong(obj); + i = _PyLong_AsInt(obj); if (i == -1 && PyErr_Occurred()) return 1; *out = i; @@ -917,6 +929,11 @@ static int init_types(void) if (!Num_type) return 0; Str_type = make_type("Str", expr_type, Str_fields, 1); if (!Str_type) return 0; + FormattedValue_type = make_type("FormattedValue", expr_type, + FormattedValue_fields, 3); + if (!FormattedValue_type) return 0; + JoinedStr_type = make_type("JoinedStr", expr_type, JoinedStr_fields, 1); + if (!JoinedStr_type) return 0; Bytes_type = make_type("Bytes", expr_type, Bytes_fields, 1); if (!Bytes_type) return 0; NameConstant_type = make_type("NameConstant", expr_type, @@ -2063,6 +2080,42 @@ Str(string s, int lineno, int col_offset, PyArena *arena) } expr_ty +FormattedValue(expr_ty value, int conversion, expr_ty format_spec, int lineno, + int col_offset, PyArena *arena) +{ + expr_ty p; + if (!value) { + PyErr_SetString(PyExc_ValueError, + "field value is required for FormattedValue"); + return NULL; + } + p = (expr_ty)PyArena_Malloc(arena, sizeof(*p)); + if (!p) + return NULL; + p->kind = FormattedValue_kind; + p->v.FormattedValue.value = value; + p->v.FormattedValue.conversion = conversion; + p->v.FormattedValue.format_spec = format_spec; + p->lineno = lineno; + p->col_offset = col_offset; + return p; +} + +expr_ty +JoinedStr(asdl_seq * values, int lineno, int col_offset, PyArena *arena) +{ + expr_ty p; + p = (expr_ty)PyArena_Malloc(arena, sizeof(*p)); + if (!p) + return NULL; + p->kind = JoinedStr_kind; + p->v.JoinedStr.values = values; + p->lineno = lineno; + p->col_offset = col_offset; + return p; +} + +expr_ty Bytes(bytes s, int lineno, int col_offset, PyArena *arena) { expr_ty p; @@ -3161,6 +3214,34 @@ ast2obj_expr(void* _o) goto failed; Py_DECREF(value); break; + case FormattedValue_kind: + result = PyType_GenericNew(FormattedValue_type, NULL, NULL); + if (!result) goto failed; + value = ast2obj_expr(o->v.FormattedValue.value); + if (!value) goto failed; + if (_PyObject_SetAttrId(result, &PyId_value, value) == -1) + goto failed; + Py_DECREF(value); + value = ast2obj_int(o->v.FormattedValue.conversion); + if (!value) goto failed; + if (_PyObject_SetAttrId(result, &PyId_conversion, value) == -1) + goto failed; + Py_DECREF(value); + value = ast2obj_expr(o->v.FormattedValue.format_spec); + if (!value) goto failed; + if (_PyObject_SetAttrId(result, &PyId_format_spec, value) == -1) + goto failed; + Py_DECREF(value); + break; + case JoinedStr_kind: + result = PyType_GenericNew(JoinedStr_type, NULL, NULL); + if (!result) goto failed; + value = ast2obj_list(o->v.JoinedStr.values, ast2obj_expr); + if (!value) goto failed; + if (_PyObject_SetAttrId(result, &PyId_values, value) == -1) + goto failed; + Py_DECREF(value); + break; case Bytes_kind: result = PyType_GenericNew(Bytes_type, NULL, NULL); if (!result) goto failed; @@ -6022,6 +6103,86 @@ obj2ast_expr(PyObject* obj, expr_ty* out, PyArena* arena) if (*out == NULL) goto failed; return 0; } + isinstance = PyObject_IsInstance(obj, (PyObject*)FormattedValue_type); + if (isinstance == -1) { + return 1; + } + if (isinstance) { + expr_ty value; + int conversion; + expr_ty format_spec; + + if (_PyObject_HasAttrId(obj, &PyId_value)) { + int res; + tmp = _PyObject_GetAttrId(obj, &PyId_value); + if (tmp == NULL) goto failed; + res = obj2ast_expr(tmp, &value, arena); + if (res != 0) goto failed; + Py_CLEAR(tmp); + } else { + PyErr_SetString(PyExc_TypeError, "required field \"value\" missing from FormattedValue"); + return 1; + } + if (exists_not_none(obj, &PyId_conversion)) { + int res; + tmp = _PyObject_GetAttrId(obj, &PyId_conversion); + if (tmp == NULL) goto failed; + res = obj2ast_int(tmp, &conversion, arena); + if (res != 0) goto failed; + Py_CLEAR(tmp); + } else { + conversion = 0; + } + if (exists_not_none(obj, &PyId_format_spec)) { + int res; + tmp = _PyObject_GetAttrId(obj, &PyId_format_spec); + if (tmp == NULL) goto failed; + res = obj2ast_expr(tmp, &format_spec, arena); + if (res != 0) goto failed; + Py_CLEAR(tmp); + } else { + format_spec = NULL; + } + *out = FormattedValue(value, conversion, format_spec, lineno, + col_offset, arena); + if (*out == NULL) goto failed; + return 0; + } + isinstance = PyObject_IsInstance(obj, (PyObject*)JoinedStr_type); + if (isinstance == -1) { + return 1; + } + if (isinstance) { + asdl_seq* values; + + if (_PyObject_HasAttrId(obj, &PyId_values)) { + int res; + Py_ssize_t len; + Py_ssize_t i; + tmp = _PyObject_GetAttrId(obj, &PyId_values); + if (tmp == NULL) goto failed; + if (!PyList_Check(tmp)) { + PyErr_Format(PyExc_TypeError, "JoinedStr field \"values\" must be a list, not a %.200s", tmp->ob_type->tp_name); + goto failed; + } + len = PyList_GET_SIZE(tmp); + values = _Py_asdl_seq_new(len, arena); + if (values == NULL) goto failed; + for (i = 0; i < len; i++) { + expr_ty value; + res = obj2ast_expr(PyList_GET_ITEM(tmp, i), &value, arena); + if (res != 0) goto failed; + asdl_seq_SET(values, i, value); + } + Py_CLEAR(tmp); + } else { + PyErr_SetString(PyExc_TypeError, "required field \"values\" missing from JoinedStr"); + return 1; + } + *out = JoinedStr(values, lineno, col_offset, arena); + if (*out == NULL) goto failed; + return 0; + } isinstance = PyObject_IsInstance(obj, (PyObject*)Bytes_type); if (isinstance == -1) { return 1; @@ -7319,6 +7480,10 @@ PyInit__ast(void) if (PyDict_SetItemString(d, "Call", (PyObject*)Call_type) < 0) return NULL; if (PyDict_SetItemString(d, "Num", (PyObject*)Num_type) < 0) return NULL; if (PyDict_SetItemString(d, "Str", (PyObject*)Str_type) < 0) return NULL; + if (PyDict_SetItemString(d, "FormattedValue", + (PyObject*)FormattedValue_type) < 0) return NULL; + if (PyDict_SetItemString(d, "JoinedStr", (PyObject*)JoinedStr_type) < 0) + return NULL; if (PyDict_SetItemString(d, "Bytes", (PyObject*)Bytes_type) < 0) return NULL; if (PyDict_SetItemString(d, "NameConstant", (PyObject*)NameConstant_type) < diff --git a/Python/ast.c b/Python/ast.c index b2f09b9c84..83e4f002be 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -257,6 +257,14 @@ validate_expr(expr_ty exp, expr_context_ty ctx) } return 1; } + case JoinedStr_kind: + return validate_exprs(exp->v.JoinedStr.values, Load, 0); + case FormattedValue_kind: + if (validate_expr(exp->v.FormattedValue.value, Load) == 0) + return 0; + if (exp->v.FormattedValue.format_spec) + return validate_expr(exp->v.FormattedValue.format_spec, Load); + return 1; case Bytes_kind: { PyObject *b = exp->v.Bytes.s; if (!PyBytes_CheckExact(b)) { @@ -513,7 +521,7 @@ PyAST_Validate(mod_ty mod) /* Data structure used internally */ struct compiling { char *c_encoding; /* source encoding */ - PyArena *c_arena; /* arena for allocating memeory */ + PyArena *c_arena; /* Arena for allocating memory. */ PyObject *c_filename; /* filename */ PyObject *c_normalize; /* Normalization function from unicodedata. */ PyObject *c_normalize_args; /* Normalization argument tuple. */ @@ -535,9 +543,7 @@ static stmt_ty ast_for_for_stmt(struct compiling *, const node *, int); static expr_ty ast_for_call(struct compiling *, const node *, expr_ty); static PyObject *parsenumber(struct compiling *, const char *); -static PyObject *parsestr(struct compiling *, const node *n, int *bytesmode); -static PyObject *parsestrplus(struct compiling *, const node *n, - int *bytesmode); +static expr_ty parsestrplus(struct compiling *, const node *n); #define COMP_GENEXP 0 #define COMP_LISTCOMP 1 @@ -986,6 +992,8 @@ set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n) case Num_kind: case Str_kind: case Bytes_kind: + case JoinedStr_kind: + case FormattedValue_kind: expr_name = "literal"; break; case NameConstant_kind: @@ -1262,16 +1270,20 @@ ast_for_arguments(struct compiling *c, const node *n) and varargslist (lambda definition). parameters: '(' [typedargslist] ')' - typedargslist: ((tfpdef ['=' test] ',')* - ('*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] - | '**' tfpdef) - | tfpdef ['=' test] (',' tfpdef ['=' test])* [',']) + typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [ + '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']]] + | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']) tfpdef: NAME [':' test] - varargslist: ((vfpdef ['=' test] ',')* - ('*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] - | '**' vfpdef) - | vfpdef ['=' test] (',' vfpdef ['=' test])* [',']) + varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ + '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [','] + ) vfpdef: NAME + */ int i, j, k, nposargs = 0, nkwonlyargs = 0; int nposdefaults = 0, found_default = 0; @@ -1373,7 +1385,8 @@ ast_for_arguments(struct compiling *c, const node *n) i += 2; /* the name and the comma */ break; case STAR: - if (i+1 >= NCH(n)) { + if (i+1 >= NCH(n) || + (i+2 == NCH(n) && TYPE(CHILD(n, i+1)) == COMMA)) { ast_error(c, CHILD(n, i), "named arguments must follow bare *"); return NULL; @@ -1996,7 +2009,6 @@ ast_for_atom(struct compiling *c, const node *n) | '...' | 'None' | 'True' | 'False' */ node *ch = CHILD(n, 0); - int bytesmode = 0; switch (TYPE(ch)) { case NAME: { @@ -2018,7 +2030,7 @@ ast_for_atom(struct compiling *c, const node *n) return Name(name, Load, LINENO(n), n->n_col_offset, c->c_arena); } case STRING: { - PyObject *str = parsestrplus(c, n, &bytesmode); + expr_ty str = parsestrplus(c, n); if (!str) { const char *errtype = NULL; if (PyErr_ExceptionMatches(PyExc_UnicodeError)) @@ -2035,6 +2047,7 @@ ast_for_atom(struct compiling *c, const node *n) PyOS_snprintf(buf, sizeof(buf), "(%s) %s", errtype, s); Py_DECREF(errstr); } else { + PyErr_Clear(); PyOS_snprintf(buf, sizeof(buf), "(%s) unknown error", errtype); } ast_error(c, n, buf); @@ -2044,14 +2057,7 @@ ast_for_atom(struct compiling *c, const node *n) } return NULL; } - if (PyArena_AddPyObject(c->c_arena, str) < 0) { - Py_DECREF(str); - return NULL; - } - if (bytesmode) - return Bytes(str, LINENO(n), n->n_col_offset, c->c_arena); - else - return Str(str, LINENO(n), n->n_col_offset, c->c_arena); + return str; } case NUMBER: { PyObject *pynum = parsenumber(c, STR(ch)); @@ -3932,7 +3938,7 @@ decode_utf8(struct compiling *c, const char **sPtr, const char *end) } static PyObject * -decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, const char *encoding) +decode_unicode(struct compiling *c, const char *s, size_t len, const char *encoding) { PyObject *v, *u; char *buf; @@ -3988,20 +3994,879 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons len = p - buf; s = buf; } - if (rawmode) - v = PyUnicode_DecodeRawUnicodeEscape(s, len, NULL); - else - v = PyUnicode_DecodeUnicodeEscape(s, len, NULL); + v = PyUnicode_DecodeUnicodeEscape(s, len, NULL); Py_XDECREF(u); return v; } -/* s is a Python string literal, including the bracketing quote characters, - * and r &/or b prefixes (if any), and embedded escape sequences (if any). - * parsestr parses it, and returns the decoded Python string object. - */ +/* Compile this expression in to an expr_ty. We know that we can + temporarily modify the character before the start of this string + (it's '{'), and we know we can temporarily modify the character + after this string (it is a '}'). Leverage this to create a + sub-string with enough room for us to add parens around the + expression. This is to allow strings with embedded newlines, for + example. */ +static expr_ty +fstring_compile_expr(PyObject *str, Py_ssize_t expr_start, + Py_ssize_t expr_end, PyArena *arena) +{ + PyCompilerFlags cf; + mod_ty mod; + char *utf_expr; + Py_ssize_t i; + Py_UCS4 end_ch = -1; + int all_whitespace; + PyObject *sub = NULL; + + /* We only decref sub if we allocated it with a PyUnicode_Substring. + decref_sub records that. */ + int decref_sub = 0; + + assert(str); + + assert(expr_start >= 0 && expr_start < PyUnicode_GET_LENGTH(str)); + assert(expr_end >= 0 && expr_end < PyUnicode_GET_LENGTH(str)); + assert(expr_end >= expr_start); + + /* There has to be at least on character on each side of the + expression inside this str. This will have been caught before + we're called. */ + assert(expr_start >= 1); + assert(expr_end <= PyUnicode_GET_LENGTH(str)-1); + + /* If the substring is all whitespace, it's an error. We need to + catch this here, and not when we call PyParser_ASTFromString, + because turning the expression '' in to '()' would go from + being invalid to valid. */ + /* Note that this code says an empty string is all + whitespace. That's important. There's a test for it: f'{}'. */ + all_whitespace = 1; + for (i = expr_start; i < expr_end; i++) { + if (!Py_UNICODE_ISSPACE(PyUnicode_READ_CHAR(str, i))) { + all_whitespace = 0; + break; + } + } + if (all_whitespace) { + PyErr_SetString(PyExc_SyntaxError, "f-string: empty expression " + "not allowed"); + goto error; + } + + /* If the substring will be the entire source string, we can't use + PyUnicode_Substring, since it will return another reference to + our original string. Because we're modifying the string in + place, that's a no-no. So, detect that case and just use our + string directly. */ + + if (expr_start-1 == 0 && expr_end+1 == PyUnicode_GET_LENGTH(str)) { + /* If str is well formed, then the first and last chars must + be '{' and '}', respectively. But, if there's a syntax + error, for example f'{3!', then the last char won't be a + closing brace. So, remember the last character we read in + order for us to restore it. */ + end_ch = PyUnicode_ReadChar(str, expr_end-expr_start+1); + assert(end_ch != (Py_UCS4)-1); + + /* In all cases, however, start_ch must be '{'. */ + assert(PyUnicode_ReadChar(str, 0) == '{'); + + sub = str; + } else { + /* Create a substring object. It must be a new object, with + refcount==1, so that we can modify it. */ + sub = PyUnicode_Substring(str, expr_start-1, expr_end+1); + if (!sub) + goto error; + assert(sub != str); /* Make sure it's a new string. */ + decref_sub = 1; /* Remember to deallocate it on error. */ + } + + /* Put () around the expression. */ + if (PyUnicode_WriteChar(sub, 0, '(') < 0 || + PyUnicode_WriteChar(sub, expr_end-expr_start+1, ')') < 0) + goto error; + + /* No need to free the memory returned here: it's managed by the + string. */ + utf_expr = PyUnicode_AsUTF8(sub); + if (!utf_expr) + goto error; + + cf.cf_flags = PyCF_ONLY_AST; + mod = PyParser_ASTFromString(utf_expr, "<fstring>", + Py_eval_input, &cf, arena); + if (!mod) + goto error; + + if (sub != str) + /* Clear instead of decref in case we ever modify this code to change + the error handling: this is safest because the XDECREF won't try + and decref it when it's NULL. */ + /* No need to restore the chars in sub, since we know it's getting + ready to get deleted (refcount must be 1, since we got a new string + in PyUnicode_Substring). */ + Py_CLEAR(sub); + else { + assert(!decref_sub); + assert(end_ch != (Py_UCS4)-1); + /* Restore str, which we earlier modified directly. */ + if (PyUnicode_WriteChar(str, 0, '{') < 0 || + PyUnicode_WriteChar(str, expr_end-expr_start+1, end_ch) < 0) + goto error; + } + return mod->v.Expression.body; + +error: + /* Only decref sub if it was the result of a call to SubString. */ + if (decref_sub) + Py_XDECREF(sub); + + if (end_ch != (Py_UCS4)-1) { + /* We only get here if we modified str. Make sure that's the + case: str will be equal to sub. */ + if (str == sub) { + /* Don't check the error, because we've already set the + error state (that's why we're in 'error', after + all). */ + PyUnicode_WriteChar(str, 0, '{'); + PyUnicode_WriteChar(str, expr_end-expr_start+1, end_ch); + } + } + return NULL; +} + +/* Return -1 on error. + + Return 0 if we reached the end of the literal. + + Return 1 if we haven't reached the end of the literal, but we want + the caller to process the literal up to this point. Used for + doubled braces. +*/ +static int +fstring_find_literal(PyObject *str, Py_ssize_t *ofs, PyObject **literal, + int recurse_lvl, struct compiling *c, const node *n) +{ + /* Get any literal string. It ends when we hit an un-doubled brace, or the + end of the string. */ + + Py_ssize_t literal_start, literal_end; + int result = 0; + + enum PyUnicode_Kind kind = PyUnicode_KIND(str); + void *data = PyUnicode_DATA(str); + + assert(*literal == NULL); + + literal_start = *ofs; + for (; *ofs < PyUnicode_GET_LENGTH(str); *ofs += 1) { + Py_UCS4 ch = PyUnicode_READ(kind, data, *ofs); + if (ch == '{' || ch == '}') { + /* Check for doubled braces, but only at the top level. If + we checked at every level, then f'{0:{3}}' would fail + with the two closing braces. */ + if (recurse_lvl == 0) { + if (*ofs + 1 < PyUnicode_GET_LENGTH(str) && + PyUnicode_READ(kind, data, *ofs + 1) == ch) { + /* We're going to tell the caller that the literal ends + here, but that they should continue scanning. But also + skip over the second brace when we resume scanning. */ + literal_end = *ofs + 1; + *ofs += 2; + result = 1; + goto done; + } + + /* Where a single '{' is the start of a new expression, a + single '}' is not allowed. */ + if (ch == '}') { + ast_error(c, n, "f-string: single '}' is not allowed"); + return -1; + } + } + + /* We're either at a '{', which means we're starting another + expression; or a '}', which means we're at the end of this + f-string (for a nested format_spec). */ + break; + } + } + literal_end = *ofs; + + assert(*ofs == PyUnicode_GET_LENGTH(str) || + PyUnicode_READ(kind, data, *ofs) == '{' || + PyUnicode_READ(kind, data, *ofs) == '}'); +done: + if (literal_start != literal_end) { + *literal = PyUnicode_Substring(str, literal_start, literal_end); + if (!*literal) + return -1; + } + + return result; +} + +/* Forward declaration because parsing is recursive. */ +static expr_ty +fstring_parse(PyObject *str, Py_ssize_t *ofs, int recurse_lvl, + struct compiling *c, const node *n); + +/* Parse the f-string str, starting at ofs. We know *ofs starts an + expression (so it must be a '{'). Returns the FormattedValue node, + which includes the expression, conversion character, and + format_spec expression. + + Note that I don't do a perfect job here: I don't make sure that a + closing brace doesn't match an opening paren, for example. It + doesn't need to error on all invalid expressions, just correctly + find the end of all valid ones. Any errors inside the expression + will be caught when we parse it later. */ +static int +fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl, + expr_ty *expression, struct compiling *c, const node *n) +{ + /* Return -1 on error, else 0. */ + + Py_ssize_t expr_start; + Py_ssize_t expr_end; + expr_ty simple_expression; + expr_ty format_spec = NULL; /* Optional format specifier. */ + Py_UCS4 conversion = -1; /* The conversion char. -1 if not specified. */ + + enum PyUnicode_Kind kind = PyUnicode_KIND(str); + void *data = PyUnicode_DATA(str); + + /* 0 if we're not in a string, else the quote char we're trying to + match (single or double quote). */ + Py_UCS4 quote_char = 0; + + /* If we're inside a string, 1=normal, 3=triple-quoted. */ + int string_type = 0; + + /* Keep track of nesting level for braces/parens/brackets in + expressions. */ + Py_ssize_t nested_depth = 0; + + /* Can only nest one level deep. */ + if (recurse_lvl >= 2) { + ast_error(c, n, "f-string: expressions nested too deeply"); + return -1; + } + + /* The first char must be a left brace, or we wouldn't have gotten + here. Skip over it. */ + assert(PyUnicode_READ(kind, data, *ofs) == '{'); + *ofs += 1; + + expr_start = *ofs; + for (; *ofs < PyUnicode_GET_LENGTH(str); *ofs += 1) { + Py_UCS4 ch; + + /* Loop invariants. */ + assert(nested_depth >= 0); + assert(*ofs >= expr_start); + if (quote_char) + assert(string_type == 1 || string_type == 3); + else + assert(string_type == 0); + + ch = PyUnicode_READ(kind, data, *ofs); + if (quote_char) { + /* We're inside a string. See if we're at the end. */ + /* This code needs to implement the same non-error logic + as tok_get from tokenizer.c, at the letter_quote + label. To actually share that code would be a + nightmare. But, it's unlikely to change and is small, + so duplicate it here. Note we don't need to catch all + of the errors, since they'll be caught when parsing the + expression. We just need to match the non-error + cases. Thus we can ignore \n in single-quoted strings, + for example. Or non-terminated strings. */ + if (ch == quote_char) { + /* Does this match the string_type (single or triple + quoted)? */ + if (string_type == 3) { + if (*ofs+2 < PyUnicode_GET_LENGTH(str) && + PyUnicode_READ(kind, data, *ofs+1) == ch && + PyUnicode_READ(kind, data, *ofs+2) == ch) { + /* We're at the end of a triple quoted string. */ + *ofs += 2; + string_type = 0; + quote_char = 0; + continue; + } + } else { + /* We're at the end of a normal string. */ + quote_char = 0; + string_type = 0; + continue; + } + } + /* We're inside a string, and not finished with the + string. If this is a backslash, skip the next char (it + might be an end quote that needs skipping). Otherwise, + just consume this character normally. */ + if (ch == '\\' && *ofs+1 < PyUnicode_GET_LENGTH(str)) { + /* Just skip the next char, whatever it is. */ + *ofs += 1; + } + } else if (ch == '\'' || ch == '"') { + /* Is this a triple quoted string? */ + if (*ofs+2 < PyUnicode_GET_LENGTH(str) && + PyUnicode_READ(kind, data, *ofs+1) == ch && + PyUnicode_READ(kind, data, *ofs+2) == ch) { + string_type = 3; + *ofs += 2; + } else { + /* Start of a normal string. */ + string_type = 1; + } + /* Start looking for the end of the string. */ + quote_char = ch; + } else if (ch == '[' || ch == '{' || ch == '(') { + nested_depth++; + } else if (nested_depth != 0 && + (ch == ']' || ch == '}' || ch == ')')) { + nested_depth--; + } else if (ch == '#') { + /* Error: can't include a comment character, inside parens + or not. */ + ast_error(c, n, "f-string cannot include '#'"); + return -1; + } else if (nested_depth == 0 && + (ch == '!' || ch == ':' || ch == '}')) { + /* First, test for the special case of "!=". Since '=' is + not an allowed conversion character, nothing is lost in + this test. */ + if (ch == '!' && *ofs+1 < PyUnicode_GET_LENGTH(str) && + PyUnicode_READ(kind, data, *ofs+1) == '=') + /* This isn't a conversion character, just continue. */ + continue; + + /* Normal way out of this loop. */ + break; + } else { + /* Just consume this char and loop around. */ + } + } + expr_end = *ofs; + /* If we leave this loop in a string or with mismatched parens, we + don't care. We'll get a syntax error when compiling the + expression. But, we can produce a better error message, so + let's just do that.*/ + if (quote_char) { + ast_error(c, n, "f-string: unterminated string"); + return -1; + } + if (nested_depth) { + ast_error(c, n, "f-string: mismatched '(', '{', or '['"); + return -1; + } + + if (*ofs >= PyUnicode_GET_LENGTH(str)) + goto unexpected_end_of_string; + + /* Compile the expression as soon as possible, so we show errors + related to the expression before errors related to the + conversion or format_spec. */ + simple_expression = fstring_compile_expr(str, expr_start, expr_end, + c->c_arena); + if (!simple_expression) + return -1; + + /* Check for a conversion char, if present. */ + if (PyUnicode_READ(kind, data, *ofs) == '!') { + *ofs += 1; + if (*ofs >= PyUnicode_GET_LENGTH(str)) + goto unexpected_end_of_string; + + conversion = PyUnicode_READ(kind, data, *ofs); + *ofs += 1; + + /* Validate the conversion. */ + if (!(conversion == 's' || conversion == 'r' + || conversion == 'a')) { + ast_error(c, n, "f-string: invalid conversion character: " + "expected 's', 'r', or 'a'"); + return -1; + } + } + + /* Check for the format spec, if present. */ + if (*ofs >= PyUnicode_GET_LENGTH(str)) + goto unexpected_end_of_string; + if (PyUnicode_READ(kind, data, *ofs) == ':') { + *ofs += 1; + if (*ofs >= PyUnicode_GET_LENGTH(str)) + goto unexpected_end_of_string; + + /* Parse the format spec. */ + format_spec = fstring_parse(str, ofs, recurse_lvl+1, c, n); + if (!format_spec) + return -1; + } + + if (*ofs >= PyUnicode_GET_LENGTH(str) || + PyUnicode_READ(kind, data, *ofs) != '}') + goto unexpected_end_of_string; + + /* We're at a right brace. Consume it. */ + assert(*ofs < PyUnicode_GET_LENGTH(str)); + assert(PyUnicode_READ(kind, data, *ofs) == '}'); + *ofs += 1; + + /* And now create the FormattedValue node that represents this entire + expression with the conversion and format spec. */ + *expression = FormattedValue(simple_expression, (int)conversion, + format_spec, LINENO(n), n->n_col_offset, + c->c_arena); + if (!*expression) + return -1; + + return 0; + +unexpected_end_of_string: + ast_error(c, n, "f-string: expecting '}'"); + return -1; +} + +/* Return -1 on error. + + Return 0 if we have a literal (possible zero length) and an + expression (zero length if at the end of the string. + + Return 1 if we have a literal, but no expression, and we want the + caller to call us again. This is used to deal with doubled + braces. + + When called multiple times on the string 'a{{b{0}c', this function + will return: + + 1. the literal 'a{' with no expression, and a return value + of 1. Despite the fact that there's no expression, the return + value of 1 means we're not finished yet. + + 2. the literal 'b' and the expression '0', with a return value of + 0. The fact that there's an expression means we're not finished. + + 3. literal 'c' with no expression and a return value of 0. The + combination of the return value of 0 with no expression means + we're finished. +*/ +static int +fstring_find_literal_and_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl, + PyObject **literal, expr_ty *expression, + struct compiling *c, const node *n) +{ + int result; + + assert(*literal == NULL && *expression == NULL); + + /* Get any literal string. */ + result = fstring_find_literal(str, ofs, literal, recurse_lvl, c, n); + if (result < 0) + goto error; + + assert(result == 0 || result == 1); + + if (result == 1) + /* We have a literal, but don't look at the expression. */ + return 1; + + assert(*ofs <= PyUnicode_GET_LENGTH(str)); + + if (*ofs >= PyUnicode_GET_LENGTH(str) || + PyUnicode_READ_CHAR(str, *ofs) == '}') + /* We're at the end of the string or the end of a nested + f-string: no expression. The top-level error case where we + expect to be at the end of the string but we're at a '}' is + handled later. */ + return 0; + + /* We must now be the start of an expression, on a '{'. */ + assert(*ofs < PyUnicode_GET_LENGTH(str) && + PyUnicode_READ_CHAR(str, *ofs) == '{'); + + if (fstring_find_expr(str, ofs, recurse_lvl, expression, c, n) < 0) + goto error; + + return 0; + +error: + Py_XDECREF(*literal); + *literal = NULL; + return -1; +} + +#define EXPRLIST_N_CACHED 64 + +typedef struct { + /* Incrementally build an array of expr_ty, so be used in an + asdl_seq. Cache some small but reasonably sized number of + expr_ty's, and then after that start dynamically allocating, + doubling the number allocated each time. Note that the f-string + f'{0}a{1}' contains 3 expr_ty's: 2 FormattedValue's, and one + Str for the literal 'a'. So you add expr_ty's about twice as + fast as you add exressions in an f-string. */ + + Py_ssize_t allocated; /* Number we've allocated. */ + Py_ssize_t size; /* Number we've used. */ + expr_ty *p; /* Pointer to the memory we're actually + using. Will point to 'data' until we + start dynamically allocating. */ + expr_ty data[EXPRLIST_N_CACHED]; +} ExprList; + +#ifdef NDEBUG +#define ExprList_check_invariants(l) +#else +static void +ExprList_check_invariants(ExprList *l) +{ + /* Check our invariants. Make sure this object is "live", and + hasn't been deallocated. */ + assert(l->size >= 0); + assert(l->p != NULL); + if (l->size <= EXPRLIST_N_CACHED) + assert(l->data == l->p); +} +#endif + +static void +ExprList_Init(ExprList *l) +{ + l->allocated = EXPRLIST_N_CACHED; + l->size = 0; + + /* Until we start allocating dynamically, p points to data. */ + l->p = l->data; + + ExprList_check_invariants(l); +} + +static int +ExprList_Append(ExprList *l, expr_ty exp) +{ + ExprList_check_invariants(l); + if (l->size >= l->allocated) { + /* We need to alloc (or realloc) the memory. */ + Py_ssize_t new_size = l->allocated * 2; + + /* See if we've ever allocated anything dynamically. */ + if (l->p == l->data) { + Py_ssize_t i; + /* We're still using the cached data. Switch to + alloc-ing. */ + l->p = PyMem_RawMalloc(sizeof(expr_ty) * new_size); + if (!l->p) + return -1; + /* Copy the cached data into the new buffer. */ + for (i = 0; i < l->size; i++) + l->p[i] = l->data[i]; + } else { + /* Just realloc. */ + expr_ty *tmp = PyMem_RawRealloc(l->p, sizeof(expr_ty) * new_size); + if (!tmp) { + PyMem_RawFree(l->p); + l->p = NULL; + return -1; + } + l->p = tmp; + } + + l->allocated = new_size; + assert(l->allocated == 2 * l->size); + } + + l->p[l->size++] = exp; + + ExprList_check_invariants(l); + return 0; +} + +static void +ExprList_Dealloc(ExprList *l) +{ + ExprList_check_invariants(l); + + /* If there's been an error, or we've never dynamically allocated, + do nothing. */ + if (!l->p || l->p == l->data) { + /* Do nothing. */ + } else { + /* We have dynamically allocated. Free the memory. */ + PyMem_RawFree(l->p); + } + l->p = NULL; + l->size = -1; +} + +static asdl_seq * +ExprList_Finish(ExprList *l, PyArena *arena) +{ + asdl_seq *seq; + + ExprList_check_invariants(l); + + /* Allocate the asdl_seq and copy the expressions in to it. */ + seq = _Py_asdl_seq_new(l->size, arena); + if (seq) { + Py_ssize_t i; + for (i = 0; i < l->size; i++) + asdl_seq_SET(seq, i, l->p[i]); + } + ExprList_Dealloc(l); + return seq; +} + +/* The FstringParser is designed to add a mix of strings and + f-strings, and concat them together as needed. Ultimately, it + generates an expr_ty. */ +typedef struct { + PyObject *last_str; + ExprList expr_list; +} FstringParser; + +#ifdef NDEBUG +#define FstringParser_check_invariants(state) +#else +static void +FstringParser_check_invariants(FstringParser *state) +{ + if (state->last_str) + assert(PyUnicode_CheckExact(state->last_str)); + ExprList_check_invariants(&state->expr_list); +} +#endif + +static void +FstringParser_Init(FstringParser *state) +{ + state->last_str = NULL; + ExprList_Init(&state->expr_list); + FstringParser_check_invariants(state); +} + +static void +FstringParser_Dealloc(FstringParser *state) +{ + FstringParser_check_invariants(state); + + Py_XDECREF(state->last_str); + ExprList_Dealloc(&state->expr_list); +} + +/* Make a Str node, but decref the PyUnicode object being added. */ +static expr_ty +make_str_node_and_del(PyObject **str, struct compiling *c, const node* n) +{ + PyObject *s = *str; + *str = NULL; + assert(PyUnicode_CheckExact(s)); + if (PyArena_AddPyObject(c->c_arena, s) < 0) { + Py_DECREF(s); + return NULL; + } + return Str(s, LINENO(n), n->n_col_offset, c->c_arena); +} + +/* Add a non-f-string (that is, a regular literal string). str is + decref'd. */ +static int +FstringParser_ConcatAndDel(FstringParser *state, PyObject *str) +{ + FstringParser_check_invariants(state); + + assert(PyUnicode_CheckExact(str)); + + if (PyUnicode_GET_LENGTH(str) == 0) { + Py_DECREF(str); + return 0; + } + + if (!state->last_str) { + /* We didn't have a string before, so just remember this one. */ + state->last_str = str; + } else { + /* Concatenate this with the previous string. */ + PyObject *temp = PyUnicode_Concat(state->last_str, str); + Py_DECREF(state->last_str); + Py_DECREF(str); + state->last_str = temp; + if (!temp) + return -1; + } + FstringParser_check_invariants(state); + return 0; +} + +/* Parse an f-string. The f-string is in str, starting at ofs, with no 'f' + or quotes. str is not decref'd, since we don't know if it's used elsewhere. + And if we're only looking at a part of a string, then decref'ing is + definitely not the right thing to do! */ +static int +FstringParser_ConcatFstring(FstringParser *state, PyObject *str, + Py_ssize_t *ofs, int recurse_lvl, + struct compiling *c, const node *n) +{ + FstringParser_check_invariants(state); + + /* Parse the f-string. */ + while (1) { + PyObject *literal = NULL; + expr_ty expression = NULL; + + /* If there's a zero length literal in front of the + expression, literal will be NULL. If we're at the end of + the f-string, expression will be NULL (unless result == 1, + see below). */ + int result = fstring_find_literal_and_expr(str, ofs, recurse_lvl, + &literal, &expression, + c, n); + if (result < 0) + return -1; + + /* Add the literal, if any. */ + if (!literal) { + /* Do nothing. Just leave last_str alone (and possibly + NULL). */ + } else if (!state->last_str) { + state->last_str = literal; + literal = NULL; + } else { + /* We have a literal, concatenate it. */ + assert(PyUnicode_GET_LENGTH(literal) != 0); + if (FstringParser_ConcatAndDel(state, literal) < 0) + return -1; + literal = NULL; + } + assert(!state->last_str || + PyUnicode_GET_LENGTH(state->last_str) != 0); + + /* We've dealt with the literal now. It can't be leaked on further + errors. */ + assert(literal == NULL); + + /* See if we should just loop around to get the next literal + and expression, while ignoring the expression this + time. This is used for un-doubling braces, as an + optimization. */ + if (result == 1) + continue; + + if (!expression) + /* We're done with this f-string. */ + break; + + /* We know we have an expression. Convert any existing string + to a Str node. */ + if (!state->last_str) { + /* Do nothing. No previous literal. */ + } else { + /* Convert the existing last_str literal to a Str node. */ + expr_ty str = make_str_node_and_del(&state->last_str, c, n); + if (!str || ExprList_Append(&state->expr_list, str) < 0) + return -1; + } + + if (ExprList_Append(&state->expr_list, expression) < 0) + return -1; + } + + assert(*ofs <= PyUnicode_GET_LENGTH(str)); + + /* If recurse_lvl is zero, then we must be at the end of the + string. Otherwise, we must be at a right brace. */ + + if (recurse_lvl == 0 && *ofs < PyUnicode_GET_LENGTH(str)) { + ast_error(c, n, "f-string: unexpected end of string"); + return -1; + } + if (recurse_lvl != 0 && PyUnicode_READ_CHAR(str, *ofs) != '}') { + ast_error(c, n, "f-string: expecting '}'"); + return -1; + } + + FstringParser_check_invariants(state); + return 0; +} + +/* Convert the partial state reflected in last_str and expr_list to an + expr_ty. The expr_ty can be a Str, or a JoinedStr. */ +static expr_ty +FstringParser_Finish(FstringParser *state, struct compiling *c, + const node *n) +{ + asdl_seq *seq; + + FstringParser_check_invariants(state); + + /* If we're just a constant string with no expressions, return + that. */ + if(state->expr_list.size == 0) { + if (!state->last_str) { + /* Create a zero length string. */ + state->last_str = PyUnicode_FromStringAndSize(NULL, 0); + if (!state->last_str) + goto error; + } + return make_str_node_and_del(&state->last_str, c, n); + } + + /* Create a Str node out of last_str, if needed. It will be the + last node in our expression list. */ + if (state->last_str) { + expr_ty str = make_str_node_and_del(&state->last_str, c, n); + if (!str || ExprList_Append(&state->expr_list, str) < 0) + goto error; + } + /* This has already been freed. */ + assert(state->last_str == NULL); + + seq = ExprList_Finish(&state->expr_list, c->c_arena); + if (!seq) + goto error; + + /* If there's only one expression, return it. Otherwise, we need + to join them together. */ + if (seq->size == 1) + return seq->elements[0]; + + return JoinedStr(seq, LINENO(n), n->n_col_offset, c->c_arena); + +error: + FstringParser_Dealloc(state); + return NULL; +} + +/* Given an f-string (with no 'f' or quotes) that's in str starting at + ofs, parse it into an expr_ty. Return NULL on error. Does not + decref str. */ +static expr_ty +fstring_parse(PyObject *str, Py_ssize_t *ofs, int recurse_lvl, + struct compiling *c, const node *n) +{ + FstringParser state; + + FstringParser_Init(&state); + if (FstringParser_ConcatFstring(&state, str, ofs, recurse_lvl, + c, n) < 0) { + FstringParser_Dealloc(&state); + return NULL; + } + + return FstringParser_Finish(&state, c, n); +} + +/* n is a Python string literal, including the bracketing quote + characters, and r, b, u, &/or f prefixes (if any), and embedded + escape sequences (if any). parsestr parses it, and returns the + decoded Python string object. If the string is an f-string, set + *fmode and return the unparsed string object. +*/ static PyObject * -parsestr(struct compiling *c, const node *n, int *bytesmode) +parsestr(struct compiling *c, const node *n, int *bytesmode, int *fmode) { size_t len; const char *s = STR(n); @@ -4021,15 +4886,24 @@ parsestr(struct compiling *c, const node *n, int *bytesmode) quote = *++s; rawmode = 1; } + else if (quote == 'f' || quote == 'F') { + quote = *++s; + *fmode = 1; + } else { break; } } } + if (*fmode && *bytesmode) { + PyErr_BadInternalCall(); + return NULL; + } if (quote != '\'' && quote != '\"') { PyErr_BadInternalCall(); return NULL; } + /* Skip the leading quote char. */ s++; len = strlen(s); if (len > INT_MAX) { @@ -4038,19 +4912,24 @@ parsestr(struct compiling *c, const node *n, int *bytesmode) return NULL; } if (s[--len] != quote) { + /* Last quote char must match the first. */ PyErr_BadInternalCall(); return NULL; } if (len >= 4 && s[0] == quote && s[1] == quote) { + /* A triple quoted string. We've already skipped one quote at + the start and one at the end of the string. Now skip the + two at the start. */ s += 2; len -= 2; + /* And check that the last two match. */ if (s[--len] != quote || s[--len] != quote) { PyErr_BadInternalCall(); return NULL; } } if (!*bytesmode && !rawmode) { - return decode_unicode(c, s, len, rawmode, c->c_encoding); + return decode_unicode(c, s, len, c->c_encoding); } if (*bytesmode) { /* Disallow non-ascii characters (but not escapes) */ @@ -4082,51 +4961,84 @@ parsestr(struct compiling *c, const node *n, int *bytesmode) } } return PyBytes_DecodeEscape(s, len, NULL, 1, - need_encoding ? c->c_encoding : NULL); + need_encoding ? c->c_encoding : NULL); } -/* Build a Python string object out of a STRING+ atom. This takes care of - * compile-time literal catenation, calling parsestr() on each piece, and - * pasting the intermediate results together. - */ -static PyObject * -parsestrplus(struct compiling *c, const node *n, int *bytesmode) +/* Accepts a STRING+ atom, and produces an expr_ty node. Run through + each STRING atom, and process it as needed. For bytes, just + concatenate them together, and the result will be a Bytes node. For + normal strings and f-strings, concatenate them together. The result + will be a Str node if there were no f-strings; a FormattedValue + node if there's just an f-string (with no leading or trailing + literals), or a JoinedStr node if there are multiple f-strings or + any literals involved. */ +static expr_ty +parsestrplus(struct compiling *c, const node *n) { - PyObject *v; + int bytesmode = 0; + PyObject *bytes_str = NULL; int i; - REQ(CHILD(n, 0), STRING); - v = parsestr(c, CHILD(n, 0), bytesmode); - if (v != NULL) { - /* String literal concatenation */ - for (i = 1; i < NCH(n); i++) { - PyObject *s; - int subbm = 0; - s = parsestr(c, CHILD(n, i), &subbm); - if (s == NULL) - goto onError; - if (*bytesmode != subbm) { - ast_error(c, n, "cannot mix bytes and nonbytes literals"); - Py_DECREF(s); - goto onError; - } - if (PyBytes_Check(v) && PyBytes_Check(s)) { - PyBytes_ConcatAndDel(&v, s); - if (v == NULL) - goto onError; - } - else { - PyObject *temp = PyUnicode_Concat(v, s); - Py_DECREF(s); - Py_DECREF(v); - v = temp; - if (v == NULL) - goto onError; + + FstringParser state; + FstringParser_Init(&state); + + for (i = 0; i < NCH(n); i++) { + int this_bytesmode = 0; + int this_fmode = 0; + PyObject *s; + + REQ(CHILD(n, i), STRING); + s = parsestr(c, CHILD(n, i), &this_bytesmode, &this_fmode); + if (!s) + goto error; + + /* Check that we're not mixing bytes with unicode. */ + if (i != 0 && bytesmode != this_bytesmode) { + ast_error(c, n, "cannot mix bytes and nonbytes literals"); + Py_DECREF(s); + goto error; + } + bytesmode = this_bytesmode; + + assert(bytesmode ? PyBytes_CheckExact(s) : PyUnicode_CheckExact(s)); + + if (bytesmode) { + /* For bytes, concat as we go. */ + if (i == 0) { + /* First time, just remember this value. */ + bytes_str = s; + } else { + PyBytes_ConcatAndDel(&bytes_str, s); + if (!bytes_str) + goto error; } + } else if (this_fmode) { + /* This is an f-string. Concatenate and decref it. */ + Py_ssize_t ofs = 0; + int result = FstringParser_ConcatFstring(&state, s, &ofs, 0, c, n); + Py_DECREF(s); + if (result < 0) + goto error; + } else { + /* This is a regular string. Concatenate it. */ + if (FstringParser_ConcatAndDel(&state, s) < 0) + goto error; } } - return v; + if (bytesmode) { + /* Just return the bytes object and we're done. */ + if (PyArena_AddPyObject(c->c_arena, bytes_str) < 0) + goto error; + return Bytes(bytes_str, LINENO(n), n->n_col_offset, c->c_arena); + } + + /* We're not a bytes string, bytes_str should never have been set. */ + assert(bytes_str == NULL); + + return FstringParser_Finish(&state, c, n); - onError: - Py_XDECREF(v); +error: + Py_XDECREF(bytes_str); + FstringParser_Dealloc(&state); return NULL; } diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 2f22209e9d..2038d2b26d 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -1158,13 +1158,14 @@ map_next(mapobject *lz) PyObject *result; Py_ssize_t numargs, i; - numargs = PyTuple_Size(lz->iters); + numargs = PyTuple_GET_SIZE(lz->iters); argtuple = PyTuple_New(numargs); if (argtuple == NULL) return NULL; for (i=0 ; i<numargs ; i++) { - val = PyIter_Next(PyTuple_GET_ITEM(lz->iters, i)); + PyObject *it = PyTuple_GET_ITEM(lz->iters, i); + val = Py_TYPE(it)->tp_iternext(it); if (val == NULL) { Py_DECREF(argtuple); return NULL; diff --git a/Python/compile.c b/Python/compile.c index 97bb12ee67..3a49ecec28 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -731,6 +731,7 @@ compiler_set_qualname(struct compiler *c) return 1; } + /* Allocate a new block and return a pointer to it. Returns NULL on error. */ @@ -3209,6 +3210,117 @@ compiler_call(struct compiler *c, expr_ty e) e->v.Call.keywords); } +static int +compiler_joined_str(struct compiler *c, expr_ty e) +{ + /* Concatenate parts of a string using ''.join(parts). There are + probably better ways of doing this. + + This is used for constructs like "'x=' f'{42}'", which have to + be evaluated at compile time. */ + + static PyObject *empty_string; + static PyObject *join_string; + + if (!empty_string) { + empty_string = PyUnicode_FromString(""); + if (!empty_string) + return 0; + } + if (!join_string) { + join_string = PyUnicode_FromString("join"); + if (!join_string) + return 0; + } + + ADDOP_O(c, LOAD_CONST, empty_string, consts); + ADDOP_NAME(c, LOAD_ATTR, join_string, names); + VISIT_SEQ(c, expr, e->v.JoinedStr.values); + ADDOP_I(c, BUILD_LIST, asdl_seq_LEN(e->v.JoinedStr.values)); + ADDOP_I(c, CALL_FUNCTION, 1); + return 1; +} + +/* Note that this code uses the builtin functions format(), str(), + repr(), and ascii(). You can break this code, or make it do odd + things, by redefining those functions. */ +static int +compiler_formatted_value(struct compiler *c, expr_ty e) +{ + PyObject *conversion_name = NULL; + + static PyObject *format_string; + static PyObject *str_string; + static PyObject *repr_string; + static PyObject *ascii_string; + + if (!format_string) { + format_string = PyUnicode_InternFromString("format"); + if (!format_string) + return 0; + } + + if (!str_string) { + str_string = PyUnicode_InternFromString("str"); + if (!str_string) + return 0; + } + + if (!repr_string) { + repr_string = PyUnicode_InternFromString("repr"); + if (!repr_string) + return 0; + } + if (!ascii_string) { + ascii_string = PyUnicode_InternFromString("ascii"); + if (!ascii_string) + return 0; + } + + ADDOP_NAME(c, LOAD_GLOBAL, format_string, names); + + /* If needed, convert via str, repr, or ascii. */ + if (e->v.FormattedValue.conversion != -1) { + switch (e->v.FormattedValue.conversion) { + case 's': + conversion_name = str_string; + break; + case 'r': + conversion_name = repr_string; + break; + case 'a': + conversion_name = ascii_string; + break; + default: + PyErr_SetString(PyExc_SystemError, + "Unrecognized conversion character"); + return 0; + } + ADDOP_NAME(c, LOAD_GLOBAL, conversion_name, names); + } + + /* Evaluate the value. */ + VISIT(c, expr, e->v.FormattedValue.value); + + /* If needed, convert via str, repr, or ascii. */ + if (conversion_name) { + /* Call the function we previously pushed. */ + ADDOP_I(c, CALL_FUNCTION, 1); + } + + /* If we have a format spec, use format(value, format_spec). Otherwise, + use the single argument form. */ + if (e->v.FormattedValue.format_spec) { + VISIT(c, expr, e->v.FormattedValue.format_spec); + ADDOP_I(c, CALL_FUNCTION, 2); + } else { + /* No format spec specified, call format(value). */ + ADDOP_I(c, CALL_FUNCTION, 1); + } + + return 1; +} + /* shared code between compiler_call and compiler_class */ static int compiler_call_helper(struct compiler *c, @@ -3628,9 +3740,9 @@ expr_constant(struct compiler *c, expr_ty e) BLOCK finally: if an exception was raised: - exc = copy of (exception, instance, traceback) + exc = copy of (exception, instance, traceback) else: - exc = (None, None, None) + exc = (None, None, None) if not (await exit(*exc)): raise */ @@ -3878,6 +3990,10 @@ compiler_visit_expr(struct compiler *c, expr_ty e) case Str_kind: ADDOP_O(c, LOAD_CONST, e->v.Str.s, consts); break; + case JoinedStr_kind: + return compiler_joined_str(c, e); + case FormattedValue_kind: + return compiler_formatted_value(c, e); case Bytes_kind: ADDOP_O(c, LOAD_CONST, e->v.Bytes.s, consts); break; @@ -4784,4 +4900,3 @@ PyAST_Compile(mod_ty mod, const char *filename, PyCompilerFlags *flags, { return PyAST_CompileEx(mod, filename, flags, -1, arena); } - diff --git a/Python/graminit.c b/Python/graminit.c index 8212b2a584..354dc121b0 100644 --- a/Python/graminit.c +++ b/Python/graminit.c @@ -204,11 +204,13 @@ static arc arcs_9_6[2] = { {32, 7}, {0, 6}, }; -static arc arcs_9_7[2] = { +static arc arcs_9_7[3] = { {30, 12}, {34, 3}, + {0, 7}, }; -static arc arcs_9_8[1] = { +static arc arcs_9_8[2] = { + {32, 13}, {0, 8}, }; static arc arcs_9_9[2] = { @@ -221,35 +223,39 @@ static arc arcs_9_10[3] = { {0, 10}, }; static arc arcs_9_11[3] = { - {30, 13}, - {32, 14}, + {30, 14}, + {32, 15}, {0, 11}, }; static arc arcs_9_12[3] = { {32, 7}, - {31, 15}, + {31, 16}, {0, 12}, }; -static arc arcs_9_13[2] = { - {32, 14}, +static arc arcs_9_13[1] = { {0, 13}, }; static arc arcs_9_14[2] = { - {30, 16}, + {32, 15}, + {0, 14}, +}; +static arc arcs_9_15[3] = { + {30, 17}, {34, 3}, + {0, 15}, }; -static arc arcs_9_15[1] = { +static arc arcs_9_16[1] = { {26, 6}, }; -static arc arcs_9_16[3] = { - {32, 14}, - {31, 17}, - {0, 16}, +static arc arcs_9_17[3] = { + {32, 15}, + {31, 18}, + {0, 17}, }; -static arc arcs_9_17[1] = { - {26, 13}, +static arc arcs_9_18[1] = { + {26, 14}, }; -static state states_9[18] = { +static state states_9[19] = { {3, arcs_9_0}, {3, arcs_9_1}, {3, arcs_9_2}, @@ -257,17 +263,18 @@ static state states_9[18] = { {1, arcs_9_4}, {4, arcs_9_5}, {2, arcs_9_6}, - {2, arcs_9_7}, - {1, arcs_9_8}, + {3, arcs_9_7}, + {2, arcs_9_8}, {2, arcs_9_9}, {3, arcs_9_10}, {3, arcs_9_11}, {3, arcs_9_12}, - {2, arcs_9_13}, + {1, arcs_9_13}, {2, arcs_9_14}, - {1, arcs_9_15}, - {3, arcs_9_16}, - {1, arcs_9_17}, + {3, arcs_9_15}, + {1, arcs_9_16}, + {3, arcs_9_17}, + {1, arcs_9_18}, }; static arc arcs_10_0[1] = { {23, 1}, @@ -319,11 +326,13 @@ static arc arcs_11_6[2] = { {32, 7}, {0, 6}, }; -static arc arcs_11_7[2] = { +static arc arcs_11_7[3] = { {36, 12}, {34, 3}, + {0, 7}, }; -static arc arcs_11_8[1] = { +static arc arcs_11_8[2] = { + {32, 13}, {0, 8}, }; static arc arcs_11_9[2] = { @@ -336,35 +345,39 @@ static arc arcs_11_10[3] = { {0, 10}, }; static arc arcs_11_11[3] = { - {36, 13}, - {32, 14}, + {36, 14}, + {32, 15}, {0, 11}, }; static arc arcs_11_12[3] = { {32, 7}, - {31, 15}, + {31, 16}, {0, 12}, }; -static arc arcs_11_13[2] = { - {32, 14}, +static arc arcs_11_13[1] = { {0, 13}, }; static arc arcs_11_14[2] = { - {36, 16}, + {32, 15}, + {0, 14}, +}; +static arc arcs_11_15[3] = { + {36, 17}, {34, 3}, + {0, 15}, }; -static arc arcs_11_15[1] = { +static arc arcs_11_16[1] = { {26, 6}, }; -static arc arcs_11_16[3] = { - {32, 14}, - {31, 17}, - {0, 16}, +static arc arcs_11_17[3] = { + {32, 15}, + {31, 18}, + {0, 17}, }; -static arc arcs_11_17[1] = { - {26, 13}, +static arc arcs_11_18[1] = { + {26, 14}, }; -static state states_11[18] = { +static state states_11[19] = { {3, arcs_11_0}, {3, arcs_11_1}, {3, arcs_11_2}, @@ -372,17 +385,18 @@ static state states_11[18] = { {1, arcs_11_4}, {4, arcs_11_5}, {2, arcs_11_6}, - {2, arcs_11_7}, - {1, arcs_11_8}, + {3, arcs_11_7}, + {2, arcs_11_8}, {2, arcs_11_9}, {3, arcs_11_10}, {3, arcs_11_11}, {3, arcs_11_12}, - {2, arcs_11_13}, + {1, arcs_11_13}, {2, arcs_11_14}, - {1, arcs_11_15}, - {3, arcs_11_16}, - {1, arcs_11_17}, + {3, arcs_11_15}, + {1, arcs_11_16}, + {3, arcs_11_17}, + {1, arcs_11_18}, }; static arc arcs_12_0[1] = { {23, 1}, @@ -1879,11 +1893,11 @@ static dfa dfas[85] = { "\000\000\100\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, {264, "parameters", 0, 4, states_8, "\000\040\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, - {265, "typedargslist", 0, 18, states_9, + {265, "typedargslist", 0, 19, states_9, "\000\000\200\000\006\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, {266, "tfpdef", 0, 4, states_10, "\000\000\200\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, - {267, "varargslist", 0, 18, states_11, + {267, "varargslist", 0, 19, states_11, "\000\000\200\000\006\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, {268, "vfpdef", 0, 2, states_12, "\000\000\200\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"}, diff --git a/Python/pythonrun.c b/Python/pythonrun.c index ebedd123f3..1a5dab5f3a 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -431,7 +431,7 @@ static int parse_syntax_error(PyObject *err, PyObject **message, PyObject **filename, int *lineno, int *offset, PyObject **text) { - long hold; + int hold; PyObject *v; _Py_IDENTIFIER(msg); _Py_IDENTIFIER(filename); @@ -464,11 +464,11 @@ parse_syntax_error(PyObject *err, PyObject **message, PyObject **filename, v = _PyObject_GetAttrId(err, &PyId_lineno); if (!v) goto finally; - hold = PyLong_AsLong(v); + hold = _PyLong_AsInt(v); Py_DECREF(v); if (hold < 0 && PyErr_Occurred()) goto finally; - *lineno = (int)hold; + *lineno = hold; v = _PyObject_GetAttrId(err, &PyId_offset); if (!v) @@ -477,11 +477,11 @@ parse_syntax_error(PyObject *err, PyObject **message, PyObject **filename, *offset = -1; Py_DECREF(v); } else { - hold = PyLong_AsLong(v); + hold = _PyLong_AsInt(v); Py_DECREF(v); if (hold < 0 && PyErr_Occurred()) goto finally; - *offset = (int)hold; + *offset = hold; } v = _PyObject_GetAttrId(err, &PyId_text); diff --git a/Python/pytime.c b/Python/pytime.c index 5a5cdd9c7a..9889a3b53b 100644 --- a/Python/pytime.c +++ b/Python/pytime.c @@ -7,6 +7,11 @@ #include <mach/mach_time.h> /* mach_absolute_time(), mach_timebase_info() */ #endif +#define _PyTime_check_mul_overflow(a, b) \ + (assert(b > 0), \ + (_PyTime_t)(a) < _PyTime_MIN / (_PyTime_t)(b) \ + || _PyTime_MAX / (_PyTime_t)(b) < (_PyTime_t)(a)) + /* To millisecond (10^-3) */ #define SEC_TO_MS 1000 @@ -60,50 +65,83 @@ _PyLong_FromTime_t(time_t t) #endif } +/* Round to nearest with ties going to nearest even integer + (_PyTime_ROUND_HALF_EVEN) */ +static double +_PyTime_RoundHalfEven(double x) +{ + double rounded = round(x); + if (fabs(x-rounded) == 0.5) + /* halfway case: round to even */ + rounded = 2.0*round(x/2.0); + return rounded; +} + +static double +_PyTime_Round(double x, _PyTime_round_t round) +{ + /* volatile avoids optimization changing how numbers are rounded */ + volatile double d; + + d = x; + if (round == _PyTime_ROUND_HALF_EVEN) + d = _PyTime_RoundHalfEven(d); + else if (round == _PyTime_ROUND_CEILING) + d = ceil(d); + else + d = floor(d); + return d; +} + static int -_PyTime_ObjectToDenominator(PyObject *obj, time_t *sec, long *numerator, +_PyTime_DoubleToDenominator(double d, time_t *sec, long *numerator, double denominator, _PyTime_round_t round) { - assert(denominator <= LONG_MAX); - if (PyFloat_Check(obj)) { - double d, intpart, err; - /* volatile avoids unsafe optimization on float enabled by gcc -O3 */ - volatile double floatpart; + double intpart, err; + /* volatile avoids optimization changing how numbers are rounded */ + volatile double floatpart; - d = PyFloat_AsDouble(obj); - floatpart = modf(d, &intpart); - if (floatpart < 0) { - floatpart = 1.0 + floatpart; - intpart -= 1.0; - } + floatpart = modf(d, &intpart); - floatpart *= denominator; - if (round == _PyTime_ROUND_CEILING) { - floatpart = ceil(floatpart); - if (floatpart >= denominator) { - floatpart = 0.0; - intpart += 1.0; - } - } - else { - floatpart = floor(floatpart); - } + floatpart *= denominator; + floatpart = _PyTime_Round(floatpart, round); + if (floatpart >= denominator) { + floatpart -= denominator; + intpart += 1.0; + } + else if (floatpart < 0) { + floatpart += denominator; + intpart -= 1.0; + } + assert(0.0 <= floatpart && floatpart < denominator); - *sec = (time_t)intpart; - err = intpart - (double)*sec; - if (err <= -1.0 || err >= 1.0) { - error_time_t_overflow(); - return -1; - } + *sec = (time_t)intpart; + *numerator = (long)floatpart; - *numerator = (long)floatpart; - return 0; + err = intpart - (double)*sec; + if (err <= -1.0 || err >= 1.0) { + error_time_t_overflow(); + return -1; + } + return 0; +} + +static int +_PyTime_ObjectToDenominator(PyObject *obj, time_t *sec, long *numerator, + double denominator, _PyTime_round_t round) +{ + assert(denominator <= (double)LONG_MAX); + + if (PyFloat_Check(obj)) { + double d = PyFloat_AsDouble(obj); + return _PyTime_DoubleToDenominator(d, sec, numerator, + denominator, round); } else { *sec = _PyLong_AsTime_t(obj); + *numerator = 0; if (*sec == (time_t)-1 && PyErr_Occurred()) return -1; - *numerator = 0; return 0; } } @@ -112,13 +150,12 @@ int _PyTime_ObjectToTime_t(PyObject *obj, time_t *sec, _PyTime_round_t round) { if (PyFloat_Check(obj)) { - double d, intpart, err; + double intpart, err; + /* volatile avoids optimization changing how numbers are rounded */ + volatile double d; d = PyFloat_AsDouble(obj); - if (round == _PyTime_ROUND_CEILING) - d = ceil(d); - else - d = floor(d); + d = _PyTime_Round(d, round); (void)modf(d, &intpart); *sec = (time_t)intpart; @@ -141,14 +178,20 @@ int _PyTime_ObjectToTimespec(PyObject *obj, time_t *sec, long *nsec, _PyTime_round_t round) { - return _PyTime_ObjectToDenominator(obj, sec, nsec, 1e9, round); + int res; + res = _PyTime_ObjectToDenominator(obj, sec, nsec, 1e9, round); + assert(0 <= *nsec && *nsec < SEC_TO_NS); + return res; } int _PyTime_ObjectToTimeval(PyObject *obj, time_t *sec, long *usec, _PyTime_round_t round) { - return _PyTime_ObjectToDenominator(obj, sec, usec, 1e6, round); + int res; + res = _PyTime_ObjectToDenominator(obj, sec, usec, 1e6, round); + assert(0 <= *usec && *usec < SEC_TO_US); + return res; } static void @@ -162,12 +205,13 @@ _PyTime_t _PyTime_FromSeconds(int seconds) { _PyTime_t t; + t = (_PyTime_t)seconds; /* ensure that integer overflow cannot happen, int type should have 32 bits, whereas _PyTime_t type has at least 64 bits (SEC_TO_MS takes 30 bits). */ - assert((seconds >= 0 && seconds <= _PyTime_MAX / SEC_TO_NS) - || (seconds < 0 && seconds >= _PyTime_MIN / SEC_TO_NS)); - t = (_PyTime_t)seconds * SEC_TO_NS; + assert((t >= 0 && t <= _PyTime_MAX / SEC_TO_NS) + || (t < 0 && t >= _PyTime_MIN / SEC_TO_NS)); + t *= SEC_TO_NS; return t; } @@ -187,12 +231,15 @@ _PyTime_FromTimespec(_PyTime_t *tp, struct timespec *ts, int raise) _PyTime_t t; int res = 0; - t = (_PyTime_t)ts->tv_sec * SEC_TO_NS; - if (t / SEC_TO_NS != ts->tv_sec) { + assert(sizeof(ts->tv_sec) <= sizeof(_PyTime_t)); + t = (_PyTime_t)ts->tv_sec; + + if (_PyTime_check_mul_overflow(t, SEC_TO_NS)) { if (raise) _PyTime_overflow(); res = -1; } + t = t * SEC_TO_NS; t += ts->tv_nsec; @@ -206,12 +253,15 @@ _PyTime_FromTimeval(_PyTime_t *tp, struct timeval *tv, int raise) _PyTime_t t; int res = 0; - t = (_PyTime_t)tv->tv_sec * SEC_TO_NS; - if (t / SEC_TO_NS != tv->tv_sec) { + assert(sizeof(tv->tv_sec) <= sizeof(_PyTime_t)); + t = (_PyTime_t)tv->tv_sec; + + if (_PyTime_check_mul_overflow(t, SEC_TO_NS)) { if (raise) _PyTime_overflow(); res = -1; } + t = t * SEC_TO_NS; t += (_PyTime_t)tv->tv_usec * US_TO_NS; @@ -221,50 +271,59 @@ _PyTime_FromTimeval(_PyTime_t *tp, struct timeval *tv, int raise) #endif static int -_PyTime_FromObject(_PyTime_t *t, PyObject *obj, _PyTime_round_t round, - long to_nanoseconds) +_PyTime_FromFloatObject(_PyTime_t *t, double value, _PyTime_round_t round, + long unit_to_ns) { - if (PyFloat_Check(obj)) { - /* volatile avoids unsafe optimization on float enabled by gcc -O3 */ - volatile double d, err; + double err; + /* volatile avoids optimization changing how numbers are rounded */ + volatile double d; - /* convert to a number of nanoseconds */ - d = PyFloat_AsDouble(obj); - d *= to_nanoseconds; + /* convert to a number of nanoseconds */ + d = value; + d *= (double)unit_to_ns; + d = _PyTime_Round(d, round); - if (round == _PyTime_ROUND_CEILING) - d = ceil(d); - else - d = floor(d); + *t = (_PyTime_t)d; + err = d - (double)*t; + if (fabs(err) >= 1.0) { + _PyTime_overflow(); + return -1; + } + return 0; +} - *t = (_PyTime_t)d; - err = d - (double)*t; - if (fabs(err) >= 1.0) { - _PyTime_overflow(); - return -1; - } - return 0; +static int +_PyTime_FromObject(_PyTime_t *t, PyObject *obj, _PyTime_round_t round, + long unit_to_ns) +{ + if (PyFloat_Check(obj)) { + double d; + d = PyFloat_AsDouble(obj); + return _PyTime_FromFloatObject(t, d, round, unit_to_ns); } else { #ifdef HAVE_LONG_LONG PY_LONG_LONG sec; - sec = PyLong_AsLongLong(obj); assert(sizeof(PY_LONG_LONG) <= sizeof(_PyTime_t)); + + sec = PyLong_AsLongLong(obj); #else long sec; - sec = PyLong_AsLong(obj); assert(sizeof(PY_LONG_LONG) <= sizeof(_PyTime_t)); + + sec = PyLong_AsLong(obj); #endif if (sec == -1 && PyErr_Occurred()) { if (PyErr_ExceptionMatches(PyExc_OverflowError)) _PyTime_overflow(); return -1; } - *t = sec * to_nanoseconds; - if (*t / to_nanoseconds != sec) { + + if (_PyTime_check_mul_overflow(sec, unit_to_ns)) { _PyTime_overflow(); return -1; } + *t = sec * unit_to_ns; return 0; } } @@ -284,12 +343,21 @@ _PyTime_FromMillisecondsObject(_PyTime_t *t, PyObject *obj, _PyTime_round_t roun double _PyTime_AsSecondsDouble(_PyTime_t t) { - _PyTime_t sec, ns; - /* Divide using integers to avoid rounding issues on the integer part. - 1e-9 cannot be stored exactly in IEEE 64-bit. */ - sec = t / SEC_TO_NS; - ns = t % SEC_TO_NS; - return (double)sec + (double)ns * 1e-9; + /* volatile avoids optimization changing how numbers are rounded */ + volatile double d; + + if (t % SEC_TO_NS == 0) { + _PyTime_t secs; + /* Divide using integers to avoid rounding issues on the integer part. + 1e-9 cannot be stored exactly in IEEE 64-bit. */ + secs = t / SEC_TO_NS; + d = (double)secs; + } + else { + d = (double)t; + d /= 1e9; + } + return d; } PyObject * @@ -309,7 +377,20 @@ _PyTime_Divide(const _PyTime_t t, const _PyTime_t k, const _PyTime_round_t round) { assert(k > 1); - if (round == _PyTime_ROUND_CEILING) { + if (round == _PyTime_ROUND_HALF_EVEN) { + _PyTime_t x, r, abs_r; + x = t / k; + r = t % k; + abs_r = Py_ABS(r); + if (abs_r > k / 2 || (abs_r == k / 2 && (Py_ABS(x) & 1))) { + if (t >= 0) + x++; + else + x--; + } + return x; + } + else if (round == _PyTime_ROUND_CEILING) { if (t >= 0) return (t + k - 1) / k; else @@ -424,6 +505,7 @@ _PyTime_AsTimevalTime_t(_PyTime_t t, time_t *p_secs, int *us, return 0; } + #if defined(HAVE_CLOCK_GETTIME) || defined(HAVE_KQUEUE) int _PyTime_AsTimespec(_PyTime_t t, struct timespec *ts) @@ -437,13 +519,13 @@ _PyTime_AsTimespec(_PyTime_t t, struct timespec *ts) secs -= 1; } ts->tv_sec = (time_t)secs; + assert(0 <= nsec && nsec < SEC_TO_NS); + ts->tv_nsec = nsec; + if ((_PyTime_t)ts->tv_sec != secs) { - _PyTime_overflow(); + error_time_t_overflow(); return -1; } - ts->tv_nsec = nsec; - - assert(0 <= ts->tv_nsec && ts->tv_nsec <= 999999999); return 0; } #endif @@ -557,19 +639,20 @@ _PyTime_GetSystemClockWithInfo(_PyTime_t *t, _Py_clock_info_t *info) return pygettimeofday_new(t, info, 1); } - static int pymonotonic(_PyTime_t *tp, _Py_clock_info_t *info, int raise) { #if defined(MS_WINDOWS) - ULONGLONG result; + ULONGLONG ticks; + _PyTime_t t; assert(info == NULL || raise); - result = GetTickCount64(); + ticks = GetTickCount64(); + assert(sizeof(ticks) <= sizeof(_PyTime_t)); + t = (_PyTime_t)ticks; - *tp = result * MS_TO_NS; - if (*tp / MS_TO_NS != result) { + if (_PyTime_check_mul_overflow(t, MS_TO_NS)) { if (raise) { _PyTime_overflow(); return -1; @@ -577,6 +660,7 @@ pymonotonic(_PyTime_t *tp, _Py_clock_info_t *info, int raise) /* Hello, time traveler! */ assert(0); } + *tp = t * MS_TO_NS; if (info) { DWORD timeAdjustment, timeIncrement; diff --git a/Python/random.c b/Python/random.c index ea09e84a7b..8f3e6d6021 100644 --- a/Python/random.c +++ b/Python/random.c @@ -6,7 +6,9 @@ # ifdef HAVE_SYS_STAT_H # include <sys/stat.h> # endif -# ifdef HAVE_GETRANDOM_SYSCALL +# ifdef HAVE_GETRANDOM +# include <sys/random.h> +# elif defined(HAVE_GETRANDOM_SYSCALL) # include <sys/syscall.h> # endif #endif @@ -70,7 +72,9 @@ win32_urandom(unsigned char *buffer, Py_ssize_t size, int raise) return 0; } -#elif HAVE_GETENTROPY +#elif defined(HAVE_GETENTROPY) && !defined(sun) +#define PY_GETENTROPY + /* Fill buffer with size pseudo-random bytes generated by getentropy(). Return 0 on success, or raise an exception and return -1 on error. @@ -105,16 +109,19 @@ py_getentropy(unsigned char *buffer, Py_ssize_t size, int fatal) return 0; } -#else /* !HAVE_GETENTROPY */ +#else + +#if defined(HAVE_GETRANDOM) || defined(HAVE_GETRANDOM_SYSCALL) +#define PY_GETRANDOM -#ifdef HAVE_GETRANDOM_SYSCALL static int py_getrandom(void *buffer, Py_ssize_t size, int raise) { - /* is getrandom() supported by the running kernel? - * need Linux kernel 3.17 or later */ + /* Is getrandom() supported by the running kernel? + * Need Linux kernel 3.17 or newer, or Solaris 11.3 or newer */ static int getrandom_works = 1; - /* Use /dev/urandom, block if the kernel has no entropy */ + /* Use non-blocking /dev/urandom device. On Linux at boot, the getrandom() + * syscall blocks until /dev/urandom is initialized with enough entropy. */ const int flags = 0; int n; @@ -124,7 +131,18 @@ py_getrandom(void *buffer, Py_ssize_t size, int raise) while (0 < size) { errno = 0; - /* Use syscall() because the libc doesn't expose getrandom() yet, see: +#ifdef HAVE_GETRANDOM + if (raise) { + Py_BEGIN_ALLOW_THREADS + n = getrandom(buffer, size, flags); + Py_END_ALLOW_THREADS + } + else { + n = getrandom(buffer, size, flags); + } +#else + /* On Linux, use the syscall() function because the GNU libc doesn't + * expose the Linux getrandom() syscall yet. See: * https://sourceware.org/bugzilla/show_bug.cgi?id=17252 */ if (raise) { Py_BEGIN_ALLOW_THREADS @@ -134,6 +152,7 @@ py_getrandom(void *buffer, Py_ssize_t size, int raise) else { n = syscall(SYS_getrandom, buffer, size, flags); } +#endif if (n < 0) { if (errno == ENOSYS) { @@ -182,7 +201,7 @@ dev_urandom_noraise(unsigned char *buffer, Py_ssize_t size) assert (0 < size); -#ifdef HAVE_GETRANDOM_SYSCALL +#ifdef PY_GETRANDOM if (py_getrandom(buffer, size, 0) == 1) return; /* getrandom() is not supported by the running kernel, fall back @@ -218,14 +237,14 @@ dev_urandom_python(char *buffer, Py_ssize_t size) int fd; Py_ssize_t n; struct _Py_stat_struct st; -#ifdef HAVE_GETRANDOM_SYSCALL +#ifdef PY_GETRANDOM int res; #endif if (size <= 0) return 0; -#ifdef HAVE_GETRANDOM_SYSCALL +#ifdef PY_GETRANDOM res = py_getrandom(buffer, size, 1); if (res < 0) return -1; @@ -304,7 +323,7 @@ dev_urandom_close(void) } } -#endif /* HAVE_GETENTROPY */ +#endif /* Fill buffer with pseudo-random bytes generated by a linear congruent generator (LCG): @@ -345,7 +364,7 @@ _PyOS_URandom(void *buffer, Py_ssize_t size) #ifdef MS_WINDOWS return win32_urandom((unsigned char *)buffer, size, 1); -#elif HAVE_GETENTROPY +#elif PY_GETENTROPY return py_getentropy(buffer, size, 0); #else return dev_urandom_python((char*)buffer, size); @@ -392,7 +411,7 @@ _PyRandom_Init(void) else { #ifdef MS_WINDOWS (void)win32_urandom(secret, secret_size, 0); -#elif HAVE_GETENTROPY +#elif PY_GETENTROPY (void)py_getentropy(secret, secret_size, 1); #else dev_urandom_noraise(secret, secret_size); @@ -408,7 +427,7 @@ _PyRandom_Fini(void) CryptReleaseContext(hCryptProv, 0); hCryptProv = 0; } -#elif HAVE_GETENTROPY +#elif PY_GETENTROPY /* nothing to clean */ #else dev_urandom_close(); diff --git a/Python/symtable.c b/Python/symtable.c index 64910d8a55..8431d514f6 100644 --- a/Python/symtable.c +++ b/Python/symtable.c @@ -1439,6 +1439,14 @@ symtable_visit_expr(struct symtable *st, expr_ty e) VISIT_SEQ(st, expr, e->v.Call.args); VISIT_SEQ_WITH_NULL(st, keyword, e->v.Call.keywords); break; + case FormattedValue_kind: + VISIT(st, expr, e->v.FormattedValue.value); + if (e->v.FormattedValue.format_spec) + VISIT(st, expr, e->v.FormattedValue.format_spec); + break; + case JoinedStr_kind: + VISIT_SEQ(st, expr, e->v.JoinedStr.values); + break; case Num_kind: case Str_kind: case Bytes_kind: |