diff options
Diffstat (limited to 'Parser/pegen/parse_string.c')
| -rw-r--r-- | Parser/pegen/parse_string.c | 38 |
1 files changed, 24 insertions, 14 deletions
diff --git a/Parser/pegen/parse_string.c b/Parser/pegen/parse_string.c index d96303dc18..ca4b733c15 100644 --- a/Parser/pegen/parse_string.c +++ b/Parser/pegen/parse_string.c @@ -12,7 +12,7 @@ // file (like "_PyPegen_raise_syntax_error"). static int -warn_invalid_escape_sequence(Parser *p, unsigned char first_invalid_escape_char) +warn_invalid_escape_sequence(Parser *p, unsigned char first_invalid_escape_char, Token *t) { PyObject *msg = PyUnicode_FromFormat("invalid escape sequence \\%c", first_invalid_escape_char); @@ -20,11 +20,16 @@ warn_invalid_escape_sequence(Parser *p, unsigned char first_invalid_escape_char) return -1; } if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg, p->tok->filename, - p->tok->lineno, NULL, NULL) < 0) { + t->lineno, NULL, NULL) < 0) { if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) { /* Replace the DeprecationWarning exception with a SyntaxError to get a more accurate error report */ PyErr_Clear(); + + /* This is needed, in order for the SyntaxError to point to the token t, + since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the + error location, if p->known_err_token is not set. */ + p->known_err_token = t; RAISE_SYNTAX_ERROR("invalid escape sequence \\%c", first_invalid_escape_char); } Py_DECREF(msg); @@ -47,7 +52,7 @@ decode_utf8(const char **sPtr, const char *end) } static PyObject * -decode_unicode_with_escapes(Parser *parser, const char *s, size_t len) +decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t) { PyObject *v, *u; char *buf; @@ -110,7 +115,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len) v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape); if (v != NULL && first_invalid_escape != NULL) { - if (warn_invalid_escape_sequence(parser, *first_invalid_escape) < 0) { + if (warn_invalid_escape_sequence(parser, *first_invalid_escape, t) < 0) { /* We have not decref u before because first_invalid_escape points inside u. */ Py_XDECREF(u); @@ -123,7 +128,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len) } static PyObject * -decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len) +decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t) { const char *first_invalid_escape; PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape); @@ -132,7 +137,7 @@ decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len) } if (first_invalid_escape != NULL) { - if (warn_invalid_escape_sequence(p, *first_invalid_escape) < 0) { + if (warn_invalid_escape_sequence(p, *first_invalid_escape, t) < 0) { Py_DECREF(result); return NULL; } @@ -146,9 +151,14 @@ decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len) If the string is an f-string, set *fstr and *fstrlen to the unparsed string object. Return 0 if no errors occurred. */ int -_PyPegen_parsestr(Parser *p, const char *s, int *bytesmode, int *rawmode, PyObject **result, - const char **fstr, Py_ssize_t *fstrlen) +_PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result, + const char **fstr, Py_ssize_t *fstrlen, Token *t) { + const char *s = PyBytes_AsString(t->bytes); + if (s == NULL) { + return -1; + } + size_t len; int quote = Py_CHARMASK(*s); int fmode = 0; @@ -245,7 +255,7 @@ _PyPegen_parsestr(Parser *p, const char *s, int *bytesmode, int *rawmode, PyObje *result = PyBytes_FromStringAndSize(s, len); } else { - *result = decode_bytes_with_escapes(p, s, len); + *result = decode_bytes_with_escapes(p, s, len, t); } } else { @@ -253,7 +263,7 @@ _PyPegen_parsestr(Parser *p, const char *s, int *bytesmode, int *rawmode, PyObje *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL); } else { - *result = decode_unicode_with_escapes(p, s, len); + *result = decode_unicode_with_escapes(p, s, len, t); } } return *result == NULL ? -1 : 0; @@ -637,7 +647,7 @@ exit: */ static int fstring_find_literal(Parser *p, const char **str, const char *end, int raw, - PyObject **literal, int recurse_lvl) + PyObject **literal, int recurse_lvl, Token *t) { /* Get any literal string. It ends when we hit an un-doubled left brace (which isn't part of a unicode name escape such as @@ -660,7 +670,7 @@ fstring_find_literal(Parser *p, const char **str, const char *end, int raw, } break; } - if (ch == '{' && warn_invalid_escape_sequence(p, ch) < 0) { + if (ch == '{' && warn_invalid_escape_sequence(p, ch, t) < 0) { return -1; } } @@ -704,7 +714,7 @@ done: NULL, NULL); else *literal = decode_unicode_with_escapes(p, literal_start, - s - literal_start); + s - literal_start, t); if (!*literal) return -1; } @@ -1041,7 +1051,7 @@ fstring_find_literal_and_expr(Parser *p, const char **str, const char *end, int assert(*literal == NULL && *expression == NULL); /* Get any literal string. */ - result = fstring_find_literal(p, str, end, raw, literal, recurse_lvl); + result = fstring_find_literal(p, str, end, raw, literal, recurse_lvl, t); if (result < 0) goto error; |
