summaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/parser/scan.l107
1 files changed, 85 insertions, 22 deletions
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index a0635463bb..ef5c1a639f 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -4,13 +4,27 @@
* scan.l
* lexical scanner for PostgreSQL
*
- * XXX The rules in this file must be kept in sync with psql's lexer!!!
+ * NOTE NOTE NOTE:
+ *
+ * The rules in this file must be kept in sync with psql's lexer!!!
+ *
+ * The rules are designed so that the scanner never has to backtrack,
+ * in the sense that there is always a rule that can match the input
+ * consumed so far (the rule action may internally throw back some input
+ * with yyless(), however). As explained in the flex manual, this makes
+ * for a useful speed increase --- about a third faster than a plain -CF
+ * lexer, in simple testing. The extra complexity is mostly in the rules
+ * for handling float numbers and continued string literals. If you change
+ * the lexical rules, verify that you haven't broken the no-backtrack
+ * property by running flex with the "-b" option and checking that the
+ * resulting "lex.backup" file says that no backing up is needed.
+ *
*
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.121 2005/03/11 19:13:42 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.122 2005/05/26 01:24:29 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -138,6 +152,20 @@ special_whitespace ({space}+|{comment}{newline})
horiz_whitespace ({horiz_space}|{comment})
whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
+/*
+ * To ensure that {quotecontinue} can be scanned without having to back up
+ * if the full pattern isn't matched, we include trailing whitespace in
+ * {quotestop}. This matches all cases where {quotecontinue} fails to match,
+ * except for {quote} followed by whitespace and just one "-" (not two,
+ * which would start a {comment}). To cover that we have {quotefail}.
+ * The actions for {quotestop} and {quotefail} must throw back characters
+ * beyond the quote proper.
+ */
+quote '
+quotestop {quote}{whitespace}*
+quotecontinue {quote}{whitespace_with_newline}{quote}
+quotefail {quote}{whitespace}*"-"
+
/* Bit string
* It is tempting to scan the string for only those characters
* which are allowed. However, this leads to silently swallowed
@@ -148,16 +176,12 @@ whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
* validate the contents.
*/
xbstart [bB]{quote}
-xbstop {quote}
xbinside [^']*
-xbcat {quote}{whitespace_with_newline}{quote}
/* Hexadecimal number
*/
xhstart [xX]{quote}
-xhstop {quote}
xhinside [^']*
-xhcat {quote}{whitespace_with_newline}{quote}
/* National character
*/
@@ -165,26 +189,26 @@ xnstart [nN]{quote}
/* Extended quote
* xqdouble implements embedded quote
- * xqcat allows strings to cross input lines
*/
-quote '
xqstart {quote}
-xqstop {quote}
xqdouble {quote}{quote}
xqinside [^\\']+
xqescape [\\][^0-7]
xqoctesc [\\][0-7]{1,3}
-xqcat {quote}{whitespace_with_newline}{quote}
/* $foo$ style quotes ("dollar quoting")
* The quoted string starts with $foo$ where "foo" is an optional string
* in the form of an identifier, except that it may not contain "$",
* and extends to the first occurrence of an identical string.
* There is *no* processing of the quoted text.
+ *
+ * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
+ * fails to match its trailing "$".
*/
dolq_start [A-Za-z\200-\377_]
dolq_cont [A-Za-z\200-\377_0-9]
dolqdelim \$({dolq_start}{dolq_cont}*)?\$
+dolqfailed \${dolq_start}{dolq_cont}*
dolqinside [^$]+
/* Double quote
@@ -242,12 +266,17 @@ operator {op_chars}+
/* we no longer allow unary minus in numbers.
* instead we pass it separately to parser. there it gets
- * coerced via doNegate() -- Leon aug 20 1999
+ * coerced via doNegate() -- Leon aug 20 1999
+ *
+ * {realfail1} and {realfail2} are added to prevent the need for scanner
+ * backup when the {real} rule fails to match completely.
*/
integer {digit}+
decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
-real ((({digit}*\.{digit}+)|({digit}+\.{digit}*)|({digit}+))([Ee][-+]?{digit}+))
+real ({integer}|{decimal})[Ee][-+]?{digit}+
+realfail1 ({integer}|{decimal})[Ee]
+realfail2 ({integer}|{decimal})[Ee][-+]
param \${integer}
@@ -310,6 +339,10 @@ other .
/* ignore */
}
+<xc>\*+ {
+ /* ignore */
+ }
+
<xc><<EOF>> { yyerror("unterminated /* comment"); }
{xbstart} {
@@ -324,7 +357,9 @@ other .
startlit();
addlitchar('b');
}
-<xb>{xbstop} {
+<xb>{quotestop} |
+<xb>{quotefail} {
+ yyless(1);
BEGIN(INITIAL);
yylval.str = litbufdup();
return BCONST;
@@ -333,8 +368,8 @@ other .
<xb>{xbinside} {
addlit(yytext, yyleng);
}
-<xh>{xhcat} |
-<xb>{xbcat} {
+<xh>{quotecontinue} |
+<xb>{quotecontinue} {
/* ignore */
}
<xb><<EOF>> { yyerror("unterminated bit string literal"); }
@@ -351,7 +386,9 @@ other .
startlit();
addlitchar('x');
}
-<xh>{xhstop} {
+<xh>{quotestop} |
+<xh>{quotefail} {
+ yyless(1);
BEGIN(INITIAL);
yylval.str = litbufdup();
return XCONST;
@@ -365,13 +402,11 @@ other .
*/
const ScanKeyword *keyword;
- /* This had better be a keyword! */
+ yyless(1); /* eat only 'n' this time */
+ /* nchar had better be a keyword! */
keyword = ScanKeywordLookup("nchar");
Assert(keyword != NULL);
yylval.keyword = keyword->name;
- token_start = yytext;
- BEGIN(xq);
- startlit();
return keyword->value;
}
@@ -380,7 +415,9 @@ other .
BEGIN(xq);
startlit();
}
-<xq>{xqstop} {
+<xq>{quotestop} |
+<xq>{quotefail} {
+ yyless(1);
BEGIN(INITIAL);
yylval.str = litbufdup();
return SCONST;
@@ -398,7 +435,7 @@ other .
unsigned char c = strtoul(yytext+1, NULL, 8);
addlitchar(c);
}
-<xq>{xqcat} {
+<xq>{quotecontinue} {
/* ignore */
}
<xq>. {
@@ -413,6 +450,12 @@ other .
BEGIN(xdolq);
startlit();
}
+{dolqfailed} {
+ /* throw back all but the initial "$" */
+ yyless(1);
+ /* and treat it as {other} */
+ return yytext[0];
+ }
<xdolq>{dolqdelim} {
if (strcmp(yytext, dolqstart) == 0)
{
@@ -435,6 +478,9 @@ other .
<xdolq>{dolqinside} {
addlit(yytext, yyleng);
}
+<xdolq>{dolqfailed} {
+ addlit(yytext, yyleng);
+ }
<xdolq>. {
/* This is only needed for $ inside the quoted text */
addlitchar(yytext[0]);
@@ -576,6 +622,23 @@ other .
yylval.str = pstrdup(yytext);
return FCONST;
}
+{realfail1} {
+ /*
+ * throw back the [Ee], and treat as {decimal}. Note
+ * that it is possible the input is actually {integer},
+ * but since this case will almost certainly lead to a
+ * syntax error anyway, we don't bother to distinguish.
+ */
+ yyless(yyleng-1);
+ yylval.str = pstrdup(yytext);
+ return FCONST;
+ }
+{realfail2} {
+ /* throw back the [Ee][+-], and proceed as above */
+ yyless(yyleng-2);
+ yylval.str = pstrdup(yytext);
+ return FCONST;
+ }
{identifier} {