Reject trailing junk after numeric literals
authorPeter Eisentraut <[email protected]>
Wed, 16 Feb 2022 09:32:36 +0000 (10:32 +0100)
committerPeter Eisentraut <[email protected]>
Wed, 16 Feb 2022 09:37:31 +0000 (10:37 +0100)
After this, the PostgreSQL lexers no longer accept numeric literals
with trailing non-digits, such as 123abc, which would be scanned as
two tokens: 123 and abc.  This is undocumented and surprising, and it
might also interfere with some extended numeric literal syntax being
contemplated for the future.

Reviewed-by: John Naylor <[email protected]>
Discussion: https://www.postgresql.org/message-id/flat/b239564c-cad0-b23e-c57e-166d883cb97d@enterprisedb.com

src/backend/parser/scan.l
src/fe_utils/psqlscan.l
src/interfaces/ecpg/preproc/pgc.l
src/test/regress/expected/numerology.out
src/test/regress/sql/numerology.sql

index f555ac6e6d2e3bed730f6ae2334d56890f894651..882e081aae2eb5739e57a48bf4d0ff6660b6a4a3 100644 (file)
@@ -387,7 +387,7 @@ operator        {op_chars}+
  *
  * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
  *
- * {realfail1} and {realfail2} are added to prevent the need for scanner
+ * {realfail} is added to prevent the need for scanner
  * backup when the {real} rule fails to match completely.
  */
 digit          [0-9]
@@ -396,10 +396,14 @@ integer           {digit}+
 decimal            (({digit}*\.{digit}+)|({digit}+\.{digit}*))
 decimalfail        {digit}+\.\.
 real           ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail1      ({integer}|{decimal})[Ee]
-realfail2      ({integer}|{decimal})[Ee][-+]
+realfail       ({integer}|{decimal})[Ee][-+]
+
+integer_junk   {integer}{ident_start}
+decimal_junk   {decimal}{ident_start}
+real_junk      {real}{ident_start}
 
 param          \${integer}
+param_junk     \${integer}{ident_start}
 
 other          .
 
@@ -974,6 +978,10 @@ other          .
                    yylval->ival = atol(yytext + 1);
                    return PARAM;
                }
+{param_junk}   {
+                   SET_YYLLOC();
+                   yyerror("trailing junk after parameter");
+               }
 
 {integer}      {
                    SET_YYLLOC();
@@ -995,20 +1003,21 @@ other            .
                    yylval->str = pstrdup(yytext);
                    return FCONST;
                }
-{realfail1}        {
-                   /*
-                    * throw back the [Ee], and figure out whether what
-                    * remains is an {integer} or {decimal}.
-                    */
-                   yyless(yyleng - 1);
+{realfail}     {
                    SET_YYLLOC();
-                   return process_integer_literal(yytext, yylval);
+                   yyerror("trailing junk after numeric literal");
                }
-{realfail2}        {
-                   /* throw back the [Ee][+-], and proceed as above */
-                   yyless(yyleng - 2);
+{integer_junk} {
                    SET_YYLLOC();
-                   return process_integer_literal(yytext, yylval);
+                   yyerror("trailing junk after numeric literal");
+               }
+{decimal_junk} {
+                   SET_YYLLOC();
+                   yyerror("trailing junk after numeric literal");
+               }
+{real_junk}        {
+                   SET_YYLLOC();
+                   yyerror("trailing junk after numeric literal");
                }
 
 
index 941ed065532b916ceb8a251d0706681b6bd3e939..ae531ec2407793135b85590ae27e0fb5d0040088 100644 (file)
@@ -325,7 +325,7 @@ operator        {op_chars}+
  *
  * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
  *
- * {realfail1} and {realfail2} are added to prevent the need for scanner
+ * {realfail} is added to prevent the need for scanner
  * backup when the {real} rule fails to match completely.
  */
 digit          [0-9]
@@ -334,10 +334,14 @@ integer           {digit}+
 decimal            (({digit}*\.{digit}+)|({digit}+\.{digit}*))
 decimalfail        {digit}+\.\.
 real           ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail1      ({integer}|{decimal})[Ee]
-realfail2      ({integer}|{decimal})[Ee][-+]
+realfail       ({integer}|{decimal})[Ee][-+]
+
+integer_junk   {integer}{ident_start}
+decimal_junk   {decimal}{ident_start}
+real_junk      {real}{ident_start}
 
 param          \${integer}
+param_junk     \${integer}{ident_start}
 
 /* psql-specific: characters allowed in variable names */
 variable_char  [A-Za-z\200-\377_0-9]
@@ -839,6 +843,9 @@ other           .
 {param}            {
                    ECHO;
                }
+{param_junk}   {
+                   ECHO;
+               }
 
 {integer}      {
                    ECHO;
@@ -854,18 +861,16 @@ other         .
 {real}         {
                    ECHO;
                }
-{realfail1}        {
-                   /*
-                    * throw back the [Ee], and figure out whether what
-                    * remains is an {integer} or {decimal}.
-                    * (in psql, we don't actually care...)
-                    */
-                   yyless(yyleng - 1);
+{realfail}     {
                    ECHO;
                }
-{realfail2}        {
-                   /* throw back the [Ee][+-], and proceed as above */
-                   yyless(yyleng - 2);
+{integer_junk} {
+                   ECHO;
+               }
+{decimal_junk} {
+                   ECHO;
+               }
+{real_junk}        {
                    ECHO;
                }
 
index 9286a0355d6e9580ee36f8a546aa67299ad64114..2367b860f558f34d493bb28a0dbbec9fa0cc33d8 100644 (file)
@@ -353,7 +353,7 @@ operator        {op_chars}+
  *
  * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
  *
- * {realfail1} and {realfail2} are added to prevent the need for scanner
+ * {realfail} is added to prevent the need for scanner
  * backup when the {real} rule fails to match completely.
  */
 digit          [0-9]
@@ -362,10 +362,14 @@ integer           {digit}+
 decimal            (({digit}*\.{digit}+)|({digit}+\.{digit}*))
 decimalfail        {digit}+\.\.
 real           ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail1      ({integer}|{decimal})[Ee]
-realfail2      ({integer}|{decimal})[Ee][-+]
+realfail       ({integer}|{decimal})[Ee][-+]
+
+integer_junk   {integer}{ident_start}
+decimal_junk   {decimal}{ident_start}
+real_junk      {real}{ident_start}
 
 param          \${integer}
+param_junk     \${integer}{ident_start}
 
 /* special characters for other dbms */
 /* we have to react differently in compat mode */
@@ -917,6 +921,9 @@ cppline         {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
                    base_yylval.ival = atol(yytext+1);
                    return PARAM;
                }
+{param_junk}   {
+                   mmfatal(PARSE_ERROR, "trailing junk after parameter");
+               }
 
 {ip}           {
                    base_yylval.str = mm_strdup(yytext);
@@ -941,22 +948,31 @@ cppline           {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
                    base_yylval.str = mm_strdup(yytext);
                    return FCONST;
                }
-{realfail1}        {
+{realfail}     {
                    /*
-                    * throw back the [Ee], and figure out whether what
+                    * throw back the [Ee][+-], and figure out whether what
                     * remains is an {integer} or {decimal}.
                     */
-                   yyless(yyleng - 1);
-                   return process_integer_literal(yytext, &base_yylval);
-               }
-{realfail2}        {
-                   /* throw back the [Ee][+-], and proceed as above */
                    yyless(yyleng - 2);
                    return process_integer_literal(yytext, &base_yylval);
                }
 } /* <C,SQL> */
 
 <SQL>{
+   /*
+    * Note that some trailing junk is valid in C (such as 100LL), so we
+    * contain this to SQL mode.
+    */
+{integer_junk} {
+                   mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
+               }
+{decimal_junk} {
+                   mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
+               }
+{real_junk}        {
+                   mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
+               }
+
 :{identifier}((("->"|\.){identifier})|(\[{array}\]))*  {
                    base_yylval.str = mm_strdup(yytext+1);
                    return CVARIABLE;
index 2ffc73e854875436a33ac42444398c8c617865d6..77d48434173bb6991d731c89a7a0e190feac2b1c 100644 (file)
@@ -6,64 +6,45 @@
 -- Trailing junk in numeric literals
 --
 SELECT 123abc;
- abc 
------
- 123
-(1 row)
-
+ERROR:  trailing junk after numeric literal at or near "123a"
+LINE 1: SELECT 123abc;
+               ^
 SELECT 0x0o;
- x0o 
------
-   0
-(1 row)
-
+ERROR:  trailing junk after numeric literal at or near "0x"
+LINE 1: SELECT 0x0o;
+               ^
 SELECT 1_2_3;
- _2_3 
-------
-    1
-(1 row)
-
+ERROR:  trailing junk after numeric literal at or near "1_"
+LINE 1: SELECT 1_2_3;
+               ^
 SELECT 0.a;
- a 
----
- 0
-(1 row)
-
+ERROR:  trailing junk after numeric literal at or near "0.a"
+LINE 1: SELECT 0.a;
+               ^
 SELECT 0.0a;
-  a  
------
- 0.0
-(1 row)
-
+ERROR:  trailing junk after numeric literal at or near "0.0a"
+LINE 1: SELECT 0.0a;
+               ^
 SELECT .0a;
-  a  
------
- 0.0
-(1 row)
-
+ERROR:  trailing junk after numeric literal at or near ".0a"
+LINE 1: SELECT .0a;
+               ^
 SELECT 0.0e1a;
- a 
----
- 0
-(1 row)
-
+ERROR:  trailing junk after numeric literal at or near "0.0e1a"
+LINE 1: SELECT 0.0e1a;
+               ^
 SELECT 0.0e;
-  e  
------
- 0.0
-(1 row)
-
+ERROR:  trailing junk after numeric literal at or near "0.0e"
+LINE 1: SELECT 0.0e;
+               ^
 SELECT 0.0e+a;
-ERROR:  syntax error at or near "+"
+ERROR:  trailing junk after numeric literal at or near "0.0e+"
 LINE 1: SELECT 0.0e+a;
-                   ^
+               ^
 PREPARE p1 AS SELECT $1a;
-EXECUTE p1(1);
- a 
----
- 1
-(1 row)
-
+ERROR:  trailing junk after parameter at or near "$1a"
+LINE 1: PREPARE p1 AS SELECT $1a;
+                             ^
 --
 -- Test implicit type conversions
 -- This fails for Postgres v6.1 (and earlier?)
index fb75f97832d0e24f5856d53d8589816d29e39731..be7d6dfe0c2676012205fc9ee6ff1d9daaec1257 100644 (file)
@@ -17,7 +17,6 @@ SELECT 0.0e1a;
 SELECT 0.0e;
 SELECT 0.0e+a;
 PREPARE p1 AS SELECT $1a;
-EXECUTE p1(1);
 
 --
 -- Test implicit type conversions