Reject trailing junk after numeric literals

author Peter Eisentraut <[email protected]>

Wed, 16 Feb 2022 09:32:36 +0000 (10:32 +0100)

committer Peter Eisentraut <[email protected]>

Wed, 16 Feb 2022 09:37:31 +0000 (10:37 +0100)
author Peter Eisentraut <[email protected]>
Wed, 16 Feb 2022 09:32:36 +0000 (10:32 +0100)
committer Peter Eisentraut <[email protected]>
Wed, 16 Feb 2022 09:37:31 +0000 (10:37 +0100)
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l

index f555ac6e6d2e3bed730f6ae2334d56890f894651..882e081aae2eb5739e57a48bf4d0ff6660b6a4a3 100644 (file)
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -387,7 +387,7 @@ operator        {op_chars}+
   *
   * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
   *
- * {realfail1} and {realfail2} are added to prevent the need for scanner
+ * {realfail} is added to prevent the need for scanner
   * backup when the {real} rule fails to match completely.
   */
  digit          [0-9]
@@ -396,10 +396,14 @@ integer           {digit}+
  decimal            (({digit}*\.{digit}+)|({digit}+\.{digit}*))
  decimalfail        {digit}+\.\.
  real           ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail1      ({integer}|{decimal})[Ee]
-realfail2      ({integer}|{decimal})[Ee][-+]
+realfail       ({integer}|{decimal})[Ee][-+]
+
+integer_junk   {integer}{ident_start}
+decimal_junk   {decimal}{ident_start}
+real_junk      {real}{ident_start}
  
  param          \${integer}
+param_junk     \${integer}{ident_start}
  
  other          .
  
@@ -974,6 +978,10 @@ other          .
                     yylval->ival = atol(yytext + 1);
                     return PARAM;
                 }
+{param_junk}   {
+                   SET_YYLLOC();
+                   yyerror("trailing junk after parameter");
+               }
  
  {integer}      {
                     SET_YYLLOC();
@@ -995,20 +1003,21 @@ other            .
                     yylval->str = pstrdup(yytext);
                     return FCONST;
                 }
-{realfail1}        {
-                   /*
-                    * throw back the [Ee], and figure out whether what
-                    * remains is an {integer} or {decimal}.
-                    */
-                   yyless(yyleng - 1);
+{realfail}     {
                     SET_YYLLOC();
-                   return process_integer_literal(yytext, yylval);
+                   yyerror("trailing junk after numeric literal");
                 }
-{realfail2}        {
-                   /* throw back the [Ee][+-], and proceed as above */
-                   yyless(yyleng - 2);
+{integer_junk} {
                     SET_YYLLOC();
-                   return process_integer_literal(yytext, yylval);
+                   yyerror("trailing junk after numeric literal");
+               }
+{decimal_junk} {
+                   SET_YYLLOC();
+                   yyerror("trailing junk after numeric literal");
+               }
+{real_junk}        {
+                   SET_YYLLOC();
+                   yyerror("trailing junk after numeric literal");
                 }
  
  
diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l

index 941ed065532b916ceb8a251d0706681b6bd3e939..ae531ec2407793135b85590ae27e0fb5d0040088 100644 (file)
--- a/src/fe_utils/psqlscan.l
+++ b/src/fe_utils/psqlscan.l
@@ -325,7 +325,7 @@ operator        {op_chars}+
   *
   * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
   *
- * {realfail1} and {realfail2} are added to prevent the need for scanner
+ * {realfail} is added to prevent the need for scanner
   * backup when the {real} rule fails to match completely.
   */
  digit          [0-9]
@@ -334,10 +334,14 @@ integer           {digit}+
  decimal            (({digit}*\.{digit}+)|({digit}+\.{digit}*))
  decimalfail        {digit}+\.\.
  real           ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail1      ({integer}|{decimal})[Ee]
-realfail2      ({integer}|{decimal})[Ee][-+]
+realfail       ({integer}|{decimal})[Ee][-+]
+
+integer_junk   {integer}{ident_start}
+decimal_junk   {decimal}{ident_start}
+real_junk      {real}{ident_start}
  
  param          \${integer}
+param_junk     \${integer}{ident_start}
  
  /* psql-specific: characters allowed in variable names */
  variable_char  [A-Za-z\200-\377_0-9]
@@ -839,6 +843,9 @@ other           .
  {param}            {
                     ECHO;
                 }
+{param_junk}   {
+                   ECHO;
+               }
  
  {integer}      {
                     ECHO;
@@ -854,18 +861,16 @@ other         .
  {real}         {
                     ECHO;
                 }
-{realfail1}        {
-                   /*
-                    * throw back the [Ee], and figure out whether what
-                    * remains is an {integer} or {decimal}.
-                    * (in psql, we don't actually care...)
-                    */
-                   yyless(yyleng - 1);
+{realfail}     {
                     ECHO;
                 }
-{realfail2}        {
-                   /* throw back the [Ee][+-], and proceed as above */
-                   yyless(yyleng - 2);
+{integer_junk} {
+                   ECHO;
+               }
+{decimal_junk} {
+                   ECHO;
+               }
+{real_junk}        {
                     ECHO;
                 }
  
diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l

index 9286a0355d6e9580ee36f8a546aa67299ad64114..2367b860f558f34d493bb28a0dbbec9fa0cc33d8 100644 (file)
--- a/src/interfaces/ecpg/preproc/pgc.l
+++ b/src/interfaces/ecpg/preproc/pgc.l
@@ -353,7 +353,7 @@ operator        {op_chars}+
   *
   * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
   *
- * {realfail1} and {realfail2} are added to prevent the need for scanner
+ * {realfail} is added to prevent the need for scanner
   * backup when the {real} rule fails to match completely.
   */
  digit          [0-9]
@@ -362,10 +362,14 @@ integer           {digit}+
  decimal            (({digit}*\.{digit}+)|({digit}+\.{digit}*))
  decimalfail        {digit}+\.\.
  real           ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail1      ({integer}|{decimal})[Ee]
-realfail2      ({integer}|{decimal})[Ee][-+]
+realfail       ({integer}|{decimal})[Ee][-+]
+
+integer_junk   {integer}{ident_start}
+decimal_junk   {decimal}{ident_start}
+real_junk      {real}{ident_start}
  
  param          \${integer}
+param_junk     \${integer}{ident_start}
  
  /* special characters for other dbms */
  /* we have to react differently in compat mode */
@@ -917,6 +921,9 @@ cppline         {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
                     base_yylval.ival = atol(yytext+1);
                     return PARAM;
                 }
+{param_junk}   {
+                   mmfatal(PARSE_ERROR, "trailing junk after parameter");
+               }
  
  {ip}           {
                     base_yylval.str = mm_strdup(yytext);
@@ -941,22 +948,31 @@ cppline           {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
                     base_yylval.str = mm_strdup(yytext);
                     return FCONST;
                 }
-{realfail1}        {
+{realfail}     {
                     /*
-                    * throw back the [Ee], and figure out whether what
+                    * throw back the [Ee][+-], and figure out whether what
                      * remains is an {integer} or {decimal}.
                      */
-                   yyless(yyleng - 1);
-                   return process_integer_literal(yytext, &base_yylval);
-               }
-{realfail2}        {
-                   /* throw back the [Ee][+-], and proceed as above */
                     yyless(yyleng - 2);
                     return process_integer_literal(yytext, &base_yylval);
                 }
  } /* <C,SQL> */
  
  <SQL>{
+   /*
+    * Note that some trailing junk is valid in C (such as 100LL), so we
+    * contain this to SQL mode.
+    */
+{integer_junk} {
+                   mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
+               }
+{decimal_junk} {
+                   mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
+               }
+{real_junk}        {
+                   mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
+               }
+
  :{identifier}((("->"|\.){identifier})|(\[{array}\]))*  {
                     base_yylval.str = mm_strdup(yytext+1);
                     return CVARIABLE;
diff --git a/src/test/regress/expected/numerology.out b/src/test/regress/expected/numerology.out

index 2ffc73e854875436a33ac42444398c8c617865d6..77d48434173bb6991d731c89a7a0e190feac2b1c 100644 (file)
--- a/src/test/regress/expected/numerology.out
+++ b/src/test/regress/expected/numerology.out
@@ -6,64 +6,45 @@
  -- Trailing junk in numeric literals
  --
  SELECT 123abc;
- abc 
------
- 123
-(1 row)
-
+ERROR:  trailing junk after numeric literal at or near "123a"
+LINE 1: SELECT 123abc;
+               ^
  SELECT 0x0o;
- x0o 
------
-   0
-(1 row)
-
+ERROR:  trailing junk after numeric literal at or near "0x"
+LINE 1: SELECT 0x0o;
+               ^
  SELECT 1_2_3;
- _2_3 
-------
-    1
-(1 row)
-
+ERROR:  trailing junk after numeric literal at or near "1_"
+LINE 1: SELECT 1_2_3;
+               ^
  SELECT 0.a;
- a 
----
- 0
-(1 row)
-
+ERROR:  trailing junk after numeric literal at or near "0.a"
+LINE 1: SELECT 0.a;
+               ^
  SELECT 0.0a;
-  a  
------
- 0.0
-(1 row)
-
+ERROR:  trailing junk after numeric literal at or near "0.0a"
+LINE 1: SELECT 0.0a;
+               ^
  SELECT .0a;
-  a  
------
- 0.0
-(1 row)
-
+ERROR:  trailing junk after numeric literal at or near ".0a"
+LINE 1: SELECT .0a;
+               ^
  SELECT 0.0e1a;
- a 
----
- 0
-(1 row)
-
+ERROR:  trailing junk after numeric literal at or near "0.0e1a"
+LINE 1: SELECT 0.0e1a;
+               ^
  SELECT 0.0e;
-  e  
------
- 0.0
-(1 row)
-
+ERROR:  trailing junk after numeric literal at or near "0.0e"
+LINE 1: SELECT 0.0e;
+               ^
  SELECT 0.0e+a;
-ERROR:  syntax error at or near "+"
+ERROR:  trailing junk after numeric literal at or near "0.0e+"
  LINE 1: SELECT 0.0e+a;
-                   ^
+               ^
  PREPARE p1 AS SELECT $1a;
-EXECUTE p1(1);
- a 
----
- 1
-(1 row)
-
+ERROR:  trailing junk after parameter at or near "$1a"
+LINE 1: PREPARE p1 AS SELECT $1a;
+                             ^
  --
  -- Test implicit type conversions
  -- This fails for Postgres v6.1 (and earlier?)
diff --git a/src/test/regress/sql/numerology.sql b/src/test/regress/sql/numerology.sql

index fb75f97832d0e24f5856d53d8589816d29e39731..be7d6dfe0c2676012205fc9ee6ff1d9daaec1257 100644 (file)
--- a/src/test/regress/sql/numerology.sql
+++ b/src/test/regress/sql/numerology.sql
@@ -17,7 +17,6 @@ SELECT 0.0e1a;
  SELECT 0.0e;
  SELECT 0.0e+a;
  PREPARE p1 AS SELECT $1a;
-EXECUTE p1(1);
  
  --
  -- Test implicit type conversions
author	Peter Eisentraut <[email protected]>
	Wed, 16 Feb 2022 09:32:36 +0000 (10:32 +0100)
committer	Peter Eisentraut <[email protected]>
	Wed, 16 Feb 2022 09:37:31 +0000 (10:37 +0100)
src/backend/parser/scan.l		patch \| blob \| blame \| history
src/fe_utils/psqlscan.l		patch \| blob \| blame \| history
src/interfaces/ecpg/preproc/pgc.l		patch \| blob \| blame \| history
src/test/regress/expected/numerology.out		patch \| blob \| blame \| history
src/test/regress/sql/numerology.sql		patch \| blob \| blame \| history