Build de-escaped JSON strings in larger chunks during lexing

author John Naylor <[email protected]>

Fri, 1 Jul 2022 10:28:20 +0000 (17:28 +0700)

committer John Naylor <[email protected]>

Mon, 11 Jul 2022 04:11:36 +0000 (11:11 +0700)
author John Naylor <[email protected]>
Fri, 1 Jul 2022 10:28:20 +0000 (17:28 +0700)
committer John Naylor <[email protected]>
Mon, 11 Jul 2022 04:11:36 +0000 (11:11 +0700)
diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c

index eeedc0645a0a85a8df2a690caaee3940c8f87858..694417bb388e731a5a3ba04f808e74542020c693 100644 (file)
--- a/src/common/jsonapi.c
+++ b/src/common/jsonapi.c
@@ -686,15 +686,6 @@ json_lex_string(JsonLexContext *lex)
             lex->token_terminator = s;
             return JSON_INVALID_TOKEN;
         }
-       else if (*s == '"')
-           break;
-       else if ((unsigned char) *s < 32)
-       {
-           /* Per RFC4627, these characters MUST be escaped. */
-           /* Since *s isn't printable, exclude it from the context string */
-           lex->token_terminator = s;
-           return JSON_ESCAPING_REQUIRED;
-       }
         else if (*s == '\\')
         {
             /* OK, we have an escape character. */
@@ -849,22 +840,51 @@ json_lex_string(JsonLexContext *lex)
                 return JSON_ESCAPING_INVALID;
             }
         }
-       else if (lex->strval != NULL)
+       else
         {
+           char       *p;
+
             if (hi_surrogate != -1)
                 return JSON_UNICODE_LOW_SURROGATE;
  
-           appendStringInfoChar(lex->strval, *s);
-       }
-   }
+           /*
+            * Skip to the first byte that requires special handling, so we
+            * can batch calls to appendBinaryStringInfo.
+            */
+           for (p = s; p < end; p++)
+           {
+               if (*p == '\\' || *p == '"')
+                   break;
+               else if ((unsigned char) *p < 32)
+               {
+                   /* Per RFC4627, these characters MUST be escaped. */
+                   /*
+                    * Since *p isn't printable, exclude it from the context
+                    * string
+                    */
+                   lex->token_terminator = p;
+                   return JSON_ESCAPING_REQUIRED;
+               }
+           }
  
-   if (hi_surrogate != -1)
-       return JSON_UNICODE_LOW_SURROGATE;
+           if (lex->strval != NULL)
+               appendBinaryStringInfo(lex->strval, s, p - s);
  
-   /* Hooray, we found the end of the string! */
-   lex->prev_token_terminator = lex->token_terminator;
-   lex->token_terminator = s + 1;
-   return JSON_SUCCESS;
+           if (*p == '"')
+           {
+               /* Hooray, we found the end of the string! */
+               lex->prev_token_terminator = lex->token_terminator;
+               lex->token_terminator = p + 1;
+               return JSON_SUCCESS;
+           }
+
+           /*
+            * s will be incremented at the top of the loop, so set it to just
+            * behind our lookahead position
+            */
+           s = p - 1;
+       }
+   }
  }
  
  /*
author	John Naylor <[email protected]>
	Fri, 1 Jul 2022 10:28:20 +0000 (17:28 +0700)
committer	John Naylor <[email protected]>
	Mon, 11 Jul 2022 04:11:36 +0000 (11:11 +0700)