isspace() can be locale-sensitive depending on the platform, causing
hstore to consider as whitespaces characters it should not see as such.
For example, U+0105, being decoded as 0xC4 0x85 in UTF-8, would be
discarded from the input given.
This problem is similar to
9ae2661, though it was missed that hstore
can also manipulate non-ASCII inputs, so replace the existing isspace()
calls with scanner_isspace().
This problem exists for a long time, so backpatch all the way down.
Author: Evan Jones
Discussion: https://postgr.es/m/CA+HWA9awUW0+RV_gO9r1ABZwGoZxPztcJxPy8vMFSTbTfi4jig@mail.gmail.com
Backpatch-through: 11
HEADERS = hstore.h
-REGRESS = hstore
+REGRESS = hstore hstore_utf8
ifdef USE_PGXS
PG_CONFIG = pg_config
--- /dev/null
+/*
+ * This test must be run in a database with UTF-8 encoding,
+ * because other encodings don't support all the characters used.
+ */
+SELECT getdatabaseencoding() <> 'UTF8'
+ AS skip_test \gset
+\if :skip_test
+\quit
+\endif
+SET client_encoding = utf8;
+-- UTF-8 locale bug on macOS: isspace(0x85) returns true. \u0105 encodes
+-- as 0xc4 0x85 in UTF-8; the 0x85 was interpreted here as a whitespace.
+SELECT E'key\u0105=>value\u0105'::hstore;
+ hstore
+------------------
+ "keyą"=>"valueą"
+(1 row)
+
+SELECT 'keyą=>valueą'::hstore;
+ hstore
+------------------
+ "keyą"=>"valueą"
+(1 row)
+
+SELECT 'ą=>ą'::hstore;
+ hstore
+----------
+ "ą"=>"ą"
+(1 row)
+
+SELECT 'keyąfoo=>valueą'::hstore;
+ hstore
+---------------------
+ "keyąfoo"=>"valueą"
+(1 row)
+
--- /dev/null
+/*
+ * This test must be run in a database with UTF-8 encoding,
+ * because other encodings don't support all the characters used.
+ */
+SELECT getdatabaseencoding() <> 'UTF8'
+ AS skip_test \gset
+\if :skip_test
+\quit
#include "lib/stringinfo.h"
#include "libpq/pqformat.h"
#include "nodes/miscnodes.h"
+#include "parser/scansup.h"
#include "utils/builtins.h"
#include "utils/json.h"
#include "utils/jsonb.h"
{
st = GV_WAITESCIN;
}
- else if (!isspace((unsigned char) *(state->ptr)))
+ else if (!scanner_isspace((unsigned char) *(state->ptr)))
{
*(state->cur) = *(state->ptr);
state->cur++;
state->ptr--;
return true;
}
- else if (isspace((unsigned char) *(state->ptr)))
+ else if (scanner_isspace((unsigned char) *(state->ptr)))
{
return true;
}
{
PRSEOF;
}
- else if (!isspace((unsigned char) *(state->ptr)))
+ else if (!scanner_isspace((unsigned char) *(state->ptr)))
{
PRSSYNTAXERROR;
}
{
return true;
}
- else if (!isspace((unsigned char) *(state->ptr)))
+ else if (!scanner_isspace((unsigned char) *(state->ptr)))
{
PRSSYNTAXERROR;
}
'regress': {
'sql': [
'hstore',
+ 'hstore_utf8',
],
},
}
--- /dev/null
+/*
+ * This test must be run in a database with UTF-8 encoding,
+ * because other encodings don't support all the characters used.
+ */
+
+SELECT getdatabaseencoding() <> 'UTF8'
+ AS skip_test \gset
+\if :skip_test
+\quit
+\endif
+
+SET client_encoding = utf8;
+
+-- UTF-8 locale bug on macOS: isspace(0x85) returns true. \u0105 encodes
+-- as 0xc4 0x85 in UTF-8; the 0x85 was interpreted here as a whitespace.
+SELECT E'key\u0105=>value\u0105'::hstore;
+SELECT 'keyą=>valueą'::hstore;
+SELECT 'ą=>ą'::hstore;
+SELECT 'keyąfoo=>valueą'::hstore;