diff --git a/contrib/btree_gin/expected/bytea.out b/contrib/btree_gin/expected/bytea.out index b0ed7a53450a..d4ad28787759 100644 --- a/contrib/btree_gin/expected/bytea.out +++ b/contrib/btree_gin/expected/bytea.out @@ -44,3 +44,95 @@ SELECT * FROM test_bytea WHERE i>'abc'::bytea ORDER BY i; xyz (2 rows) +-- Simple ASCII strings +SELECT encode(bytea(E'a'), 'hex'); -- 61 + encode +-------- + 61 +(1 row) + +SELECT encode(bytea(E'ab'), 'hex'); -- 6162 + encode +-------- + 6162 +(1 row) + +-- Octal escapes +SELECT encode(bytea(E'\\000'), 'hex'); -- 00 + encode +-------- + 00 +(1 row) + +SELECT encode(bytea(E'\\001'), 'hex'); -- 01 + encode +-------- + 01 +(1 row) + +SELECT encode(bytea(E'\\001\\002\\003'), 'hex'); -- 010203 + encode +-------- + 010203 +(1 row) + +-- Mixed literal and escapes +SELECT encode(bytea(E'a\\000b\\134c'), 'hex'); -- 6100625c63 + encode +------------ + 6100625c63 +(1 row) + +-- Backslash literal +SELECT encode(bytea(E'\\\\'), 'hex'); -- 5c + encode +-------- + 5c +(1 row) + +-- Empty input +SELECT encode(bytea(E''), 'hex'); -- (empty string) + encode +-------- + +(1 row) + +-- Hex format +SELECT encode(bytea(E'\\x6869'), 'escape'); -- hi + encode +-------- + hi +(1 row) + +-- ===== Invalid bytea input tests ===== +-- Invalid octal escapes (less than 3 digits or out of range) +SELECT bytea(E'\\77'); -- ERROR +ERROR: invalid input syntax for type bytea +LINE 1: SELECT bytea(E'\\77'); + ^ +SELECT bytea(E'\\4'); -- ERROR +ERROR: invalid input syntax for type bytea +LINE 1: SELECT bytea(E'\\4'); + ^ +SELECT bytea(E'\\08'); -- ERROR +ERROR: invalid input syntax for type bytea +LINE 1: SELECT bytea(E'\\08'); + ^ +SELECT bytea(E'\\999'); -- ERROR +ERROR: invalid input syntax for type bytea +LINE 1: SELECT bytea(E'\\999'); + ^ +-- Invalid hex format +SELECT bytea(E'\\x1'); -- ERROR +ERROR: invalid hexadecimal data: odd number of digits +LINE 1: SELECT bytea(E'\\x1'); + ^ +SELECT bytea(E'\\xZZ'); -- ERROR +ERROR: invalid hexadecimal digit: "Z" +LINE 1: SELECT bytea(E'\\xZZ'); + ^ +-- Incomplete escape sequence +SELECT bytea(E'abc\\'); -- ERROR +ERROR: invalid input syntax for type bytea +LINE 1: SELECT bytea(E'abc\\'); + ^ diff --git a/contrib/btree_gin/sql/bytea.sql b/contrib/btree_gin/sql/bytea.sql index 5f3eb11b1698..cb8ee8eb2aae 100644 --- a/contrib/btree_gin/sql/bytea.sql +++ b/contrib/btree_gin/sql/bytea.sql @@ -15,3 +15,40 @@ SELECT * FROM test_bytea WHERE i<='abc'::bytea ORDER BY i; SELECT * FROM test_bytea WHERE i='abc'::bytea ORDER BY i; SELECT * FROM test_bytea WHERE i>='abc'::bytea ORDER BY i; SELECT * FROM test_bytea WHERE i>'abc'::bytea ORDER BY i; + + +-- Simple ASCII strings +SELECT encode(bytea(E'a'), 'hex'); -- 61 +SELECT encode(bytea(E'ab'), 'hex'); -- 6162 + +-- Octal escapes +SELECT encode(bytea(E'\\000'), 'hex'); -- 00 +SELECT encode(bytea(E'\\001'), 'hex'); -- 01 +SELECT encode(bytea(E'\\001\\002\\003'), 'hex'); -- 010203 + +-- Mixed literal and escapes +SELECT encode(bytea(E'a\\000b\\134c'), 'hex'); -- 6100625c63 + +-- Backslash literal +SELECT encode(bytea(E'\\\\'), 'hex'); -- 5c + +-- Empty input +SELECT encode(bytea(E''), 'hex'); -- (empty string) + +-- Hex format +SELECT encode(bytea(E'\\x6869'), 'escape'); -- hi + +-- ===== Invalid bytea input tests ===== + +-- Invalid octal escapes (less than 3 digits or out of range) +SELECT bytea(E'\\77'); -- ERROR +SELECT bytea(E'\\4'); -- ERROR +SELECT bytea(E'\\08'); -- ERROR +SELECT bytea(E'\\999'); -- ERROR + +-- Invalid hex format +SELECT bytea(E'\\x1'); -- ERROR +SELECT bytea(E'\\xZZ'); -- ERROR + +-- Incomplete escape sequence +SELECT bytea(E'abc\\'); -- ERROR \ No newline at end of file diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 3e4d5568bde8..517965445feb 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -291,99 +291,75 @@ text_to_cstring_buffer(const text *src, char *dst, size_t dst_len) * ereport(ERROR, ...) if bad form. * * BUGS: - * The input is scanned twice. * The error checking of input is minimal. */ Datum byteain(PG_FUNCTION_ARGS) { - char *inputText = PG_GETARG_CSTRING(0); - Node *escontext = fcinfo->context; - char *tp; - char *rp; - int bc; - bytea *result; + char *inputText = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; - /* Recognize hex input */ + /* Hex format */ if (inputText[0] == '\\' && inputText[1] == 'x') { - size_t len = strlen(inputText); + size_t len; + int bc; + bytea *result; - bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */ + len = strlen(inputText); + bc = (len - 2) / 2 + VARHDRSZ; result = palloc(bc); - bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result), - escontext); - SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */ - + bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result), escontext); + SET_VARSIZE(result, bc + VARHDRSZ); PG_RETURN_BYTEA_P(result); } - /* Else, it's the traditional escaped style */ - for (bc = 0, tp = inputText; *tp != '\0'; bc++) + /* Escaped format */ { - if (tp[0] != '\\') - tp++; - else if ((tp[0] == '\\') && - (tp[1] >= '0' && tp[1] <= '3') && - (tp[2] >= '0' && tp[2] <= '7') && - (tp[3] >= '0' && tp[3] <= '7')) - tp += 4; - else if ((tp[0] == '\\') && - (tp[1] == '\\')) - tp += 2; - else - { - /* - * one backslash, not followed by another or ### valid octal - */ - ereturn(escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for type %s", "bytea"))); - } - } - - bc += VARHDRSZ; + StringInfoData buf; + char *tp; + bytea *result; - result = (bytea *) palloc(bc); - SET_VARSIZE(result, bc); + initStringInfo(&buf); + tp = inputText; - tp = inputText; - rp = VARDATA(result); - while (*tp != '\0') - { - if (tp[0] != '\\') - *rp++ = *tp++; - else if ((tp[0] == '\\') && - (tp[1] >= '0' && tp[1] <= '3') && - (tp[2] >= '0' && tp[2] <= '7') && - (tp[3] >= '0' && tp[3] <= '7')) + while (*tp) { - bc = VAL(tp[1]); - bc <<= 3; - bc += VAL(tp[2]); - bc <<= 3; - *rp++ = bc + VAL(tp[3]); + if (*tp != '\\') + { + appendStringInfoChar(&buf, *tp++); + continue; + } - tp += 4; - } - else if ((tp[0] == '\\') && - (tp[1] == '\\')) - { - *rp++ = '\\'; - tp += 2; - } - else - { - /* - * We should never get here. The first pass should not allow it. - */ - ereturn(escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for type %s", "bytea"))); + if (tp[1] == '\\') + { + appendStringInfoChar(&buf, '\\'); + tp += 2; + } + else if ((tp[1] >= '0' && tp[1] <= '3') && + (tp[2] >= '0' && tp[2] <= '7') && + (tp[3] >= '0' && tp[3] <= '7')) + { + int byte_val = VAL(tp[1]); + byte_val = (byte_val << 3) + VAL(tp[2]); + byte_val = (byte_val << 3) + VAL(tp[3]); + appendStringInfoChar(&buf, byte_val); + tp += 4; + } + else + { + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s", "bytea"))); + } } - } - PG_RETURN_BYTEA_P(result); + result = palloc(buf.len + VARHDRSZ); + SET_VARSIZE(result, buf.len + VARHDRSZ); + memcpy(VARDATA(result), buf.data, buf.len); + + PG_RETURN_BYTEA_P(result); + } } /*