Fix psql's copy of utf2ucs() to match the backend's copy exactly;
authorTom Lane <[email protected]>
Mon, 16 Aug 2010 00:06:54 +0000 (00:06 +0000)
committerTom Lane <[email protected]>
Mon, 16 Aug 2010 00:06:54 +0000 (00:06 +0000)
in particular, propagate a fix in the test to see whether a UTF8 character has
length 4 bytes.  This is likely of little real-world consequence because
5-or-more-byte UTF8 sequences are not supported by Postgres nor seen anywhere
in the wild, but still we may as well get it right.  Problem found by Joseph
Adams.

Bug is aboriginal, so back-patch all the way.

src/bin/psql/mbprint.c

index 27ee3f840f3e46a22dffbba7bd191fcf31a3432d..4b4dc187347e99d032968b7f5691d28c46249d4e 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2000-2005, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/bin/psql/mbprint.c,v 1.16 2005/01/01 05:43:08 momjian Exp $
+ * $PostgreSQL: pgsql/src/bin/psql/mbprint.c,v 1.16.4.1 2010/08/16 00:06:54 tgl Exp $
  */
 
 #include "postgres_fe.h"
@@ -168,28 +168,20 @@ utf2ucs(const unsigned char *c)
        if ((*c & 0x80) == 0)
                return (pg_wchar) c[0];
        else if ((*c & 0xe0) == 0xc0)
-       {
                return (pg_wchar) (((c[0] & 0x1f) << 6) |
                                                   (c[1] & 0x3f));
-       }
        else if ((*c & 0xf0) == 0xe0)
-       {
                return (pg_wchar) (((c[0] & 0x0f) << 12) |
                                                   ((c[1] & 0x3f) << 6) |
                                                   (c[2] & 0x3f));
-       }
-       else if ((*c & 0xf0) == 0xf0)
-       {
+       else if ((*c & 0xf8) == 0xf0)
                return (pg_wchar) (((c[0] & 0x07) << 18) |
                                                   ((c[1] & 0x3f) << 12) |
                                                   ((c[2] & 0x3f) << 6) |
                                                   (c[3] & 0x3f));
-       }
        else
-       {
                /* that is an invalid code on purpose */
                return 0xffffffff;
-       }
 }
 
 /* mb_utf_wcwidth : calculate column length for the utf8 string pwcs