usual/wchar: wide-char utilities.

author Marko Kreen <[email protected]>

Mon, 31 Dec 2012 12:18:49 +0000 (14:18 +0200)

committer Marko Kreen <[email protected]>

Thu, 3 Jan 2013 22:02:19 +0000 (00:02 +0200)
author Marko Kreen <[email protected]>
Mon, 31 Dec 2012 12:18:49 +0000 (14:18 +0200)
committer Marko Kreen <[email protected]>
Thu, 3 Jan 2013 22:02:19 +0000 (00:02 +0200)
diff --git a/Makefile b/Makefile

index 36b9698ccf4d44f46b514280a529b503bb2df584..bb342cfbdc0b4e2129022d9e07b97c6282e6cb89 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -57,7 +57,8 @@ libusual_la_SOURCES = usual/config.h.in \
         usual/string.h usual/string.c \
         usual/strpool.h usual/strpool.c \
         usual/time.h usual/time.c \
-       usual/utf8.h usual/utf8.c
+       usual/utf8.h usual/utf8.c \
+       usual/wchar.h usual/wchar.c
  
  # we want to filter headers, so cannot use usual install method via _HEADERS
  USUAL_HEADERS = $(filter %.h,$(libusual_la_SOURCES) $(nodist_libusual_la_SOURCES))
diff --git a/usual/wchar.c b/usual/wchar.c

new file mode 100644 (file)

index 0000000..f0d2afc
--- /dev/null
+++ b/usual/wchar.c
@@ -0,0 +1,107 @@
+/*
+ * wchar utility functions.
+ *
+ * Copyright (c) 2012  Marko Kreen
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <usual/wchar.h>
+
+#include <string.h>
+
+wchar_t *mbstr_decode(const char *str, int str_len, int *wlen_p,
+                     wchar_t *wbuf, int wbuf_len, bool allow_invalid)
+{
+       mbstate_t ps;
+       int clen, wcnt;
+       wchar_t *dst, *w, *wend;
+       const char *s;
+       const char *str_end;
+       int wmax;
+
+       if (str_len < 0)
+               str_len = strlen(str);
+       str_end = str + str_len;
+
+       /* max number of wchar_t that the output can take plus zero-terminator */
+       wmax = str_len + 1;
+       if (wbuf != NULL && wmax < wbuf_len) {
+               dst = wbuf;
+       } else {
+               dst = malloc(sizeof(wchar_t) * wmax);
+               if (!dst)
+                       return NULL;
+       }
+
+       /* try full decode at once */
+       s = str;
+       memset(&ps, 0, sizeof(ps));
+       wcnt = mbsnrtowcs(dst, &s, str_len, wmax, &ps);
+       if (wcnt > 0 && s == NULL) {
+               if (wlen_p)
+                       *wlen_p = wcnt;
+               return dst;
+       }
+
+       /* full decode failed, decode chars one-by-one */
+       s = str;
+       w = dst;
+       wend = dst + wmax - 1;
+       memset(&ps, 0, sizeof(ps));
+       while (s < str_end && w < wend) {
+               clen = mbrtowc(w, s, str_end - s, &ps);
+               if (clen > 0) {
+                       w++;
+                       s += clen;
+               } else if (allow_invalid) {
+                       /* allow invalid encoding */
+                       memset(&ps, 0, sizeof(ps));
+                       *w++ = (unsigned char)*s++;
+               } else {
+                       goto fail;
+               }
+       }
+
+       if (s != str_end)
+               goto fail;
+
+       *w = 0;
+       if (wlen_p != NULL)
+               *wlen_p = w - dst;
+       return dst;
+
+fail:
+       if (dst != wbuf)
+               free(dst);
+       errno = EILSEQ;
+       return NULL;
+}
+
+wctype_t wctype_wcsn(const wchar_t *name, unsigned int namelen)
+{
+       char buf[10];
+       unsigned int i;
+
+       if (namelen >= sizeof(buf))
+               return (wctype_t)0;
+       for (i = 0; i < namelen; i++) {
+               wchar_t c = name[i];
+               if (c < 0x20 || c > 127)
+                       return (wctype_t)0;
+               buf[i] = c;
+       }
+       buf[i] = 0;
+       return wctype(buf);
+}
+
diff --git a/usual/wchar.h b/usual/wchar.h

new file mode 100644 (file)

index 0000000..1d06a84
--- /dev/null
+++ b/usual/wchar.h
@@ -0,0 +1,31 @@
+/*
+ * wchar.h - wchar_t utilities.
+ *
+ * Copyright (c) 2012  Marko Kreen
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _USUAL_WCHAR_H_
+#define _USUAL_WCHAR_H_
+
+#include <usual/base.h>
+
+#include <wchar.h>
+#include <wctype.h>
+
+wchar_t *mbstr_decode(const char *str, int str_len, int *wlen_p, wchar_t *wbuf, int wbuf_len, bool allow_invalid);
+
+wctype_t wctype_wcsn(const wchar_t *name, unsigned int namelen);
+
+#endif
author	Marko Kreen <[email protected]>
	Mon, 31 Dec 2012 12:18:49 +0000 (14:18 +0200)
committer	Marko Kreen <[email protected]>
	Thu, 3 Jan 2013 22:02:19 +0000 (00:02 +0200)
Makefile		patch \| blob \| blame \| history
usual/wchar.c	[new file with mode: 0644]	patch \| blob
usual/wchar.h	[new file with mode: 0644]	patch \| blob