summaryrefslogtreecommitdiff
path: root/bin/ls/utf8.c
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@cvs.openbsd.org>2015-12-01 18:36:14 +0000
committerIngo Schwarze <schwarze@cvs.openbsd.org>2015-12-01 18:36:14 +0000
commit51dbee4f9cb8843e5e1afc77881fa3bd3eaa9ad9 (patch)
treeb54cc4f6dcf1a92eed8a0ede2c61d16a6fdbf1c7 /bin/ls/utf8.c
parent4adcc5bb8d17c4de04b10799e1f19daa3e5a4564 (diff)
Support UTF-8: use wcwidth(3) for column adjustment and replace
non-printable Unicode codepoints and invalid bytes with ASCII question marks. No change for the SMALL version. Using ideas developed by tedu@, phessler@, bentley@ and feedback from many. OK yasuoka@ czarkoff@ sthen@.
Diffstat (limited to 'bin/ls/utf8.c')
-rw-r--r--bin/ls/utf8.c51
1 files changed, 51 insertions, 0 deletions
diff --git a/bin/ls/utf8.c b/bin/ls/utf8.c
new file mode 100644
index 00000000000..3825c531f16
--- /dev/null
+++ b/bin/ls/utf8.c
@@ -0,0 +1,51 @@
+/* $OpenBSD: utf8.c,v 1.1 2015/12/01 18:36:13 schwarze Exp $ */
+
+/*
+ * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef SMALL
+#include <stdio.h>
+#include <stdlib.h>
+#include <wchar.h>
+
+int
+mbsprint(const char *mbs, int print)
+{
+ wchar_t wc;
+ int len; /* length in bytes of UTF-8 encoded string */
+ int width; /* display width of a single Unicode char */
+ int total_width; /* display width of the whole string */
+
+ for (total_width = 0; *mbs != '\0'; mbs += len) {
+ if ((len = mbtowc(&wc, mbs, MB_CUR_MAX)) == -1) {
+ (void)mbtowc(NULL, NULL, MB_CUR_MAX);
+ if (print)
+ putchar('?');
+ total_width++;
+ len = 1;
+ } else if ((width = wcwidth(wc)) == -1) {
+ if (print)
+ putchar('?');
+ total_width++;
+ } else {
+ if (print)
+ fwrite(mbs, 1, len, stdout);
+ total_width += width;
+ }
+ }
+ return total_width;
+}
+#endif