summaryrefslogtreecommitdiff
path: root/bin/ls
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@cvs.openbsd.org>2015-12-01 18:36:14 +0000
committerIngo Schwarze <schwarze@cvs.openbsd.org>2015-12-01 18:36:14 +0000
commit51dbee4f9cb8843e5e1afc77881fa3bd3eaa9ad9 (patch)
treeb54cc4f6dcf1a92eed8a0ede2c61d16a6fdbf1c7 /bin/ls
parent4adcc5bb8d17c4de04b10799e1f19daa3e5a4564 (diff)
Support UTF-8: use wcwidth(3) for column adjustment and replace
non-printable Unicode codepoints and invalid bytes with ASCII question marks. No change for the SMALL version. Using ideas developed by tedu@, phessler@, bentley@ and feedback from many. OK yasuoka@ czarkoff@ sthen@.
Diffstat (limited to 'bin/ls')
-rw-r--r--bin/ls/Makefile4
-rw-r--r--bin/ls/extern.h4
-rw-r--r--bin/ls/ls.110
-rw-r--r--bin/ls/ls.c12
-rw-r--r--bin/ls/print.c11
-rw-r--r--bin/ls/utf8.c51
-rw-r--r--bin/ls/util.c9
7 files changed, 85 insertions, 16 deletions
diff --git a/bin/ls/Makefile b/bin/ls/Makefile
index defd6071b95..026ce6d3da4 100644
--- a/bin/ls/Makefile
+++ b/bin/ls/Makefile
@@ -1,7 +1,7 @@
-# $OpenBSD: Makefile,v 1.7 2003/08/06 19:09:09 tedu Exp $
+# $OpenBSD: Makefile,v 1.8 2015/12/01 18:36:13 schwarze Exp $
PROG= ls
-SRCS= cmp.c ls.c main.c print.c util.c
+SRCS= cmp.c ls.c main.c print.c util.c utf8.c
DPADD= ${LIBUTIL}
LDADD= -lutil
diff --git a/bin/ls/extern.h b/bin/ls/extern.h
index 7f7807a612f..afe3fb47e68 100644
--- a/bin/ls/extern.h
+++ b/bin/ls/extern.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: extern.h,v 1.9 2003/06/02 23:32:08 millert Exp $ */
+/* $OpenBSD: extern.h,v 1.10 2015/12/01 18:36:13 schwarze Exp $ */
/* $NetBSD: extern.h,v 1.5 1995/03/21 09:06:24 cgd Exp $ */
/*-
@@ -45,7 +45,7 @@ int revstatcmp(const FTSENT *, const FTSENT *);
int sizecmp(const FTSENT *, const FTSENT *);
int revsizecmp(const FTSENT *, const FTSENT *);
-int putname(char *);
+int mbsprint(const char *, int);
void printcol(DISPLAY *);
void printacol(DISPLAY *);
void printlong(DISPLAY *);
diff --git a/bin/ls/ls.1 b/bin/ls/ls.1
index 9fe555dc3de..1daaca58f61 100644
--- a/bin/ls/ls.1
+++ b/bin/ls/ls.1
@@ -1,4 +1,4 @@
-.\" $OpenBSD: ls.1,v 1.72 2015/04/24 10:57:36 sobrado Exp $
+.\" $OpenBSD: ls.1,v 1.73 2015/12/01 18:36:13 schwarze Exp $
.\" $NetBSD: ls.1,v 1.14 1995/12/05 02:44:01 jtc Exp $
.\"
.\" Copyright (c) 1980, 1990, 1991, 1993, 1994
@@ -33,7 +33,7 @@
.\"
.\" @(#)ls.1 8.7 (Berkeley) 7/29/94
.\"
-.Dd $Mdocdate: April 24 2015 $
+.Dd $Mdocdate: December 1 2015 $
.Dt LS 1
.Os
.Sh NAME
@@ -440,6 +440,12 @@ If this variable contains a string representing a
decimal integer, it is used as the
column position width for displaying
multiple-text-column output.
+.It Ev LC_CTYPE
+If set to a string ending in
+.Qq .UTF-8 ,
+.Nm
+respects character display widths when columnating output.
+Otherwise, non-ASCII bytes are replaced by question marks.
.It Ev TZ
The time zone to use when displaying dates.
See
diff --git a/bin/ls/ls.c b/bin/ls/ls.c
index da93dd91e10..6341bfc6fa2 100644
--- a/bin/ls/ls.c
+++ b/bin/ls/ls.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ls.c,v 1.43 2015/10/09 01:37:06 deraadt Exp $ */
+/* $OpenBSD: ls.c,v 1.44 2015/12/01 18:36:13 schwarze Exp $ */
/* $NetBSD: ls.c,v 1.18 1996/07/09 09:16:29 mycroft Exp $ */
/*
@@ -48,6 +48,7 @@
#include <string.h>
#include <unistd.h>
#include <limits.h>
+#include <locale.h>
#include <util.h>
#include "ls.h"
@@ -103,6 +104,10 @@ ls_main(int argc, char *argv[])
int kflag = 0, width = 0;
char *p;
+#ifndef SMALL
+ setlocale(LC_CTYPE, "");
+#endif
+
/* Terminal defaults to -Cq, non-terminal defaults to -1. */
if (isatty(STDOUT_FILENO)) {
if ((p = getenv("COLUMNS")) != NULL)
@@ -428,6 +433,7 @@ display(FTSENT *p, FTSENT *list)
ino_t maxinode;
int bcfile, flen, glen, ulen, maxflags, maxgroup, maxuser;
int entries, needstats;
+ int width;
char *user, *group, buf[21]; /* 64 bits == 20 digits */
char nuser[12], ngroup[12];
char *flags = NULL;
@@ -474,8 +480,8 @@ display(FTSENT *p, FTSENT *list)
continue;
}
}
- if (cur->fts_namelen > maxlen)
- maxlen = cur->fts_namelen;
+ if ((width = mbsprint(cur->fts_name, 0)) > maxlen)
+ maxlen = width;
if (needstats) {
sp = cur->fts_statp;
if (sp->st_blocks > maxblock)
diff --git a/bin/ls/print.c b/bin/ls/print.c
index 6709c7b27e3..6af6db3366f 100644
--- a/bin/ls/print.c
+++ b/bin/ls/print.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: print.c,v 1.34 2015/03/15 00:41:27 millert Exp $ */
+/* $OpenBSD: print.c,v 1.35 2015/12/01 18:36:13 schwarze Exp $ */
/* $NetBSD: print.c,v 1.15 1996/12/11 03:25:39 thorpej Exp $ */
/*
@@ -122,7 +122,7 @@ printlong(DISPLAY *dp)
printtime(sp->st_ctime);
else
printtime(sp->st_mtime);
- (void)putname(p->fts_name);
+ (void)mbsprint(p->fts_name, 1);
if (f_type || (f_typedir && S_ISDIR(sp->st_mode)))
(void)printtype(sp->st_mode);
if (S_ISLNK(sp->st_mode))
@@ -231,7 +231,7 @@ printaname(FTSENT *p, u_long inodefield, u_long sizefield)
if (f_size)
chcnt += printf("%*qd ",
(int)sizefield, howmany(sp->st_blocks, blocksize));
- chcnt += putname(p->fts_name);
+ chcnt += mbsprint(p->fts_name, 1);
if (f_type || (f_typedir && S_ISDIR(sp->st_mode)))
chcnt += printtype(sp->st_mode);
return (chcnt);
@@ -310,7 +310,8 @@ printstream(DISPLAY *dp)
continue;
if (col > 0) {
(void)putchar(','), col++;
- if (col + 1 + extwidth + p->fts_namelen >= termwidth)
+ if (col + 1 + extwidth + mbsprint(p->fts_name, 0) >=
+ termwidth)
(void)putchar('\n'), col = 0;
else
(void)putchar(' '), col++;
@@ -361,7 +362,7 @@ printlink(FTSENT *p)
}
path[lnklen] = '\0';
(void)printf(" -> ");
- (void)putname(path);
+ (void)mbsprint(path, 1);
}
static void
diff --git a/bin/ls/utf8.c b/bin/ls/utf8.c
new file mode 100644
index 00000000000..3825c531f16
--- /dev/null
+++ b/bin/ls/utf8.c
@@ -0,0 +1,51 @@
+/* $OpenBSD: utf8.c,v 1.1 2015/12/01 18:36:13 schwarze Exp $ */
+
+/*
+ * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef SMALL
+#include <stdio.h>
+#include <stdlib.h>
+#include <wchar.h>
+
+int
+mbsprint(const char *mbs, int print)
+{
+ wchar_t wc;
+ int len; /* length in bytes of UTF-8 encoded string */
+ int width; /* display width of a single Unicode char */
+ int total_width; /* display width of the whole string */
+
+ for (total_width = 0; *mbs != '\0'; mbs += len) {
+ if ((len = mbtowc(&wc, mbs, MB_CUR_MAX)) == -1) {
+ (void)mbtowc(NULL, NULL, MB_CUR_MAX);
+ if (print)
+ putchar('?');
+ total_width++;
+ len = 1;
+ } else if ((width = wcwidth(wc)) == -1) {
+ if (print)
+ putchar('?');
+ total_width++;
+ } else {
+ if (print)
+ fwrite(mbs, 1, len, stdout);
+ total_width += width;
+ }
+ }
+ return total_width;
+}
+#endif
diff --git a/bin/ls/util.c b/bin/ls/util.c
index 6ba1a7e2cee..d9a0552fe04 100644
--- a/bin/ls/util.c
+++ b/bin/ls/util.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: util.c,v 1.16 2013/11/21 15:54:45 deraadt Exp $ */
+/* $OpenBSD: util.c,v 1.17 2015/12/01 18:36:13 schwarze Exp $ */
/* $NetBSD: util.c,v 1.12 1995/09/07 06:43:02 jtc Exp $ */
/*
@@ -45,15 +45,20 @@
#include "ls.h"
#include "extern.h"
+#ifdef SMALL
int
-putname(char *name)
+mbsprint(const char *name, int print)
{
int len;
+ if (print == 0)
+ return strlen(name);
+
for (len = 0; *name; len++, name++)
putchar((!isprint((unsigned char)*name) && f_nonprint) ? '?' : *name);
return len;
}
+#endif
void
usage(void)