summaryrefslogtreecommitdiff
path: root/usr.bin/less/charset.c
diff options
context:
space:
mode:
authorTodd C. Miller <millert@cvs.openbsd.org>2003-04-13 18:26:27 +0000
committerTodd C. Miller <millert@cvs.openbsd.org>2003-04-13 18:26:27 +0000
commit5569f8a6b6c831077f77d24e2c23d2e2d41fb95f (patch)
treeb79fe763d81906631382fbdc80f3b42a44b6c668 /usr.bin/less/charset.c
parent6b9d6b99a4fdda2ee6d9a30dec452d8202fb2017 (diff)
Merge in less-381 w/ local changes and remove obsolete files.
Diffstat (limited to 'usr.bin/less/charset.c')
-rw-r--r--usr.bin/less/charset.c145
1 files changed, 96 insertions, 49 deletions
diff --git a/usr.bin/less/charset.c b/usr.bin/less/charset.c
index 979435e283b..8f4a9c31ad2 100644
--- a/usr.bin/less/charset.c
+++ b/usr.bin/less/charset.c
@@ -1,29 +1,11 @@
-/* $OpenBSD: charset.c,v 1.4 2003/03/13 09:09:32 deraadt Exp $ */
-
/*
- * Copyright (c) 1984,1985,1989,1994,1995 Mark Nudelman
- * All rights reserved.
+ * Copyright (C) 1984-2002 Mark Nudelman
*
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice in the documentation and/or other materials provided with
- * the distribution.
+ * You may distribute under the terms of either the GNU General Public
+ * License or the Less License, as specified in the README file.
*
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
- * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
- * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * For more information about less, or for information on how to
+ * contact the author, see the README file.
*/
@@ -38,20 +20,35 @@
#include <ctype.h>
#endif
+public int utf_mode = 0;
+
/*
* Predefined character sets,
* selected by the LESSCHARSET environment variable.
*/
struct charset {
char *name;
+ int *p_flag;
char *desc;
} charsets[] = {
- { "ascii", "8bcccbcc18b95.b" },
- { "latin1", "8bcccbcc18b95.33b." },
- { "dos", "8bcccbcc12bc5b95.b." },
- { "koi8-r", "8bcccbcc18b95.b128." },
- { "next", "8bcccbcc18b95.bb125.bb" },
- { NULL }
+ { "ascii", NULL, "8bcccbcc18b95.b" },
+ { "dos", NULL, "8bcccbcc12bc5b223.b" },
+ { "ebcdic", NULL, "5bc6bcc7bcc41b.9b7.9b5.b..8b6.10b6.b9.7b9.8b8.17b3.3b9.7b9.8b8.6b10.b.b.b." },
+ { "IBM-1047", NULL, "4cbcbc3b9cbccbccbb4c6bcc5b3cbbc4bc4bccbc191.b" },
+ { "iso8859", NULL, "8bcccbcc18b95.33b." },
+ { "koi8-r", NULL, "8bcccbcc18b95.b128." },
+ { "next", NULL, "8bcccbcc18b95.bb125.bb" },
+ { "utf-8", &utf_mode, "8bcccbcc18b." },
+ { NULL, NULL, NULL }
+};
+
+struct cs_alias {
+ char *name;
+ char *oname;
+} cs_aliases[] = {
+ { "latin1", "iso8859" },
+ { "latin9", "iso8859" },
+ { NULL, NULL }
};
#define IS_BINARY_CHAR 01
@@ -80,9 +77,9 @@ public int binattr = AT_STANDOUT;
ichardef(s)
char *s;
{
- char *cp;
- int n;
- char v;
+ register char *cp;
+ register int n;
+ register char v;
n = 0;
v = 0;
@@ -135,18 +132,31 @@ ichardef(s)
*/
static int
icharset(name)
- char *name;
+ register char *name;
{
- struct charset *p;
+ register struct charset *p;
+ register struct cs_alias *a;
if (name == NULL || *name == '\0')
return (0);
+ /* First see if the name is an alias. */
+ for (a = cs_aliases; a->name != NULL; a++)
+ {
+ if (strcmp(name, a->name) == 0)
+ {
+ name = a->oname;
+ break;
+ }
+ }
+
for (p = charsets; p->name != NULL; p++)
{
if (strcmp(name, p->name) == 0)
{
ichardef(p->desc);
+ if (p->p_flag != NULL)
+ *(p->p_flag) = 1;
return (1);
}
}
@@ -154,6 +164,7 @@ icharset(name)
error("invalid charset name", NULL_PARG);
quit(QUIT_ERROR);
/*NOTREACHED*/
+ return (0);
}
#if HAVE_LOCALE
@@ -163,10 +174,10 @@ icharset(name)
static void
ilocale()
{
- int c;
+ register int c;
- setlocale(LC_CTYPE, "");
- for (c = 0; c < sizeof(chardef); c++)
+ setlocale(LC_ALL, "");
+ for (c = 0; c < (int) sizeof(chardef); c++)
{
if (isprint(c))
chardef[c] = 0;
@@ -211,36 +222,58 @@ setbinfmt(s)
public void
init_charset()
{
- char *s;
+ register char *s;
- s = getenv("LESSBINFMT");
+ s = lgetenv("LESSBINFMT");
setbinfmt(s);
/*
* See if environment variable LESSCHARSET is defined.
*/
- s = getenv("LESSCHARSET");
+ s = lgetenv("LESSCHARSET");
if (icharset(s))
return;
/*
* LESSCHARSET is not defined: try LESSCHARDEF.
*/
- s = getenv("LESSCHARDEF");
+ s = lgetenv("LESSCHARDEF");
if (s != NULL && *s != '\0')
{
ichardef(s);
return;
}
+
+#if HAVE_STRSTR
+ /*
+ * Check whether LC_ALL, LC_CTYPE or LANG look like UTF-8 is used.
+ */
+ if ((s = lgetenv("LC_ALL")) != NULL ||
+ (s = lgetenv("LC_CTYPE")) != NULL ||
+ (s = lgetenv("LANG")) != NULL)
+ {
+ if (strstr(s, "UTF-8") != NULL || strstr(s, "utf-8") != NULL)
+ if (icharset("utf-8"))
+ return;
+ }
+#endif
+
#if HAVE_LOCALE
/*
* Use setlocale.
*/
ilocale();
#else
+#if MSDOS_COMPILER
/*
- * Default to "ascii".
+ * Default to "dos".
*/
- (void) icharset("ascii");
+ (void) icharset("dos");
+#else
+ /*
+ * Default to "latin1".
+ */
+ (void) icharset("latin1");
+#endif
#endif
}
@@ -249,7 +282,7 @@ init_charset()
*/
public int
binary_char(c)
- int c;
+ unsigned char c;
{
c &= 0377;
return (chardef[c] & IS_BINARY_CHAR);
@@ -278,12 +311,26 @@ prchar(c)
c &= 0377;
if (!control_char(c))
- snprintf(buf, sizeof buf, "%c", c);
+ snprintf(buf, sizeof(buf), "%c", c);
else if (c == ESC)
- snprintf(buf, sizeof buf, "ESC");
- else if (c < 128 && !control_char(c ^ 0100))
- snprintf(buf, sizeof buf, "^%c", c ^ 0100);
+ snprintf(buf, sizeof(buf), "ESC");
+#if IS_EBCDIC_HOST
+ else if (!binary_char(c) && c < 64)
+ snprintf(buf, sizeof(buf), "^%c",
+ /*
+ * This array roughly inverts CONTROL() #defined in less.h,
+ * and should be kept in sync with CONTROL() and IBM-1047.
+ */
+ "@ABC.I.?...KLMNO"
+ "PQRS.JH.XY.."
+ "\\]^_"
+ "......W[.....EFG"
+ "..V....D....TU.Z"[c]);
+#else
+ else if (c < 128 && !control_char(c ^ 0100))
+ snprintf(buf, sizeof(buf), "^%c", c ^ 0100);
+#endif
else
- snprintf(buf, sizeof buf, binfmt, c);
+ snprintf(buf, sizeof(buf), binfmt, c);
return (buf);
}