diff options
author | Stefan Sperling <stsp@cvs.openbsd.org> | 2010-07-27 16:59:05 +0000 |
---|---|---|
committer | Stefan Sperling <stsp@cvs.openbsd.org> | 2010-07-27 16:59:05 +0000 |
commit | ca2246f7206f312d3507d148d8fca7b5028c4088 (patch) | |
tree | ce30da3359d783bea6fe127d1b15a6d9b6a52b33 /lib/libc | |
parent | 357ef0a59acd38d0252d97a185411096ea76c5dd (diff) |
Replace the single-byte placeholders for the multi-byte/wide-character
conversion interfaces of libc (mbrtowc(3) and friends) with new
implementations that internally call an API based on NetBSD's citrus.
This allows us to support locales with multi-byte character encodings.
Provide two implementations of the citrus-based API: one based on the old
single-byte placeholders for use with our existing single-byte character
locales (C, ISO8859-*, KOI8, CP1251, etc.), and one that provides support
for UTF-8 encoded characters (code based on FreeBSD's implementation).
Install the en_US.UTF-8 ctype locale support file, and allow the UTF-8
ctype locale to be enabled via setlocale(3) (export LC_CTYPE='en_US.UTF-8').
A lot of programs, especially from ports, will now start using UTF-8 if the
UTF-8 locale is enabled. Use at your own risk, and please report any breakage.
Note that ncurses-based programs cannot display UTF-8 right now, this is being
worked on.
To prevent install media growth, add vfprintf(3) and mbrtowc(3) to libstubs.
The mbrtowc stub was copied unchanged from its old single-byte placeholder.
vfprintf.c doesn't need to be copied, just put in .PATH (hint by fgsch@).
Testing by myself, naddy, sthen, nicm, espie, armani, Dmitrij D. Czarkoff.
ok matthieu espie millert sthen nicm deraadt
Diffstat (limited to 'lib/libc')
24 files changed, 1259 insertions, 155 deletions
diff --git a/lib/libc/Makefile.inc b/lib/libc/Makefile.inc index 28622a6da8e..170a44eeb5b 100644 --- a/lib/libc/Makefile.inc +++ b/lib/libc/Makefile.inc @@ -1,4 +1,4 @@ -# $OpenBSD: Makefile.inc,v 1.14 2010/02/03 20:49:00 miod Exp $ +# $OpenBSD: Makefile.inc,v 1.15 2010/07/27 16:59:03 stsp Exp $ # # This file contains make rules used to build libc # @@ -30,6 +30,7 @@ AINC+= -nostdinc -idirafter ${DESTDIR}/usr/include .include "${LIBCSRCDIR}/db/Makefile.inc" .include "${LIBCSRCDIR}/dlfcn/Makefile.inc" +.include "${LIBCSRCDIR}/citrus/Makefile.inc" .include "${LIBCSRCDIR}/compat-43/Makefile.inc" .include "${LIBCSRCDIR}/gen/Makefile.inc" .include "${LIBCSRCDIR}/crypt/Makefile.inc" diff --git a/lib/libc/citrus/Makefile.inc b/lib/libc/citrus/Makefile.inc new file mode 100644 index 00000000000..b1466442dd9 --- /dev/null +++ b/lib/libc/citrus/Makefile.inc @@ -0,0 +1,7 @@ +# $OpenBSD: Makefile.inc,v 1.1 2010/07/27 16:59:03 stsp Exp $ + +# citrus sources +.PATH: ${LIBCSRCDIR}/citrus + +SRCS+= citrus_ctype.c citrus_none.c citrus_utf8.c +CFLAGS+=-I${.CURDIR} diff --git a/lib/libc/citrus/citrus_ctype.c b/lib/libc/citrus/citrus_ctype.c new file mode 100644 index 00000000000..c869519bff7 --- /dev/null +++ b/lib/libc/citrus/citrus_ctype.c @@ -0,0 +1,60 @@ +/* $OpenBSD: citrus_ctype.c,v 1.1 2010/07/27 16:59:03 stsp Exp $ */ +/* $NetBSD: citrus_ctype.c,v 1.5 2008/06/14 16:01:07 tnozaki Exp $ */ + +/*- + * Copyright (c)1999, 2000, 2001, 2002 Citrus Project, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#include <sys/types.h> +#include <errno.h> +#include <stdlib.h> +#include <wchar.h> +#include "citrus_ctype.h" +#include "citrus_none.h" +#include "citrus_utf8.h" + +struct _citrus_ctype_rec _citrus_ctype_none = { + &_citrus_none_ctype_ops, /* cc_ops */ +}; + +struct _citrus_ctype_rec _citrus_ctype_utf8 = { + &_citrus_utf8_ctype_ops, /* cc_ops */ +}; + +_citrus_ctype_open(struct _citrus_ctype_rec **rcc, char const *encname) +{ + if (!strcmp(encname, "NONE")) { + *rcc = &_citrus_ctype_none; + __mb_cur_max = 1; + return (0); + } else if (!strcmp(encname, "UTF8")) { + *rcc = &_citrus_ctype_utf8; + __mb_cur_max = _CITRUS_UTF8_MB_CUR_MAX; + return (0); + } + + return (-1); +} diff --git a/lib/libc/citrus/citrus_ctype.h b/lib/libc/citrus/citrus_ctype.h index 28356ab079f..5e69b3e8076 100644 --- a/lib/libc/citrus/citrus_ctype.h +++ b/lib/libc/citrus/citrus_ctype.h @@ -1,4 +1,4 @@ -/* $OpenBSD: citrus_ctype.h,v 1.1 2005/08/07 10:16:23 espie Exp $ */ +/* $OpenBSD: citrus_ctype.h,v 1.2 2010/07/27 16:59:03 stsp Exp $ */ /* $NetBSD: citrus_ctype.h,v 1.2 2003/03/05 20:18:15 tshiozak Exp $ */ /*- @@ -33,4 +33,6 @@ #include "citrus_ctype_local.h" +extern struct _citrus_ctype_rec _citrus_ctype_none; + #endif diff --git a/lib/libc/citrus/citrus_ctype_local.h b/lib/libc/citrus/citrus_ctype_local.h index e3bcd923be2..8ada0ef3d2c 100644 --- a/lib/libc/citrus/citrus_ctype_local.h +++ b/lib/libc/citrus/citrus_ctype_local.h @@ -1,6 +1,4 @@ -#ifndef _CITRUS_CTYPE_LOCAL_H_ -#define _CITRUS_CTYPE_LOCAL_H_ -/* $OpenBSD: citrus_ctype_local.h,v 1.1 2005/08/07 10:16:23 espie Exp $ */ +/* $OpenBSD: citrus_ctype_local.h,v 1.2 2010/07/27 16:59:03 stsp Exp $ */ /* $NetBSD: citrus_ctype_local.h,v 1.2 2003/03/05 20:18:15 tshiozak Exp $ */ /*- @@ -30,7 +28,57 @@ * */ -#define _CITRUS_DEFAULT_CTYPE_NAME "NONE" +#ifndef _CITRUS_CTYPE_LOCAL_H_ +#define _CITRUS_CTYPE_LOCAL_H_ + +#define _CITRUS_CTYPE_DECLS(_e_) \ +size_t _citrus_##_e_##_ctype_mbrtowc(wchar_t * __restrict, \ + const char * __restrict, size_t, \ + void * __restrict); \ +int _citrus_##_e_##_ctype_mbsinit(const void * __restrict); \ +size_t _citrus_##_e_##_ctype_mbsrtowcs(wchar_t * __restrict, \ + const char ** __restrict, \ + size_t, void * __restrict); \ +size_t _citrus_##_e_##_ctype_wcrtomb(char * __restrict, wchar_t, \ + void * __restrict); \ +size_t _citrus_##_e_##_ctype_wcsrtombs(char * __restrict, \ + const wchar_t ** __restrict, \ + size_t, void * __restrict); \ -#endif +#define _CITRUS_CTYPE_DEF_OPS(_e_) \ +struct _citrus_ctype_ops_rec _citrus_##_e_##_ctype_ops = { \ + /* co_mbrtowc */ &_citrus_##_e_##_ctype_mbrtowc, \ + /* co_mbsinit */ &_citrus_##_e_##_ctype_mbsinit, \ + /* co_mbsrtowcs */ &_citrus_##_e_##_ctype_mbsrtowcs, \ + /* co_wcrtomb */ &_citrus_##_e_##_ctype_wcrtomb, \ + /* co_wcsrtombs */ &_citrus_##_e_##_ctype_wcsrtombs, \ +} +typedef size_t (*_citrus_ctype_mbrtowc_t) + (wchar_t * __restrict, const char * __restrict, + size_t, void * __restrict); +typedef int (*_citrus_ctype_mbsinit_t) (const void * __restrict); +typedef size_t (*_citrus_ctype_mbsrtowcs_t) + (wchar_t * __restrict, const char ** __restrict, + size_t, void * __restrict); +typedef size_t (*_citrus_ctype_wcrtomb_t) + (char * __restrict, wchar_t, void * __restrict); +typedef size_t (*_citrus_ctype_wcsrtombs_t) + (char * __restrict, const wchar_t ** __restrict, + size_t, void * __restrict); + +struct _citrus_ctype_ops_rec { + _citrus_ctype_mbrtowc_t co_mbrtowc; + _citrus_ctype_mbsinit_t co_mbsinit; + _citrus_ctype_mbsrtowcs_t co_mbsrtowcs; + _citrus_ctype_wcrtomb_t co_wcrtomb; + _citrus_ctype_wcsrtombs_t co_wcsrtombs; +}; + +#define _CITRUS_DEFAULT_CTYPE_NAME "NONE" + +struct _citrus_ctype_rec { + struct _citrus_ctype_ops_rec *cc_ops; +}; + +#endif diff --git a/lib/libc/locale/multibyte_sb.c b/lib/libc/citrus/citrus_none.c index 4067b154b02..b7b039ee773 100644 --- a/lib/libc/locale/multibyte_sb.c +++ b/lib/libc/citrus/citrus_none.c @@ -1,8 +1,8 @@ -/* $OpenBSD: multibyte_sb.c,v 1.7 2008/08/23 07:51:35 djm Exp $ */ -/* $NetBSD: multibyte_sb.c,v 1.4 2003/08/07 16:43:04 agc Exp $ */ +/* $OpenBSD: citrus_none.c,v 1.1 2010/07/27 16:59:03 stsp Exp $ */ +/* $NetBSD: citrus_none.c,v 1.18 2008/06/14 16:01:07 tnozaki Exp $ */ -/* - * Copyright (c) 1991 The Regents of the University of California. +/*- + * Copyright (c)2002 Citrus Project, * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -13,14 +13,11 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) @@ -30,80 +27,58 @@ * SUCH DAMAGE. */ +#include <sys/cdefs.h> +#include <sys/types.h> + #include <errno.h> +#include <string.h> +#include <stdio.h> #include <stdlib.h> +#include <stddef.h> #include <wchar.h> -/* - * Stub multibyte character functions. - * This cheezy implementation is fixed to the native single-byte - * character set. - */ +#include "citrus_ctype.h" +#include "citrus_none.h" -/*ARGSUSED*/ -int -mbsinit(const mbstate_t *ps) -{ +_CITRUS_CTYPE_DEF_OPS(none); - return 1; -} - -/*ARGSUSED*/ size_t -mbrlen(const char *s, size_t n, mbstate_t *ps) -{ - - /* ps appears to be unused */ - - if (s == NULL || *s == '\0') - return 0; - if (n == 0) - return (size_t)-1; - return 1; -} - -int -mblen(const char *s, size_t n) -{ - - /* s may be NULL */ - - return mbrlen(s, n, NULL); -} - /*ARGSUSED*/ -size_t -wcrtomb(char *s, wchar_t wchar, mbstate_t *ps) +_citrus_none_ctype_mbrtowc(wchar_t * __restrict pwc, + const char * __restrict s, size_t n, + void * __restrict pspriv) { - + /* pwc may be NULL */ /* s may be NULL */ - /* ps appears to be unused */ + /* pspriv appears to be unused */ if (s == NULL) return 0; - - *s = (char) wchar; - return 1; + if (n == 0) + return (size_t)-2; + if (pwc) + *pwc = (wchar_t)(unsigned char)*s; + return (*s != '\0'); } int -wctomb(char *s, wchar_t wchar) +/*ARGSUSED*/ +_citrus_none_ctype_mbsinit(const void * __restrict pspriv) { - - /* s may be NULL */ - - return wcrtomb(s, wchar, NULL); + return (1); /* always initial state */ } -/*ARGSUSED*/ size_t -mbsrtowcs(wchar_t *pwcs, const char **s, size_t n, mbstate_t *ps) +/*ARGSUSED*/ +_citrus_none_ctype_mbsrtowcs(wchar_t * __restrict pwcs, + const char ** __restrict s, size_t n, + void * __restrict pspriv) { int count = 0; /* pwcs may be NULL */ /* s may be NULL */ - /* ps appears to be unused */ + /* pspriv appears to be unused */ if (!s || !*s) return 0; @@ -128,25 +103,31 @@ mbsrtowcs(wchar_t *pwcs, const char **s, size_t n, mbstate_t *ps) } size_t -mbstowcs(wchar_t *pwcs, const char *s, size_t n) +/*ARGSUSED*/ +_citrus_none_ctype_wcrtomb(char * __restrict s, + wchar_t wc, void * __restrict pspriv) { + /* s may be NULL */ + /* ps appears to be unused */ - /* pwcs may be NULL */ - if (pwcs == NULL) - return strlen(s); + if (s == NULL) + return 0; - return mbsrtowcs(pwcs, &s, n, NULL); + *s = (char) wc; + return 1; } -/*ARGSUSED*/ size_t -wcsrtombs(char *s, const wchar_t **pwcs, size_t n, mbstate_t *ps) +/*ARGSUSED*/ +_citrus_none_ctype_wcsrtombs(char * __restrict s, + const wchar_t ** __restrict pwcs, size_t n, + void * __restrict pspriv) { int count = 0; /* s may be NULL */ /* pwcs may be NULL */ - /* ps appears to be unused */ + /* pspriv appears to be unused */ if (pwcs == NULL || *pwcs == NULL) return (0); @@ -167,46 +148,3 @@ wcsrtombs(char *s, const wchar_t **pwcs, size_t n, mbstate_t *ps) return count; } - -size_t -wcstombs(char *s, const wchar_t *pwcs, size_t n) -{ - - /* s may be NULL */ - /* pwcs may be NULL */ - - return wcsrtombs(s, &pwcs, n, NULL); -} - -wint_t -btowc(int c) -{ - if (c == EOF || c & ~0xFF) - return WEOF; - return (wint_t)c; -} - -int -wctob(wint_t c) -{ - if (c == WEOF || c & ~0xFF) - return EOF; - return (int)c; -} - -int -wcscoll(const wchar_t *s1, const wchar_t *s2) -{ - while (*s1 == *s2++) - if (*s1++ == 0) - return (0); - return ((unsigned char)(*s1) - (unsigned char)(*--s2)); -} - -size_t -wcsxfrm(wchar_t *dest, const wchar_t *src, size_t n) -{ - if (n == 0) - return wcslen(src); - return wcslcpy(dest, src, n); -} diff --git a/lib/libc/citrus/citrus_none.h b/lib/libc/citrus/citrus_none.h new file mode 100644 index 00000000000..096e3794177 --- /dev/null +++ b/lib/libc/citrus/citrus_none.h @@ -0,0 +1,37 @@ +/* $OpenBSD: citrus_none.h,v 1.1 2010/07/27 16:59:03 stsp Exp $ */ +/* $NetBSD: citrus_none.h,v 1.3 2003/06/25 09:51:38 tshiozak Exp $ */ + +/*- + * Copyright (c)2002 Citrus Project, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _CITRUS_NONE_H_ +#define _CITRUS_NONE_H_ + +extern struct _citrus_ctype_ops_rec _citrus_none_ctype_ops; + +_CITRUS_CTYPE_DECLS(none); + +#endif diff --git a/lib/libc/citrus/citrus_utf8.c b/lib/libc/citrus/citrus_utf8.c new file mode 100644 index 00000000000..d28670c9eb7 --- /dev/null +++ b/lib/libc/citrus/citrus_utf8.c @@ -0,0 +1,416 @@ + +/* $OpenBSD: citrus_utf8.c,v 1.1 2010/07/27 16:59:03 stsp Exp $ */ + +/*- + * Copyright (c) 2002-2004 Tim J. Robbins + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#include <sys/errno.h> +#include <sys/param.h> +#include <sys/types.h> +#include <sys/limits.h> + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <string.h> +#include <wchar.h> + +#include "citrus_ctype.h" +#include "citrus_utf8.h" + +_CITRUS_CTYPE_DEF_OPS(utf8); + +struct _utf8_state { + wchar_t ch; + int want; + wchar_t lbound; +}; + +size_t +/*ARGSUSED*/ +_citrus_utf8_ctype_mbrtowc(wchar_t * __restrict pwc, + const char * __restrict s, size_t n, + void * __restrict pspriv) +{ + struct _utf8_state *us; + int ch, i, mask, want; + wchar_t lbound, wch; + + us = (struct _utf8_state *)pspriv; + + if (us->want < 0 || us->want > _CITRUS_UTF8_MB_CUR_MAX) { + errno = EINVAL; + return ((size_t)-1); + } + + if (s == NULL) { + s = ""; + n = 1; + pwc = NULL; + } + + if (n == 0) { + /* Incomplete multibyte sequence */ + return ((size_t)-2); + } + + if (us->want == 0 && ((ch = (unsigned char)*s) & ~0x7f) == 0) { + /* Fast path for plain ASCII characters. */ + if (pwc != NULL) + *pwc = ch; + return (ch != '\0' ? 1 : 0); + } + + if (us->want == 0) { + /* + * Determine the number of octets that make up this character + * from the first octet, and a mask that extracts the + * interesting bits of the first octet. We already know + * the character is at least two bytes long. + * + * We also specify a lower bound for the character code to + * detect redundant, non-"shortest form" encodings. For + * example, the sequence C0 80 is _not_ a legal representation + * of the null character. This enforces a 1-to-1 mapping + * between character codes and their multibyte representations. + */ + ch = (unsigned char)*s; + if ((ch & 0x80) == 0) { + mask = 0x7f; + want = 1; + lbound = 0; + } else if ((ch & 0xe0) == 0xc0) { + mask = 0x1f; + want = 2; + lbound = 0x80; + } else if ((ch & 0xf0) == 0xe0) { + mask = 0x0f; + want = 3; + lbound = 0x800; + } else if ((ch & 0xf8) == 0xf0) { + mask = 0x07; + want = 4; + lbound = 0x10000; + } else { + /* + * Malformed input; input is not UTF-8. + * See RFC 3629. + */ + errno = EILSEQ; + return ((size_t)-1); + } + } else { + want = us->want; + lbound = us->lbound; + } + + /* + * Decode the octet sequence representing the character in chunks + * of 6 bits, most significant first. + */ + if (us->want == 0) + wch = (unsigned char)*s++ & mask; + else + wch = us->ch; + for (i = (us->want == 0) ? 1 : 0; i < MIN(want, n); i++) { + if ((*s & 0xc0) != 0x80) { + /* + * Malformed input; bad characters in the middle + * of a character. + */ + errno = EILSEQ; + return ((size_t)-1); + } + wch <<= 6; + wch |= *s++ & 0x3f; + } + if (i < want) { + /* Incomplete multibyte sequence. */ + us->want = want - i; + us->lbound = lbound; + us->ch = wch; + return ((size_t)-2); + } + if (wch < lbound) { + /* + * Malformed input; redundant encoding. + */ + errno = EILSEQ; + return ((size_t)-1); + } + if (pwc != NULL) + *pwc = wch; + us->want = 0; + return (wch == L'\0' ? 0 : want); +} + +int +/*ARGSUSED*/ +_citrus_utf8_ctype_mbsinit(const void * __restrict pspriv) +{ + return (pspriv == NULL || + ((const struct _utf8_state *)pspriv)->want == 0); +} + +size_t +/*ARGSUSED*/ +_citrus_utf8_ctype_mbsrtowcs(wchar_t * __restrict pwcs, + const char ** __restrict s, size_t n, + void * __restrict pspriv) +{ + struct _utf8_state *us; + const char *src; + size_t nchr; + wchar_t wc; + size_t nb; + + us = (struct _utf8_state *)pspriv; + src = *s; + nchr = 0; + + if (pwcs == NULL) { + /* + * The fast path in the loop below is not safe if an ASCII + * character appears as anything but the first byte of a + * multibyte sequence. Check now to avoid doing it in the loop. + */ + if (us->want > 0 && (signed char)*src > 0) { + errno = EILSEQ; + return ((size_t)-1); + } + for (;;) { + if ((signed char)*src > 0) { + /* + * Fast path for plain ASCII characters + * excluding NUL. + */ + nb = 1; + } else { + nb = _citrus_utf8_ctype_mbrtowc(&wc, src, + _CITRUS_UTF8_MB_CUR_MAX, us); + if (nb == (size_t)-1) { + /* Invalid sequence. */ + return (nb); + } + if (nb == 0 || nb == (size_t)-2) { + return (nchr); + } + } + + src += nb; + nchr++; + } + /*NOTREACHED*/ + } + + /* + * The fast path in the loop below is not safe if an ASCII + * character appears as anything but the first byte of a + * multibyte sequence. Check now to avoid doing it in the loop. + */ + if (n > 0 && us->want > 0 && (signed char)*src > 0) { + errno = EILSEQ; + return ((size_t)-1); + } + while (n-- > 0) { + if ((signed char)*src > 0) { + /* + * Fast path for plain ASCII characters + * excluding NUL. + */ + *pwcs = (wchar_t)*src; + nb = 1; + } else { + nb = _citrus_utf8_ctype_mbrtowc(pwcs, src, n, us); + if (nb == (size_t)-1) { + *s = src; + return (nb); + } + if (nb == (size_t)-2) { + *s = src; + return (nchr); + } + if (nb == 0) { + *s = NULL; + return (nchr); + } + } + src += nb; + nchr++; + pwcs++; + } + *s = src; + return (nchr); +} + +size_t +/*ARGSUSED*/ +_citrus_utf8_ctype_wcrtomb(char * __restrict s, + wchar_t wc, void * __restrict pspriv) +{ + struct _utf8_state *us; + unsigned char lead; + int i, len; + + us = (struct _utf8_state *)pspriv; + + if (us->want != 0) { + errno = EINVAL; + return ((size_t)-1); + } + + if (s == NULL) { + /* Reset to initial shift state (no-op) */ + return (1); + } + + if ((wc & ~0x7f) == 0) { + /* Fast path for plain ASCII characters. */ + *s = (char)wc; + return (1); + } + + /* + * Determine the number of octets needed to represent this character. + * We always output the shortest sequence possible. Also specify the + * first few bits of the first octet, which contains the information + * about the sequence length. + */ + if ((wc & ~0x7f) == 0) { + lead = 0; + len = 1; + } else if ((wc & ~0x7ff) == 0) { + lead = 0xc0; + len = 2; + } else if ((wc & ~0xffff) == 0) { + lead = 0xe0; + len = 3; + } else if ((wc & ~0x1fffff) == 0) { + lead = 0xf0; + len = 4; + } else { + errno = EILSEQ; + return ((size_t)-1); + } + + /* + * Output the octets representing the character in chunks + * of 6 bits, least significant last. The first octet is + * a special case because it contains the sequence length + * information. + */ + for (i = len - 1; i > 0; i--) { + s[i] = (wc & 0x3f) | 0x80; + wc >>= 6; + } + *s = (wc & 0xff) | lead; + + return (len); +} + +size_t +/*ARGSUSED*/ +_citrus_utf8_ctype_wcsrtombs(char * __restrict s, + const wchar_t ** __restrict pwcs, size_t n, + void * __restrict pspriv) +{ + struct _utf8_state *us; + char buf[_CITRUS_UTF8_MB_CUR_MAX]; + const wchar_t *src; + size_t nbytes; + size_t nb; + + us = (struct _utf8_state *)pspriv; + + if (us->want != 0) { + errno = EINVAL; + return ((size_t)-1); + } + + src = *pwcs; + nbytes = 0; + + if (s == NULL) { + for (;;) { + if (0 <= *src && *src < 0x80) + /* Fast path for plain ASCII characters. */ + nb = 1; + else { + nb = _citrus_utf8_ctype_wcrtomb(buf, *src, us); + if (nb == (size_t)-1) { + /* Invalid character */ + return (nb); + } + } + if (*src == L'\0') { + return (nbytes + nb - 1); + } + src++; + nbytes += nb; + } + /*NOTREACHED*/ + } + + while (n > 0) { + if (0 <= *src && *src < 0x80) { + /* Fast path for plain ASCII characters. */ + nb = 1; + *s = *src; + } else if (n > (size_t)_CITRUS_UTF8_MB_CUR_MAX) { + /* Enough space to translate in-place. */ + nb = _citrus_utf8_ctype_wcrtomb(s, *src, us); + if (nb == (size_t)-1) { + *pwcs = src; + return (nb); + } + } else { + /* + * May not be enough space; use temp. buffer. + */ + nb = _citrus_utf8_ctype_wcrtomb(buf, *src, us); + if (nb == (size_t)-1) { + *pwcs = src; + return (nb); + } + if (nb > n) + /* MB sequence for character won't fit. */ + break; + memcpy(s, buf, nb); + } + if (*src == L'\0') { + *pwcs = NULL; + return (nbytes + nb - 1); + } + src++; + s += nb; + n -= nb; + nbytes += nb; + } + *pwcs = src; + return (nbytes); +} diff --git a/lib/libc/citrus/citrus_utf8.h b/lib/libc/citrus/citrus_utf8.h new file mode 100644 index 00000000000..d7a7bd87794 --- /dev/null +++ b/lib/libc/citrus/citrus_utf8.h @@ -0,0 +1,39 @@ +/* $OpenBSD */ +/* $NetBSD: citrus_utf8.h,v 1.2 2003/06/25 09:51:49 tshiozak Exp $ */ + +/*- + * Copyright (c)2002 Citrus Project, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _CITRUS_UTF8_H_ +#define _CITRUS_UTF8_H_ + +extern struct _citrus_ctype_ops_rec _citrus_utf8_ctype_ops; + +#define _CITRUS_UTF8_MB_CUR_MAX 4 + +_CITRUS_CTYPE_DECLS(utf8); + +#endif diff --git a/lib/libc/locale/Makefile.inc b/lib/libc/locale/Makefile.inc index 8800b1433eb..987996b7269 100644 --- a/lib/libc/locale/Makefile.inc +++ b/lib/libc/locale/Makefile.inc @@ -1,15 +1,15 @@ -# $OpenBSD: Makefile.inc,v 1.15 2010/02/03 20:49:00 miod Exp $ +# $OpenBSD: Makefile.inc,v 1.16 2010/07/27 16:59:03 stsp Exp $ # locale sources .PATH: ${LIBCSRCDIR}/arch/${MACHINE_CPU}/locale ${LIBCSRCDIR}/locale -SRCS+= _def_messages.c _def_monetary.c _def_numeric.c _def_time.c \ - localeconv.c nl_langinfo.c setlocale.c iswctype.c mbrtowc_sb.c \ - multibyte_sb.c __mb_cur_max.c \ - wcstof.c wcstod.c wcstold.c \ - wcstol.c wcstoul.c wcstoll.c wcstoull.c wcstoimax.c wcstoumax.c \ +SRCS+= btowc.c _def_messages.c _def_monetary.c _def_numeric.c _def_time.c \ + localeconv.c nl_langinfo.c setlocale.c iswctype.c __mb_cur_max.c \ + mblen.c mbrlen.c mbstowcs.c mbtowc.c multibyte_citrus.c wcscoll.c \ + wcstombs.c wctob.c wctomb.c wcstof.c wcstod.c wcstold.c wcstol.c \ + wcstoul.c wcstoll.c wcstoull.c wcstoimax.c wcstoumax.c \ setrunelocale.c runeglue.c rune.c runetable.c ___runetype_mb.c \ - _wctrans.c + _wctrans.c wcsxfrm.c MAN+= nl_langinfo.3 setlocale.3 iswalnum.3 towlower.3 \ btowc.3 mblen.3 mbrlen.3 mbrtowc.3 mbsinit.3 mbsrtowcs.3 \ @@ -37,4 +37,4 @@ MLINKS+=setlocale.3 localeconv.3 \ wcstol.3 wcstoimax.3 \ wcstol.3 wcstoumax.3 -CFLAGS+=-I${.CURDIR} +CFLAGS+=-I${.CURDIR} -I${LIBCSRCDIR}/citrus diff --git a/lib/libc/locale/btowc.c b/lib/libc/locale/btowc.c new file mode 100644 index 00000000000..e1c6893bca9 --- /dev/null +++ b/lib/libc/locale/btowc.c @@ -0,0 +1,54 @@ +/* $OpenBSD: btowc.c,v 1.1 2010/07/27 16:59:03 stsp Exp $ */ + +/*- + * Copyright (c) 2002, 2003 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +#include <stdio.h> +#include <string.h> +#include <wchar.h> + +wint_t +btowc(int c) +{ + mbstate_t mbs; + char cc; + wchar_t wc; + + if (c == EOF) + return (WEOF); + /* + * We expect mbrtowc() to return 0 or 1, hence the check for n > 1 + * which detects error return values as well as "impossible" byte + * counts. + */ + memset(&mbs, 0, sizeof(mbs)); + cc = (char)c; + if (mbrtowc(&wc, &cc, 1, &mbs) > 1) + return (WEOF); + return (wc); +} diff --git a/lib/libc/locale/mblen.c b/lib/libc/locale/mblen.c new file mode 100644 index 00000000000..56d718e69f4 --- /dev/null +++ b/lib/libc/locale/mblen.c @@ -0,0 +1,50 @@ +/* $OpenBSD: mblen.c,v 1.1 2010/07/27 16:59:04 stsp Exp $ */ + +/*- + * Copyright (c) 2002-2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +#include <stdlib.h> +#include <string.h> +#include <wchar.h> + +int +mblen(const char *s, size_t n) +{ + static mbstate_t mbs; + size_t rval; + + if (s == NULL) { + /* No support for state dependent encodings. */ + memset(&mbs, 0, sizeof(mbs)); + return (0); + } + rval = mbrtowc(NULL, s, n, &mbs); + if (rval == (size_t)-1 || rval == (size_t)-2) + return (-1); + return ((int)rval); +} diff --git a/lib/libc/locale/mbrlen.c b/lib/libc/locale/mbrlen.c new file mode 100644 index 00000000000..97c48d2d6ad --- /dev/null +++ b/lib/libc/locale/mbrlen.c @@ -0,0 +1,41 @@ +/* $OpenBSD: mbrlen.c,v 1.1 2010/07/27 16:59:04 stsp Exp $ */ + +/*- + * Copyright (c) 2002-2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +#include <wchar.h> + +size_t +mbrlen(const char * __restrict s, size_t n, mbstate_t * __restrict ps) +{ + static mbstate_t mbs; + + if (ps == NULL) + ps = &mbs; + return (mbrtowc(NULL, s, n, ps)); +} diff --git a/lib/libc/locale/mbstowcs.c b/lib/libc/locale/mbstowcs.c new file mode 100644 index 00000000000..7ea97e4b614 --- /dev/null +++ b/lib/libc/locale/mbstowcs.c @@ -0,0 +1,45 @@ +/* $OpenBSD: mbstowcs.c,v 1.1 2010/07/27 16:59:04 stsp Exp $ */ + +/*- + * Copyright (c) 2002-2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +#include <limits.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> + +size_t +mbstowcs(wchar_t * __restrict pwcs, const char * __restrict s, size_t n) +{ + mbstate_t mbs; + const char *sp; + + memset(&mbs, 0, sizeof(mbs)); + sp = s; + return (mbsrtowcs(pwcs, &sp, n, &mbs)); +} diff --git a/lib/libc/locale/mbtowc.c b/lib/libc/locale/mbtowc.c new file mode 100644 index 00000000000..e0dcff0b915 --- /dev/null +++ b/lib/libc/locale/mbtowc.c @@ -0,0 +1,51 @@ +/* $OpenBSD: mbtowc.c,v 1.1 2010/07/27 16:59:04 stsp Exp $ */ + +/*- + * Copyright (c) 2002-2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include <errno.h> + +int +mbtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n) +{ + static mbstate_t mbs; + size_t rval; + + if (s == NULL) { + /* No support for state dependent encodings. */ + memset(&mbs, 0, sizeof(mbs)); + return (0); + } + rval = mbrtowc(pwc, s, n, &mbs); + if (rval == (size_t)-1 || rval == (size_t)-2) + return (-1); + return ((int)rval); +} diff --git a/lib/libc/locale/multibyte.h b/lib/libc/locale/multibyte.h new file mode 100644 index 00000000000..9a69ada2751 --- /dev/null +++ b/lib/libc/locale/multibyte.h @@ -0,0 +1,50 @@ +/* $OpenBSD: multibyte.h,v 1.1 2010/07/27 16:59:04 stsp Exp $ */ +/* $NetBSD: multibyte.h,v 1.5 2009/01/11 02:46:28 christos Exp $ */ + +/*- + * Copyright (c)2002 Citrus Project, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _MULTIBYTE_H_ +#define _MULTIBYTE_H_ + +typedef struct _RuneStatePriv { + _RuneLocale *__runelocale; + char __private __attribute__((__aligned__)); +} _RuneStatePriv; + +typedef union _RuneState { + mbstate_t __pad; + struct _RuneStatePriv __priv; +#define rs_runelocale __priv.__runelocale +#define rs_private __priv.__private +} _RuneState; +#define _RUNE_STATE_PRIVSIZE (sizeof(mbstate_t)-offsetof(_RuneStatePriv, __private)) + +#define _ps_to_runestate(ps) ((_RuneState *)(void *)(ps)) +#define _ps_to_runelocale(ps) (_ps_to_runestate(ps)->rs_runelocale) +#define _ps_to_private(ps) ((void *)&_ps_to_runestate(ps)->rs_private) + +#endif /*_MULTIBYTE_H_*/ diff --git a/lib/libc/locale/multibyte_citrus.c b/lib/libc/locale/multibyte_citrus.c new file mode 100644 index 00000000000..9993e67fe15 --- /dev/null +++ b/lib/libc/locale/multibyte_citrus.c @@ -0,0 +1,101 @@ +/* $OpenBSD: multibyte_citrus.c,v 1.1 2010/07/27 16:59:04 stsp Exp $ */ +/* $NetBSD: multibyte_amd1.c,v 1.7 2009/01/11 02:46:28 christos Exp $ */ + +/*- + * Copyright (c)2002, 2008 Citrus Project, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#include <sys/types.h> +#include <errno.h> +#include <wchar.h> + +#include "citrus_ctype.h" +#include "rune.h" +#include "multibyte.h" + +int +mbsinit(const mbstate_t *ps) +{ + struct _citrus_ctype_rec *cc; + _RuneLocale *rl; + + if (ps == NULL) + return 1; + + rl = _ps_to_runelocale(ps); + if (rl == NULL) + rl = _CurrentRuneLocale; + cc = rl->rl_citrus_ctype; + return (*cc->cc_ops->co_mbsinit)(ps); +} + +size_t +mbrtowc(wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) +{ + static mbstate_t mbs; + struct _citrus_ctype_rec *cc; + + if (ps == NULL) + ps = &mbs; + cc = _CurrentRuneLocale->rl_citrus_ctype; + return (*cc->cc_ops->co_mbrtowc)(pwc, s, n, _ps_to_private(ps)); +} + +size_t +mbsrtowcs(wchar_t *pwcs, const char **s, size_t n, mbstate_t *ps) +{ + static mbstate_t mbs; + struct _citrus_ctype_rec *cc; + + if (ps == NULL) + ps = &mbs; + cc = _CurrentRuneLocale->rl_citrus_ctype; + return (*cc->cc_ops->co_mbsrtowcs)(pwcs, s, n, _ps_to_private(ps)); +} + +size_t +wcrtomb(char *s, wchar_t wc, mbstate_t *ps) +{ + static mbstate_t mbs; + struct _citrus_ctype_rec *cc; + + if (ps == NULL) + ps = &mbs; + cc = _CurrentRuneLocale->rl_citrus_ctype; + return (*cc->cc_ops->co_wcrtomb)(s, wc, _ps_to_private(ps)); +} + +size_t +wcsrtombs(char *s, const wchar_t **ppwcs, size_t n, mbstate_t *ps) +{ + static mbstate_t mbs; + struct _citrus_ctype_rec *cc; + + if (ps == NULL) + ps = &mbs; + cc = _CurrentRuneLocale->rl_citrus_ctype; + return (*cc->cc_ops->co_wcsrtombs)(s, ppwcs, n, _ps_to_private(ps)); +} diff --git a/lib/libc/locale/runetable.c b/lib/libc/locale/runetable.c index 3324d1baa22..07dce4ba2b0 100644 --- a/lib/libc/locale/runetable.c +++ b/lib/libc/locale/runetable.c @@ -43,6 +43,8 @@ #include <wchar.h> #include "rune.h" #include "rune_local.h" +#include "citrus_ctype.h" +#include "citrus_none.h" #include <stdlib.h> _RuneLocale _DefaultRuneLocale = { @@ -378,7 +380,7 @@ _RuneLocale _DefaultRuneLocale = { { 0, NULL }, NULL, 0, "646", - 0, + &_citrus_ctype_none, { { NULL, NULL, NULL }, { NULL, NULL, NULL }, diff --git a/lib/libc/locale/setrunelocale.c b/lib/libc/locale/setrunelocale.c index bbe4cfb0f6e..8001b0087df 100644 --- a/lib/libc/locale/setrunelocale.c +++ b/lib/libc/locale/setrunelocale.c @@ -1,4 +1,4 @@ -/* $OpenBSD: setrunelocale.c,v 1.4 2008/06/26 05:42:05 ray Exp $ */ +/* $OpenBSD: setrunelocale.c,v 1.5 2010/07/27 16:59:04 stsp Exp $ */ /* $NetBSD: setrunelocale.c,v 1.14 2003/08/07 16:43:07 agc Exp $ */ /*- @@ -151,10 +151,11 @@ found: rl->rl_citrus_ctype = NULL; - if (strcmp(rl->rl_encoding, _CITRUS_DEFAULT_CTYPE_NAME) != 0) { + if (_citrus_ctype_open(&rl->rl_citrus_ctype, rl->rl_encoding)) { _NukeRune(rl); return EINVAL; } + /* register it */ lt = malloc(sizeof(struct localetable)); if (lt == NULL) { diff --git a/lib/libc/locale/wcscoll.c b/lib/libc/locale/wcscoll.c new file mode 100644 index 00000000000..6d476690c54 --- /dev/null +++ b/lib/libc/locale/wcscoll.c @@ -0,0 +1,43 @@ +/* $OpenBSD: wcscoll.c,v 1.1 2010/07/27 16:59:04 stsp Exp $ */ +/* $NetBSD: wcscoll.c,v 1.1 2003/03/02 22:18:16 tshiozak Exp $ */ + +/*- + * Copyright (c)2003 Citrus Project, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +#include <assert.h> +#include <wchar.h> + +/* + * Compare strings with using collating information. + */ +int +wcscoll(const wchar_t *s1, const wchar_t *s2) +{ + /* XXX: LC_COLLATE should be implemented. */ + return (wcscmp(s1, s2)); +} diff --git a/lib/libc/locale/wcstombs.c b/lib/libc/locale/wcstombs.c new file mode 100644 index 00000000000..024e3afe874 --- /dev/null +++ b/lib/libc/locale/wcstombs.c @@ -0,0 +1,45 @@ +/* $OpenBSD: wcstombs.c,v 1.1 2010/07/27 16:59:04 stsp Exp $ */ + +/*- + * Copyright (c) 2002-2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +#include <limits.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> + +size_t +wcstombs(char * __restrict s, const wchar_t * __restrict pwcs, size_t n) +{ + mbstate_t mbs; + const wchar_t *pwcsp; + + memset(&mbs, 0, sizeof(mbs)); + pwcsp = pwcs; + return (wcsrtombs(s, &pwcsp, n, &mbs)); +} diff --git a/lib/libc/locale/mbrtowc_sb.c b/lib/libc/locale/wcsxfrm.c index e47e06ae019..d2e9ff3fdfd 100644 --- a/lib/libc/locale/mbrtowc_sb.c +++ b/lib/libc/locale/wcsxfrm.c @@ -1,4 +1,5 @@ -/* $OpenBSD: mbrtowc_sb.c,v 1.4 2005/11/27 20:03:06 cloder Exp $ */ +/* $OpenBSD: wcsxfrm.c,v 1.1 2010/07/27 16:59:04 stsp Exp $ */ +/* $OpenBSD: wcsxfrm.c,v 1.1 2010/07/27 16:59:04 stsp Exp $ */ /* $NetBSD: multibyte_sb.c,v 1.4 2003/08/07 16:43:04 agc Exp $ */ /* @@ -30,35 +31,13 @@ * SUCH DAMAGE. */ -#include <errno.h> -#include <stdlib.h> +#include <sys/cdefs.h> #include <wchar.h> -/*ARGSUSED*/ -size_t -mbrtowc(wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) +size_t +wcsxfrm(wchar_t *dest, const wchar_t *src, size_t n) { - - /* pwc may be NULL */ - /* s may be NULL */ - /* ps appears to be unused */ - - if (s == NULL) - return 0; if (n == 0) - return (size_t)-1; - if (pwc) - *pwc = (wchar_t)(unsigned char)*s; - return (*s != '\0'); -} - -int -mbtowc(wchar_t *pwc, const char *s, size_t n) -{ - - /* pwc may be NULL */ - /* s may be NULL */ - - return mbrtowc(pwc, s, n, NULL); + return wcslen(src); + return wcslcpy(dest, src, n); } - diff --git a/lib/libc/locale/wctob.c b/lib/libc/locale/wctob.c new file mode 100644 index 00000000000..cc5a07c1484 --- /dev/null +++ b/lib/libc/locale/wctob.c @@ -0,0 +1,45 @@ +/* $OpenBSD: wctob.c,v 1.1 2010/07/27 16:59:04 stsp Exp $ */ +/*- + * Copyright (c) 2002-2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +#include <limits.h> +#include <stdio.h> +#include <string.h> +#include <wchar.h> + +int +wctob(wint_t c) +{ + mbstate_t mbs; + char buf[MB_LEN_MAX]; + + memset(&mbs, 0, sizeof(mbs)); + if (c == WEOF || wcrtomb(buf, c, &mbs) != 1) + return (EOF); + return ((unsigned char)*buf); +} diff --git a/lib/libc/locale/wctomb.c b/lib/libc/locale/wctomb.c new file mode 100644 index 00000000000..9cd1ee4d362 --- /dev/null +++ b/lib/libc/locale/wctomb.c @@ -0,0 +1,49 @@ +/* $OpenBSD: wctomb.c,v 1.1 2010/07/27 16:59:04 stsp Exp $ */ + +/*- + * Copyright (c) 2002-2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +#include <stdlib.h> +#include <string.h> +#include <wchar.h> + +int +wctomb(char *s, wchar_t wchar) +{ + static mbstate_t mbs; + size_t rval; + + if (s == NULL) { + /* No support for state dependent encodings. */ + memset(&mbs, 0, sizeof(mbs)); + return (0); + } + if ((rval = wcrtomb(s, wchar, &mbs)) == (size_t)-1) + return (-1); + return ((int)rval); +} |