diff options
author | Stefan Sperling <stsp@cvs.openbsd.org> | 2012-01-18 14:01:39 +0000 |
---|---|---|
committer | Stefan Sperling <stsp@cvs.openbsd.org> | 2012-01-18 14:01:39 +0000 |
commit | 65492c94f0b4a4ef230c79d753844d967d093c16 (patch) | |
tree | a26899a2496837f64049925b8be7ca0d676faa48 /lib/libc/stdio | |
parent | 8b26bf89ccbe3141f9bfc2d16068ae00b9eb9bba (diff) |
Add support for C99 %ls, %lc, and %l[ format directives to scanf(3).
Based on code from FreeBSD.
Tested by myself (full release/install cycle on i386 and sparc64) and ajacoutot.
Man page help from jmc.
Diffstat (limited to 'lib/libc/stdio')
-rw-r--r-- | lib/libc/stdio/Makefile.inc | 4 | ||||
-rw-r--r-- | lib/libc/stdio/scanf.3 | 45 | ||||
-rw-r--r-- | lib/libc/stdio/vfscanf.c | 170 |
3 files changed, 210 insertions, 9 deletions
diff --git a/lib/libc/stdio/Makefile.inc b/lib/libc/stdio/Makefile.inc index b73bd4b877f..14e391690b8 100644 --- a/lib/libc/stdio/Makefile.inc +++ b/lib/libc/stdio/Makefile.inc @@ -1,9 +1,9 @@ -# $OpenBSD: Makefile.inc,v 1.20 2011/11/08 18:30:42 guenther Exp $ +# $OpenBSD: Makefile.inc,v 1.21 2012/01/18 14:01:38 stsp Exp $ # stdio sources .PATH: ${LIBCSRCDIR}/stdio -CFLAGS+=-DFLOATING_POINT -DPRINTF_WIDE_CHAR +CFLAGS+=-DFLOATING_POINT -DPRINTF_WIDE_CHAR -DSCANF_WIDE_CHAR SRCS+= asprintf.c clrerr.c fclose.c fdopen.c feof.c ferror.c fflush.c fgetc.c \ fgetln.c fgetpos.c fgets.c fileno.c findfp.c flags.c fopen.c \ diff --git a/lib/libc/stdio/scanf.3 b/lib/libc/stdio/scanf.3 index ac12278a354..57b3c6d17a9 100644 --- a/lib/libc/stdio/scanf.3 +++ b/lib/libc/stdio/scanf.3 @@ -1,4 +1,4 @@ -.\" $OpenBSD: scanf.3,v 1.19 2011/11/02 22:29:07 schwarze Exp $ +.\" $OpenBSD: scanf.3,v 1.20 2012/01/18 14:01:38 stsp Exp $ .\" .\" Copyright (c) 1990, 1991, 1993 .\" The Regents of the University of California. All rights reserved. @@ -31,7 +31,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd $Mdocdate: November 2 2011 $ +.Dd $Mdocdate: January 18 2012 $ .Dt SCANF 3 .Os .Sh NAME @@ -159,7 +159,24 @@ or that the conversion will be one of and the next pointer is a pointer to .Vt double (rather than -.Vt float ) . +.Vt float ) , +or that the conversion will be one of +.Cm sc[ . +.Pp +If the conversion is one of +.Cm sc[ +the expected conversion input is a multibyte character sequence. +Each multibyte character in the sequence is converted with a call to the +.Fn mbrtowc +function. +The field width specifies the maximum amount of bytes read from the +multibyte character sequence and passed to +.Fn mbrtowc +for conversion. +The next pointer is a pointer to a +.Vt wchar_t +wide-character buffer large enough to accept the converted input sequence +including the terminating NUL wide character which will be added automatically. .It Cm ll No (ell ell) Indicates that the conversion will be one of .Cm dioux @@ -291,7 +308,12 @@ Equivalent to Matches a sequence of non-whitespace characters; the next pointer must be a pointer to .Vt char , -and the provided array must be large enough to accept and store +or to +.Vt wchar_t +if the +.Vt l +length modifier is present. +The provided array must be large enough to accept and store the whole sequence and the terminating NUL character. The input string stops at whitespace or at the maximum field width, whichever occurs first. @@ -303,7 +325,12 @@ Matches a sequence of characters consuming the number of bytes specified by the field width (defaults to 1 if unspecified); the next pointer must be a pointer to .Vt char , -and there must be enough room for all the characters +or to +.Vt wchar_t +if the +.Vt l +length modifier is present. +There must be enough room for all the characters (no terminating NUL is added). The usual skip of leading whitespace is suppressed. To skip whitespace first, use an explicit space in the format. @@ -312,7 +339,12 @@ Matches a nonempty sequence of characters from the specified set of accepted characters; the next pointer must be a pointer to .Vt char , -and there must be enough room for all the characters in the string, +or to +.Vt wchar_t +if the +.Vt l +length modifier is present. +There must be enough room for all the characters in the string, plus a terminating NUL character. The usual skip of leading whitespace is suppressed. .Pp @@ -397,6 +429,7 @@ If an error or end-of-file occurs after conversion has begun, the number of conversions which were successfully completed is returned. .Sh SEE ALSO .Xr getc 3 , +.Xr mbrtowc 3 , .Xr printf 3 , .Xr strtod 3 , .Xr strtol 3 , diff --git a/lib/libc/stdio/vfscanf.c b/lib/libc/stdio/vfscanf.c index 42c9dfa15b6..10aac0002e4 100644 --- a/lib/libc/stdio/vfscanf.c +++ b/lib/libc/stdio/vfscanf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfscanf.c,v 1.28 2011/11/08 18:30:42 guenther Exp $ */ +/* $OpenBSD: vfscanf.c,v 1.29 2012/01/18 14:01:38 stsp Exp $ */ /*- * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. @@ -37,6 +37,7 @@ #include <stddef.h> #include <stdio.h> #include <stdlib.h> +#include <string.h> #include "local.h" #ifdef FLOATING_POINT @@ -108,6 +109,11 @@ __svfscanf(FILE *fp, const char *fmt0, __va_list ap) int base; /* base argument to strtoimax/strtouimax */ char ccltab[256]; /* character class table for %[...] */ char buf[BUF]; /* buffer for numeric conversions */ +#ifdef SCANF_WIDE_CHAR + wchar_t *wcp; /* handy wide character pointer */ + size_t nconv; /* length of multibyte sequence converted */ + mbstate_t mbs; +#endif /* `basefix' is used to avoid `if' tests in the integer scanner */ static short basefix[17] = @@ -331,6 +337,48 @@ literal: /* scan arbitrary characters (sets NOSKIP) */ if (width == 0) width = 1; +#ifdef SCANF_WIDE_CHAR + if (flags & LONG) { + if ((flags & SUPPRESS) == 0) + wcp = va_arg(ap, wchar_t *); + else + wcp = NULL; + n = 0; + while (width != 0) { + if (n == MB_CUR_MAX) { + fp->_flags |= __SERR; + goto input_failure; + } + buf[n++] = *fp->_p; + fp->_p++; + fp->_r--; + bzero(&mbs, sizeof(mbs)); + nconv = mbrtowc(wcp, buf, n, &mbs); + if (nconv == (size_t)-1) { + fp->_flags |= __SERR; + goto input_failure; + } + if (nconv == 0 && !(flags & SUPPRESS)) + *wcp = L'\0'; + if (nconv != (size_t)-2) { + nread += n; + width--; + if (!(flags & SUPPRESS)) + wcp++; + n = 0; + } + if (fp->_r <= 0 && __srefill(fp)) { + if (n != 0) { + fp->_flags |= __SERR; + goto input_failure; + } + break; + } + } + if (!(flags & SUPPRESS)) + nassigned++; + } else +#endif /* SCANF_WIDE_CHAR */ if (flags & SUPPRESS) { size_t sum = 0; for (;;) { @@ -366,6 +414,72 @@ literal: /* scan a (nonempty) character class (sets NOSKIP) */ if (width == 0) width = (size_t)~0; /* `infinity' */ +#ifdef SCANF_WIDE_CHAR + /* take only those things in the class */ + if (flags & LONG) { + wchar_t twc; + int nchars; + + if ((flags & SUPPRESS) == 0) + wcp = va_arg(ap, wchar_t *); + else + wcp = &twc; + n = 0; + nchars = 0; + while (width != 0) { + if (n == MB_CUR_MAX) { + fp->_flags |= __SERR; + goto input_failure; + } + buf[n++] = *fp->_p; + fp->_p++; + fp->_r--; + bzero(&mbs, sizeof(mbs)); + nconv = mbrtowc(wcp, buf, n, &mbs); + if (nconv == (size_t)-1) { + fp->_flags |= __SERR; + goto input_failure; + } + if (nconv == 0) + *wcp = L'\0'; + if (nconv != (size_t)-2) { + if (wctob(*wcp) != EOF && + !ccltab[wctob(*wcp)]) { + while (n != 0) { + n--; + ungetc(buf[n], + fp); + } + break; + } + nread += n; + width--; + if (!(flags & SUPPRESS)) + wcp++; + nchars++; + n = 0; + } + if (fp->_r <= 0 && __srefill(fp)) { + if (n != 0) { + fp->_flags |= __SERR; + goto input_failure; + } + break; + } + } + if (n != 0) { + fp->_flags |= __SERR; + goto input_failure; + } + n = nchars; + if (n == 0) + goto match_failure; + if (!(flags & SUPPRESS)) { + *wcp = L'\0'; + nassigned++; + } + } else +#endif /* SCANF_WIDE_CHAR */ /* take only those things in the class */ if (flags & SUPPRESS) { n = 0; @@ -407,6 +521,60 @@ literal: /* like CCL, but zero-length string OK, & no NOSKIP */ if (width == 0) width = (size_t)~0; +#ifdef SCANF_WIDE_CHAR + if (flags & LONG) { + wchar_t twc; + + if ((flags & SUPPRESS) == 0) + wcp = va_arg(ap, wchar_t *); + else + wcp = &twc; + n = 0; + while (!isspace(*fp->_p) && width != 0) { + if (n == MB_CUR_MAX) { + fp->_flags |= __SERR; + goto input_failure; + } + buf[n++] = *fp->_p; + fp->_p++; + fp->_r--; + bzero(&mbs, sizeof(mbs)); + nconv = mbrtowc(wcp, buf, n, &mbs); + if (nconv == (size_t)-1) { + fp->_flags |= __SERR; + goto input_failure; + } + if (nconv == 0) + *wcp = L'\0'; + if (nconv != (size_t)-2) { + if (iswspace(*wcp)) { + while (n != 0) { + n--; + ungetc(buf[n], + fp); + } + break; + } + nread += n; + width--; + if (!(flags & SUPPRESS)) + wcp++; + n = 0; + } + if (fp->_r <= 0 && __srefill(fp)) { + if (n != 0) { + fp->_flags |= __SERR; + goto input_failure; + } + break; + } + } + if (!(flags & SUPPRESS)) { + *wcp = L'\0'; + nassigned++; + } + } else +#endif /* SCANF_WIDE_CHAR */ if (flags & SUPPRESS) { n = 0; while (!isspace(*fp->_p)) { |