1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
|
/* $OpenBSD: mbrtoc16.c,v 1.1 2023/08/20 15:02:51 schwarze Exp $ */
/*
* Copyright (c) 2022 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <stdint.h>
#include <uchar.h>
#include <wchar.h>
/*
* Keep this structure compatible with
* struct _utf8_state in the file citrus/citrus_utf8.c.
* In particular, only use values for the "want" field
* that do not collide with values used by the function
* _citrus_utf8_ctype_mbrtowc().
*/
struct _utf16_state {
wchar_t ch;
int want;
};
size_t
mbrtoc16(char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
{
static mbstate_t mbs;
struct _utf16_state *us;
size_t rv;
wchar_t wc;
/*
* Fall back to a state object local to this function
* and do not use the fallback object in mbrtowc(3)
* because an application program might mix calls to mbrtowc(3)
* and mbrtoc16(3) decoding different strings, and they must
* not clobber each other's state.
*/
if (ps == NULL)
ps = &mbs;
us = (struct _utf16_state *)ps;
/*
* Handle the special case of NULL input first such that
* a low surrogate left over from a previous call does not
* clobber an object pointed to by the pc16 argument.
*/
if (s == NULL) {
s = "";
n = 1;
pc16 = NULL;
}
/*
* If the previous call stored a high surrogate,
* store the corresponding low surrogate now
* and do not inspect any further input yet.
*/
if (us->want == (size_t)-3) {
if (pc16 != NULL)
*pc16 = 0xdc00 + (us->ch & 0x3ff);
us->ch = 0;
us->want = 0;
return -3;
}
/*
* Decode the multibyte character.
* All the mbrtowc(3) use cases can be reached from here,
* including continuing an imcomplete character started earlier,
* decoding a NUL character, a valid complete character,
* an incomplete character to be continued later,
* or a decoding error.
*/
rv = mbrtowc(&wc, s, n, ps);
if (rv < (size_t)-2) {
/* A new character that is valid and complete. */
if (wc > UINT16_MAX) {
/* Store a high surrogate. */
if (pc16 != NULL)
*pc16 = 0xd7c0 + (wc >> 10);
/* Remember that the low surrogate is pending. */
us->ch = wc;
us->want = -3;
} else if (pc16 != NULL)
/* Store a basic multilingual plane codepoint. */
*pc16 = wc;
}
return rv;
}
|