summaryrefslogtreecommitdiff
path: root/lib/libc/locale/mbrtoc16.c
blob: 40b4de0a4e50cbf33429bdbb66140212cad60d67 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
/*	$OpenBSD: mbrtoc16.c,v 1.1 2023/08/20 15:02:51 schwarze Exp $ */
/*
 * Copyright (c) 2022 Ingo Schwarze <schwarze@openbsd.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#include <stdint.h>
#include <uchar.h>
#include <wchar.h>

/*
 * Keep this structure compatible with
 * struct _utf8_state in the file citrus/citrus_utf8.c.
 * In particular, only use values for the "want" field
 * that do not collide with values used by the function
 * _citrus_utf8_ctype_mbrtowc().
 */
struct _utf16_state {
	wchar_t	ch;
	int	want;
};

size_t
mbrtoc16(char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
{
	static mbstate_t	 mbs;
	struct _utf16_state	*us;
	size_t			 rv;
	wchar_t			 wc;

	/*
	 * Fall back to a state object local to this function
	 * and do not use the fallback object in mbrtowc(3)
	 * because an application program might mix calls to mbrtowc(3)
	 * and mbrtoc16(3) decoding different strings, and they must
	 * not clobber each other's state.
	 */
	if (ps == NULL)
		ps = &mbs;

	us = (struct _utf16_state *)ps;

	/*
	 * Handle the special case of NULL input first such that
	 * a low surrogate left over from a previous call does not
	 * clobber an object pointed to by the pc16 argument.
	 */
	if (s == NULL) {
		s = "";
		n = 1;
		pc16 = NULL;
	}

	/*
	 * If the previous call stored a high surrogate,
	 * store the corresponding low surrogate now
	 * and do not inspect any further input yet.
	 */
	if (us->want == (size_t)-3) {
		if (pc16 != NULL)
			*pc16 = 0xdc00 + (us->ch & 0x3ff);
		us->ch = 0;
		us->want = 0;
		return -3;
	}

	/*
	 * Decode the multibyte character.
	 * All the mbrtowc(3) use cases can be reached from here,
	 * including continuing an imcomplete character started earlier,
	 * decoding a NUL character, a valid complete character,
	 * an incomplete character to be continued later,
	 * or a decoding error.
	 */
	rv = mbrtowc(&wc, s, n, ps);

	if (rv < (size_t)-2) {
		/* A new character that is valid and complete. */
		if (wc > UINT16_MAX) {
			/* Store a high surrogate. */
			if (pc16 != NULL)
				*pc16 = 0xd7c0 + (wc >> 10);
			/* Remember that the low surrogate is pending. */
			us->ch = wc;
			us->want = -3;
		} else if (pc16 != NULL)
			/* Store a basic multilingual plane codepoint. */
			*pc16 = wc;
	}
	return rv;
}