src - OpenBSD base system

diff options


context:
space:
mode:

author	Nicholas Marriott <nicm@cvs.openbsd.org>	2020-06-06 12:38:33 +0000
committer	Nicholas Marriott <nicm@cvs.openbsd.org>	2020-06-06 12:38:33 +0000
commit	c6cdcb016ad9428ba2424642b9a363d65412d313 (patch)
tree	440d24316e72a7ccbfd473243cbc5b3e4a1c1b03 /usr.bin/tmux
parent	3c1f6c149eb30183af360ef8e2f4ff5f53da970f (diff)

Use bitshifts instead of a union for encoding UTF-8 into 32 bits, which

is more friendly to GCC3. Reported by and ok aoyama@.

Diffstat (limited to 'usr.bin/tmux')

-rw-r--r--

usr.bin/tmux/utf8.c

1 files changed, 34 insertions, 59 deletions

diff --git a/usr.bin/tmux/utf8.c b/usr.bin/tmux/utf8.c
index 11bbbbdf7f5..2a4588e6e92 100644
--- a/usr.bin/tmux/utf8.c
+++ b/usr.bin/tmux/utf8.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: utf8.c,v 1.52 2020/06/02 20:10:23 nicm Exp $ */

+/* $OpenBSD: utf8.c,v 1.53 2020/06/06 12:38:32 nicm Exp $ */

@@ -52,32 +52,11 @@ static struct utf8_item *utf8_list;

static u_int utf8_list_size;

static u_int utf8_list_used;

-union utf8_map {

- utf8_char uc;

- struct {

- u_char flags;

- u_char data[3];

- };

-} __packed;

+#define UTF8_GET_SIZE(uc) (((uc) >> 24) & 0x1f)

+#define UTF8_GET_WIDTH(flags) (((uc) >> 29) - 1)

-#define UTF8_GET_SIZE(flags) ((flags) & 0x1f)

-#define UTF8_GET_WIDTH(flags) (((flags) >> 5) - 1)

-#define UTF8_SET_SIZE(size) (size)

-#define UTF8_SET_WIDTH(width) ((width + 1) << 5)

-static const union utf8_map utf8_space0 = {

- .flags = UTF8_SET_WIDTH(0)|UTF8_SET_SIZE(0),

- .data = ""

-};

-static const union utf8_map utf8_space1 = {

- .flags = UTF8_SET_WIDTH(1)|UTF8_SET_SIZE(1),

- .data = " "

-};

-static const union utf8_map utf8_space2 = {

- .flags = UTF8_SET_WIDTH(2)|UTF8_SET_SIZE(2),

- .data = " "

-};

+#define UTF8_SET_SIZE(size) (((utf8_char)(size)) << 24)

+#define UTF8_SET_WIDTH(width) ((((utf8_char)(width)) + 1) << 29)

/* Get a UTF-8 item by offset. */

static struct utf8_item *

@@ -139,34 +118,31 @@ utf8_put_item(const char *data, size_t size, u_int *offset)

enum utf8_state

utf8_from_data(const struct utf8_data *ud, utf8_char *uc)

{

- union utf8_map m = { .uc = 0 };

- u_int offset;

+ u_int offset;

if (ud->width > 2)

fatalx("invalid UTF-8 width");

if (ud->size > UTF8_SIZE)

goto fail;

- m.flags = UTF8_SET_SIZE(ud->size)|UTF8_SET_WIDTH(ud->width);

- if (ud->size <= 3)

- memcpy(m.data, ud->data, ud->size);

- else {

- if (utf8_put_item(ud->data, ud->size, &offset) != 0)

- goto fail;

- m.data[0] = (offset & 0xff);

- m.data[1] = (offset >> 8) & 0xff;

- m.data[2] = (offset >> 16);

- }

- *uc = htonl(m.uc);

+ if (ud->size <= 3) {

+ offset = (((utf8_char)ud->data[2] << 16)|

+ ((utf8_char)ud->data[1] << 8)|

+ ((utf8_char)ud->data[0]));

+ } else if (utf8_put_item(ud->data, ud->size, &offset) != 0)

+ goto fail;

+ *uc = UTF8_SET_SIZE(ud->size)|UTF8_SET_WIDTH(ud->width)|offset;

+ log_debug("%s: (%d %d %.*s) -> %08x", __func__, ud->width, ud->size,

+ (int)ud->size, ud->data, *uc);

return (UTF8_DONE);

fail:

if (ud->width == 0)

- *uc = htonl(utf8_space0.uc);

+ *uc = UTF8_SET_SIZE(0)|UTF8_SET_WIDTH(0);

else if (ud->width == 1)

- *uc = htonl(utf8_space1.uc);

+ *uc = UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|0x20;

else

- *uc = htonl(utf8_space2.uc);

+ *uc = UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|0x2020;

return (UTF8_ERROR);

}

@@ -174,37 +150,36 @@ fail:

void

utf8_to_data(utf8_char uc, struct utf8_data *ud)

{

- union utf8_map m = { .uc = ntohl(uc) };

struct utf8_item *ui;

u_int offset;

memset(ud, 0, sizeof *ud);

- ud->size = ud->have = UTF8_GET_SIZE(m.flags);

- ud->width = UTF8_GET_WIDTH(m.flags);

+ ud->size = ud->have = UTF8_GET_SIZE(uc);

+ ud->width = UTF8_GET_WIDTH(uc);

if (ud->size <= 3) {

- memcpy(ud->data, m.data, ud->size);

- return;

+ ud->data[2] = (uc >> 16);

+ ud->data[1] = ((uc >> 8) & 0xff);

+ ud->data[0] = (uc & 0xff);

+ } else {

+ offset = (uc & 0xffffff);

+ if (offset >= utf8_list_used)

+ memset(ud->data, ' ', ud->size);

+ else {

+ ui = &utf8_list[offset];

+ memcpy(ud->data, ui->data, ud->size);

+ }

}

- offset = ((u_int)m.data[2] << 16)|((u_int)m.data[1] << 8)|m.data[0];

- if (offset >= utf8_list_used)

- memset(ud->data, ' ', ud->size);

- else {

- ui = &utf8_list[offset];

- memcpy(ud->data, ui->data, ud->size);

- }

+ log_debug("%s: %08x -> (%d %d %.*s)", __func__, uc, ud->width, ud->size,

+ (int)ud->size, ud->data);

}

/* Get UTF-8 character from a single ASCII character. */

u_int

utf8_build_one(u_char ch)

{

- union utf8_map m;

- m.flags = UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1);

- m.data[0] = ch;

- return (htonl(m.uc));

+ return (UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|ch);

}

/* Set a single character. */