summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Marriott <nicm@cvs.openbsd.org>2020-05-25 09:32:11 +0000
committerNicholas Marriott <nicm@cvs.openbsd.org>2020-05-25 09:32:11 +0000
commit2c743ea363c5f913f013bb5b9402bcbea962c0a3 (patch)
tree2d06af7ce9247a7e3aef3415d56a8b2a6ab36574
parentdc20892ded4cc025bb0b07b6685e4fc927d967e6 (diff)
Instead of storing all UTF-8 characters in the extended cell which means
that 14 bytes are wasted for each character in the BMP, only store characters of three bytes or less in the cell itself and store others (outside the BMP or with combining characters) in a separate global tree. Can reduce grid memory use for heavy Unicode users by around 30%.
-rw-r--r--usr.bin/tmux/grid.c53
-rw-r--r--usr.bin/tmux/tmux.h33
-rw-r--r--usr.bin/tmux/utf8.c176
-rw-r--r--usr.bin/tmux/window-copy.c30
4 files changed, 254 insertions, 38 deletions
diff --git a/usr.bin/tmux/grid.c b/usr.bin/tmux/grid.c
index 3c36d359603..d2cc6a2efc3 100644
--- a/usr.bin/tmux/grid.c
+++ b/usr.bin/tmux/grid.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: grid.c,v 1.109 2020/05/16 16:22:01 nicm Exp $ */
+/* $OpenBSD: grid.c,v 1.110 2020/05/25 09:32:10 nicm Exp $ */
/*
* Copyright (c) 2008 Nicholas Marriott <nicholas.marriott@gmail.com>
@@ -100,11 +100,11 @@ grid_get_extended_cell(struct grid_line *gl, struct grid_cell_entry *gce,
}
/* Set cell as extended. */
-static struct grid_cell *
+static struct grid_extd_entry *
grid_extended_cell(struct grid_line *gl, struct grid_cell_entry *gce,
const struct grid_cell *gc)
{
- struct grid_cell *gcp;
+ struct grid_extd_entry *gee;
int flags = (gc->flags & ~GRID_FLAG_CLEARED);
if (~gce->flags & GRID_FLAG_EXTENDED)
@@ -113,10 +113,14 @@ grid_extended_cell(struct grid_line *gl, struct grid_cell_entry *gce,
fatalx("offset too big");
gl->flags |= GRID_LINE_EXTENDED;
- gcp = &gl->extddata[gce->offset];
- memcpy(gcp, gc, sizeof *gcp);
- gcp->flags = flags;
- return (gcp);
+ gee = &gl->extddata[gce->offset];
+ gee->data = utf8_map_big(&gc->data);
+ gee->attr = gc->attr;
+ gee->flags = flags;
+ gee->fg = gc->fg;
+ gee->bg = gc->bg;
+ gee->us = gc->us;
+ return (gee);
}
/* Free up unused extended cells. */
@@ -124,9 +128,9 @@ static void
grid_compact_line(struct grid_line *gl)
{
int new_extdsize = 0;
- struct grid_cell *new_extddata;
+ struct grid_extd_entry *new_extddata;
struct grid_cell_entry *gce;
- struct grid_cell *gc;
+ struct grid_extd_entry *gee;
u_int px, idx;
if (gl->extdsize == 0)
@@ -150,8 +154,8 @@ grid_compact_line(struct grid_line *gl)
for (px = 0; px < gl->cellsize; px++) {
gce = &gl->celldata[px];
if (gce->flags & GRID_FLAG_EXTENDED) {
- gc = &gl->extddata[gce->offset];
- memcpy(&new_extddata[idx], gc, sizeof *gc);
+ gee = &gl->extddata[gce->offset];
+ memcpy(&new_extddata[idx], gee, sizeof *gee);
gce->offset = idx++;
}
}
@@ -181,17 +185,14 @@ grid_clear_cell(struct grid *gd, u_int px, u_int py, u_int bg)
{
struct grid_line *gl = &gd->linedata[py];
struct grid_cell_entry *gce = &gl->celldata[px];
- struct grid_cell *gc;
+ struct grid_extd_entry *gee;
memcpy(gce, &grid_cleared_entry, sizeof *gce);
if (bg != 8) {
if (bg & COLOUR_FLAG_RGB) {
grid_get_extended_cell(gl, gce, gce->flags);
- gl->flags |= GRID_LINE_EXTENDED;
-
- gc = &gl->extddata[gce->offset];
- memcpy(gc, &grid_cleared_cell, sizeof *gc);
- gc->bg = bg;
+ gee = grid_extended_cell(gl, gce, &grid_cleared_cell);
+ gee->bg = bg;
} else {
if (bg & COLOUR_FLAG_256)
gce->flags |= GRID_FLAG_BG256;
@@ -483,12 +484,20 @@ static void
grid_get_cell1(struct grid_line *gl, u_int px, struct grid_cell *gc)
{
struct grid_cell_entry *gce = &gl->celldata[px];
+ struct grid_extd_entry *gee;
if (gce->flags & GRID_FLAG_EXTENDED) {
if (gce->offset >= gl->extdsize)
memcpy(gc, &grid_default_cell, sizeof *gc);
- else
- memcpy(gc, &gl->extddata[gce->offset], sizeof *gc);
+ else {
+ gee = &gl->extddata[gce->offset];
+ gc->flags = gee->flags;
+ gc->attr = gee->attr;
+ gc->fg = gee->fg;
+ gc->bg = gee->bg;
+ gc->us = gee->us;
+ utf8_get_big(gee->data, &gc->data);
+ }
return;
}
@@ -545,7 +554,7 @@ grid_set_cells(struct grid *gd, u_int px, u_int py, const struct grid_cell *gc,
{
struct grid_line *gl;
struct grid_cell_entry *gce;
- struct grid_cell *gcp;
+ struct grid_extd_entry *gee;
u_int i;
if (grid_check_y(gd, __func__, py) != 0)
@@ -560,8 +569,8 @@ grid_set_cells(struct grid *gd, u_int px, u_int py, const struct grid_cell *gc,
for (i = 0; i < slen; i++) {
gce = &gl->celldata[px + i];
if (grid_need_extended_cell(gce, gc)) {
- gcp = grid_extended_cell(gl, gce, gc);
- utf8_set(&gcp->data, s[i]);
+ gee = grid_extended_cell(gl, gce, gc);
+ gee->data = utf8_set_big(s[i], 1);
} else
grid_store_cell(gce, gc, s[i]);
}
diff --git a/usr.bin/tmux/tmux.h b/usr.bin/tmux/tmux.h
index 73ccf5234c7..eca2e1c85dc 100644
--- a/usr.bin/tmux/tmux.h
+++ b/usr.bin/tmux/tmux.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: tmux.h,v 1.1052 2020/05/24 09:40:17 nicm Exp $ */
+/* $OpenBSD: tmux.h,v 1.1053 2020/05/25 09:32:10 nicm Exp $ */
/*
* Copyright (c) 2007 Nicholas Marriott <nicholas.marriott@gmail.com>
@@ -597,11 +597,11 @@ struct msg_write_close {
#define MOTION_MOUSE_MODES (MODE_MOUSE_BUTTON|MODE_MOUSE_ALL)
/*
- * A single UTF-8 character. UTF8_SIZE must be big enough to hold
- * combining characters as well, currently at most five (of three
- * bytes) are supported.
-*/
-#define UTF8_SIZE 18
+ * A single UTF-8 character. UTF8_SIZE must be big enough to hold combining
+ * characters as well. It can't be more than 32 bytes without changes to how
+ * big characters are stored.
+ */
+#define UTF8_SIZE 21
struct utf8_data {
u_char data[UTF8_SIZE];
@@ -609,7 +609,7 @@ struct utf8_data {
u_char size;
u_char width; /* 0xff if invalid */
-} __packed;
+};
enum utf8_state {
UTF8_MORE,
UTF8_DONE,
@@ -663,13 +663,25 @@ enum utf8_state {
/* Grid cell data. */
struct grid_cell {
- struct utf8_data data; /* 21 bytes */
+ struct utf8_data data;
+ u_short attr;
+ u_char flags;
+ int fg;
+ int bg;
+ int us;
+};
+
+/* Grid extended cell entry. */
+struct grid_extd_entry {
+ uint32_t data;
u_short attr;
u_char flags;
int fg;
int bg;
int us;
} __packed;
+
+/* Grid cell entry. */
struct grid_cell_entry {
u_char flags;
union {
@@ -690,7 +702,7 @@ struct grid_line {
struct grid_cell_entry *celldata;
u_int extdsize;
- struct grid_cell *extddata;
+ struct grid_extd_entry *extddata;
int flags;
} __packed;
@@ -2877,6 +2889,9 @@ u_int session_group_attached_count(struct session_group *);
void session_renumber_windows(struct session *);
/* utf8.c */
+uint32_t utf8_set_big(char, u_int);
+uint32_t utf8_map_big(const struct utf8_data *);
+void utf8_get_big(uint32_t, struct utf8_data *);
void utf8_set(struct utf8_data *, u_char);
void utf8_copy(struct utf8_data *, const struct utf8_data *);
enum utf8_state utf8_open(struct utf8_data *, u_char);
diff --git a/usr.bin/tmux/utf8.c b/usr.bin/tmux/utf8.c
index fe1808c769d..95981b8b689 100644
--- a/usr.bin/tmux/utf8.c
+++ b/usr.bin/tmux/utf8.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: utf8.c,v 1.44 2019/11/25 15:04:15 nicm Exp $ */
+/* $OpenBSD: utf8.c,v 1.45 2020/05/25 09:32:10 nicm Exp $ */
/*
* Copyright (c) 2008 Nicholas Marriott <nicholas.marriott@gmail.com>
@@ -29,6 +29,180 @@
static int utf8_width(wchar_t);
+struct utf8_big_item {
+ u_int index;
+ RB_ENTRY(utf8_big_item) entry;
+
+ char data[UTF8_SIZE];
+ u_char size;
+};
+RB_HEAD(utf8_big_tree, utf8_big_item);
+
+static int
+utf8_big_cmp(struct utf8_big_item *bi1, struct utf8_big_item *bi2)
+{
+ if (bi1->size < bi2->size)
+ return (-1);
+ if (bi1->size > bi2->size)
+ return (1);
+ return (memcmp(bi1->data, bi2->data, bi1->size));
+}
+RB_GENERATE_STATIC(utf8_big_tree, utf8_big_item, entry, utf8_big_cmp);
+static struct utf8_big_tree utf8_big_tree = RB_INITIALIZER(utf8_big_tree);
+
+static struct utf8_big_item *utf8_big_list;
+static u_int utf8_big_list_size;
+static u_int utf8_big_list_used;
+
+union utf8_big_map {
+ uint32_t value;
+ struct {
+ u_char flags;
+#define UTF8_BIG_SIZE 0x1f
+#define UTF8_BIG_WIDTH2 0x20
+
+ u_char data[3];
+ };
+} __packed;
+
+static const union utf8_big_map utf8_big_space1 = {
+ .flags = 1,
+ .data = " "
+};
+static const union utf8_big_map utf8_big_space2 = {
+ .flags = UTF8_BIG_WIDTH2|2,
+ .data = " "
+};
+
+/* Get a big item by index. */
+static struct utf8_big_item *
+utf8_get_big_item(const char *data, size_t size)
+{
+ struct utf8_big_item bi;
+
+ memcpy(bi.data, data, size);
+ bi.size = size;
+
+ return (RB_FIND(utf8_big_tree, &utf8_big_tree, &bi));
+}
+
+/* Add a big item. */
+static int
+utf8_put_big_item(const char *data, size_t size, u_int *index)
+{
+ struct utf8_big_item *bi;
+
+ bi = utf8_get_big_item(data, size);
+ if (bi != NULL) {
+ *index = bi->index;
+ log_debug("%s: have %.*s at %u", __func__, (int)size, data,
+ *index);
+ return (0);
+ }
+
+ if (utf8_big_list_used == utf8_big_list_size) {
+ if (utf8_big_list_size == 0xffffff)
+ return (-1);
+ if (utf8_big_list_size == 0)
+ utf8_big_list_size = 256;
+ else if (utf8_big_list_size > 0x7fffff)
+ utf8_big_list_size = 0xffffff;
+ else
+ utf8_big_list_size *= 2;
+ utf8_big_list = xreallocarray(utf8_big_list, utf8_big_list_size,
+ sizeof *utf8_big_list);
+ }
+ *index = utf8_big_list_used++;
+
+ bi = &utf8_big_list[*index];
+ bi->index = *index;
+ memcpy(bi->data, data, size);
+ bi->size = size;
+ RB_INSERT(utf8_big_tree, &utf8_big_tree, bi);
+
+ log_debug("%s: added %.*s at %u", __func__, (int)size, data, *index);
+ return (0);
+}
+
+/* Get UTF-8 as index into buffer. */
+uint32_t
+utf8_map_big(const struct utf8_data *ud)
+{
+ union utf8_big_map m = { .value = 0 };
+ u_int o;
+ const char *data = ud->data;
+ size_t size = ud->size;
+
+ if (ud->width != 1 && ud->width != 2)
+ return (utf8_big_space1.value);
+
+ if (size > UTF8_BIG_SIZE)
+ goto fail;
+ if (size == 1)
+ return (utf8_set_big(data[0], 1));
+
+ m.flags = size;
+ if (ud->width == 2)
+ m.flags |= UTF8_BIG_WIDTH2;
+
+ if (size <= 3) {
+ memcpy(&m.data, data, size);
+ return (m.value);
+ }
+
+ if (utf8_put_big_item(data, size, &o) != 0)
+ goto fail;
+ m.data[0] = (o & 0xff);
+ m.data[1] = (o >> 8) & 0xff;
+ m.data[2] = (o >> 16);
+ return (m.value);
+
+fail:
+ if (ud->width == 1)
+ return (utf8_big_space1.value);
+ return (utf8_big_space2.value);
+}
+
+/* Get UTF-8 from index into buffer. */
+void
+utf8_get_big(uint32_t v, struct utf8_data *ud)
+{
+ union utf8_big_map m = { .value = v };
+ struct utf8_big_item *bi;
+ u_int o;
+
+ memset(ud, 0, sizeof *ud);
+ ud->size = ud->have = (m.flags & UTF8_BIG_SIZE);
+ if (m.flags & UTF8_BIG_WIDTH2)
+ ud->width = 2;
+ else
+ ud->width = 1;
+
+ if (ud->size <= 3) {
+ memcpy(ud->data, m.data, ud->size);
+ return;
+ }
+
+ o = ((uint32_t)m.data[2] << 16)|((uint32_t)m.data[1] << 8)|m.data[0];
+ if (o >= utf8_big_list_used)
+ memset(ud->data, ' ', ud->size);
+ else {
+ bi = &utf8_big_list[o];
+ memcpy(ud->data, bi->data, ud->size);
+ }
+}
+
+/* Get big value for UTF-8 single character. */
+uint32_t
+utf8_set_big(char c, u_int width)
+{
+ union utf8_big_map m = { .flags = 1, .data[0] = c };
+
+ if (width == 2)
+ m.flags |= UTF8_BIG_WIDTH2;
+ return (m.value);
+}
+
/* Set a single character. */
void
utf8_set(struct utf8_data *ud, u_char ch)
diff --git a/usr.bin/tmux/window-copy.c b/usr.bin/tmux/window-copy.c
index 3fa799f42be..a2226b33cd5 100644
--- a/usr.bin/tmux/window-copy.c
+++ b/usr.bin/tmux/window-copy.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: window-copy.c,v 1.288 2020/05/16 16:10:28 nicm Exp $ */
+/* $OpenBSD: window-copy.c,v 1.289 2020/05/25 09:32:10 nicm Exp $ */
/*
* Copyright (c) 2007 Nicholas Marriott <nicholas.marriott@gmail.com>
@@ -2551,23 +2551,33 @@ window_copy_search_rl_regex(struct grid *gd, u_int *ppx, u_int *psx, u_int py,
}
static const char *
-window_copy_cellstring(const struct grid_line *gl, u_int px, size_t *size)
+window_copy_cellstring(const struct grid_line *gl, u_int px, size_t *size,
+ int *allocated)
{
+ static struct utf8_data ud;
struct grid_cell_entry *gce;
+ char *copy;
if (px >= gl->cellsize) {
*size = 1;
+ *allocated = 0;
return (" ");
}
gce = &gl->celldata[px];
if (~gce->flags & GRID_FLAG_EXTENDED) {
*size = 1;
+ *allocated = 0;
return (&gce->data.data);
}
- *size = gl->extddata[gce->offset].data.size;
- return (gl->extddata[gce->offset].data.data);
+ utf8_get_big(gl->extddata[gce->offset].data, &ud);
+ *size = ud.size;
+ *allocated = 1;
+
+ copy = xmalloc(ud.size);
+ memcpy(copy, ud.data, ud.size);
+ return (copy);
}
/* Find last match in given range. */
@@ -2630,6 +2640,7 @@ window_copy_stringify(struct grid *gd, u_int py, u_int first, u_int last,
const struct grid_line *gl;
const char *d;
size_t bufsize = 1024, dlen;
+ int allocated;
while (bufsize < newsize)
bufsize *= 2;
@@ -2638,7 +2649,7 @@ window_copy_stringify(struct grid *gd, u_int py, u_int first, u_int last,
gl = grid_peek_line(gd, py);
bx = *size - 1;
for (ax = first; ax < last; ax++) {
- d = window_copy_cellstring(gl, ax, &dlen);
+ d = window_copy_cellstring(gl, ax, &dlen, &allocated);
newsize += dlen;
while (bufsize < newsize) {
bufsize *= 2;
@@ -2650,6 +2661,8 @@ window_copy_stringify(struct grid *gd, u_int py, u_int first, u_int last,
memcpy(buf + bx, d, dlen);
bx += dlen;
}
+ if (allocated)
+ free((void *)d);
}
buf[newsize - 1] = '\0';
@@ -2670,6 +2683,7 @@ window_copy_cstrtocellpos(struct grid *gd, u_int ncells, u_int *ppx, u_int *ppy,
struct {
const char *d;
size_t dlen;
+ int allocated;
} *cells;
/* Populate the array of cell data. */
@@ -2680,7 +2694,7 @@ window_copy_cstrtocellpos(struct grid *gd, u_int ncells, u_int *ppx, u_int *ppy,
gl = grid_peek_line(gd, pywrap);
while (cell < ncells) {
cells[cell].d = window_copy_cellstring(gl, px,
- &cells[cell].dlen);
+ &cells[cell].dlen, &cells[cell].allocated);
cell++;
px++;
if (px == gd->sx) {
@@ -2738,6 +2752,10 @@ window_copy_cstrtocellpos(struct grid *gd, u_int ncells, u_int *ppx, u_int *ppy,
*ppy = pywrap;
/* Free cell data. */
+ for (cell = 0; cell < ncells; cell++) {
+ if (cells[cell].allocated)
+ free((void *)cells[cell].d);
+ }
free(cells);
}