src - OpenBSD base system

diff options


context:
space:
mode:

author	Ingo Schwarze <schwarze@cvs.openbsd.org>	2014-01-02 22:19:39 +0000
committer	Ingo Schwarze <schwarze@cvs.openbsd.org>	2014-01-02 22:19:39 +0000
commit	c32a7debc18b96679ed870f943798c6ac701f37e (patch)
tree	c9955d3b4ad993a9d1b44a1d69a6f0ee1c4562ee /usr.bin/mandoc
parent	069eef6c19ed0653044eed84de3fdf8d894d4f46 (diff)

Do not put UTF-8-encoded strings into the database by default, use ASCII.

Just like for mandoc(1), provide a -Tutf8 option for people who want that.

Diffstat (limited to 'usr.bin/mandoc')

-rw-r--r--

usr.bin/mandoc/mandocdb.c

1 files changed, 35 insertions, 17 deletions

diff --git a/usr.bin/mandoc/mandocdb.c b/usr.bin/mandoc/mandocdb.c
index 237b23e49ff..190fcc71619 100644
--- a/usr.bin/mandoc/mandocdb.c
+++ b/usr.bin/mandoc/mandocdb.c

@@ -1,7 +1,7 @@

-/* $Id: mandocdb.c,v 1.51 2014/01/02 20:24:35 schwarze Exp $ */

+/* $Id: mandocdb.c,v 1.52 2014/01/02 22:19:38 schwarze Exp $ */

* Permission to use, copy, modify, and distribute this software for any

* purpose with or without fee is hereby granted, provided that the above

@@ -168,6 +168,7 @@ static int use_all; /* use all found files */

static int nodb; /* no database changes */

static int verb; /* print what we're doing */

static int warnings; /* warn about crap */

+static int write_utf8; /* write UTF-8 output; else ASCII */

static int exitcode; /* to be returned by main */

static enum op op; /* operational mode */

static char basedir[PATH_MAX]; /* current base directory */

@@ -343,7 +344,7 @@ mandocdb(int argc, char *argv[])

path_arg = NULL;

op = OP_DEFAULT;

- while (-1 != (ch = getopt(argc, argv, "aC:d:ntu:vW")))

+ while (-1 != (ch = getopt(argc, argv, "aC:d:nT:tu:vW")))

switch (ch) {

case ('a'):

use_all = 1;

@@ -361,6 +362,14 @@ mandocdb(int argc, char *argv[])

case ('n'):

nodb = 1;

break;

+ case ('T'):

+ if (strcmp(optarg, "utf8")) {

+ fprintf(stderr, "-T%s: Unsupported "

+ "output format\n", optarg);

+ goto usage;

+ }

+ write_utf8 = 1;

+ break;

case ('t'):

CHECKOP(op, ch);

dup2(STDOUT_FILENO, STDERR_FILENO);

@@ -482,9 +491,9 @@ out:

ohash_delete(&mlinks);

return(exitcode);

usage:

- fprintf(stderr, "usage: %s [-anvW] [-C file]\n"

- " %s [-anvW] dir ...\n"

- " %s [-nvW] -d dir [file ...]\n"

+ fprintf(stderr, "usage: %s [-anvW] [-C file] [-Tutf8]\n"

+ " %s [-anvW] [-Tutf8] dir ...\n"

+ " %s [-nvW] [-Tutf8] -d dir [file ...]\n"

" %s [-nvW] -u dir [file ...]\n"

" %s -t file ...\n",

progname, progname, progname,

@@ -1714,31 +1723,40 @@ utf8key(struct mchars *mc, struct str *key)

* Parse the escape sequence and see if it's a

* predefined character or special character.

esc = mandoc_escape

((const char **)&val, &seq, &len);

if (ESCAPE_ERROR == esc)

break;

if (ESCAPE_SPECIAL != esc)

continue;

- if (0 == (u = mchars_spec2cp(mc, seq, len)))

- continue;

- * If we have a Unicode codepoint, try to convert that

- * to a UTF-8 byte string.

+ * Render the special character

+ * as either UTF-8 or ASCII.

- cpp = utfbuf;

- if (0 == (sz = utf8(u, utfbuf)))

- continue;

+ if (write_utf8) {

+ if (0 == (u = mchars_spec2cp(mc, seq, len)))

+ continue;

+ cpp = utfbuf;

+ if (0 == (sz = utf8(u, utfbuf)))

+ continue;

+ sz = strlen(cpp);

+ } else {

+ cpp = mchars_spec2str(mc, seq, len, &sz);

+ if (NULL == cpp)

+ continue;

+ if (ASCII_NBRSP == *cpp) {

+ cpp = " ";

+ sz = 1;

+ }

/* Copy the rendered glyph into the stream. */

- sz = strlen(cpp);

bsz += sz;

buf = mandoc_realloc(buf, bsz);

memcpy(&buf[pos], cpp, sz);

pos += sz;

}