diff options
author | Ingo Schwarze <schwarze@cvs.openbsd.org> | 2011-09-17 15:29:20 +0000 |
---|---|---|
committer | Ingo Schwarze <schwarze@cvs.openbsd.org> | 2011-09-17 15:29:20 +0000 |
commit | 0dfa92616ec7892ce1d1e19ba6849357b0c550e7 (patch) | |
tree | 4a632a04faa79a21e170051514bc742f3110f5fd /usr.bin/sed | |
parent | b7f880ec6f58461e81df2c30565b3d27cf217081 (diff) |
Rewrite the main loop of the "sed s/..." command, to fix multiple
issues regarding the replacement of zero-length strings.
This commit brings back rev. 1.16, but without the regression that
forced the backout: No NUL bytes will be output now, not even when
the input file lacks a trailing newline character and there is a
zero-length match at the end.
OK otto@ deraadt@;
and naddy@ (who originally found the regression) checked that
the regression is indeed fixed.
Diffstat (limited to 'usr.bin/sed')
-rw-r--r-- | usr.bin/sed/process.c | 93 |
1 files changed, 44 insertions, 49 deletions
diff --git a/usr.bin/sed/process.c b/usr.bin/sed/process.c index 148ccaf9493..bd8d8696740 100644 --- a/usr.bin/sed/process.c +++ b/usr.bin/sed/process.c @@ -1,4 +1,4 @@ -/* $OpenBSD: process.c,v 1.17 2011/07/26 08:47:07 schwarze Exp $ */ +/* $OpenBSD: process.c,v 1.18 2011/09/17 15:29:19 schwarze Exp $ */ /*- * Copyright (c) 1992 Diomidis Spinellis. @@ -312,7 +312,7 @@ substitute(struct s_command *cp) { SPACE tspace; regex_t *re; - size_t re_off, slen; + regoff_t slen; int n, lastempty; char *s; @@ -333,60 +333,55 @@ substitute(struct s_command *cp) n = cp->u.s->n; lastempty = 1; - switch (n) { - case 0: /* Global */ - do { - if (lastempty || match[0].rm_so != match[0].rm_eo) { - /* Locate start of replaced string. */ - re_off = match[0].rm_so; - /* Copy leading retained string. */ - cspace(&SS, s, re_off, APPEND); - /* Add in regular expression. */ - regsub(&SS, s, cp->u.s->new); - } + do { + /* Copy the leading retained string. */ + if (n <= 1 && match[0].rm_so) + cspace(&SS, s, match[0].rm_so, APPEND); - /* Move past this match. */ - if (match[0].rm_so != match[0].rm_eo) { - s += match[0].rm_eo; - slen -= match[0].rm_eo; - lastempty = 0; + /* Skip zero-length matches right after other matches. */ + if (lastempty || match[0].rm_so || + match[0].rm_so != match[0].rm_eo) { + if (n <= 1) { + /* Want this match: append replacement. */ + regsub(&SS, s, cp->u.s->new); + if (n == 1) + n = -1; } else { - if (match[0].rm_so == 0) - cspace(&SS, s, match[0].rm_so + 1, - APPEND); - else - cspace(&SS, s + match[0].rm_so, 1, - APPEND); - s += match[0].rm_so + 1; - slen -= match[0].rm_so + 1; - lastempty = 1; + /* Want a later match: append original. */ + if (match[0].rm_eo) + cspace(&SS, s, match[0].rm_eo, APPEND); + n--; } - } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen)); - /* Copy trailing retained string. */ - if (slen > 0) - cspace(&SS, s, slen, APPEND); - break; - default: /* Nth occurrence */ - while (--n) { - s += match[0].rm_eo; - slen -= match[0].rm_eo; - if (!regexec_e(re, s, REG_NOTBOL, 0, slen)) - return (0); } - /* FALLTHROUGH */ - case 1: /* 1st occurrence */ - /* Locate start of replaced string. */ - re_off = match[0].rm_so + (s - ps); - /* Copy leading retained string. */ - cspace(&SS, ps, re_off, APPEND); - /* Add in regular expression. */ - regsub(&SS, s, cp->u.s->new); - /* Copy trailing retained string. */ + + /* Move past this match. */ s += match[0].rm_eo; slen -= match[0].rm_eo; + + /* + * After a zero-length match, advance one byte, + * and at the end of the line, terminate. + */ + if (match[0].rm_so == match[0].rm_eo) { + if (*s == '\0' || *s == '\n') + slen = -1; + else + slen--; + if (*s != '\0') + cspace(&SS, s++, 1, APPEND); + lastempty = 1; + } else + lastempty = 0; + + } while (n >= 0 && slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, slen)); + + /* Did not find the requested number of matches. */ + if (n > 1) + return (0); + + /* Copy the trailing retained string. */ + if (slen > 0) cspace(&SS, s, slen, APPEND); - break; - } /* * Swap the substitute space and the pattern space, and make sure |