diff options
author | Theo Buehler <tb@cvs.openbsd.org> | 2020-12-30 08:59:18 +0000 |
---|---|---|
committer | Theo Buehler <tb@cvs.openbsd.org> | 2020-12-30 08:59:18 +0000 |
commit | b18aadc29809b27bbd884d2515a338d080ba8168 (patch) | |
tree | 26df4f1e85f7be54b3e07573b59df23279aeb9de /lib/libc/regex | |
parent | 0a06cb68fce6a8ef263ea09c1ef7c868f8bba311 (diff) |
regcomp.c uses the "start + count < end" idiom to check that there are
"count" bytes available in an array of char "start" and "end" both point
to.
This is fine, unless "start + count" goes beyond the last element of the
array. In this case, pedantic interpretation of the C standard makes
the comparison of such a pointer against "end" undefined, and optimizers
from hell will happily remove as much code as possible because of this.
An example of this occurs in regcomp.c's bothcases(), which defines
bracket[3], sets "next" to "bracket" and "end" to "bracket + 2". Then it
invokes p_bracket(), which starts with "if (p->next + 5 < p->end)"...
Because bothcases() and p_bracket() are static functions in regcomp.c,
there is a real risk of miscompilation if aggressive inlining happens.
The following diff rewrites the "start + count < end" constructs into
"end - start > count". Assuming "end" and "start" are always pointing in
the array (such as "bracket[3]" above), "end - start" is well-defined
and can be compared without trouble.
As a bonus, MORE2() implies MORE() therefore SEETWO() can be simplified
a bit.
from miod, ok millert
Diffstat (limited to 'lib/libc/regex')
-rw-r--r-- | lib/libc/regex/regcomp.c | 28 |
1 files changed, 15 insertions, 13 deletions
diff --git a/lib/libc/regex/regcomp.c b/lib/libc/regex/regcomp.c index fb8b5687060..d7f06dd8bed 100644 --- a/lib/libc/regex/regcomp.c +++ b/lib/libc/regex/regcomp.c @@ -1,4 +1,4 @@ -/* $OpenBSD: regcomp.c,v 1.37 2020/12/30 08:54:42 tb Exp $ */ +/* $OpenBSD: regcomp.c,v 1.38 2020/12/30 08:59:17 tb Exp $ */ /*- * Copyright (c) 1992, 1993, 1994 Henry Spencer. * Copyright (c) 1992, 1993, 1994 @@ -110,10 +110,10 @@ static char nuls[10]; /* place to point scanner in event of error */ */ #define PEEK() (*p->next) #define PEEK2() (*(p->next+1)) -#define MORE() (p->next < p->end) -#define MORE2() (p->next+1 < p->end) +#define MORE() (p->end - p->next > 0) +#define MORE2() (p->end - p->next > 1) #define SEE(c) (MORE() && PEEK() == (c)) -#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) +#define SEETWO(a, b) (MORE2() && PEEK() == (a) && PEEK2() == (b)) #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) #define NEXT() (p->next++) @@ -620,15 +620,17 @@ p_bracket(struct parse *p) int invert = 0; /* Dept of Truly Sickening Special-Case Kludges */ - if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { - EMIT(OBOW, 0); - NEXTn(6); - return; - } - if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { - EMIT(OEOW, 0); - NEXTn(6); - return; + if (p->end - p->next > 5) { + if (strncmp(p->next, "[:<:]]", 6) == 0) { + EMIT(OBOW, 0); + NEXTn(6); + return; + } + if (strncmp(p->next, "[:>:]]", 6) == 0) { + EMIT(OEOW, 0); + NEXTn(6); + return; + } } if ((cs = allocset(p)) == NULL) { |