diff options
author | Theo de Raadt <deraadt@cvs.openbsd.org> | 1995-10-18 08:53:40 +0000 |
---|---|---|
committer | Theo de Raadt <deraadt@cvs.openbsd.org> | 1995-10-18 08:53:40 +0000 |
commit | d6583bb2a13f329cf0332ef2570eb8bb8fc0e39c (patch) | |
tree | ece253b876159b39c620e62b6c9b1174642e070e /usr.bin/vi/common/search.c |
initial import of NetBSD tree
Diffstat (limited to 'usr.bin/vi/common/search.c')
-rw-r--r-- | usr.bin/vi/common/search.c | 833 |
1 files changed, 833 insertions, 0 deletions
diff --git a/usr.bin/vi/common/search.c b/usr.bin/vi/common/search.c new file mode 100644 index 00000000000..f7e96ebda71 --- /dev/null +++ b/usr.bin/vi/common/search.c @@ -0,0 +1,833 @@ +/*- + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char sccsid[] = "@(#)search.c 8.48 (Berkeley) 8/17/94"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/queue.h> +#include <sys/time.h> + +#include <bitstring.h> +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <termios.h> +#include <unistd.h> + +#include "compat.h" +#include <db.h> +#include <regex.h> + +#include "vi.h" + +static int check_delta __P((SCR *, EXF *, long, recno_t)); +static int ctag_conv __P((SCR *, char **, int *)); +static int get_delta __P((SCR *, char **, long *, u_int *)); +static int resetup __P((SCR *, regex_t **, enum direction, + char *, char **, long *, u_int *)); + +/* + * resetup -- + * Set up a search for a regular expression. + */ +static int +resetup(sp, rep, dir, ptrn, epp, deltap, flagp) + SCR *sp; + regex_t **rep; + enum direction dir; + char *ptrn, **epp; + long *deltap; + u_int *flagp; +{ + u_int flags; + int delim, eval, re_flags, replaced; + char *p, *t; + + /* Set return information the default. */ + *deltap = 0; + + /* + * Use saved pattern if no pattern supplied, or if only a delimiter + * character is supplied. Only the pattern was saved, historic vi + * did not reuse any delta supplied. + */ + flags = *flagp; + if (ptrn == NULL) + goto prev; + if (ptrn[1] == '\0') { + if (epp != NULL) + *epp = ptrn + 1; + goto prev; + } + if (ptrn[0] == ptrn[1] && ptrn[2] == '\0') { + if (epp != NULL) + *epp = ptrn + 2; +prev: if (!F_ISSET(sp, S_SRE_SET)) { + msgq(sp, M_ERR, "No previous search pattern"); + return (1); + } + *rep = &sp->sre; + + /* Empty patterns set the direction. */ + if (LF_ISSET(SEARCH_SET)) { + F_SET(sp, S_SRE_SET); + sp->searchdir = dir; + sp->sre = **rep; + } + return (0); + } + + re_flags = 0; /* Set RE flags. */ + if (O_ISSET(sp, O_EXTENDED)) + re_flags |= REG_EXTENDED; + if (O_ISSET(sp, O_IGNORECASE)) + re_flags |= REG_ICASE; + + replaced = 0; + if (LF_ISSET(SEARCH_PARSE)) { /* Parse the string. */ + /* Set delimiter. */ + delim = *ptrn++; + + /* Find terminating delimiter, handling escaped delimiters. */ + for (p = t = ptrn;;) { + if (p[0] == '\0' || p[0] == delim) { + if (p[0] == delim) + ++p; + *t = '\0'; + break; + } + if (p[1] == delim && p[0] == '\\') + ++p; + *t++ = *p++; + } + + /* + * If characters after the terminating delimiter, it may + * be an error, or may be an offset. In either case, we + * return the end of the string, whatever it may be. + */ + if (*p) { + if (get_delta(sp, &p, deltap, flagp)) + return (1); + if (*p && LF_ISSET(SEARCH_TERM)) { + msgq(sp, M_ERR, + "Characters after search string and/or delta"); + return (1); + } + } + if (epp != NULL) + *epp = p; + + /* Check for "/ " or other such silliness. */ + if (*ptrn == '\0') + goto prev; + + if (re_conv(sp, &ptrn, &replaced)) + return (1); + } else if (LF_ISSET(SEARCH_TAG)) { + if (ctag_conv(sp, &ptrn, &replaced)) + return (1); + re_flags &= ~(REG_EXTENDED | REG_ICASE); + } + + /* Compile the RE. */ + if (eval = regcomp(*rep, ptrn, re_flags)) + re_error(sp, eval, *rep); + else if (LF_ISSET(SEARCH_SET)) { + F_SET(sp, S_SRE_SET); + sp->searchdir = dir; + sp->sre = **rep; + } + + /* Free up any extra memory. */ + if (replaced) + FREE_SPACE(sp, ptrn, 0); + return (eval); +} + +/* + * ctag_conv -- + * Convert a tags search path into something that the POSIX + * 1003.2 RE functions can handle. + */ +static int +ctag_conv(sp, ptrnp, replacedp) + SCR *sp; + char **ptrnp; + int *replacedp; +{ + size_t blen, len; + int lastdollar; + char *bp, *p, *t; + + *replacedp = 0; + + len = strlen(p = *ptrnp); + + /* Max memory usage is 2 times the length of the string. */ + GET_SPACE_RET(sp, bp, blen, len * 2); + + t = bp; + + /* The last character is a '/' or '?', we just strip it. */ + if (p[len - 1] == '/' || p[len - 1] == '?') + p[len - 1] = '\0'; + + /* The next-to-last character is a '$', and it's magic. */ + if (p[len - 2] == '$') { + lastdollar = 1; + p[len - 2] = '\0'; + } else + lastdollar = 0; + + /* The first character is a '/' or '?', we just strip it. */ + if (p[0] == '/' || p[0] == '?') + ++p; + + /* The second character is a '^', and it's magic. */ + if (p[0] == '^') + *t++ = *p++; + + /* + * Escape every other magic character we can find, stripping the + * backslashes ctags inserts to escape the search delimiter + * characters. + */ + while (p[0]) { + /* Ctags escapes the search delimiter characters. */ + if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) + ++p; + else if (strchr("^.[]$*", p[0])) + *t++ = '\\'; + *t++ = *p++; + } + if (lastdollar) + *t++ = '$'; + *t++ = '\0'; + + *ptrnp = bp; + *replacedp = 1; + return (0); +} + +#define EMPTYMSG "File empty; nothing to search" +#define EOFMSG "Reached end-of-file without finding the pattern" +#define NOTFOUND "Pattern not found" +#define SOFMSG "Reached top-of-file without finding the pattern" +#define WRAPMSG "Search wrapped" + +int +f_search(sp, ep, fm, rm, ptrn, eptrn, flagp) + SCR *sp; + EXF *ep; + MARK *fm, *rm; + char *ptrn, **eptrn; + u_int *flagp; +{ + regmatch_t match[1]; + regex_t *re, lre; + recno_t lno; + size_t coff, len; + long delta; + u_int flags; + int btear, eval, rval, wrapped; + char *l; + + if (file_lline(sp, ep, &lno)) + return (1); + flags = *flagp; + if (lno == 0) { + if (LF_ISSET(SEARCH_MSG)) + msgq(sp, M_INFO, EMPTYMSG); + return (1); + } + + re = &lre; + if (resetup(sp, &re, FORWARD, ptrn, eptrn, &delta, flagp)) + return (1); + + /* + * Start searching immediately after the cursor. If at the end of the + * line, start searching on the next line. This is incompatible (read + * bug fix) with the historic vi -- searches for the '$' pattern never + * moved forward, and "-t foo" didn't work if "foo" was the first thing + * in the file. + */ + if (LF_ISSET(SEARCH_FILE)) { + lno = 1; + coff = 0; + } else { + if ((l = file_gline(sp, ep, fm->lno, &len)) == NULL) { + GETLINE_ERR(sp, fm->lno); + return (1); + } + if (fm->cno + 1 >= len) { + if (fm->lno == lno) { + if (!O_ISSET(sp, O_WRAPSCAN)) { + if (LF_ISSET(SEARCH_MSG)) + msgq(sp, M_INFO, EOFMSG); + return (1); + } + lno = 1; + } else + lno = fm->lno + 1; + coff = 0; + } else { + lno = fm->lno; + coff = fm->cno + 1; + } + } + + /* Turn on busy message. */ + btear = F_ISSET(sp, S_EXSILENT) ? 0 : !busy_on(sp, "Searching..."); + + for (rval = 1, wrapped = 0;; ++lno, coff = 0) { + if (INTERRUPTED(sp)) { + msgq(sp, M_INFO, "Interrupted."); + break; + } + if (wrapped && lno > fm->lno || + (l = file_gline(sp, ep, lno, &len)) == NULL) { + if (wrapped) { + if (LF_ISSET(SEARCH_MSG)) + msgq(sp, M_INFO, NOTFOUND); + break; + } + if (!O_ISSET(sp, O_WRAPSCAN)) { + if (LF_ISSET(SEARCH_MSG)) + msgq(sp, M_INFO, EOFMSG); + break; + } + lno = 0; + wrapped = 1; + continue; + } + + /* If already at EOL, just keep going. */ + if (len && coff == len) + continue; + + /* Set the termination. */ + match[0].rm_so = coff; + match[0].rm_eo = len; + +#if defined(DEBUG) && 0 + TRACE(sp, "F search: %lu from %u to %u\n", + lno, coff, len ? len - 1 : len); +#endif + /* Search the line. */ + eval = regexec(re, l, 1, match, + (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | REG_STARTEND); + if (eval == REG_NOMATCH) + continue; + if (eval != 0) { + re_error(sp, eval, re); + break; + } + + /* Warn if wrapped. */ + if (wrapped && O_ISSET(sp, O_WARN) && LF_ISSET(SEARCH_MSG)) + msgq(sp, M_VINFO, WRAPMSG); + + /* + * If an offset, see if it's legal. It's possible to match + * past the end of the line with $, so check for that case. + */ + if (delta) { + if (check_delta(sp, ep, delta, lno)) + break; + rm->lno = delta + lno; + rm->cno = 0; + } else { +#if defined(DEBUG) && 0 + TRACE(sp, "found: %qu to %qu\n", + match[0].rm_so, match[0].rm_eo); +#endif + rm->lno = lno; + rm->cno = match[0].rm_so; + + /* + * If a change command, it's possible to move beyond + * the end of a line. Historic vi generally got this + * wrong (try "c?$<cr>"). Not all that sure this gets + * it right, there are lots of strange cases. + */ + if (!LF_ISSET(SEARCH_EOL) && rm->cno >= len) + rm->cno = len ? len - 1 : 0; + } + rval = 0; + break; + } + + /* Turn off busy message, interrupts. */ + if (btear) + busy_off(sp); + return (rval); +} + +int +b_search(sp, ep, fm, rm, ptrn, eptrn, flagp) + SCR *sp; + EXF *ep; + MARK *fm, *rm; + char *ptrn, **eptrn; + u_int *flagp; +{ + regmatch_t match[1]; + regex_t *re, lre; + recno_t lno; + size_t coff, len, last; + long delta; + u_int flags; + int btear, eval, rval, wrapped; + char *l; + + if (file_lline(sp, ep, &lno)) + return (1); + flags = *flagp; + if (lno == 0) { + if (LF_ISSET(SEARCH_MSG)) + msgq(sp, M_INFO, EMPTYMSG); + return (1); + } + + re = &lre; + if (resetup(sp, &re, BACKWARD, ptrn, eptrn, &delta, flagp)) + return (1); + + /* If in the first column, start searching on the previous line. */ + if (fm->cno == 0) { + if (fm->lno == 1) { + if (!O_ISSET(sp, O_WRAPSCAN)) { + if (LF_ISSET(SEARCH_MSG)) + msgq(sp, M_INFO, SOFMSG); + return (1); + } + } else + lno = fm->lno - 1; + } else + lno = fm->lno; + + /* Turn on busy message. */ + btear = F_ISSET(sp, S_EXSILENT) ? 0 : !busy_on(sp, "Searching..."); + + for (rval = 1, wrapped = 0, coff = fm->cno;; --lno, coff = 0) { + if (INTERRUPTED(sp)) { + msgq(sp, M_INFO, "Interrupted."); + break; + } + if (wrapped && lno < fm->lno || lno == 0) { + if (wrapped) { + if (LF_ISSET(SEARCH_MSG)) + msgq(sp, M_INFO, NOTFOUND); + break; + } + if (!O_ISSET(sp, O_WRAPSCAN)) { + if (LF_ISSET(SEARCH_MSG)) + msgq(sp, M_INFO, SOFMSG); + break; + } + if (file_lline(sp, ep, &lno)) + goto err; + if (lno == 0) { + if (LF_ISSET(SEARCH_MSG)) + msgq(sp, M_INFO, EMPTYMSG); + break; + } + ++lno; + wrapped = 1; + continue; + } + + if ((l = file_gline(sp, ep, lno, &len)) == NULL) + goto err; + + /* Set the termination. */ + match[0].rm_so = 0; + match[0].rm_eo = len; + +#if defined(DEBUG) && 0 + TRACE(sp, "B search: %lu from 0 to %qu\n", lno, match[0].rm_eo); +#endif + /* Search the line. */ + eval = regexec(re, l, 1, match, + (match[0].rm_eo == len ? 0 : REG_NOTEOL) | REG_STARTEND); + if (eval == REG_NOMATCH) + continue; + if (eval != 0) { + re_error(sp, eval, re); + break; + } + + /* Check for a match starting past the cursor. */ + if (coff != 0 && match[0].rm_so >= coff) + continue; + + /* Warn if wrapped. */ + if (wrapped && O_ISSET(sp, O_WARN) && LF_ISSET(SEARCH_MSG)) + msgq(sp, M_VINFO, WRAPMSG); + + if (delta) { + if (check_delta(sp, ep, delta, lno)) + break; + rm->lno = delta + lno; + rm->cno = 0; + } else { +#if defined(DEBUG) && 0 + TRACE(sp, "found: %qu to %qu\n", + match[0].rm_so, match[0].rm_eo); +#endif + /* + * We now have the first match on the line. Step + * through the line character by character until we + * find the last acceptable match. This is painful, + * we need a better interface to regex to make this + * work. + */ + for (;;) { + last = match[0].rm_so++; + if (match[0].rm_so >= len) + break; + match[0].rm_eo = len; + eval = regexec(re, l, 1, match, + (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | + REG_STARTEND); + if (eval == REG_NOMATCH) + break; + if (eval != 0) { + re_error(sp, eval, re); + goto err; + } + if (coff && match[0].rm_so >= coff) + break; + } + rm->lno = lno; + + /* See comment in f_search(). */ + if (!LF_ISSET(SEARCH_EOL) && last >= len) + rm->cno = len ? len - 1 : 0; + else + rm->cno = last; + } + rval = 0; + break; + } + + /* Turn off busy message, interrupts. */ +err: if (btear) + busy_off(sp); + return (rval); +} + +/* + * re_conv -- + * Convert vi's regular expressions into something that the + * the POSIX 1003.2 RE functions can handle. + * + * There are three conversions we make to make vi's RE's (specifically + * the global, search, and substitute patterns) work with POSIX RE's. + * + * 1: If O_MAGIC is not set, strip backslashes from the magic character + * set (.[]*~) that have them, and add them to the ones that don't. + * 2: If O_MAGIC is not set, the string "\~" is replaced with the text + * from the last substitute command's replacement string. If O_MAGIC + * is set, it's the string "~". + * 3: The pattern \<ptrn\> does "word" searches, convert it to use the + * new RE escapes. + */ +int +re_conv(sp, ptrnp, replacedp) + SCR *sp; + char **ptrnp; + int *replacedp; +{ + size_t blen, needlen; + int magic; + char *bp, *p, *t; + + /* + * First pass through, we figure out how much space we'll need. + * We do it in two passes, on the grounds that most of the time + * the user is doing a search and won't have magic characters. + * That way we can skip the malloc and memmove's. + */ + for (p = *ptrnp, magic = 0, needlen = 0; *p != '\0'; ++p) + switch (*p) { + case '\\': + switch (*++p) { + case '<': + magic = 1; + needlen += sizeof(RE_WSTART); + break; + case '>': + magic = 1; + needlen += sizeof(RE_WSTOP); + break; + case '~': + if (!O_ISSET(sp, O_MAGIC)) { + magic = 1; + needlen += sp->repl_len; + } + break; + case '.': + case '[': + case ']': + case '*': + if (!O_ISSET(sp, O_MAGIC)) { + magic = 1; + needlen += 1; + } + break; + default: + needlen += 2; + } + break; + case '~': + if (O_ISSET(sp, O_MAGIC)) { + magic = 1; + needlen += sp->repl_len; + } + break; + case '.': + case '[': + case ']': + case '*': + if (!O_ISSET(sp, O_MAGIC)) { + magic = 1; + needlen += 2; + } + break; + default: + needlen += 1; + break; + } + + if (!magic) { + *replacedp = 0; + return (0); + } + + /* + * Get enough memory to hold the final pattern. + * + * XXX + * It's nul-terminated, for now. + */ + GET_SPACE_RET(sp, bp, blen, needlen + 1); + + for (p = *ptrnp, t = bp; *p != '\0'; ++p) + switch (*p) { + case '\\': + switch (*++p) { + case '<': + memmove(t, RE_WSTART, sizeof(RE_WSTART) - 1); + t += sizeof(RE_WSTART) - 1; + break; + case '>': + memmove(t, RE_WSTOP, sizeof(RE_WSTOP) - 1); + t += sizeof(RE_WSTOP) - 1; + break; + case '~': + if (O_ISSET(sp, O_MAGIC)) + *t++ = '~'; + else { + memmove(t, sp->repl, sp->repl_len); + t += sp->repl_len; + } + break; + case '.': + case '[': + case ']': + case '*': + if (O_ISSET(sp, O_MAGIC)) + *t++ = '\\'; + *t++ = *p; + break; + default: + *t++ = '\\'; + *t++ = *p; + } + break; + case '~': + if (O_ISSET(sp, O_MAGIC)) { + memmove(t, sp->repl, sp->repl_len); + t += sp->repl_len; + } else + *t++ = '~'; + break; + case '.': + case '[': + case ']': + case '*': + if (!O_ISSET(sp, O_MAGIC)) + *t++ = '\\'; + *t++ = *p; + break; + default: + *t++ = *p; + break; + } + *t = '\0'; + + *ptrnp = bp; + *replacedp = 1; + return (0); +} + +/* + * get_delta -- + * Get a line delta. The trickiness is that the delta can be pretty + * complicated, i.e. "+3-2+3++- ++" is allowed. + * + * !!! + * In historic vi, if you had a delta on a search pattern which was used as + * a motion command, the command became a line mode command regardless of the + * cursor positions. A fairly common trick is to use a delta of "+0" to make + * the command a line mode command. This is the only place that knows about + * delta's, so we set the return flag information here. + */ +static int +get_delta(sp, dp, valp, flagp) + SCR *sp; + char **dp; + long *valp; + u_int *flagp; +{ + char *p; + long val, tval; + + for (tval = 0, p = *dp; *p != '\0'; *flagp |= SEARCH_DELTA) { + if (isblank(*p)) { + ++p; + continue; + } + if (*p == '+' || *p == '-') { + if (!isdigit(*(p + 1))) { + if (*p == '+') { + if (tval == LONG_MAX) + goto overflow; + ++tval; + } else { + if (tval == LONG_MIN) + goto underflow; + --tval; + } + ++p; + continue; + } + } else + if (!isdigit(*p)) + break; + + errno = 0; + val = strtol(p, &p, 10); + if (errno == ERANGE) { + if (val == LONG_MAX) +overflow: msgq(sp, M_ERR, "Delta value overflow"); + else if (val == LONG_MIN) +underflow: msgq(sp, M_ERR, "Delta value underflow"); + else + msgq(sp, M_SYSERR, NULL); + return (1); + } + if (val >= 0) { + if (LONG_MAX - val < tval) + goto overflow; + } else + if (-(LONG_MIN - tval) > val) + goto underflow; + tval += val; + } + *dp = p; + *valp = tval; + return (0); +} + +/* + * check_delta -- + * Check a line delta to see if it's legal. + */ +static int +check_delta(sp, ep, delta, lno) + SCR *sp; + EXF *ep; + long delta; + recno_t lno; +{ + /* A delta can overflow a record number. */ + if (delta < 0) { + if (lno < LONG_MAX && delta >= (long)lno) { + msgq(sp, M_ERR, "Search offset before line 1"); + return (1); + } + } else { + if (ULONG_MAX - lno < delta) { + msgq(sp, M_ERR, "Delta value overflow"); + return (1); + } + if (file_gline(sp, ep, lno + delta, NULL) == NULL) { + msgq(sp, M_ERR, "Search offset past end-of-file"); + return (1); + } + } + return (0); +} + +/* + * re_error -- + * Report a regular expression error. + */ +void +re_error(sp, errcode, preg) + SCR *sp; + int errcode; + regex_t *preg; +{ + size_t s; + char *oe; + + s = regerror(errcode, preg, "", 0); + if ((oe = malloc(s)) == NULL) + msgq(sp, M_SYSERR, NULL); + else { + (void)regerror(errcode, preg, oe, s); + msgq(sp, M_ERR, "RE error: %s", oe); + } + free(oe); +} |