diff options
author | Theo de Raadt <deraadt@cvs.openbsd.org> | 2000-02-25 19:08:53 +0000 |
---|---|---|
committer | Theo de Raadt <deraadt@cvs.openbsd.org> | 2000-02-25 19:08:53 +0000 |
commit | 998d769a0cf8bef7d4ca0d26945c151a23b542ec (patch) | |
tree | fe53a083eaa06a2bf7631453e18a161a86ad9d62 /usr.bin/mg/re_search.c | |
parent | b0226ecd4460819556afd27fd575d64421fd0f68 (diff) |
initial import of mg2a
Diffstat (limited to 'usr.bin/mg/re_search.c')
-rw-r--r-- | usr.bin/mg/re_search.c | 706 |
1 files changed, 706 insertions, 0 deletions
diff --git a/usr.bin/mg/re_search.c b/usr.bin/mg/re_search.c new file mode 100644 index 00000000000..010cb6024e5 --- /dev/null +++ b/usr.bin/mg/re_search.c @@ -0,0 +1,706 @@ +/* + * regular expression search commands for + * MicroGnuEmacs + * + * This file contains functions to implement several of gnuemacs' + * regular expression functions for MicroGnuEmacs. Several of + * the routines below are just minor rearrangements of the MicroGnuEmacs + * non-regular expression search functions. Hence some of them date back + * in essential structure to the original MicroEMACS; others are modifications + * of Rich Ellison's code. I, Peter Newton, wrote about half from scratch. + * + * Although I have nothing to do with the GNU project, these functions + * require the GNU project's regular expression package (files regex.c and + * regex.h). Hence, this file comes under the same copyright notice + * as the GNU project's code. As far as I know, the rest of MicroGnuEmacs + * need not since it may be used independently of any GNU project code. In + * any case, I certainly do not warrant either the correctness or utility + * of this code. The GNU project copyright notice follows. Don't you + * wish they would make it a bit shorter! + */ + +/* +GNU Emacs copying permission notice Copyright (C) 1985 Richard M. Stallman + Verbatim copies of this document, including its copyright notice, + may be distributed by anyone in any manner. + Distribution with modifications is not permitted. + +GNU Emacs is distributed in the hope that it will be useful, +but without any warranty. No author or distributor +accepts responsibility to anyone for the consequences of using it +or for whether it serves any particular purpose or works at all, +unless he says so in writing. + +Everyone is granted permission to copy, modify and redistribute +GNU Emacs under the following conditions: + + Permission is granted to anyone to make or distribute verbatim copies + of GNU Emacs source code as received, in any medium, provided that all + copyright notices and permission and nonwarranty notices are preserved, + and that the distributor grants the recipient permission + for further redistribution as permitted by this document, + and gives him and points out to him an exact copy of this document + to inform him of his rights. + + Permission is granted to distribute modified versions + of GNU Emacs source code, or of portions of it, + under the above conditions, provided also that all + changed files carry prominent notices stating who last changed them + and that all the GNU-Emacs-derived material, including everything + packaged together with it and not independently usable, is + distributed under the conditions stated in this document. + + Permission is granted to distribute GNU Emacs in + compiled or executable form under the same conditions applying + for source code, provided that either + A. it is accompanied by the corresponding machine-readable + source code, or + B. it is accompanied by a written offer, with no time limit, + to give anyone a machine-readable copy of the corresponding + source code in return for reimbursement of the cost of distribution. + This written offer must permit verbatim duplication by anyone. + C. it is distributed by someone who received only the + executable form, and is accompanied by a copy of the + written offer of source code which he received along with it. + +In other words, you are welcome to use, share and improve GNU Emacs +You are forbidden to forbid anyone else to use, share and improve +what you give them. Help stamp out software-hoarding! +*/ + +#ifdef REGEX +#include "def.h" +#include "macro.h" + +#define SRCH_BEGIN (0) /* Search sub-codes. */ +#define SRCH_FORW (-1) +#define SRCH_BACK (-2) +#define SRCH_NOPR (-3) +#define SRCH_ACCM (-4) +#define SRCH_MARK (-5) + +char re_pat[NPAT]; /* Regex pattern */ +int re_srch_lastdir = SRCH_NOPR; /* Last search flags. */ +int casefoldsearch = TRUE; /* Does search ignore case ? */ + +/* Indexed by a character, gives the upper case equivalent of the character */ + +static char upcase[0400] = + { 000, 001, 002, 003, 004, 005, 006, 007, + 010, 011, 012, 013, 014, 015, 016, 017, + 020, 021, 022, 023, 024, 025, 026, 027, + 030, 031, 032, 033, 034, 035, 036, 037, + 040, 041, 042, 043, 044, 045, 046, 047, + 050, 051, 052, 053, 054, 055, 056, 057, + 060, 061, 062, 063, 064, 065, 066, 067, + 070, 071, 072, 073, 074, 075, 076, 077, + 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107, + 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, + 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, + 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137, + 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107, + 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, + 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, + 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177, + 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207, + 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217, + 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227, + 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237, + 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247, + 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257, + 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267, + 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277, + 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307, + 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317, + 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327, + 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337, + 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347, + 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357, + 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367, + 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377 + }; + +/* + * Search forward. + * Get a search string from the user, and search for it, + * starting at ".". If found, "." gets moved to just after the + * matched characters, and display does all the hard stuff. + * If not found, it just prints a message. + */ +/*ARGSUSED*/ +re_forwsearch(f, n) { + register int s; + + if ((s=re_readpattern("RE Search")) != TRUE) + return (s); + if (re_forwsrch() == FALSE) { + ewprintf("Search failed: \"%s\"", re_pat); + return (FALSE); + } + re_srch_lastdir = SRCH_FORW; + return (TRUE); +} + +/* + * Reverse search. + * Get a search string from the user, and search, starting at "." + * and proceeding toward the front of the buffer. If found "." is left + * pointing at the first character of the pattern [the last character that + * was matched]. + */ +/*ARGSUSED*/ +re_backsearch(f, n) { + register int s; + + if ((s=re_readpattern("RE Search backward")) != TRUE) + return (s); + if (re_backsrch() == FALSE) { + ewprintf("Search failed: \"%s\"", re_pat); + return (FALSE); + } + re_srch_lastdir = SRCH_BACK; + return (TRUE); +} + + + +/* + * Search again, using the same search string + * and direction as the last search command. The direction + * has been saved in "srch_lastdir", so you know which way + * to go. + */ +/*ARGSUSED*/ +/* This code has problems-- some incompatibility(?) with + extend.c causes match to fail when it should not. + */ +re_searchagain(f, n) { + + if (re_srch_lastdir == SRCH_NOPR) { + ewprintf("No last search"); + return (FALSE); + } + + if (re_srch_lastdir == SRCH_FORW) { + if (re_forwsrch() == FALSE) { + ewprintf("Search failed: \"%s\"", re_pat); + return (FALSE); + } + return (TRUE); + } + if (re_srch_lastdir == SRCH_BACK) { + if (re_backsrch() == FALSE) { + ewprintf("Search failed: \"%s\"", re_pat); + return (FALSE); + } + return (TRUE); + } +} + + +#include "regex.h" +#define BYTEWIDTH 8 + +/* Compiled regex goes here-- changed only when new pattern read */ +static struct re_pattern_buffer re_buff; +static char fastmap[(1 << BYTEWIDTH)]; + +/* regs holds boundaries of matched text */ +static struct re_registers regs; + +/* + * Re-Query Replace. + * Replace strings selectively. Does a search and replace operation. + */ +/*ARGSUSED*/ +re_queryrepl(f, n) { + register int s; + register int rcnt = 0; /* Replacements made so far */ + register int plen; /* length of found string */ + char news[NPAT]; /* replacement string */ + + /* Casefold check */ + if (!casefoldsearch) f = TRUE; + + if ((s=re_readpattern("RE Query replace")) != TRUE) + return (s); + if ((s=ereply("Query replace %s with: ",news, NPAT, re_pat)) == ABORT) + return (s); + if (s == FALSE) + news[0] = '\0'; + ewprintf("Query replacing %s with %s:", re_pat, news); + + /* + * Search forward repeatedly, checking each time whether to insert + * or not. The "!" case makes the check always true, so it gets put + * into a tighter loop for efficiency. + */ + + while (re_forwsrch() == TRUE) { + retry: + update(); + switch (getkey(FALSE)) { + case ' ': + plen = regs.end[0] - regs.start[0]; + if (re_doreplace((RSIZE) plen, news, f) == FALSE) + return (FALSE); + rcnt++; + break; + + case '.': + plen = regs.end[0] - regs.start[0]; + if (re_doreplace((RSIZE) plen, news, f) == FALSE) + return (FALSE); + rcnt++; + goto stopsearch; + + case CCHR('G'): /* ^G */ + (VOID) ctrlg(FFRAND, 0); + case CCHR('['): /* ESC */ + case '`': + goto stopsearch; + + case '!': + do { + plen = regs.end[0] - regs.start[0]; + if (re_doreplace((RSIZE) plen, news, f) == FALSE) + return (FALSE); + rcnt++; + } while (re_forwsrch() == TRUE); + goto stopsearch; + + case CCHR('?'): /* To not replace */ + break; + + default: +ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit"); + goto retry; + } + } +stopsearch: + curwp->w_flag |= WFHARD; + update(); + if (!inmacro) { + if (rcnt == 0) + ewprintf("(No replacements done)"); + else if (rcnt == 1) + ewprintf("(1 replacement done)"); + else + ewprintf("(%d replacements done)", rcnt); + } + return TRUE; +} + + + +/* Routine re_doreplace calls lreplace to make replacements needed by + * re_query replace. Its reason for existence is to deal with \1, + * \2. etc. + */ + +/* Maximum length of replacement string */ +#define REPLEN 256 + +re_doreplace(plen, st, f) + register RSIZE plen; /* length to remove */ + char *st; /* replacement string */ + int f; /* case hack disable */ +{ + int s; + int num, k; + register int j; + int more, state; + LINE *clp; + char repstr[REPLEN]; + + clp = curwp->w_dotp; + more = TRUE; + j = 0; + state = 0; + + /* The following FSA parses the replacement string */ + while (more) { + switch (state) { + + case 0: if (*st == '\\') { + st++; + state = 1; + } + else if (*st == '\0') + more = FALSE; + else { + repstr[j] = *st; + j++; if (j >= REPLEN) return(FALSE); + st++; + } + break; + case 1: if (*st >= '0' && *st <= '9') { + num = *st - '0'; + st++; + state = 2; + } + else if (*st == '\0') + more = FALSE; + else { + repstr[j] = *st; + j++; if (j >= REPLEN) return(FALSE); + st++; + state = 0; + } + break; + case 2: if (*st >= '0' && *st <= '9') { + num = 10*num + *st - '0'; + st++; + } + else { + if (num >= RE_NREGS) return(FALSE); + k = regs.end[num] - regs.start[num]; + if (j+k >= REPLEN) return(FALSE); + bcopy(&(clp->l_text[regs.start[num]]), &repstr[j], k); + j += k; + if (*st == '\0') + more = FALSE; + if (*st == '\\') { + st++; + state = 1; + } + else { + repstr[j] = *st; + j++; if (j >= REPLEN) return(FALSE); + st++; + state = 0; + } + } + break; + } /* end case */ + } /* end while */ + + repstr[j] = '\0'; + + s = lreplace(plen, repstr, f); + + return(s); +} + + + +/* + * This routine does the real work of a + * forward search. The pattern is sitting in the external + * variable "pat". If found, dot is updated, the window system + * is notified of the change, and TRUE is returned. If the + * string isn't found, FALSE is returned. + */ +re_forwsrch() { + + register LINE *clp; + register int tbo; + int ntries; + int i, plen; + + clp = curwp->w_dotp; + tbo = curwp->w_doto; + + if (tbo == clp->l_used) + /* Don't start matching off end of line-- must + * move to beginning of next line, unless at end + */ + if (clp != curbp->b_linep) { + clp = lforw(clp); + tbo = 0; + } + + + /* Note this loop does not process the last line, but this editor + always makes the last line empty so this is good. + */ + + while (clp != (curbp->b_linep)) { + + ntries = llength(clp) - tbo; + i = re_search (&re_buff, ltext(clp), llength(clp), tbo, ntries, ®s); + + if (i == -1) { + clp = lforw(clp); + tbo = 0; + } + else { + curwp->w_doto = regs.end[0]; + curwp->w_dotp = clp; + curwp->w_flag |= WFMOVE; + return (TRUE); + } + + } + + return(FALSE); + +} + + +/* + * This routine does the real work of a + * backward search. The pattern is sitting in the external + * variable "re_pat". If found, dot is updated, the window system + * is notified of the change, and TRUE is returned. If the + * string isn't found, FALSE is returned. + */ +re_backsrch() { + + register LINE *clp; + register int tbo; + int ntries; + int i, startpos; +char m[1]; + + clp = curwp->w_dotp; + tbo = curwp->w_doto; + + /* Start search one position to the left of dot */ + tbo = tbo - 1; + if (tbo < 0) { + /* must move up one line */ + clp = lback(clp); + tbo = llength(clp); + } + + /* Note this loop does not process the last line, but this editor + always makes the last line empty so this is good. + */ + + while (clp != (curbp->b_linep)) { + + ntries = tbo; + i = re_search (&re_buff, ltext(clp), llength(clp), tbo, -ntries, ®s); + + if (i == -1) { + clp = lback(clp); + tbo = llength(clp); + } + else { + curwp->w_doto = regs.start[0]; + curwp->w_dotp = clp; + curwp->w_flag |= WFMOVE; + return (TRUE); + } + + } + + return(FALSE); + +} + + +/* + * Read a pattern. + * Stash it in the external variable "re_pat". The "pat" is + * not updated if the user types in an empty line. If the user typed + * an empty line, and there is no old pattern, it is an error. + * Display the old pattern, in the style of Jeff Lomicka. There is + * some do-it-yourself control expansion. + */ +re_readpattern(prompt) char *prompt; { + register int s; + char tpat[NPAT]; + char *message; + + if (re_pat[0] == '\0') s = ereply("%s: ", tpat, NPAT, prompt); + else s = ereply("%s: (default %s) ", tpat, NPAT, prompt, re_pat); + + if (s == TRUE) { + /* New pattern given */ + (VOID) strcpy(re_pat, tpat); + re_buff.allocated = 40; + re_buff.buffer = (char *) malloc (re_buff.allocated); + re_buff.fastmap = fastmap; + if (casefoldsearch) + re_buff.translate = upcase; + else + re_buff.translate = '\0'; + message = re_compile_pattern (re_pat, strlen(re_pat), &re_buff); + if (message != '\0') { + ewprintf("Regex Error: %s", message); + re_pat[0] = '\0'; + return(FALSE); + } + re_compile_fastmap (&re_buff); + } + else if (s==FALSE && re_pat[0]!='\0') + /* Just using old pattern */ + s = TRUE; + return (s); +} + + + +/* Cause case to not matter in searches. This is the default. If + * called with argument cause case to matter. + */ +setcasefold(f, n) { + + if (f & FFARG) { + casefoldsearch = FALSE; + ewprintf("Case-fold-search unset"); + } + else { + casefoldsearch = TRUE; + ewprintf("Case-fold-search set"); + } + + /* Invalidate the regular expression pattern since I'm too lazy + * to recompile it. + */ + + re_pat[0] = '\0'; + + return(TRUE); + +} /* end setcasefold */ + + +/* Delete all lines after dot that contain a string matching regex + */ +delmatchlines(f, n) { + int s; + + if ((s=re_readpattern("Flush lines (containing match for regexp)")) != TRUE) + return (s); + + s = killmatches(TRUE); + + return(s); +} + + + +/* Delete all lines after dot that don't contain a string matching regex + */ +delnonmatchlines(f, n) { + int s; + + + if ((s=re_readpattern("Keep lines (containing match for regexp)")) != TRUE) + return (s); + + s = killmatches(FALSE); + + return(s); +} + + + +/* This function does the work of deleting matching lines */ +killmatches(cond) + int cond; +{ + int s, i; + int count = 0; + LINE *clp; + + clp = curwp->w_dotp; + if (curwp->w_doto == llength(clp)) + /* Consider dot on next line */ + clp = lforw(clp); + + while (clp != (curbp->b_linep)) { + + /* see if line matches */ + i = re_search (&re_buff, ltext(clp), llength(clp), 0, llength(clp), + ®s); + /* Delete line when appropriate */ + if ((cond == FALSE && i == -1) || (cond == TRUE && i != -1)) { + curwp->w_doto = 0; + curwp->w_dotp = clp; + count++; + s = ldelete(llength(clp)+1, KNONE); + clp = curwp->w_dotp; + curwp->w_flag |= WFMOVE; + if (s == FALSE) return(FALSE); + } + else + clp = lforw(clp); + } + + ewprintf("%d line(s) deleted", count); + if (count > 0) curwp->w_flag |= WFMOVE; + + return(TRUE); +} + + +petersfunc(f, n) { + + int s; + LINE *clp; + char c; + + curwp->w_doto = 0; + s = ldelete(llength(curwp->w_dotp)+1, KNONE); + curwp->w_flag |= WFMOVE; + return(s); + +} + + +/* Count lines matching regex + */ +cntmatchlines(f, n) { + int s; + + if ((s=re_readpattern("Count lines (matching regexp)")) != TRUE) + return (s); + + s = countmatches(TRUE); + + return(s); +} + + + +/* Count lines that fail to match regex + */ +cntnonmatchlines(f, n) { + int s; + + + if ((s=re_readpattern("Count lines (not matching regexp)")) != TRUE) + return (s); + + s = countmatches(FALSE); + + return(s); +} + + + +/* This function does the work of counting matching lines */ +countmatches(cond) + int cond; +{ + int s, i; + int count = 0; + LINE *clp; + + clp = curwp->w_dotp; + if (curwp->w_doto == llength(clp)) + /* Consider dot on next line */ + clp = lforw(clp); + + while (clp != (curbp->b_linep)) { + + /* see if line matches */ + i = re_search (&re_buff, ltext(clp), llength(clp), 0, llength(clp), + ®s); + /* Count line when appropriate */ + if ((cond == FALSE && i == -1) || (cond == TRUE && i != -1)) count++; + clp = lforw(clp); + } + + if (cond) + ewprintf("Number of lines matching: %d", count); + else + ewprintf("Number of lines not matching: %d", count); + + return(TRUE); +} +#endif |