summaryrefslogtreecommitdiff
path: root/usr.bin/mg/re_search.c
diff options
context:
space:
mode:
authorTheo de Raadt <deraadt@cvs.openbsd.org>2000-02-25 19:08:53 +0000
committerTheo de Raadt <deraadt@cvs.openbsd.org>2000-02-25 19:08:53 +0000
commit998d769a0cf8bef7d4ca0d26945c151a23b542ec (patch)
treefe53a083eaa06a2bf7631453e18a161a86ad9d62 /usr.bin/mg/re_search.c
parentb0226ecd4460819556afd27fd575d64421fd0f68 (diff)
initial import of mg2a
Diffstat (limited to 'usr.bin/mg/re_search.c')
-rw-r--r--usr.bin/mg/re_search.c706
1 files changed, 706 insertions, 0 deletions
diff --git a/usr.bin/mg/re_search.c b/usr.bin/mg/re_search.c
new file mode 100644
index 00000000000..010cb6024e5
--- /dev/null
+++ b/usr.bin/mg/re_search.c
@@ -0,0 +1,706 @@
+/*
+ * regular expression search commands for
+ * MicroGnuEmacs
+ *
+ * This file contains functions to implement several of gnuemacs'
+ * regular expression functions for MicroGnuEmacs. Several of
+ * the routines below are just minor rearrangements of the MicroGnuEmacs
+ * non-regular expression search functions. Hence some of them date back
+ * in essential structure to the original MicroEMACS; others are modifications
+ * of Rich Ellison's code. I, Peter Newton, wrote about half from scratch.
+ *
+ * Although I have nothing to do with the GNU project, these functions
+ * require the GNU project's regular expression package (files regex.c and
+ * regex.h). Hence, this file comes under the same copyright notice
+ * as the GNU project's code. As far as I know, the rest of MicroGnuEmacs
+ * need not since it may be used independently of any GNU project code. In
+ * any case, I certainly do not warrant either the correctness or utility
+ * of this code. The GNU project copyright notice follows. Don't you
+ * wish they would make it a bit shorter!
+ */
+
+/*
+GNU Emacs copying permission notice Copyright (C) 1985 Richard M. Stallman
+ Verbatim copies of this document, including its copyright notice,
+ may be distributed by anyone in any manner.
+ Distribution with modifications is not permitted.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but without any warranty. No author or distributor
+accepts responsibility to anyone for the consequences of using it
+or for whether it serves any particular purpose or works at all,
+unless he says so in writing.
+
+Everyone is granted permission to copy, modify and redistribute
+GNU Emacs under the following conditions:
+
+ Permission is granted to anyone to make or distribute verbatim copies
+ of GNU Emacs source code as received, in any medium, provided that all
+ copyright notices and permission and nonwarranty notices are preserved,
+ and that the distributor grants the recipient permission
+ for further redistribution as permitted by this document,
+ and gives him and points out to him an exact copy of this document
+ to inform him of his rights.
+
+ Permission is granted to distribute modified versions
+ of GNU Emacs source code, or of portions of it,
+ under the above conditions, provided also that all
+ changed files carry prominent notices stating who last changed them
+ and that all the GNU-Emacs-derived material, including everything
+ packaged together with it and not independently usable, is
+ distributed under the conditions stated in this document.
+
+ Permission is granted to distribute GNU Emacs in
+ compiled or executable form under the same conditions applying
+ for source code, provided that either
+ A. it is accompanied by the corresponding machine-readable
+ source code, or
+ B. it is accompanied by a written offer, with no time limit,
+ to give anyone a machine-readable copy of the corresponding
+ source code in return for reimbursement of the cost of distribution.
+ This written offer must permit verbatim duplication by anyone.
+ C. it is distributed by someone who received only the
+ executable form, and is accompanied by a copy of the
+ written offer of source code which he received along with it.
+
+In other words, you are welcome to use, share and improve GNU Emacs
+You are forbidden to forbid anyone else to use, share and improve
+what you give them. Help stamp out software-hoarding!
+*/
+
+#ifdef REGEX
+#include "def.h"
+#include "macro.h"
+
+#define SRCH_BEGIN (0) /* Search sub-codes. */
+#define SRCH_FORW (-1)
+#define SRCH_BACK (-2)
+#define SRCH_NOPR (-3)
+#define SRCH_ACCM (-4)
+#define SRCH_MARK (-5)
+
+char re_pat[NPAT]; /* Regex pattern */
+int re_srch_lastdir = SRCH_NOPR; /* Last search flags. */
+int casefoldsearch = TRUE; /* Does search ignore case ? */
+
+/* Indexed by a character, gives the upper case equivalent of the character */
+
+static char upcase[0400] =
+ { 000, 001, 002, 003, 004, 005, 006, 007,
+ 010, 011, 012, 013, 014, 015, 016, 017,
+ 020, 021, 022, 023, 024, 025, 026, 027,
+ 030, 031, 032, 033, 034, 035, 036, 037,
+ 040, 041, 042, 043, 044, 045, 046, 047,
+ 050, 051, 052, 053, 054, 055, 056, 057,
+ 060, 061, 062, 063, 064, 065, 066, 067,
+ 070, 071, 072, 073, 074, 075, 076, 077,
+ 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
+ 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
+ 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
+ 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
+ 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
+ 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
+ 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
+ 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
+ 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
+ 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
+ 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
+ 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
+ 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
+ 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
+ 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
+ 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
+ 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
+ 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
+ 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
+ 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
+ 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
+ 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
+ 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
+ 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
+ };
+
+/*
+ * Search forward.
+ * Get a search string from the user, and search for it,
+ * starting at ".". If found, "." gets moved to just after the
+ * matched characters, and display does all the hard stuff.
+ * If not found, it just prints a message.
+ */
+/*ARGSUSED*/
+re_forwsearch(f, n) {
+ register int s;
+
+ if ((s=re_readpattern("RE Search")) != TRUE)
+ return (s);
+ if (re_forwsrch() == FALSE) {
+ ewprintf("Search failed: \"%s\"", re_pat);
+ return (FALSE);
+ }
+ re_srch_lastdir = SRCH_FORW;
+ return (TRUE);
+}
+
+/*
+ * Reverse search.
+ * Get a search string from the user, and search, starting at "."
+ * and proceeding toward the front of the buffer. If found "." is left
+ * pointing at the first character of the pattern [the last character that
+ * was matched].
+ */
+/*ARGSUSED*/
+re_backsearch(f, n) {
+ register int s;
+
+ if ((s=re_readpattern("RE Search backward")) != TRUE)
+ return (s);
+ if (re_backsrch() == FALSE) {
+ ewprintf("Search failed: \"%s\"", re_pat);
+ return (FALSE);
+ }
+ re_srch_lastdir = SRCH_BACK;
+ return (TRUE);
+}
+
+
+
+/*
+ * Search again, using the same search string
+ * and direction as the last search command. The direction
+ * has been saved in "srch_lastdir", so you know which way
+ * to go.
+ */
+/*ARGSUSED*/
+/* This code has problems-- some incompatibility(?) with
+ extend.c causes match to fail when it should not.
+ */
+re_searchagain(f, n) {
+
+ if (re_srch_lastdir == SRCH_NOPR) {
+ ewprintf("No last search");
+ return (FALSE);
+ }
+
+ if (re_srch_lastdir == SRCH_FORW) {
+ if (re_forwsrch() == FALSE) {
+ ewprintf("Search failed: \"%s\"", re_pat);
+ return (FALSE);
+ }
+ return (TRUE);
+ }
+ if (re_srch_lastdir == SRCH_BACK) {
+ if (re_backsrch() == FALSE) {
+ ewprintf("Search failed: \"%s\"", re_pat);
+ return (FALSE);
+ }
+ return (TRUE);
+ }
+}
+
+
+#include "regex.h"
+#define BYTEWIDTH 8
+
+/* Compiled regex goes here-- changed only when new pattern read */
+static struct re_pattern_buffer re_buff;
+static char fastmap[(1 << BYTEWIDTH)];
+
+/* regs holds boundaries of matched text */
+static struct re_registers regs;
+
+/*
+ * Re-Query Replace.
+ * Replace strings selectively. Does a search and replace operation.
+ */
+/*ARGSUSED*/
+re_queryrepl(f, n) {
+ register int s;
+ register int rcnt = 0; /* Replacements made so far */
+ register int plen; /* length of found string */
+ char news[NPAT]; /* replacement string */
+
+ /* Casefold check */
+ if (!casefoldsearch) f = TRUE;
+
+ if ((s=re_readpattern("RE Query replace")) != TRUE)
+ return (s);
+ if ((s=ereply("Query replace %s with: ",news, NPAT, re_pat)) == ABORT)
+ return (s);
+ if (s == FALSE)
+ news[0] = '\0';
+ ewprintf("Query replacing %s with %s:", re_pat, news);
+
+ /*
+ * Search forward repeatedly, checking each time whether to insert
+ * or not. The "!" case makes the check always true, so it gets put
+ * into a tighter loop for efficiency.
+ */
+
+ while (re_forwsrch() == TRUE) {
+ retry:
+ update();
+ switch (getkey(FALSE)) {
+ case ' ':
+ plen = regs.end[0] - regs.start[0];
+ if (re_doreplace((RSIZE) plen, news, f) == FALSE)
+ return (FALSE);
+ rcnt++;
+ break;
+
+ case '.':
+ plen = regs.end[0] - regs.start[0];
+ if (re_doreplace((RSIZE) plen, news, f) == FALSE)
+ return (FALSE);
+ rcnt++;
+ goto stopsearch;
+
+ case CCHR('G'): /* ^G */
+ (VOID) ctrlg(FFRAND, 0);
+ case CCHR('['): /* ESC */
+ case '`':
+ goto stopsearch;
+
+ case '!':
+ do {
+ plen = regs.end[0] - regs.start[0];
+ if (re_doreplace((RSIZE) plen, news, f) == FALSE)
+ return (FALSE);
+ rcnt++;
+ } while (re_forwsrch() == TRUE);
+ goto stopsearch;
+
+ case CCHR('?'): /* To not replace */
+ break;
+
+ default:
+ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit");
+ goto retry;
+ }
+ }
+stopsearch:
+ curwp->w_flag |= WFHARD;
+ update();
+ if (!inmacro) {
+ if (rcnt == 0)
+ ewprintf("(No replacements done)");
+ else if (rcnt == 1)
+ ewprintf("(1 replacement done)");
+ else
+ ewprintf("(%d replacements done)", rcnt);
+ }
+ return TRUE;
+}
+
+
+
+/* Routine re_doreplace calls lreplace to make replacements needed by
+ * re_query replace. Its reason for existence is to deal with \1,
+ * \2. etc.
+ */
+
+/* Maximum length of replacement string */
+#define REPLEN 256
+
+re_doreplace(plen, st, f)
+ register RSIZE plen; /* length to remove */
+ char *st; /* replacement string */
+ int f; /* case hack disable */
+{
+ int s;
+ int num, k;
+ register int j;
+ int more, state;
+ LINE *clp;
+ char repstr[REPLEN];
+
+ clp = curwp->w_dotp;
+ more = TRUE;
+ j = 0;
+ state = 0;
+
+ /* The following FSA parses the replacement string */
+ while (more) {
+ switch (state) {
+
+ case 0: if (*st == '\\') {
+ st++;
+ state = 1;
+ }
+ else if (*st == '\0')
+ more = FALSE;
+ else {
+ repstr[j] = *st;
+ j++; if (j >= REPLEN) return(FALSE);
+ st++;
+ }
+ break;
+ case 1: if (*st >= '0' && *st <= '9') {
+ num = *st - '0';
+ st++;
+ state = 2;
+ }
+ else if (*st == '\0')
+ more = FALSE;
+ else {
+ repstr[j] = *st;
+ j++; if (j >= REPLEN) return(FALSE);
+ st++;
+ state = 0;
+ }
+ break;
+ case 2: if (*st >= '0' && *st <= '9') {
+ num = 10*num + *st - '0';
+ st++;
+ }
+ else {
+ if (num >= RE_NREGS) return(FALSE);
+ k = regs.end[num] - regs.start[num];
+ if (j+k >= REPLEN) return(FALSE);
+ bcopy(&(clp->l_text[regs.start[num]]), &repstr[j], k);
+ j += k;
+ if (*st == '\0')
+ more = FALSE;
+ if (*st == '\\') {
+ st++;
+ state = 1;
+ }
+ else {
+ repstr[j] = *st;
+ j++; if (j >= REPLEN) return(FALSE);
+ st++;
+ state = 0;
+ }
+ }
+ break;
+ } /* end case */
+ } /* end while */
+
+ repstr[j] = '\0';
+
+ s = lreplace(plen, repstr, f);
+
+ return(s);
+}
+
+
+
+/*
+ * This routine does the real work of a
+ * forward search. The pattern is sitting in the external
+ * variable "pat". If found, dot is updated, the window system
+ * is notified of the change, and TRUE is returned. If the
+ * string isn't found, FALSE is returned.
+ */
+re_forwsrch() {
+
+ register LINE *clp;
+ register int tbo;
+ int ntries;
+ int i, plen;
+
+ clp = curwp->w_dotp;
+ tbo = curwp->w_doto;
+
+ if (tbo == clp->l_used)
+ /* Don't start matching off end of line-- must
+ * move to beginning of next line, unless at end
+ */
+ if (clp != curbp->b_linep) {
+ clp = lforw(clp);
+ tbo = 0;
+ }
+
+
+ /* Note this loop does not process the last line, but this editor
+ always makes the last line empty so this is good.
+ */
+
+ while (clp != (curbp->b_linep)) {
+
+ ntries = llength(clp) - tbo;
+ i = re_search (&re_buff, ltext(clp), llength(clp), tbo, ntries, &regs);
+
+ if (i == -1) {
+ clp = lforw(clp);
+ tbo = 0;
+ }
+ else {
+ curwp->w_doto = regs.end[0];
+ curwp->w_dotp = clp;
+ curwp->w_flag |= WFMOVE;
+ return (TRUE);
+ }
+
+ }
+
+ return(FALSE);
+
+}
+
+
+/*
+ * This routine does the real work of a
+ * backward search. The pattern is sitting in the external
+ * variable "re_pat". If found, dot is updated, the window system
+ * is notified of the change, and TRUE is returned. If the
+ * string isn't found, FALSE is returned.
+ */
+re_backsrch() {
+
+ register LINE *clp;
+ register int tbo;
+ int ntries;
+ int i, startpos;
+char m[1];
+
+ clp = curwp->w_dotp;
+ tbo = curwp->w_doto;
+
+ /* Start search one position to the left of dot */
+ tbo = tbo - 1;
+ if (tbo < 0) {
+ /* must move up one line */
+ clp = lback(clp);
+ tbo = llength(clp);
+ }
+
+ /* Note this loop does not process the last line, but this editor
+ always makes the last line empty so this is good.
+ */
+
+ while (clp != (curbp->b_linep)) {
+
+ ntries = tbo;
+ i = re_search (&re_buff, ltext(clp), llength(clp), tbo, -ntries, &regs);
+
+ if (i == -1) {
+ clp = lback(clp);
+ tbo = llength(clp);
+ }
+ else {
+ curwp->w_doto = regs.start[0];
+ curwp->w_dotp = clp;
+ curwp->w_flag |= WFMOVE;
+ return (TRUE);
+ }
+
+ }
+
+ return(FALSE);
+
+}
+
+
+/*
+ * Read a pattern.
+ * Stash it in the external variable "re_pat". The "pat" is
+ * not updated if the user types in an empty line. If the user typed
+ * an empty line, and there is no old pattern, it is an error.
+ * Display the old pattern, in the style of Jeff Lomicka. There is
+ * some do-it-yourself control expansion.
+ */
+re_readpattern(prompt) char *prompt; {
+ register int s;
+ char tpat[NPAT];
+ char *message;
+
+ if (re_pat[0] == '\0') s = ereply("%s: ", tpat, NPAT, prompt);
+ else s = ereply("%s: (default %s) ", tpat, NPAT, prompt, re_pat);
+
+ if (s == TRUE) {
+ /* New pattern given */
+ (VOID) strcpy(re_pat, tpat);
+ re_buff.allocated = 40;
+ re_buff.buffer = (char *) malloc (re_buff.allocated);
+ re_buff.fastmap = fastmap;
+ if (casefoldsearch)
+ re_buff.translate = upcase;
+ else
+ re_buff.translate = '\0';
+ message = re_compile_pattern (re_pat, strlen(re_pat), &re_buff);
+ if (message != '\0') {
+ ewprintf("Regex Error: %s", message);
+ re_pat[0] = '\0';
+ return(FALSE);
+ }
+ re_compile_fastmap (&re_buff);
+ }
+ else if (s==FALSE && re_pat[0]!='\0')
+ /* Just using old pattern */
+ s = TRUE;
+ return (s);
+}
+
+
+
+/* Cause case to not matter in searches. This is the default. If
+ * called with argument cause case to matter.
+ */
+setcasefold(f, n) {
+
+ if (f & FFARG) {
+ casefoldsearch = FALSE;
+ ewprintf("Case-fold-search unset");
+ }
+ else {
+ casefoldsearch = TRUE;
+ ewprintf("Case-fold-search set");
+ }
+
+ /* Invalidate the regular expression pattern since I'm too lazy
+ * to recompile it.
+ */
+
+ re_pat[0] = '\0';
+
+ return(TRUE);
+
+} /* end setcasefold */
+
+
+/* Delete all lines after dot that contain a string matching regex
+ */
+delmatchlines(f, n) {
+ int s;
+
+ if ((s=re_readpattern("Flush lines (containing match for regexp)")) != TRUE)
+ return (s);
+
+ s = killmatches(TRUE);
+
+ return(s);
+}
+
+
+
+/* Delete all lines after dot that don't contain a string matching regex
+ */
+delnonmatchlines(f, n) {
+ int s;
+
+
+ if ((s=re_readpattern("Keep lines (containing match for regexp)")) != TRUE)
+ return (s);
+
+ s = killmatches(FALSE);
+
+ return(s);
+}
+
+
+
+/* This function does the work of deleting matching lines */
+killmatches(cond)
+ int cond;
+{
+ int s, i;
+ int count = 0;
+ LINE *clp;
+
+ clp = curwp->w_dotp;
+ if (curwp->w_doto == llength(clp))
+ /* Consider dot on next line */
+ clp = lforw(clp);
+
+ while (clp != (curbp->b_linep)) {
+
+ /* see if line matches */
+ i = re_search (&re_buff, ltext(clp), llength(clp), 0, llength(clp),
+ &regs);
+ /* Delete line when appropriate */
+ if ((cond == FALSE && i == -1) || (cond == TRUE && i != -1)) {
+ curwp->w_doto = 0;
+ curwp->w_dotp = clp;
+ count++;
+ s = ldelete(llength(clp)+1, KNONE);
+ clp = curwp->w_dotp;
+ curwp->w_flag |= WFMOVE;
+ if (s == FALSE) return(FALSE);
+ }
+ else
+ clp = lforw(clp);
+ }
+
+ ewprintf("%d line(s) deleted", count);
+ if (count > 0) curwp->w_flag |= WFMOVE;
+
+ return(TRUE);
+}
+
+
+petersfunc(f, n) {
+
+ int s;
+ LINE *clp;
+ char c;
+
+ curwp->w_doto = 0;
+ s = ldelete(llength(curwp->w_dotp)+1, KNONE);
+ curwp->w_flag |= WFMOVE;
+ return(s);
+
+}
+
+
+/* Count lines matching regex
+ */
+cntmatchlines(f, n) {
+ int s;
+
+ if ((s=re_readpattern("Count lines (matching regexp)")) != TRUE)
+ return (s);
+
+ s = countmatches(TRUE);
+
+ return(s);
+}
+
+
+
+/* Count lines that fail to match regex
+ */
+cntnonmatchlines(f, n) {
+ int s;
+
+
+ if ((s=re_readpattern("Count lines (not matching regexp)")) != TRUE)
+ return (s);
+
+ s = countmatches(FALSE);
+
+ return(s);
+}
+
+
+
+/* This function does the work of counting matching lines */
+countmatches(cond)
+ int cond;
+{
+ int s, i;
+ int count = 0;
+ LINE *clp;
+
+ clp = curwp->w_dotp;
+ if (curwp->w_doto == llength(clp))
+ /* Consider dot on next line */
+ clp = lforw(clp);
+
+ while (clp != (curbp->b_linep)) {
+
+ /* see if line matches */
+ i = re_search (&re_buff, ltext(clp), llength(clp), 0, llength(clp),
+ &regs);
+ /* Count line when appropriate */
+ if ((cond == FALSE && i == -1) || (cond == TRUE && i != -1)) count++;
+ clp = lforw(clp);
+ }
+
+ if (cond)
+ ewprintf("Number of lines matching: %d", count);
+ else
+ ewprintf("Number of lines not matching: %d", count);
+
+ return(TRUE);
+}
+#endif