summaryrefslogtreecommitdiff
path: root/usr.bin/vim/regsub.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr.bin/vim/regsub.c')
-rw-r--r--usr.bin/vim/regsub.c346
1 files changed, 346 insertions, 0 deletions
diff --git a/usr.bin/vim/regsub.c b/usr.bin/vim/regsub.c
new file mode 100644
index 00000000000..c1b1e74179f
--- /dev/null
+++ b/usr.bin/vim/regsub.c
@@ -0,0 +1,346 @@
+/* $OpenBSD: regsub.c,v 1.1 1996/09/07 21:40:25 downsj Exp $ */
+/* vi:set ts=4 sw=4:
+ * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
+ *
+ * This is NOT the original regular expression code as written by
+ * Henry Spencer. This code has been modified specifically for use
+ * with the VIM editor, and should not be used apart from compiling
+ * VIM. If you want a good regular expression library, get the
+ * original code. The copyright notice that follows is from the
+ * original.
+ *
+ * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
+ *
+ * vim_regsub
+ *
+ * Copyright (c) 1986 by University of Toronto.
+ * Written by Henry Spencer. Not derived from licensed software.
+ *
+ * Permission is granted to anyone to use this software for any
+ * purpose on any computer system, and to redistribute it freely,
+ * subject to the following restrictions:
+ *
+ * 1. The author is not responsible for the consequences of use of
+ * this software, no matter how awful, even if they arise
+ * from defects in it.
+ *
+ * 2. The origin of this software must not be misrepresented, either
+ * by explicit claim or by omission.
+ *
+ * 3. Altered versions must be plainly marked as such, and must not
+ * be misrepresented as being the original software.
+ *
+ * $Log: regsub.c,v $
+ * Revision 1.1 1996/09/07 21:40:25 downsj
+ * Initial revision
+ *
+ * Revision 1.2 88/04/28 08:11:25 tony
+ * First modification of the regexp library. Added an external variable
+ * 'reg_ic' which can be set to indicate that case should be ignored.
+ * Added a new parameter to vim_regexec() to indicate that the given string
+ * comes from the beginning of a line and is thus eligible to match
+ * 'beginning-of-line'.
+ *
+ * Revisions by Olaf 'Rhialto' Seibert, rhialto@mbfys.kun.nl:
+ * Changes for vi: (the semantics of several things were rather different)
+ * - Added lexical analyzer, because in vi magicness of characters
+ * is rather difficult, and may change over time.
+ * - Added support for \< \> \1-\9 and ~
+ * - Left some magic stuff in, but only backslashed: \| \+
+ * - * and \+ still work after \) even though they shouldn't.
+ */
+
+#include "vim.h"
+#include "globals.h"
+#include "proto.h"
+
+#ifndef __ARGS
+# define __ARGS(a) a
+#endif
+
+#include <stdio.h>
+#include "regexp.h"
+
+#ifdef LATTICE
+# include <sys/types.h> /* for size_t */
+#endif
+
+#ifndef CHARBITS
+#define UCHARAT(p) ((int)*(char_u *)(p))
+#else
+#define UCHARAT(p) ((int)*(p)&CHARBITS)
+#endif
+
+extern char_u *reg_prev_sub;
+
+ /* This stuff below really confuses cc on an SGI -- webb */
+#ifdef __sgi
+# undef __ARGS
+# define __ARGS(x) ()
+#endif
+
+ /*
+ * We should define ftpr as a pointer to a function returning a pointer to
+ * a function returning a pointer to a function ...
+ * This is impossible, so we declare a pointer to a function returning a
+ * pointer to a function returning void. This should work for all compilers.
+ */
+typedef void (*(*fptr) __ARGS((char_u *, int)))();
+
+static fptr do_upper __ARGS((char_u *, int));
+static fptr do_Upper __ARGS((char_u *, int));
+static fptr do_lower __ARGS((char_u *, int));
+static fptr do_Lower __ARGS((char_u *, int));
+
+ static fptr
+do_upper(d, c)
+ char_u *d;
+ int c;
+{
+ *d = TO_UPPER(c);
+
+ return (fptr)NULL;
+}
+
+ static fptr
+do_Upper(d, c)
+ char_u *d;
+ int c;
+{
+ *d = TO_UPPER(c);
+
+ return (fptr)do_Upper;
+}
+
+ static fptr
+do_lower(d, c)
+ char_u *d;
+ int c;
+{
+ *d = TO_LOWER(c);
+
+ return (fptr)NULL;
+}
+
+ static fptr
+do_Lower(d, c)
+ char_u *d;
+ int c;
+{
+ *d = TO_LOWER(c);
+
+ return (fptr)do_Lower;
+}
+
+/*
+ * regtilde: replace tildes in the pattern by the old pattern
+ *
+ * Short explanation of the tilde: it stands for the previous replacement
+ * pattern. If that previous pattern also contains a ~ we should go back
+ * a step further... but we insert the previous pattern into the current one
+ * and remember that.
+ * This still does not handle the case where "magic" changes. TODO?
+ *
+ * New solution: The tilde's are parsed once before the first call to
+ * vim_regsub(). In the old solution (tilde handled in regsub()) is was
+ * possible to get an endless loop.
+ */
+ char_u *
+regtilde(source, magic)
+ char_u *source;
+ int magic;
+{
+ char_u *newsub = NULL;
+ char_u *tmpsub;
+ char_u *p;
+ int len;
+ int prevlen;
+
+ for (p = source; *p; ++p)
+ {
+ if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
+ {
+ if (reg_prev_sub)
+ {
+ /* length = len(current) - 1 + len(previous) + 1 */
+ prevlen = STRLEN(reg_prev_sub);
+ tmpsub = alloc((unsigned)(STRLEN(source) + prevlen));
+ if (tmpsub)
+ {
+ /* copy prefix */
+ len = (int)(p - source); /* not including ~ */
+ STRNCPY(tmpsub, source, len);
+ /* interpretate tilde */
+ STRCPY(tmpsub + len, reg_prev_sub);
+ /* copy postfix */
+ if (!magic)
+ ++p; /* back off \ */
+ STRCAT(tmpsub + len, p + 1);
+
+ vim_free(newsub);
+ newsub = tmpsub;
+ p = newsub + len + prevlen;
+ }
+ }
+ else if (magic)
+ STRCPY(p, p + 1); /* remove '~' */
+ else
+ STRCPY(p, p + 2); /* remove '\~' */
+ }
+ else if (*p == '\\' && p[1]) /* skip escaped characters */
+ ++p;
+ }
+
+ vim_free(reg_prev_sub);
+ if (newsub)
+ {
+ source = newsub;
+ reg_prev_sub = newsub;
+ }
+ else
+ reg_prev_sub = strsave(source);
+ return source;
+}
+
+/*
+ - vim_regsub - perform substitutions after a regexp match
+ *
+ * If copy is TRUE really copy into dest, otherwise dest is not written to.
+ *
+ * Returns the size of the replacement, including terminating \0.
+ */
+ int
+vim_regsub(prog, source, dest, copy, magic)
+ regexp *prog;
+ char_u *source;
+ char_u *dest;
+ int copy;
+ int magic;
+{
+ register char_u *src;
+ register char_u *dst;
+ register char_u *s;
+ register int c;
+ register int no;
+ fptr func = (fptr)NULL;
+
+ if (prog == NULL || source == NULL || dest == NULL)
+ {
+ emsg(e_null);
+ return 0;
+ }
+ if (UCHARAT(prog->program) != MAGIC)
+ {
+ emsg(e_re_corr);
+ return 0;
+ }
+ src = source;
+ dst = dest;
+
+ while ((c = *src++) != '\0')
+ {
+ no = -1;
+ if (c == '&' && magic)
+ no = 0;
+ else if (c == '\\' && *src != NUL)
+ {
+ if (*src == '&' && !magic)
+ {
+ ++src;
+ no = 0;
+ }
+ else if ('0' <= *src && *src <= '9')
+ {
+ no = *src++ - '0';
+ }
+ else if (vim_strchr((char_u *)"uUlLeE", *src))
+ {
+ switch (*src++)
+ {
+ case 'u': func = (fptr)do_upper;
+ continue;
+ case 'U': func = (fptr)do_Upper;
+ continue;
+ case 'l': func = (fptr)do_lower;
+ continue;
+ case 'L': func = (fptr)do_Lower;
+ continue;
+ case 'e':
+ case 'E': func = (fptr)NULL;
+ continue;
+ }
+ }
+ }
+ if (no < 0) /* Ordinary character. */
+ {
+ if (c == '\\' && *src != NUL)
+ {
+ /* Check for abbreviations -- webb */
+ switch (*src)
+ {
+ case 'r': c = CR; break;
+ case 'n': c = NL; break;
+ case 't': c = TAB; break;
+ /* Oh no! \e already has meaning in subst pat :-( */
+ /* case 'e': c = ESC; break; */
+ case 'b': c = Ctrl('H'); break;
+ default:
+ /* Normal character, not abbreviation */
+ c = *src;
+ break;
+ }
+ src++;
+ }
+ if (copy)
+ {
+ if (func == (fptr)NULL) /* just copy */
+ *dst = c;
+ else /* change case */
+ func = (fptr)(func(dst, c));
+ /* Turbo C complains without the typecast */
+ }
+ dst++;
+ }
+ else if (prog->startp[no] != NULL && prog->endp[no] != NULL)
+ {
+ for (s = prog->startp[no]; s < prog->endp[no]; ++s)
+ {
+ if (copy && *s == '\0') /* we hit NUL. */
+ {
+ emsg(e_re_damg);
+ goto exit;
+ }
+ /*
+ * Insert a CTRL-V in front of a CR, otherwise
+ * it will be replaced by a line break.
+ */
+ if (*s == CR)
+ {
+ if (copy)
+ {
+ dst[0] = Ctrl('V');
+ dst[1] = CR;
+ }
+ dst += 2;
+ }
+ else
+ {
+ if (copy)
+ {
+ if (func == (fptr)NULL) /* just copy */
+ *dst = *s;
+ else /* change case */
+ func = (fptr)(func(dst, *s));
+ /* Turbo C complains without the typecast */
+ }
+ ++dst;
+ }
+ }
+ }
+ }
+ if (copy)
+ *dst = '\0';
+
+exit:
+ return (int)((dst - dest) + 1);
+}