/* $OpenBSD: regsub.c,v 1.1 1996/09/07 21:40:25 downsj Exp $ */ /* vi:set ts=4 sw=4: * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE * * This is NOT the original regular expression code as written by * Henry Spencer. This code has been modified specifically for use * with the VIM editor, and should not be used apart from compiling * VIM. If you want a good regular expression library, get the * original code. The copyright notice that follows is from the * original. * * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE * * vim_regsub * * Copyright (c) 1986 by University of Toronto. * Written by Henry Spencer. Not derived from licensed software. * * Permission is granted to anyone to use this software for any * purpose on any computer system, and to redistribute it freely, * subject to the following restrictions: * * 1. The author is not responsible for the consequences of use of * this software, no matter how awful, even if they arise * from defects in it. * * 2. The origin of this software must not be misrepresented, either * by explicit claim or by omission. * * 3. Altered versions must be plainly marked as such, and must not * be misrepresented as being the original software. * * $Log: regsub.c,v $ * Revision 1.1 1996/09/07 21:40:25 downsj * Initial revision * * Revision 1.2 88/04/28 08:11:25 tony * First modification of the regexp library. Added an external variable * 'reg_ic' which can be set to indicate that case should be ignored. * Added a new parameter to vim_regexec() to indicate that the given string * comes from the beginning of a line and is thus eligible to match * 'beginning-of-line'. * * Revisions by Olaf 'Rhialto' Seibert, rhialto@mbfys.kun.nl: * Changes for vi: (the semantics of several things were rather different) * - Added lexical analyzer, because in vi magicness of characters * is rather difficult, and may change over time. * - Added support for \< \> \1-\9 and ~ * - Left some magic stuff in, but only backslashed: \| \+ * - * and \+ still work after \) even though they shouldn't. */ #include "vim.h" #include "globals.h" #include "proto.h" #ifndef __ARGS # define __ARGS(a) a #endif #include #include "regexp.h" #ifdef LATTICE # include /* for size_t */ #endif #ifndef CHARBITS #define UCHARAT(p) ((int)*(char_u *)(p)) #else #define UCHARAT(p) ((int)*(p)&CHARBITS) #endif extern char_u *reg_prev_sub; /* This stuff below really confuses cc on an SGI -- webb */ #ifdef __sgi # undef __ARGS # define __ARGS(x) () #endif /* * We should define ftpr as a pointer to a function returning a pointer to * a function returning a pointer to a function ... * This is impossible, so we declare a pointer to a function returning a * pointer to a function returning void. This should work for all compilers. */ typedef void (*(*fptr) __ARGS((char_u *, int)))(); static fptr do_upper __ARGS((char_u *, int)); static fptr do_Upper __ARGS((char_u *, int)); static fptr do_lower __ARGS((char_u *, int)); static fptr do_Lower __ARGS((char_u *, int)); static fptr do_upper(d, c) char_u *d; int c; { *d = TO_UPPER(c); return (fptr)NULL; } static fptr do_Upper(d, c) char_u *d; int c; { *d = TO_UPPER(c); return (fptr)do_Upper; } static fptr do_lower(d, c) char_u *d; int c; { *d = TO_LOWER(c); return (fptr)NULL; } static fptr do_Lower(d, c) char_u *d; int c; { *d = TO_LOWER(c); return (fptr)do_Lower; } /* * regtilde: replace tildes in the pattern by the old pattern * * Short explanation of the tilde: it stands for the previous replacement * pattern. If that previous pattern also contains a ~ we should go back * a step further... but we insert the previous pattern into the current one * and remember that. * This still does not handle the case where "magic" changes. TODO? * * New solution: The tilde's are parsed once before the first call to * vim_regsub(). In the old solution (tilde handled in regsub()) is was * possible to get an endless loop. */ char_u * regtilde(source, magic) char_u *source; int magic; { char_u *newsub = NULL; char_u *tmpsub; char_u *p; int len; int prevlen; for (p = source; *p; ++p) { if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic)) { if (reg_prev_sub) { /* length = len(current) - 1 + len(previous) + 1 */ prevlen = STRLEN(reg_prev_sub); tmpsub = alloc((unsigned)(STRLEN(source) + prevlen)); if (tmpsub) { /* copy prefix */ len = (int)(p - source); /* not including ~ */ STRNCPY(tmpsub, source, len); /* interpretate tilde */ STRCPY(tmpsub + len, reg_prev_sub); /* copy postfix */ if (!magic) ++p; /* back off \ */ STRCAT(tmpsub + len, p + 1); vim_free(newsub); newsub = tmpsub; p = newsub + len + prevlen; } } else if (magic) STRCPY(p, p + 1); /* remove '~' */ else STRCPY(p, p + 2); /* remove '\~' */ } else if (*p == '\\' && p[1]) /* skip escaped characters */ ++p; } vim_free(reg_prev_sub); if (newsub) { source = newsub; reg_prev_sub = newsub; } else reg_prev_sub = strsave(source); return source; } /* - vim_regsub - perform substitutions after a regexp match * * If copy is TRUE really copy into dest, otherwise dest is not written to. * * Returns the size of the replacement, including terminating \0. */ int vim_regsub(prog, source, dest, copy, magic) regexp *prog; char_u *source; char_u *dest; int copy; int magic; { register char_u *src; register char_u *dst; register char_u *s; register int c; register int no; fptr func = (fptr)NULL; if (prog == NULL || source == NULL || dest == NULL) { emsg(e_null); return 0; } if (UCHARAT(prog->program) != MAGIC) { emsg(e_re_corr); return 0; } src = source; dst = dest; while ((c = *src++) != '\0') { no = -1; if (c == '&' && magic) no = 0; else if (c == '\\' && *src != NUL) { if (*src == '&' && !magic) { ++src; no = 0; } else if ('0' <= *src && *src <= '9') { no = *src++ - '0'; } else if (vim_strchr((char_u *)"uUlLeE", *src)) { switch (*src++) { case 'u': func = (fptr)do_upper; continue; case 'U': func = (fptr)do_Upper; continue; case 'l': func = (fptr)do_lower; continue; case 'L': func = (fptr)do_Lower; continue; case 'e': case 'E': func = (fptr)NULL; continue; } } } if (no < 0) /* Ordinary character. */ { if (c == '\\' && *src != NUL) { /* Check for abbreviations -- webb */ switch (*src) { case 'r': c = CR; break; case 'n': c = NL; break; case 't': c = TAB; break; /* Oh no! \e already has meaning in subst pat :-( */ /* case 'e': c = ESC; break; */ case 'b': c = Ctrl('H'); break; default: /* Normal character, not abbreviation */ c = *src; break; } src++; } if (copy) { if (func == (fptr)NULL) /* just copy */ *dst = c; else /* change case */ func = (fptr)(func(dst, c)); /* Turbo C complains without the typecast */ } dst++; } else if (prog->startp[no] != NULL && prog->endp[no] != NULL) { for (s = prog->startp[no]; s < prog->endp[no]; ++s) { if (copy && *s == '\0') /* we hit NUL. */ { emsg(e_re_damg); goto exit; } /* * Insert a CTRL-V in front of a CR, otherwise * it will be replaced by a line break. */ if (*s == CR) { if (copy) { dst[0] = Ctrl('V'); dst[1] = CR; } dst += 2; } else { if (copy) { if (func == (fptr)NULL) /* just copy */ *dst = *s; else /* change case */ func = (fptr)(func(dst, *s)); /* Turbo C complains without the typecast */ } ++dst; } } } } if (copy) *dst = '\0'; exit: return (int)((dst - dest) + 1); }