diff options
author | Todd C. Miller <millert@cvs.openbsd.org> | 2008-09-29 17:19:07 +0000 |
---|---|---|
committer | Todd C. Miller <millert@cvs.openbsd.org> | 2008-09-29 17:19:07 +0000 |
commit | bfa863c83bfd01d72a748decaaf2676b800c60ff (patch) | |
tree | 02491e683928ba1c318d3ebc1200feb7c8bb78b4 /gnu/usr.bin/perl/ext/re | |
parent | 37a0443eb842cf3d2c9de0e5647b0094a0dcf0bd (diff) |
import perl 5.10.0 from CPAN
Diffstat (limited to 'gnu/usr.bin/perl/ext/re')
-rw-r--r-- | gnu/usr.bin/perl/ext/re/re.pm | 486 | ||||
-rw-r--r-- | gnu/usr.bin/perl/ext/re/re.xs | 205 | ||||
-rw-r--r-- | gnu/usr.bin/perl/ext/re/re_comp.h | 7 | ||||
-rw-r--r-- | gnu/usr.bin/perl/ext/re/re_top.h | 42 | ||||
-rw-r--r-- | gnu/usr.bin/perl/ext/re/t/lexical_debug.pl | 29 | ||||
-rw-r--r-- | gnu/usr.bin/perl/ext/re/t/lexical_debug.t | 37 | ||||
-rw-r--r-- | gnu/usr.bin/perl/ext/re/t/qr.t | 15 | ||||
-rw-r--r-- | gnu/usr.bin/perl/ext/re/t/re.t | 10 | ||||
-rw-r--r-- | gnu/usr.bin/perl/ext/re/t/re_funcs.t | 64 | ||||
-rw-r--r-- | gnu/usr.bin/perl/ext/re/t/regop.pl | 20 | ||||
-rw-r--r-- | gnu/usr.bin/perl/ext/re/t/regop.t | 261 |
11 files changed, 1068 insertions, 108 deletions
diff --git a/gnu/usr.bin/perl/ext/re/re.pm b/gnu/usr.bin/perl/ext/re/re.pm index 6e9d1218ef0..0cf5376e866 100644 --- a/gnu/usr.bin/perl/ext/re/re.pm +++ b/gnu/usr.bin/perl/ext/re/re.pm @@ -1,6 +1,174 @@ package re; -our $VERSION = 0.05; +# pragma for controlling the regex engine +use strict; +use warnings; + +our $VERSION = "0.08"; +our @ISA = qw(Exporter); +our @EXPORT_OK = qw(is_regexp regexp_pattern regmust + regname regnames regnames_count); +our %EXPORT_OK = map { $_ => 1 } @EXPORT_OK; + +# *** WARNING *** WARNING *** WARNING *** WARNING *** WARNING *** +# +# If you modify these values see comment below! + +my %bitmask = ( + taint => 0x00100000, # HINT_RE_TAINT + eval => 0x00200000, # HINT_RE_EVAL +); + +# - File::Basename contains a literal for 'taint' as a fallback. If +# taint is changed here, File::Basename must be updated as well. +# +# - ExtUtils::ParseXS uses a hardcoded +# BEGIN { $^H |= 0x00200000 } +# in it to allow re.xs to be built. So if 'eval' is changed here then +# ExtUtils::ParseXS must be changed as well. +# +# *** WARNING *** WARNING *** WARNING *** WARNING *** WARNING *** + +sub setcolor { + eval { # Ignore errors + require Term::Cap; + + my $terminal = Tgetent Term::Cap ({OSPEED => 9600}); # Avoid warning. + my $props = $ENV{PERL_RE_TC} || 'md,me,so,se,us,ue'; + my @props = split /,/, $props; + my $colors = join "\t", map {$terminal->Tputs($_,1)} @props; + + $colors =~ s/\0//g; + $ENV{PERL_RE_COLORS} = $colors; + }; + if ($@) { + $ENV{PERL_RE_COLORS} ||= qq'\t\t> <\t> <\t\t'; + } + +} + +my %flags = ( + COMPILE => 0x0000FF, + PARSE => 0x000001, + OPTIMISE => 0x000002, + TRIEC => 0x000004, + DUMP => 0x000008, + FLAGS => 0x000010, + + EXECUTE => 0x00FF00, + INTUIT => 0x000100, + MATCH => 0x000200, + TRIEE => 0x000400, + + EXTRA => 0xFF0000, + TRIEM => 0x010000, + OFFSETS => 0x020000, + OFFSETSDBG => 0x040000, + STATE => 0x080000, + OPTIMISEM => 0x100000, + STACK => 0x280000, + BUFFERS => 0x400000, +); +$flags{ALL} = -1 & ~($flags{OFFSETS}|$flags{OFFSETSDBG}|$flags{BUFFERS}); +$flags{All} = $flags{all} = $flags{DUMP} | $flags{EXECUTE}; +$flags{Extra} = $flags{EXECUTE} | $flags{COMPILE}; +$flags{More} = $flags{MORE} = $flags{All} | $flags{TRIEC} | $flags{TRIEM} | $flags{STATE}; +$flags{State} = $flags{DUMP} | $flags{EXECUTE} | $flags{STATE}; +$flags{TRIE} = $flags{DUMP} | $flags{EXECUTE} | $flags{TRIEC}; + +my $installed; +my $installed_error; + +sub _do_install { + if ( ! defined($installed) ) { + require XSLoader; + $installed = eval { XSLoader::load('re', $VERSION) } || 0; + $installed_error = $@; + } +} + +sub _load_unload { + my ($on)= @_; + if ($on) { + _do_install(); + if ( ! $installed ) { + die "'re' not installed!? ($installed_error)"; + } else { + # We call install() every time, as if we didn't, we wouldn't + # "see" any changes to the color environment var since + # the last time it was called. + + # install() returns an integer, which if casted properly + # in C resolves to a structure containing the regex + # hooks. Setting it to a random integer will guarantee + # segfaults. + $^H{regcomp} = install(); + } + } else { + delete $^H{regcomp}; + } +} + +sub bits { + my $on = shift; + my $bits = 0; + unless (@_) { + require Carp; + Carp::carp("Useless use of \"re\" pragma"); + } + foreach my $idx (0..$#_){ + my $s=$_[$idx]; + if ($s eq 'Debug' or $s eq 'Debugcolor') { + setcolor() if $s =~/color/i; + ${^RE_DEBUG_FLAGS} = 0 unless defined ${^RE_DEBUG_FLAGS}; + for my $idx ($idx+1..$#_) { + if ($flags{$_[$idx]}) { + if ($on) { + ${^RE_DEBUG_FLAGS} |= $flags{$_[$idx]}; + } else { + ${^RE_DEBUG_FLAGS} &= ~ $flags{$_[$idx]}; + } + } else { + require Carp; + Carp::carp("Unknown \"re\" Debug flag '$_[$idx]', possible flags: ", + join(", ",sort keys %flags ) ); + } + } + _load_unload($on ? 1 : ${^RE_DEBUG_FLAGS}); + last; + } elsif ($s eq 'debug' or $s eq 'debugcolor') { + setcolor() if $s =~/color/i; + _load_unload($on); + last; + } elsif (exists $bitmask{$s}) { + $bits |= $bitmask{$s}; + } elsif ($EXPORT_OK{$s}) { + _do_install(); + require Exporter; + re->export_to_level(2, 're', $s); + } else { + require Carp; + Carp::carp("Unknown \"re\" subpragma '$s' (known ones are: ", + join(', ', map {qq('$_')} 'debug', 'debugcolor', sort keys %bitmask), + ")"); + } + } + $bits; +} + +sub import { + shift; + $^H |= bits(1, @_); +} + +sub unimport { + shift; + $^H &= ~ bits(0, @_); +} + +1; + +__END__ =head1 NAME @@ -23,23 +191,38 @@ re - Perl pragma to alter regular expression behaviour /foo${pat}bar/; # disallowed (with or without -T switch) } - use re 'debug'; # NOT lexically scoped (as others are) - /^(.*)$/s; # output debugging info during - # compile and run time + use re 'debug'; # output debugging info during + /^(.*)$/s; # compile and run time + use re 'debugcolor'; # same as 'debug', but with colored output ... + use re qw(Debug All); # Finer tuned debugging options. + use re qw(Debug More); + no re qw(Debug ALL); # Turn of all re debugging in this scope + + use re qw(is_regexp regexp_pattern); # import utility functions + my ($pat,$mods)=regexp_pattern(qr/foo/i); + if (is_regexp($obj)) { + print "Got regexp: ", + scalar regexp_pattern($obj); # just as perl would stringify it + } # but no hassle with blessed re's. + (We use $^X in these examples because it's tainted by default.) =head1 DESCRIPTION +=head2 'taint' mode + When C<use re 'taint'> is in effect, and a tainted string is the target of a regex, the regex memories (or values returned by the m// operator in list context) are tainted. This feature is useful when regex operations on tainted data aren't meant to extract safe substrings, but to perform other transformations. +=head2 'eval' mode + When C<use re 'eval'> is in effect, a regex is allowed to contain C<(?{ ... })> zero-width assertions even if regular expression contains variable interpolation. That is normally disallowed, since it is a @@ -56,6 +239,8 @@ interpolation. Thus: I<is> allowed if $pat is a precompiled regular expression, even if $pat contains C<(?{ ... })> assertions. +=head2 'debug' mode + When C<use re 'debug'> is in effect, perl emits debugging messages when compiling and using regular expressions. The output is the same as that obtained by running a C<-DDEBUGGING>-enabled perl interpreter with the @@ -67,68 +252,253 @@ comma-separated list of C<termcap> properties to use for highlighting strings on/off, pre-point part on/off. See L<perldebug/"Debugging regular expressions"> for additional info. -The directive C<use re 'debug'> is I<not lexically scoped>, as the -other directives are. It has both compile-time and run-time effects. +As of 5.9.5 the directive C<use re 'debug'> and its equivalents are +lexically scoped, as the other directives are. However they have both +compile-time and run-time effects. See L<perlmodlib/Pragmatic Modules>. -=cut +=head2 'Debug' mode -# N.B. File::Basename contains a literal for 'taint' as a fallback. If -# taint is changed here, File::Basename must be updated as well. -my %bitmask = ( -taint => 0x00100000, # HINT_RE_TAINT -eval => 0x00200000, # HINT_RE_EVAL -); +Similarly C<use re 'Debug'> produces debugging output, the difference +being that it allows the fine tuning of what debugging output will be +emitted. Options are divided into three groups, those related to +compilation, those related to execution and those related to special +purposes. The options are as follows: -sub setcolor { - eval { # Ignore errors - require Term::Cap; +=over 4 - my $terminal = Tgetent Term::Cap ({OSPEED => 9600}); # Avoid warning. - my $props = $ENV{PERL_RE_TC} || 'md,me,so,se,us,ue'; - my @props = split /,/, $props; - my $colors = join "\t", map {$terminal->Tputs($_,1)} @props; +=item Compile related options - $colors =~ s/\0//g; - $ENV{PERL_RE_COLORS} = $colors; - }; -} +=over 4 -sub bits { - my $on = shift; - my $bits = 0; - unless (@_) { - require Carp; - Carp::carp("Useless use of \"re\" pragma"); - } - foreach my $s (@_){ - if ($s eq 'debug' or $s eq 'debugcolor') { - setcolor() if $s eq 'debugcolor'; - require XSLoader; - XSLoader::load('re'); - install() if $on; - uninstall() unless $on; - next; - } - if (exists $bitmask{$s}) { - $bits |= $bitmask{$s}; - } else { - require Carp; - Carp::carp("Unknown \"re\" subpragma '$s' (known ones are: @{[join(', ', map {qq('$_')} 'debug', 'debugcolor', sort keys %bitmask)]})"); - } - } - $bits; -} +=item COMPILE -sub import { - shift; - $^H |= bits(1, @_); -} +Turns on all compile related debug options. -sub unimport { - shift; - $^H &= ~ bits(0, @_); -} +=item PARSE -1; +Turns on debug output related to the process of parsing the pattern. + +=item OPTIMISE + +Enables output related to the optimisation phase of compilation. + +=item TRIEC + +Detailed info about trie compilation. + +=item DUMP + +Dump the final program out after it is compiled and optimised. + +=back + +=item Execute related options + +=over 4 + +=item EXECUTE + +Turns on all execute related debug options. + +=item MATCH + +Turns on debugging of the main matching loop. + +=item TRIEE + +Extra debugging of how tries execute. + +=item INTUIT + +Enable debugging of start point optimisations. + +=back + +=item Extra debugging options + +=over 4 + +=item EXTRA + +Turns on all "extra" debugging options. + +=item BUFFERS + +Enable debugging the capture buffer storage during match. Warning, +this can potentially produce extremely large output. + +=item TRIEM + +Enable enhanced TRIE debugging. Enhances both TRIEE +and TRIEC. + +=item STATE + +Enable debugging of states in the engine. + +=item STACK + +Enable debugging of the recursion stack in the engine. Enabling +or disabling this option automatically does the same for debugging +states as well. This output from this can be quite large. + +=item OPTIMISEM + +Enable enhanced optimisation debugging and start point optimisations. +Probably not useful except when debugging the regex engine itself. + +=item OFFSETS + +Dump offset information. This can be used to see how regops correlate +to the pattern. Output format is + + NODENUM:POSITION[LENGTH] + +Where 1 is the position of the first char in the string. Note that position +can be 0, or larger than the actual length of the pattern, likewise length +can be zero. + +=item OFFSETSDBG + +Enable debugging of offsets information. This emits copious +amounts of trace information and doesn't mesh well with other +debug options. + +Almost definitely only useful to people hacking +on the offsets part of the debug engine. + +=back + +=item Other useful flags + +These are useful shortcuts to save on the typing. + +=over 4 + +=item ALL + +Enable all options at once except OFFSETS, OFFSETSDBG and BUFFERS + +=item All + +Enable DUMP and all execute options. Equivalent to: + + use re 'debug'; + +=item MORE + +=item More + +Enable TRIEM and all execute compile and execute options. + +=back + +=back + +As of 5.9.5 the directive C<use re 'debug'> and its equivalents are +lexically scoped, as the other directives are. However they have both +compile-time and run-time effects. + +=head2 Exportable Functions + +As of perl 5.9.5 're' debug contains a number of utility functions that +may be optionally exported into the caller's namespace. They are listed +below. + +=over 4 + +=item is_regexp($ref) + +Returns true if the argument is a compiled regular expression as returned +by C<qr//>, false if it is not. + +This function will not be confused by overloading or blessing. In +internals terms, this extracts the regexp pointer out of the +PERL_MAGIC_qr structure so it it cannot be fooled. + +=item regexp_pattern($ref) + +If the argument is a compiled regular expression as returned by C<qr//>, +then this function returns the pattern. + +In list context it returns a two element list, the first element +containing the pattern and the second containing the modifiers used when +the pattern was compiled. + + my ($pat, $mods) = regexp_pattern($ref); + +In scalar context it returns the same as perl would when strigifying a raw +C<qr//> with the same pattern inside. If the argument is not a compiled +reference then this routine returns false but defined in scalar context, +and the empty list in list context. Thus the following + + if (regexp_pattern($ref) eq '(?i-xsm:foo)') + +will be warning free regardless of what $ref actually is. + +Like C<is_regexp> this function will not be confused by overloading +or blessing of the object. + +=item regmust($ref) + +If the argument is a compiled regular expression as returned by C<qr//>, +then this function returns what the optimiser consiers to be the longest +anchored fixed string and longest floating fixed string in the pattern. + +A I<fixed string> is defined as being a substring that must appear for the +pattern to match. An I<anchored fixed string> is a fixed string that must +appear at a particular offset from the beginning of the match. A I<floating +fixed string> is defined as a fixed string that can appear at any point in +a range of positions relative to the start of the match. For example, + + my $qr = qr/here .* there/x; + my ($anchored, $floating) = regmust($qr); + print "anchored:'$anchored'\nfloating:'$floating'\n"; + +results in + + anchored:'here' + floating:'there' + +Because the C<here> is before the C<.*> in the pattern, its position +can be determined exactly. That's not true, however, for the C<there>; +it could appear at any point after where the anchored string appeared. +Perl uses both for its optimisations, prefering the longer, or, if they are +equal, the floating. + +B<NOTE:> This may not necessarily be the definitive longest anchored and +floating string. This will be what the optimiser of the Perl that you +are using thinks is the longest. If you believe that the result is wrong +please report it via the L<perlbug> utility. + +=item regname($name,$all) + +Returns the contents of a named buffer of the last successful match. If +$all is true, then returns an array ref containing one entry per buffer, +otherwise returns the first defined buffer. + +=item regnames($all) + +Returns a list of all of the named buffers defined in the last successful +match. If $all is true, then it returns all names defined, if not it returns +only names which were involved in the match. + +=item regnames_count() + +Returns the number of distinct names defined in the pattern used +for the last successful match. + +B<Note:> this result is always the actual number of distinct +named buffers defined, it may not actually match that which is +returned by C<regnames()> and related routines when those routines +have not been called with the $all parameter set. + +=back + +=head1 SEE ALSO + +L<perlmodlib/Pragmatic Modules>. + +=cut diff --git a/gnu/usr.bin/perl/ext/re/re.xs b/gnu/usr.bin/perl/ext/re/re.xs index 11239d7e59d..b4d3e34c4a1 100644 --- a/gnu/usr.bin/perl/ext/re/re.xs +++ b/gnu/usr.bin/perl/ext/re/re.xs @@ -6,73 +6,182 @@ #include "EXTERN.h" #include "perl.h" #include "XSUB.h" +#include "re_comp.h" + START_EXTERN_C -extern regexp* my_regcomp (pTHX_ char* exp, char* xend, PMOP* pm); -extern I32 my_regexec (pTHX_ regexp* prog, char* stringarg, char* strend, +extern REGEXP* my_re_compile (pTHX_ const SV * const pattern, const U32 pm_flags); +extern I32 my_regexec (pTHX_ REGEXP * const prog, char* stringarg, char* strend, char* strbeg, I32 minend, SV* screamer, void* data, U32 flags); -extern void my_regfree (pTHX_ struct regexp* r); -extern char* my_re_intuit_start (pTHX_ regexp *prog, SV *sv, char *strpos, - char *strend, U32 flags, + +extern char* my_re_intuit_start (pTHX_ REGEXP * const prog, SV *sv, char *strpos, + char *strend, const U32 flags, struct re_scream_pos_data_s *data); -extern SV* my_re_intuit_string (pTHX_ regexp *prog); +extern SV* my_re_intuit_string (pTHX_ REGEXP * const prog); -END_EXTERN_C +extern void my_regfree (pTHX_ REGEXP * const r); -#define MY_CXT_KEY "re::_guts" XS_VERSION +extern void my_reg_numbered_buff_fetch(pTHX_ REGEXP * const rx, const I32 paren, + SV * const usesv); +extern void my_reg_numbered_buff_store(pTHX_ REGEXP * const rx, const I32 paren, + SV const * const value); +extern I32 my_reg_numbered_buff_length(pTHX_ REGEXP * const rx, + const SV * const sv, const I32 paren); -typedef struct { - int x_oldflag; /* debug flag */ -} my_cxt_t; +extern SV* my_reg_named_buff(pTHX_ REGEXP * const, SV * const, SV * const, + const U32); +extern SV* my_reg_named_buff_iter(pTHX_ REGEXP * const rx, + const SV * const lastkey, const U32 flags); -START_MY_CXT +extern SV* my_reg_qr_package(pTHX_ REGEXP * const rx); +#if defined(USE_ITHREADS) +extern void* my_regdupe (pTHX_ REGEXP * const r, CLONE_PARAMS *param); +#endif -#define oldflag (MY_CXT.x_oldflag) +EXTERN_C const struct regexp_engine my_reg_engine; -static void -uninstall(pTHX) -{ - dMY_CXT; - PL_regexecp = Perl_regexec_flags; - PL_regcompp = Perl_pregcomp; - PL_regint_start = Perl_re_intuit_start; - PL_regint_string = Perl_re_intuit_string; - PL_regfree = Perl_pregfree; - - if (!oldflag) - PL_debug &= ~DEBUG_r_FLAG; -} +END_EXTERN_C -static void -install(pTHX) -{ - dMY_CXT; - PL_colorset = 0; /* Allow reinspection of ENV. */ - PL_regexecp = &my_regexec; - PL_regcompp = &my_regcomp; - PL_regint_start = &my_re_intuit_start; - PL_regint_string = &my_re_intuit_string; - PL_regfree = &my_regfree; - oldflag = PL_debug & DEBUG_r_FLAG; - PL_debug |= DEBUG_r_FLAG; -} +const struct regexp_engine my_reg_engine = { + my_re_compile, + my_regexec, + my_re_intuit_start, + my_re_intuit_string, + my_regfree, + my_reg_numbered_buff_fetch, + my_reg_numbered_buff_store, + my_reg_numbered_buff_length, + my_reg_named_buff, + my_reg_named_buff_iter, + my_reg_qr_package, +#if defined(USE_ITHREADS) + my_regdupe +#endif +}; MODULE = re PACKAGE = re -BOOT: +void +install() + PPCODE: + PL_colorset = 0; /* Allow reinspection of ENV. */ + /* PL_debug |= DEBUG_r_FLAG; */ + XPUSHs(sv_2mortal(newSViv(PTR2IV(&my_reg_engine)))); + + +void +regexp_pattern(sv) + SV * sv +PROTOTYPE: $ +PREINIT: + REGEXP *re; +PPCODE: { - MY_CXT_INIT; + /* + Checks if a reference is a regex or not. If the parameter is + not a ref, or is not the result of a qr// then returns false + in scalar context and an empty list in list context. + Otherwise in list context it returns the pattern and the + modifiers, in scalar context it returns the pattern just as it + would if the qr// was stringified normally, regardless as + to the class of the variable and any strigification overloads + on the object. + */ + + if ((re = SvRX(sv))) /* assign deliberate */ + { + /* Housten, we have a regex! */ + SV *pattern; + STRLEN patlen = 0; + STRLEN left = 0; + char reflags[6]; + + if ( GIMME_V == G_ARRAY ) { + /* + we are in list context so stringify + the modifiers that apply. We ignore "negative + modifiers" in this scenario. + */ + + char *fptr = INT_PAT_MODS; + char ch; + U16 match_flags = (U16)((re->extflags & PMf_COMPILETIME) >> 12); + + while((ch = *fptr++)) { + if(match_flags & 1) { + reflags[left++] = ch; + } + match_flags >>= 1; + } + + pattern = sv_2mortal(newSVpvn(re->precomp,re->prelen)); + if (re->extflags & RXf_UTF8) SvUTF8_on(pattern); + + /* return the pattern and the modifiers */ + XPUSHs(pattern); + XPUSHs(sv_2mortal(newSVpvn(reflags,left))); + XSRETURN(2); + } else { + /* Scalar, so use the string that Perl would return */ + /* return the pattern in (?msix:..) format */ + pattern = sv_2mortal(newSVpvn(re->wrapped,re->wraplen)); + if (re->extflags & RXf_UTF8) + SvUTF8_on(pattern); + XPUSHs(pattern); + XSRETURN(1); + } + } else { + /* It ain't a regexp folks */ + if ( GIMME_V == G_ARRAY ) { + /* return the empty list */ + XSRETURN_UNDEF; + } else { + /* Because of the (?:..) wrapping involved in a + stringified pattern it is impossible to get a + result for a real regexp that would evaluate to + false. Therefore we can return PL_sv_no to signify + that the object is not a regex, this means that one + can say + + if (regex($might_be_a_regex) eq '(?:foo)') { } + + and not worry about undefined values. + */ + XSRETURN_NO; + } + } + /* NOT-REACHED */ } void -install() - CODE: - install(aTHX); +regmust(sv) + SV * sv +PROTOTYPE: $ +PREINIT: + REGEXP *re; +PPCODE: +{ + if ((re = SvRX(sv))) /* assign deliberate */ + { + SV *an = &PL_sv_no; + SV *fl = &PL_sv_no; + if (re->anchored_substr) { + an = newSVsv(re->anchored_substr); + } else if (re->anchored_utf8) { + an = newSVsv(re->anchored_utf8); + } + if (re->float_substr) { + fl = newSVsv(re->float_substr); + } else if (re->float_utf8) { + fl = newSVsv(re->float_utf8); + } + XPUSHs(an); + XPUSHs(fl); + XSRETURN(2); + } + XSRETURN_UNDEF; +} -void -uninstall() - CODE: - uninstall(aTHX); diff --git a/gnu/usr.bin/perl/ext/re/re_comp.h b/gnu/usr.bin/perl/ext/re/re_comp.h new file mode 100644 index 00000000000..ba3aae851eb --- /dev/null +++ b/gnu/usr.bin/perl/ext/re/re_comp.h @@ -0,0 +1,7 @@ +/* For blead, this file needs to do nothing other than pull in the regular + regcomp.h. For the 5.8.x re module it has to do more. + But doing it this way keeps regcomp.c and regexec.c clean. +*/ + +#include "regcomp.h" + diff --git a/gnu/usr.bin/perl/ext/re/re_top.h b/gnu/usr.bin/perl/ext/re/re_top.h new file mode 100644 index 00000000000..6eb02e33684 --- /dev/null +++ b/gnu/usr.bin/perl/ext/re/re_top.h @@ -0,0 +1,42 @@ +/* need to replace pregcomp et al, so enable that */ +#ifndef PERL_IN_XSUB_RE +# define PERL_IN_XSUB_RE +#endif +/* need access to debugger hooks */ +#if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING) +# define DEBUGGING +#endif + +/* We *really* need to overwrite these symbols: */ +#define Perl_regexec_flags my_regexec +#define Perl_regdump my_regdump +#define Perl_regprop my_regprop +#define Perl_re_intuit_start my_re_intuit_start +#define Perl_re_compile my_re_compile +#define Perl_regfree_internal my_regfree +#define Perl_re_intuit_string my_re_intuit_string +#define Perl_regdupe_internal my_regdupe +#define Perl_reg_numbered_buff_fetch my_reg_numbered_buff_fetch +#define Perl_reg_numbered_buff_store my_reg_numbered_buff_store +#define Perl_reg_numbered_buff_length my_reg_numbered_buff_length +#define Perl_reg_named_buff my_reg_named_buff +#define Perl_reg_named_buff_iter my_reg_named_buff_iter +#define Perl_reg_named_buff_fetch my_reg_named_buff_fetch +#define Perl_reg_named_buff_exists my_reg_named_buff_exists +#define Perl_reg_named_buff_firstkey my_reg_named_buff_firstkey +#define Perl_reg_named_buff_nextkey my_reg_named_buff_nextkey +#define Perl_reg_named_buff_scalar my_reg_named_buff_scalar +#define Perl_reg_named_buff_all my_reg_named_buff_all +#define Perl_reg_qr_package my_reg_qr_package + +#define PERL_NO_GET_CONTEXT + +/* + * Local variables: + * c-indentation-style: bsd + * c-basic-offset: 4 + * indent-tabs-mode: t + * End: + * + * ex: set ts=8 sts=4 sw=4 noet: + */ diff --git a/gnu/usr.bin/perl/ext/re/t/lexical_debug.pl b/gnu/usr.bin/perl/ext/re/t/lexical_debug.pl new file mode 100644 index 00000000000..3ec7455ba1a --- /dev/null +++ b/gnu/usr.bin/perl/ext/re/t/lexical_debug.pl @@ -0,0 +1,29 @@ +use re 'debug'; + +$_ = 'foo bar baz bop fip fop'; + +/foo/ and $count++; + +{ + no re 'debug'; + /bar/ and $count++; + { + use re 'debug'; + /baz/ and $count++; + } + /bop/ and $count++; +} + +/fip/ and $count++; + +no re 'debug'; + +/fop/ and $count++; + +use re 'debug'; +my $var='zoo|liz|zap'; +/($var)/ or $count++; + +print "Count=$count\n"; + + diff --git a/gnu/usr.bin/perl/ext/re/t/lexical_debug.t b/gnu/usr.bin/perl/ext/re/t/lexical_debug.t new file mode 100644 index 00000000000..3c3f7ba316e --- /dev/null +++ b/gnu/usr.bin/perl/ext/re/t/lexical_debug.t @@ -0,0 +1,37 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; + require Config; + if (($Config::Config{'extensions'} !~ /\bre\b/) ){ + print "1..0 # Skip -- Perl configured without re module\n"; + exit 0; + } +} + +use strict; + +# must use a BEGIN or the prototypes wont be respected meaning + # tests could pass that shouldn't +BEGIN { require "./test.pl"; } +my $out = runperl(progfile => "../ext/re/t/lexical_debug.pl", stderr => 1 ); + +print "1..10\n"; + +# Each pattern will produce an EXACT node with a specific string in +# it, so we will look for that. We can't just look for the string +# alone as the string being matched against contains all of them. + +ok( $out =~ /EXACT <foo>/, "Expect 'foo'" ); +ok( $out !~ /EXACT <bar>/, "No 'bar'" ); +ok( $out =~ /EXACT <baz>/, "Expect 'baz'" ); +ok( $out !~ /EXACT <bop>/, "No 'bop'" ); +ok( $out =~ /EXACT <fip>/, "Expect 'fip'" ); +ok( $out !~ /EXACT <fop>/, "No 'baz'" ); +ok( $out =~ /<liz>/, "Got 'liz'" ); # in a TRIE so no EXACT +ok( $out =~ /<zoo>/, "Got 'zoo'" ); # in a TRIE so no EXACT +ok( $out =~ /<zap>/, "Got 'zap'" ); # in a TRIE so no EXACT +ok( $out =~ /Count=7\n/, "Count is 7") + or diag($out); + diff --git a/gnu/usr.bin/perl/ext/re/t/qr.t b/gnu/usr.bin/perl/ext/re/t/qr.t new file mode 100644 index 00000000000..9a59a046bdd --- /dev/null +++ b/gnu/usr.bin/perl/ext/re/t/qr.t @@ -0,0 +1,15 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; + require Config; + if (($Config::Config{'extensions'} !~ /\bre\b/) ){ + print "1..0 # Skip -- Perl configured without re module\n"; + exit 0; + } +} + +use Test::More tests => 1; +use re 'Debug'; +isa_ok( qr//, "Regexp" ); diff --git a/gnu/usr.bin/perl/ext/re/t/re.t b/gnu/usr.bin/perl/ext/re/t/re.t index 2a1923ea79a..204092f028b 100644 --- a/gnu/usr.bin/perl/ext/re/t/re.t +++ b/gnu/usr.bin/perl/ext/re/t/re.t @@ -31,8 +31,8 @@ my $warn; local $SIG{__WARN__} = sub { $warn = shift; }; -eval { re::bits(1) }; -like( $warn, qr/Useless use/, 'bits() should warn with no args' ); +#eval { re::bits(1) }; +#like( $warn, qr/Useless use/, 'bits() should warn with no args' ); delete $ENV{PERL_RE_COLORS}; re::bits(0, 'debug'); @@ -58,6 +58,12 @@ re->unimport('taint'); ok( !( $^H & 0x00100000 ), 'unimport should clear bits in $^H when requested' ); re->unimport('eval'); ok( !( $^H & 0x00200000 ), '... and again' ); +my $reg=qr/(foo|bar|baz|blah)/; +close STDERR; +eval"use re Debug=>'ALL'"; +my $ok='foo'=~/$reg/; +eval"no re Debug=>'ALL'"; +ok( $ok, 'No segv!' ); package Term::Cap; diff --git a/gnu/usr.bin/perl/ext/re/t/re_funcs.t b/gnu/usr.bin/perl/ext/re/t/re_funcs.t new file mode 100644 index 00000000000..97f795eac93 --- /dev/null +++ b/gnu/usr.bin/perl/ext/re/t/re_funcs.t @@ -0,0 +1,64 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; + require Config; + if (($Config::Config{'extensions'} !~ /\bre\b/) ){ + print "1..0 # Skip -- Perl configured without re module\n"; + exit 0; + } +} + +use strict; + +use Test::More; # test count at bottom of file +use re qw(is_regexp regexp_pattern regmust + regname regnames regnames_count); +{ + my $qr=qr/foo/pi; + ok(is_regexp($qr),'is_regexp($qr)'); + ok(!is_regexp(''),'is_regexp("")'); + is((regexp_pattern($qr))[0],'foo','regexp_pattern[0]'); + is((regexp_pattern($qr))[1],'ip','regexp_pattern[1]'); + is(regexp_pattern($qr),'(?pi-xsm:foo)','scalar regexp_pattern'); + ok(!regexp_pattern(''),'!regexp_pattern("")'); +} +{ + my $qr=qr/here .* there/x; + my ($anchored,$floating)=regmust($qr); + is($anchored,'here',"Regmust anchored - qr//"); + is($floating,'there',"Regmust floating - qr//"); + my $foo='blah'; + ($anchored,$floating)=regmust($foo); + is($anchored,undef,"Regmust anchored - non ref"); + is($floating,undef,"Regmust anchored - non ref"); + my $bar=['blah']; + ($anchored,$floating)=regmust($foo); + is($anchored,undef,"Regmust anchored - ref"); + is($floating,undef,"Regmust anchored - ref"); +} + +if ('1234'=~/(?:(?<A>\d)|(?<C>!))(?<B>\d)(?<A>\d)(?<B>\d)/){ + my @names = sort +regnames(); + is("@names","A B","regnames"); + my @names = sort +regnames(0); + is("@names","A B","regnames"); + my $names = regnames(); + is($names, "B", "regnames in scalar context"); + @names = sort +regnames(1); + is("@names","A B C","regnames"); + is(join("", @{regname("A",1)}),"13"); + is(join("", @{regname("B",1)}),"24"); + { + if ('foobar'=~/(?<foo>foo)(?<bar>bar)/) { + is(regnames_count(),2); + } else { + ok(0); ok(0); + } + } + is(regnames_count(),3); +} +# New tests above this line, don't forget to update the test count below! +use Test::More tests => 20; +# No tests here! diff --git a/gnu/usr.bin/perl/ext/re/t/regop.pl b/gnu/usr.bin/perl/ext/re/t/regop.pl new file mode 100644 index 00000000000..89693352208 --- /dev/null +++ b/gnu/usr.bin/perl/ext/re/t/regop.pl @@ -0,0 +1,20 @@ +use re Debug=>qw(DUMP EXECUTE OFFSETS TRIEC); +my @tests=( + XY => 'X(A|[B]Q||C|D)Y' , + foobar => '[f][o][o][b][a][r]', + x => '.[XY].', + 'ABCD' => '(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)', + 'D:\\dev/perl/ver/28321_/perl.exe'=> + '/(\\.COM|\\.EXE|\\.BAT|\\.CMD|\\.VBS|\\.VBE|\\.JS|\\.JSE|\\.WSF|\\.WSH|\\.pyo|\\.pyc|\\.pyw|\\.py)$/i', + 'q'=>'[q]', +); +while (@tests) { + my ($str,$pat)=splice @tests,0,2; + warn "\n"; + $pat="/$pat/" if substr($pat,0,1) ne '/'; + # string eval to get the free regex message in the right place. + eval qq[ + warn "$str"=~$pat ? "%MATCHED%" : "%FAILED%","\n"; + ]; + die $@ if $@; +} diff --git a/gnu/usr.bin/perl/ext/re/t/regop.t b/gnu/usr.bin/perl/ext/re/t/regop.t new file mode 100644 index 00000000000..7fe7b204627 --- /dev/null +++ b/gnu/usr.bin/perl/ext/re/t/regop.t @@ -0,0 +1,261 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; + require Config; + if (($Config::Config{'extensions'} !~ /\bre\b/) ){ + print "1..0 # Skip -- Perl configured without re module\n"; + exit 0; + } +} + +use strict; +BEGIN { require "./test.pl"; } +our $NUM_SECTS; +chomp(my @strs= grep { !/^\s*\#/ } <DATA>); +my $out = runperl(progfile => "../ext/re/t/regop.pl", stderr => 1 ); +# VMS currently embeds linefeeds in the output. +$out =~ s/\cJ//g if $^O = 'VMS'; +my @tests = grep { /\S/ } split /(?=Compiling REx)/, $out; +# on debug builds we get an EXECUTING... message in there at the top +shift @tests + if $tests[0] =~ /EXECUTING.../; + +plan( @tests + 2 + ( @strs - grep { !$_ or /^---/ } @strs )); + +is( scalar @tests, $NUM_SECTS, + "Expecting output for $NUM_SECTS patterns" ); +ok( defined $out, 'regop.pl returned something defined' ); + +$out ||= ""; +my $test= 1; +foreach my $testout ( @tests ) { + my ( $pattern )= $testout=~/Compiling REx "([^"]+)"/; + ok( $pattern, "Pattern for test " . ($test++) ); + my $diaged; + while (@strs) { + local $_= shift @strs; + last if !$_ + or /^---/; + next if /^\s*#/; + s/^\s+//; + s/\s+$//; + ok( $testout=~/\Q$_\E/, "$_: /$pattern/" ) + or do { + !$diaged++ and diag("$_: /$pattern/\n$testout"); + }; + } +} + +# The format below is simple. Each line is an exact +# string that must be found in the output. +# Lines starting the # are comments. +# Lines starting with --- are seperators indicating +# that the tests for this result set are finished. +# If you add a test make sure you update $NUM_SECTS +# the commented output is just for legacy/debugging purposes +BEGIN{ $NUM_SECTS= 6 } + +__END__ +#Compiling REx "X(A|[B]Q||C|D)Y" +#size 34 +#first at 1 +# 1: EXACT <X>(3) +# 3: OPEN1(5) +# 5: TRIE-EXACT(21) +# [Words:5 Chars:5 Unique:5 States:6 Start-Class:A-D] +# <A> +# <BQ> +# <> +# <C> +# <D> +# 21: CLOSE1(23) +# 23: EXACT <Y>(25) +# 25: END(0) +#anchored "X" at 0 floating "Y" at 1..3 (checking floating) minlen 2 +#Guessing start of match, REx "X(A|[B]Q||C|D)Y" against "XY"... +#Found floating substr "Y" at offset 1... +#Found anchored substr "X" at offset 0... +#Guessed: match at offset 0 +#Matching REx "X(A|[B]Q||C|D)Y" against "XY" +# Setting an EVAL scope, savestack=140 +# 0 <> <XY> | 1: EXACT <X> +# 1 <X> <Y> | 3: OPEN1 +# 1 <X> <Y> | 5: TRIE-EXACT +# matched empty string... +# 1 <X> <Y> | 21: CLOSE1 +# 1 <X> <Y> | 23: EXACT <Y> +# 2 <XY> <> | 25: END +#Match successful! +#%MATCHED% +#Freeing REx: "X(A|[B]Q||C|D)Y" +Compiling REx "X(A|[B]Q||C|D)Y" +[A-D] +TRIE-EXACT +<BQ> +matched empty string +Match successful! +Found floating substr "Y" at offset 1... +Found anchored substr "X" at offset 0... +Guessed: match at offset 0 +checking floating +minlen 2 +S:1/6 +W:5 +L:0/2 +C:5/5 +%MATCHED% +--- +#Compiling REx "[f][o][o][b][a][r]" +#size 67 +#first at 1 +# 1: EXACT <foobar>(13) +# 13: END(0) +#anchored "foobar" at 0 (checking anchored isall) minlen 6 +#Guessing start of match, REx "[f][o][o][b][a][r]" against "foobar"... +#Found anchored substr "foobar" at offset 0... +#Guessed: match at offset 0 +#Freeing REx: "[f][o][o][b][a][r]" +foobar +checking anchored isall +minlen 6 +anchored "foobar" at 0 +Guessed: match at offset 0 +Compiling REx "[f][o][o][b][a][r]" +Freeing REx: "[f][o][o][b][a][r]" +%MATCHED% +--- +#Compiling REx ".[XY]." +#size 14 +#first at 1 +# 1: REG_ANY(2) +# 2: ANYOF[XY](13) +# 13: REG_ANY(14) +# 14: END(0) +#minlen 3 +#%FAILED% +#Freeing REx: ".[XY]." +%FAILED% +minlen 3 +--- +# Compiling REx "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)" +# Got 164 bytes for offset annotations. +# TRIE(NATIVE): W:6 C:24 Uq:7 Min:4 Max:4 +# Char : Match Base Ofs A B C P G E D +# State|--------------------------------------------------- +# # 1| @ 7 + 0[ 2 . . . . . .] +# # 2| @ 7 + 1[ . 3 . . . . .] +# # 3| @ 7 + 2[ . . 4 . . . .] +# # 4| @ A + 0[ 9 8 0 5 6 7 A] +# # 5| W 1 @ 0 +# # 6| W 2 @ 0 +# # 7| W 3 @ 0 +# # 8| W 4 @ 0 +# # 9| W 5 @ 0 +# # A| W 6 @ 0 +# Final program: +# 1: EXACT <ABC>(3) +# 3: TRIEC-EXACT<S:4/10 W:6 L:1/1 C:24/7>[A-EGP](20) +# <P> +# <G> +# <E> +# <B> +# <A> +# <D> +# 20: END(0) +# anchored "ABC" at 0 (checking anchored) minlen 4 +# Offsets: [20] +# 1:4[3] 3:4[15] 19:32[0] 20:34[0] +# Guessing start of match in sv for REx "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)" against "ABCD" +# Found anchored substr "ABC" at offset 0... +# Guessed: match at offset 0 +# Matching REx "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)" against "ABCD" +# 0 <> <ABCD> | 1:EXACT <ABC>(3) +# 3 <ABC> <D> | 3:TRIEC-EXACT<S:4/10 W:6 L:1/1 C:24/7>[A-EGP](20) +# 3 <ABC> <D> | State: 4 Accepted: 0 Charid: 7 CP: 44 After State: a +# 4 <ABCD> <> | State: a Accepted: 1 Charid: 6 CP: 0 After State: 0 +# got 1 possible matches +# only one match left: #6 <D> +# 4 <ABCD> <> | 20:END(0) +# Match successful! +# %MATCHED% +# Freeing REx: "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)" +%MATCHED% +EXACT <ABC> +TRIEC-EXACT +[A-EGP] +only one match left: #6 <D> +S:4/10 +W:6 +L:1/1 +C:24/7 +minlen 4 +(checking anchored) +anchored "ABC" at 0 +--- +#Compiling REx "(\.COM|\.EXE|\.BAT|\.CMD|\.VBS|\.VBE|\.JS|\.JSE|\.WSF|\.WSH|\.pyo|\.pyc|\.pyw|\.py)$" +#size 48 nodes first at 3 +#first at 3 +#rarest char +# at 0 +# 1: OPEN1(3) +# 3: EXACTF <.>(5) +# 5: TRIE-EXACTF(45) +# [Start:2 Words:14 Chars:54 Unique:18 States:29 Minlen:2 Maxlen:3 Start-Class:BCEJPVWbcejpvw] +# <.COM> +# ... yada yada ... (dmq) +# <.py> +# 45: CLOSE1(47) +# 47: EOL(48) +# 48: END(0) +#floating ""$ at 3..4 (checking floating) stclass "EXACTF <.>" minlen 3 +#Offsets: [48] +# 1:1[1] 3:2[1] 5:2[81] 45:83[1] 47:84[1] 48:85[0] +#Guessing start of match, REx "(\.COM|\.EXE|\.BAT|\.CMD|\.VBS|\.VBE|\.JS|\.JSE|\.WSF|\.WSH|..." against "D:dev/perl/ver/28321_/perl.exe"... +#Found floating substr ""$ at offset 30... +#Starting position does not contradict /^/m... +#Does not contradict STCLASS... +#Guessed: match at offset 26 +#Matching REx "(\.COM|\.EXE|\.BAT|\.CMD|\.VBS|\.VBE|\.JS|\.JSE|\.WSF|\.WSH|\.pyo|\.pyc|\.pyw|\.py)$..." against ".exe" +#Matching stclass "EXACTF <.>" against ".exe" +# Setting an EVAL scope, savestack=140 +# 26 <21_/perl> <.exe> | 1: OPEN1 +# 26 <21_/perl> <.exe> | 3: EXACTF <.> +# 27 <21_/perl.> <exe> | 5: TRIE-EXACTF +# only one match : #2 <.EXE> +# 30 <21_/perl.exe> <> | 45: CLOSE1 +# 30 <21_/perl.exe> <> | 47: EOL +# 30 <21_/perl.exe> <> | 48: END +#Match successful! +#POP STATE(1) +#%MATCHED% +#Freeing REx: "(\\.COM|\\.EXE|\\.BAT|\\.CMD|\\.VBS|\\.VBE|\\.JS|\\.JSE|\\."...... +%MATCHED% +floating ""$ at 3..4 (checking floating) +1:1[1] 3:2[1] 5:2[64] 45:83[1] 47:84[1] 48:85[0] +stclass EXACTF <.> minlen 3 +Found floating substr ""$ at offset 30... +Does not contradict STCLASS... +Guessed: match at offset 26 +Matching stclass EXACTF <.> against ".exe" +--- +#Compiling REx "[q]" +#size 12 nodes Got 100 bytes for offset annotations. +#first at 1 +#Final program: +# 1: EXACT <q>(3) +# 3: END(0) +#anchored "q" at 0 (checking anchored isall) minlen 1 +#Offsets: [12] +# 1:1[3] 3:4[0] +#Guessing start of match, REx "[q]" against "q"... +#Found anchored substr "q" at offset 0... +#Guessed: match at offset 0 +#%MATCHED% +#Freeing REx: "[q]" +Got 100 bytes for offset annotations. +Offsets: [12] +1:1[3] 3:4[0] +%MATCHED% +Freeing REx: "[q]"
\ No newline at end of file |