summaryrefslogtreecommitdiff
path: root/gnu/usr.bin/perl/ext/re
diff options
context:
space:
mode:
authorTodd C. Miller <millert@cvs.openbsd.org>2008-09-29 17:19:07 +0000
committerTodd C. Miller <millert@cvs.openbsd.org>2008-09-29 17:19:07 +0000
commitbfa863c83bfd01d72a748decaaf2676b800c60ff (patch)
tree02491e683928ba1c318d3ebc1200feb7c8bb78b4 /gnu/usr.bin/perl/ext/re
parent37a0443eb842cf3d2c9de0e5647b0094a0dcf0bd (diff)
import perl 5.10.0 from CPAN
Diffstat (limited to 'gnu/usr.bin/perl/ext/re')
-rw-r--r--gnu/usr.bin/perl/ext/re/re.pm486
-rw-r--r--gnu/usr.bin/perl/ext/re/re.xs205
-rw-r--r--gnu/usr.bin/perl/ext/re/re_comp.h7
-rw-r--r--gnu/usr.bin/perl/ext/re/re_top.h42
-rw-r--r--gnu/usr.bin/perl/ext/re/t/lexical_debug.pl29
-rw-r--r--gnu/usr.bin/perl/ext/re/t/lexical_debug.t37
-rw-r--r--gnu/usr.bin/perl/ext/re/t/qr.t15
-rw-r--r--gnu/usr.bin/perl/ext/re/t/re.t10
-rw-r--r--gnu/usr.bin/perl/ext/re/t/re_funcs.t64
-rw-r--r--gnu/usr.bin/perl/ext/re/t/regop.pl20
-rw-r--r--gnu/usr.bin/perl/ext/re/t/regop.t261
11 files changed, 1068 insertions, 108 deletions
diff --git a/gnu/usr.bin/perl/ext/re/re.pm b/gnu/usr.bin/perl/ext/re/re.pm
index 6e9d1218ef0..0cf5376e866 100644
--- a/gnu/usr.bin/perl/ext/re/re.pm
+++ b/gnu/usr.bin/perl/ext/re/re.pm
@@ -1,6 +1,174 @@
package re;
-our $VERSION = 0.05;
+# pragma for controlling the regex engine
+use strict;
+use warnings;
+
+our $VERSION = "0.08";
+our @ISA = qw(Exporter);
+our @EXPORT_OK = qw(is_regexp regexp_pattern regmust
+ regname regnames regnames_count);
+our %EXPORT_OK = map { $_ => 1 } @EXPORT_OK;
+
+# *** WARNING *** WARNING *** WARNING *** WARNING *** WARNING ***
+#
+# If you modify these values see comment below!
+
+my %bitmask = (
+ taint => 0x00100000, # HINT_RE_TAINT
+ eval => 0x00200000, # HINT_RE_EVAL
+);
+
+# - File::Basename contains a literal for 'taint' as a fallback. If
+# taint is changed here, File::Basename must be updated as well.
+#
+# - ExtUtils::ParseXS uses a hardcoded
+# BEGIN { $^H |= 0x00200000 }
+# in it to allow re.xs to be built. So if 'eval' is changed here then
+# ExtUtils::ParseXS must be changed as well.
+#
+# *** WARNING *** WARNING *** WARNING *** WARNING *** WARNING ***
+
+sub setcolor {
+ eval { # Ignore errors
+ require Term::Cap;
+
+ my $terminal = Tgetent Term::Cap ({OSPEED => 9600}); # Avoid warning.
+ my $props = $ENV{PERL_RE_TC} || 'md,me,so,se,us,ue';
+ my @props = split /,/, $props;
+ my $colors = join "\t", map {$terminal->Tputs($_,1)} @props;
+
+ $colors =~ s/\0//g;
+ $ENV{PERL_RE_COLORS} = $colors;
+ };
+ if ($@) {
+ $ENV{PERL_RE_COLORS} ||= qq'\t\t> <\t> <\t\t';
+ }
+
+}
+
+my %flags = (
+ COMPILE => 0x0000FF,
+ PARSE => 0x000001,
+ OPTIMISE => 0x000002,
+ TRIEC => 0x000004,
+ DUMP => 0x000008,
+ FLAGS => 0x000010,
+
+ EXECUTE => 0x00FF00,
+ INTUIT => 0x000100,
+ MATCH => 0x000200,
+ TRIEE => 0x000400,
+
+ EXTRA => 0xFF0000,
+ TRIEM => 0x010000,
+ OFFSETS => 0x020000,
+ OFFSETSDBG => 0x040000,
+ STATE => 0x080000,
+ OPTIMISEM => 0x100000,
+ STACK => 0x280000,
+ BUFFERS => 0x400000,
+);
+$flags{ALL} = -1 & ~($flags{OFFSETS}|$flags{OFFSETSDBG}|$flags{BUFFERS});
+$flags{All} = $flags{all} = $flags{DUMP} | $flags{EXECUTE};
+$flags{Extra} = $flags{EXECUTE} | $flags{COMPILE};
+$flags{More} = $flags{MORE} = $flags{All} | $flags{TRIEC} | $flags{TRIEM} | $flags{STATE};
+$flags{State} = $flags{DUMP} | $flags{EXECUTE} | $flags{STATE};
+$flags{TRIE} = $flags{DUMP} | $flags{EXECUTE} | $flags{TRIEC};
+
+my $installed;
+my $installed_error;
+
+sub _do_install {
+ if ( ! defined($installed) ) {
+ require XSLoader;
+ $installed = eval { XSLoader::load('re', $VERSION) } || 0;
+ $installed_error = $@;
+ }
+}
+
+sub _load_unload {
+ my ($on)= @_;
+ if ($on) {
+ _do_install();
+ if ( ! $installed ) {
+ die "'re' not installed!? ($installed_error)";
+ } else {
+ # We call install() every time, as if we didn't, we wouldn't
+ # "see" any changes to the color environment var since
+ # the last time it was called.
+
+ # install() returns an integer, which if casted properly
+ # in C resolves to a structure containing the regex
+ # hooks. Setting it to a random integer will guarantee
+ # segfaults.
+ $^H{regcomp} = install();
+ }
+ } else {
+ delete $^H{regcomp};
+ }
+}
+
+sub bits {
+ my $on = shift;
+ my $bits = 0;
+ unless (@_) {
+ require Carp;
+ Carp::carp("Useless use of \"re\" pragma");
+ }
+ foreach my $idx (0..$#_){
+ my $s=$_[$idx];
+ if ($s eq 'Debug' or $s eq 'Debugcolor') {
+ setcolor() if $s =~/color/i;
+ ${^RE_DEBUG_FLAGS} = 0 unless defined ${^RE_DEBUG_FLAGS};
+ for my $idx ($idx+1..$#_) {
+ if ($flags{$_[$idx]}) {
+ if ($on) {
+ ${^RE_DEBUG_FLAGS} |= $flags{$_[$idx]};
+ } else {
+ ${^RE_DEBUG_FLAGS} &= ~ $flags{$_[$idx]};
+ }
+ } else {
+ require Carp;
+ Carp::carp("Unknown \"re\" Debug flag '$_[$idx]', possible flags: ",
+ join(", ",sort keys %flags ) );
+ }
+ }
+ _load_unload($on ? 1 : ${^RE_DEBUG_FLAGS});
+ last;
+ } elsif ($s eq 'debug' or $s eq 'debugcolor') {
+ setcolor() if $s =~/color/i;
+ _load_unload($on);
+ last;
+ } elsif (exists $bitmask{$s}) {
+ $bits |= $bitmask{$s};
+ } elsif ($EXPORT_OK{$s}) {
+ _do_install();
+ require Exporter;
+ re->export_to_level(2, 're', $s);
+ } else {
+ require Carp;
+ Carp::carp("Unknown \"re\" subpragma '$s' (known ones are: ",
+ join(', ', map {qq('$_')} 'debug', 'debugcolor', sort keys %bitmask),
+ ")");
+ }
+ }
+ $bits;
+}
+
+sub import {
+ shift;
+ $^H |= bits(1, @_);
+}
+
+sub unimport {
+ shift;
+ $^H &= ~ bits(0, @_);
+}
+
+1;
+
+__END__
=head1 NAME
@@ -23,23 +191,38 @@ re - Perl pragma to alter regular expression behaviour
/foo${pat}bar/; # disallowed (with or without -T switch)
}
- use re 'debug'; # NOT lexically scoped (as others are)
- /^(.*)$/s; # output debugging info during
- # compile and run time
+ use re 'debug'; # output debugging info during
+ /^(.*)$/s; # compile and run time
+
use re 'debugcolor'; # same as 'debug', but with colored output
...
+ use re qw(Debug All); # Finer tuned debugging options.
+ use re qw(Debug More);
+ no re qw(Debug ALL); # Turn of all re debugging in this scope
+
+ use re qw(is_regexp regexp_pattern); # import utility functions
+ my ($pat,$mods)=regexp_pattern(qr/foo/i);
+ if (is_regexp($obj)) {
+ print "Got regexp: ",
+ scalar regexp_pattern($obj); # just as perl would stringify it
+ } # but no hassle with blessed re's.
+
(We use $^X in these examples because it's tainted by default.)
=head1 DESCRIPTION
+=head2 'taint' mode
+
When C<use re 'taint'> is in effect, and a tainted string is the target
of a regex, the regex memories (or values returned by the m// operator
in list context) are tainted. This feature is useful when regex operations
on tainted data aren't meant to extract safe substrings, but to perform
other transformations.
+=head2 'eval' mode
+
When C<use re 'eval'> is in effect, a regex is allowed to contain
C<(?{ ... })> zero-width assertions even if regular expression contains
variable interpolation. That is normally disallowed, since it is a
@@ -56,6 +239,8 @@ interpolation. Thus:
I<is> allowed if $pat is a precompiled regular expression, even
if $pat contains C<(?{ ... })> assertions.
+=head2 'debug' mode
+
When C<use re 'debug'> is in effect, perl emits debugging messages when
compiling and using regular expressions. The output is the same as that
obtained by running a C<-DDEBUGGING>-enabled perl interpreter with the
@@ -67,68 +252,253 @@ comma-separated list of C<termcap> properties to use for highlighting
strings on/off, pre-point part on/off.
See L<perldebug/"Debugging regular expressions"> for additional info.
-The directive C<use re 'debug'> is I<not lexically scoped>, as the
-other directives are. It has both compile-time and run-time effects.
+As of 5.9.5 the directive C<use re 'debug'> and its equivalents are
+lexically scoped, as the other directives are. However they have both
+compile-time and run-time effects.
See L<perlmodlib/Pragmatic Modules>.
-=cut
+=head2 'Debug' mode
-# N.B. File::Basename contains a literal for 'taint' as a fallback. If
-# taint is changed here, File::Basename must be updated as well.
-my %bitmask = (
-taint => 0x00100000, # HINT_RE_TAINT
-eval => 0x00200000, # HINT_RE_EVAL
-);
+Similarly C<use re 'Debug'> produces debugging output, the difference
+being that it allows the fine tuning of what debugging output will be
+emitted. Options are divided into three groups, those related to
+compilation, those related to execution and those related to special
+purposes. The options are as follows:
-sub setcolor {
- eval { # Ignore errors
- require Term::Cap;
+=over 4
- my $terminal = Tgetent Term::Cap ({OSPEED => 9600}); # Avoid warning.
- my $props = $ENV{PERL_RE_TC} || 'md,me,so,se,us,ue';
- my @props = split /,/, $props;
- my $colors = join "\t", map {$terminal->Tputs($_,1)} @props;
+=item Compile related options
- $colors =~ s/\0//g;
- $ENV{PERL_RE_COLORS} = $colors;
- };
-}
+=over 4
-sub bits {
- my $on = shift;
- my $bits = 0;
- unless (@_) {
- require Carp;
- Carp::carp("Useless use of \"re\" pragma");
- }
- foreach my $s (@_){
- if ($s eq 'debug' or $s eq 'debugcolor') {
- setcolor() if $s eq 'debugcolor';
- require XSLoader;
- XSLoader::load('re');
- install() if $on;
- uninstall() unless $on;
- next;
- }
- if (exists $bitmask{$s}) {
- $bits |= $bitmask{$s};
- } else {
- require Carp;
- Carp::carp("Unknown \"re\" subpragma '$s' (known ones are: @{[join(', ', map {qq('$_')} 'debug', 'debugcolor', sort keys %bitmask)]})");
- }
- }
- $bits;
-}
+=item COMPILE
-sub import {
- shift;
- $^H |= bits(1, @_);
-}
+Turns on all compile related debug options.
-sub unimport {
- shift;
- $^H &= ~ bits(0, @_);
-}
+=item PARSE
-1;
+Turns on debug output related to the process of parsing the pattern.
+
+=item OPTIMISE
+
+Enables output related to the optimisation phase of compilation.
+
+=item TRIEC
+
+Detailed info about trie compilation.
+
+=item DUMP
+
+Dump the final program out after it is compiled and optimised.
+
+=back
+
+=item Execute related options
+
+=over 4
+
+=item EXECUTE
+
+Turns on all execute related debug options.
+
+=item MATCH
+
+Turns on debugging of the main matching loop.
+
+=item TRIEE
+
+Extra debugging of how tries execute.
+
+=item INTUIT
+
+Enable debugging of start point optimisations.
+
+=back
+
+=item Extra debugging options
+
+=over 4
+
+=item EXTRA
+
+Turns on all "extra" debugging options.
+
+=item BUFFERS
+
+Enable debugging the capture buffer storage during match. Warning,
+this can potentially produce extremely large output.
+
+=item TRIEM
+
+Enable enhanced TRIE debugging. Enhances both TRIEE
+and TRIEC.
+
+=item STATE
+
+Enable debugging of states in the engine.
+
+=item STACK
+
+Enable debugging of the recursion stack in the engine. Enabling
+or disabling this option automatically does the same for debugging
+states as well. This output from this can be quite large.
+
+=item OPTIMISEM
+
+Enable enhanced optimisation debugging and start point optimisations.
+Probably not useful except when debugging the regex engine itself.
+
+=item OFFSETS
+
+Dump offset information. This can be used to see how regops correlate
+to the pattern. Output format is
+
+ NODENUM:POSITION[LENGTH]
+
+Where 1 is the position of the first char in the string. Note that position
+can be 0, or larger than the actual length of the pattern, likewise length
+can be zero.
+
+=item OFFSETSDBG
+
+Enable debugging of offsets information. This emits copious
+amounts of trace information and doesn't mesh well with other
+debug options.
+
+Almost definitely only useful to people hacking
+on the offsets part of the debug engine.
+
+=back
+
+=item Other useful flags
+
+These are useful shortcuts to save on the typing.
+
+=over 4
+
+=item ALL
+
+Enable all options at once except OFFSETS, OFFSETSDBG and BUFFERS
+
+=item All
+
+Enable DUMP and all execute options. Equivalent to:
+
+ use re 'debug';
+
+=item MORE
+
+=item More
+
+Enable TRIEM and all execute compile and execute options.
+
+=back
+
+=back
+
+As of 5.9.5 the directive C<use re 'debug'> and its equivalents are
+lexically scoped, as the other directives are. However they have both
+compile-time and run-time effects.
+
+=head2 Exportable Functions
+
+As of perl 5.9.5 're' debug contains a number of utility functions that
+may be optionally exported into the caller's namespace. They are listed
+below.
+
+=over 4
+
+=item is_regexp($ref)
+
+Returns true if the argument is a compiled regular expression as returned
+by C<qr//>, false if it is not.
+
+This function will not be confused by overloading or blessing. In
+internals terms, this extracts the regexp pointer out of the
+PERL_MAGIC_qr structure so it it cannot be fooled.
+
+=item regexp_pattern($ref)
+
+If the argument is a compiled regular expression as returned by C<qr//>,
+then this function returns the pattern.
+
+In list context it returns a two element list, the first element
+containing the pattern and the second containing the modifiers used when
+the pattern was compiled.
+
+ my ($pat, $mods) = regexp_pattern($ref);
+
+In scalar context it returns the same as perl would when strigifying a raw
+C<qr//> with the same pattern inside. If the argument is not a compiled
+reference then this routine returns false but defined in scalar context,
+and the empty list in list context. Thus the following
+
+ if (regexp_pattern($ref) eq '(?i-xsm:foo)')
+
+will be warning free regardless of what $ref actually is.
+
+Like C<is_regexp> this function will not be confused by overloading
+or blessing of the object.
+
+=item regmust($ref)
+
+If the argument is a compiled regular expression as returned by C<qr//>,
+then this function returns what the optimiser consiers to be the longest
+anchored fixed string and longest floating fixed string in the pattern.
+
+A I<fixed string> is defined as being a substring that must appear for the
+pattern to match. An I<anchored fixed string> is a fixed string that must
+appear at a particular offset from the beginning of the match. A I<floating
+fixed string> is defined as a fixed string that can appear at any point in
+a range of positions relative to the start of the match. For example,
+
+ my $qr = qr/here .* there/x;
+ my ($anchored, $floating) = regmust($qr);
+ print "anchored:'$anchored'\nfloating:'$floating'\n";
+
+results in
+
+ anchored:'here'
+ floating:'there'
+
+Because the C<here> is before the C<.*> in the pattern, its position
+can be determined exactly. That's not true, however, for the C<there>;
+it could appear at any point after where the anchored string appeared.
+Perl uses both for its optimisations, prefering the longer, or, if they are
+equal, the floating.
+
+B<NOTE:> This may not necessarily be the definitive longest anchored and
+floating string. This will be what the optimiser of the Perl that you
+are using thinks is the longest. If you believe that the result is wrong
+please report it via the L<perlbug> utility.
+
+=item regname($name,$all)
+
+Returns the contents of a named buffer of the last successful match. If
+$all is true, then returns an array ref containing one entry per buffer,
+otherwise returns the first defined buffer.
+
+=item regnames($all)
+
+Returns a list of all of the named buffers defined in the last successful
+match. If $all is true, then it returns all names defined, if not it returns
+only names which were involved in the match.
+
+=item regnames_count()
+
+Returns the number of distinct names defined in the pattern used
+for the last successful match.
+
+B<Note:> this result is always the actual number of distinct
+named buffers defined, it may not actually match that which is
+returned by C<regnames()> and related routines when those routines
+have not been called with the $all parameter set.
+
+=back
+
+=head1 SEE ALSO
+
+L<perlmodlib/Pragmatic Modules>.
+
+=cut
diff --git a/gnu/usr.bin/perl/ext/re/re.xs b/gnu/usr.bin/perl/ext/re/re.xs
index 11239d7e59d..b4d3e34c4a1 100644
--- a/gnu/usr.bin/perl/ext/re/re.xs
+++ b/gnu/usr.bin/perl/ext/re/re.xs
@@ -6,73 +6,182 @@
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
+#include "re_comp.h"
+
START_EXTERN_C
-extern regexp* my_regcomp (pTHX_ char* exp, char* xend, PMOP* pm);
-extern I32 my_regexec (pTHX_ regexp* prog, char* stringarg, char* strend,
+extern REGEXP* my_re_compile (pTHX_ const SV * const pattern, const U32 pm_flags);
+extern I32 my_regexec (pTHX_ REGEXP * const prog, char* stringarg, char* strend,
char* strbeg, I32 minend, SV* screamer,
void* data, U32 flags);
-extern void my_regfree (pTHX_ struct regexp* r);
-extern char* my_re_intuit_start (pTHX_ regexp *prog, SV *sv, char *strpos,
- char *strend, U32 flags,
+
+extern char* my_re_intuit_start (pTHX_ REGEXP * const prog, SV *sv, char *strpos,
+ char *strend, const U32 flags,
struct re_scream_pos_data_s *data);
-extern SV* my_re_intuit_string (pTHX_ regexp *prog);
+extern SV* my_re_intuit_string (pTHX_ REGEXP * const prog);
-END_EXTERN_C
+extern void my_regfree (pTHX_ REGEXP * const r);
-#define MY_CXT_KEY "re::_guts" XS_VERSION
+extern void my_reg_numbered_buff_fetch(pTHX_ REGEXP * const rx, const I32 paren,
+ SV * const usesv);
+extern void my_reg_numbered_buff_store(pTHX_ REGEXP * const rx, const I32 paren,
+ SV const * const value);
+extern I32 my_reg_numbered_buff_length(pTHX_ REGEXP * const rx,
+ const SV * const sv, const I32 paren);
-typedef struct {
- int x_oldflag; /* debug flag */
-} my_cxt_t;
+extern SV* my_reg_named_buff(pTHX_ REGEXP * const, SV * const, SV * const,
+ const U32);
+extern SV* my_reg_named_buff_iter(pTHX_ REGEXP * const rx,
+ const SV * const lastkey, const U32 flags);
-START_MY_CXT
+extern SV* my_reg_qr_package(pTHX_ REGEXP * const rx);
+#if defined(USE_ITHREADS)
+extern void* my_regdupe (pTHX_ REGEXP * const r, CLONE_PARAMS *param);
+#endif
-#define oldflag (MY_CXT.x_oldflag)
+EXTERN_C const struct regexp_engine my_reg_engine;
-static void
-uninstall(pTHX)
-{
- dMY_CXT;
- PL_regexecp = Perl_regexec_flags;
- PL_regcompp = Perl_pregcomp;
- PL_regint_start = Perl_re_intuit_start;
- PL_regint_string = Perl_re_intuit_string;
- PL_regfree = Perl_pregfree;
-
- if (!oldflag)
- PL_debug &= ~DEBUG_r_FLAG;
-}
+END_EXTERN_C
-static void
-install(pTHX)
-{
- dMY_CXT;
- PL_colorset = 0; /* Allow reinspection of ENV. */
- PL_regexecp = &my_regexec;
- PL_regcompp = &my_regcomp;
- PL_regint_start = &my_re_intuit_start;
- PL_regint_string = &my_re_intuit_string;
- PL_regfree = &my_regfree;
- oldflag = PL_debug & DEBUG_r_FLAG;
- PL_debug |= DEBUG_r_FLAG;
-}
+const struct regexp_engine my_reg_engine = {
+ my_re_compile,
+ my_regexec,
+ my_re_intuit_start,
+ my_re_intuit_string,
+ my_regfree,
+ my_reg_numbered_buff_fetch,
+ my_reg_numbered_buff_store,
+ my_reg_numbered_buff_length,
+ my_reg_named_buff,
+ my_reg_named_buff_iter,
+ my_reg_qr_package,
+#if defined(USE_ITHREADS)
+ my_regdupe
+#endif
+};
MODULE = re PACKAGE = re
-BOOT:
+void
+install()
+ PPCODE:
+ PL_colorset = 0; /* Allow reinspection of ENV. */
+ /* PL_debug |= DEBUG_r_FLAG; */
+ XPUSHs(sv_2mortal(newSViv(PTR2IV(&my_reg_engine))));
+
+
+void
+regexp_pattern(sv)
+ SV * sv
+PROTOTYPE: $
+PREINIT:
+ REGEXP *re;
+PPCODE:
{
- MY_CXT_INIT;
+ /*
+ Checks if a reference is a regex or not. If the parameter is
+ not a ref, or is not the result of a qr// then returns false
+ in scalar context and an empty list in list context.
+ Otherwise in list context it returns the pattern and the
+ modifiers, in scalar context it returns the pattern just as it
+ would if the qr// was stringified normally, regardless as
+ to the class of the variable and any strigification overloads
+ on the object.
+ */
+
+ if ((re = SvRX(sv))) /* assign deliberate */
+ {
+ /* Housten, we have a regex! */
+ SV *pattern;
+ STRLEN patlen = 0;
+ STRLEN left = 0;
+ char reflags[6];
+
+ if ( GIMME_V == G_ARRAY ) {
+ /*
+ we are in list context so stringify
+ the modifiers that apply. We ignore "negative
+ modifiers" in this scenario.
+ */
+
+ char *fptr = INT_PAT_MODS;
+ char ch;
+ U16 match_flags = (U16)((re->extflags & PMf_COMPILETIME) >> 12);
+
+ while((ch = *fptr++)) {
+ if(match_flags & 1) {
+ reflags[left++] = ch;
+ }
+ match_flags >>= 1;
+ }
+
+ pattern = sv_2mortal(newSVpvn(re->precomp,re->prelen));
+ if (re->extflags & RXf_UTF8) SvUTF8_on(pattern);
+
+ /* return the pattern and the modifiers */
+ XPUSHs(pattern);
+ XPUSHs(sv_2mortal(newSVpvn(reflags,left)));
+ XSRETURN(2);
+ } else {
+ /* Scalar, so use the string that Perl would return */
+ /* return the pattern in (?msix:..) format */
+ pattern = sv_2mortal(newSVpvn(re->wrapped,re->wraplen));
+ if (re->extflags & RXf_UTF8)
+ SvUTF8_on(pattern);
+ XPUSHs(pattern);
+ XSRETURN(1);
+ }
+ } else {
+ /* It ain't a regexp folks */
+ if ( GIMME_V == G_ARRAY ) {
+ /* return the empty list */
+ XSRETURN_UNDEF;
+ } else {
+ /* Because of the (?:..) wrapping involved in a
+ stringified pattern it is impossible to get a
+ result for a real regexp that would evaluate to
+ false. Therefore we can return PL_sv_no to signify
+ that the object is not a regex, this means that one
+ can say
+
+ if (regex($might_be_a_regex) eq '(?:foo)') { }
+
+ and not worry about undefined values.
+ */
+ XSRETURN_NO;
+ }
+ }
+ /* NOT-REACHED */
}
void
-install()
- CODE:
- install(aTHX);
+regmust(sv)
+ SV * sv
+PROTOTYPE: $
+PREINIT:
+ REGEXP *re;
+PPCODE:
+{
+ if ((re = SvRX(sv))) /* assign deliberate */
+ {
+ SV *an = &PL_sv_no;
+ SV *fl = &PL_sv_no;
+ if (re->anchored_substr) {
+ an = newSVsv(re->anchored_substr);
+ } else if (re->anchored_utf8) {
+ an = newSVsv(re->anchored_utf8);
+ }
+ if (re->float_substr) {
+ fl = newSVsv(re->float_substr);
+ } else if (re->float_utf8) {
+ fl = newSVsv(re->float_utf8);
+ }
+ XPUSHs(an);
+ XPUSHs(fl);
+ XSRETURN(2);
+ }
+ XSRETURN_UNDEF;
+}
-void
-uninstall()
- CODE:
- uninstall(aTHX);
diff --git a/gnu/usr.bin/perl/ext/re/re_comp.h b/gnu/usr.bin/perl/ext/re/re_comp.h
new file mode 100644
index 00000000000..ba3aae851eb
--- /dev/null
+++ b/gnu/usr.bin/perl/ext/re/re_comp.h
@@ -0,0 +1,7 @@
+/* For blead, this file needs to do nothing other than pull in the regular
+ regcomp.h. For the 5.8.x re module it has to do more.
+ But doing it this way keeps regcomp.c and regexec.c clean.
+*/
+
+#include "regcomp.h"
+
diff --git a/gnu/usr.bin/perl/ext/re/re_top.h b/gnu/usr.bin/perl/ext/re/re_top.h
new file mode 100644
index 00000000000..6eb02e33684
--- /dev/null
+++ b/gnu/usr.bin/perl/ext/re/re_top.h
@@ -0,0 +1,42 @@
+/* need to replace pregcomp et al, so enable that */
+#ifndef PERL_IN_XSUB_RE
+# define PERL_IN_XSUB_RE
+#endif
+/* need access to debugger hooks */
+#if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
+# define DEBUGGING
+#endif
+
+/* We *really* need to overwrite these symbols: */
+#define Perl_regexec_flags my_regexec
+#define Perl_regdump my_regdump
+#define Perl_regprop my_regprop
+#define Perl_re_intuit_start my_re_intuit_start
+#define Perl_re_compile my_re_compile
+#define Perl_regfree_internal my_regfree
+#define Perl_re_intuit_string my_re_intuit_string
+#define Perl_regdupe_internal my_regdupe
+#define Perl_reg_numbered_buff_fetch my_reg_numbered_buff_fetch
+#define Perl_reg_numbered_buff_store my_reg_numbered_buff_store
+#define Perl_reg_numbered_buff_length my_reg_numbered_buff_length
+#define Perl_reg_named_buff my_reg_named_buff
+#define Perl_reg_named_buff_iter my_reg_named_buff_iter
+#define Perl_reg_named_buff_fetch my_reg_named_buff_fetch
+#define Perl_reg_named_buff_exists my_reg_named_buff_exists
+#define Perl_reg_named_buff_firstkey my_reg_named_buff_firstkey
+#define Perl_reg_named_buff_nextkey my_reg_named_buff_nextkey
+#define Perl_reg_named_buff_scalar my_reg_named_buff_scalar
+#define Perl_reg_named_buff_all my_reg_named_buff_all
+#define Perl_reg_qr_package my_reg_qr_package
+
+#define PERL_NO_GET_CONTEXT
+
+/*
+ * Local variables:
+ * c-indentation-style: bsd
+ * c-basic-offset: 4
+ * indent-tabs-mode: t
+ * End:
+ *
+ * ex: set ts=8 sts=4 sw=4 noet:
+ */
diff --git a/gnu/usr.bin/perl/ext/re/t/lexical_debug.pl b/gnu/usr.bin/perl/ext/re/t/lexical_debug.pl
new file mode 100644
index 00000000000..3ec7455ba1a
--- /dev/null
+++ b/gnu/usr.bin/perl/ext/re/t/lexical_debug.pl
@@ -0,0 +1,29 @@
+use re 'debug';
+
+$_ = 'foo bar baz bop fip fop';
+
+/foo/ and $count++;
+
+{
+ no re 'debug';
+ /bar/ and $count++;
+ {
+ use re 'debug';
+ /baz/ and $count++;
+ }
+ /bop/ and $count++;
+}
+
+/fip/ and $count++;
+
+no re 'debug';
+
+/fop/ and $count++;
+
+use re 'debug';
+my $var='zoo|liz|zap';
+/($var)/ or $count++;
+
+print "Count=$count\n";
+
+
diff --git a/gnu/usr.bin/perl/ext/re/t/lexical_debug.t b/gnu/usr.bin/perl/ext/re/t/lexical_debug.t
new file mode 100644
index 00000000000..3c3f7ba316e
--- /dev/null
+++ b/gnu/usr.bin/perl/ext/re/t/lexical_debug.t
@@ -0,0 +1,37 @@
+#!./perl
+
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = '../lib';
+ require Config;
+ if (($Config::Config{'extensions'} !~ /\bre\b/) ){
+ print "1..0 # Skip -- Perl configured without re module\n";
+ exit 0;
+ }
+}
+
+use strict;
+
+# must use a BEGIN or the prototypes wont be respected meaning
+ # tests could pass that shouldn't
+BEGIN { require "./test.pl"; }
+my $out = runperl(progfile => "../ext/re/t/lexical_debug.pl", stderr => 1 );
+
+print "1..10\n";
+
+# Each pattern will produce an EXACT node with a specific string in
+# it, so we will look for that. We can't just look for the string
+# alone as the string being matched against contains all of them.
+
+ok( $out =~ /EXACT <foo>/, "Expect 'foo'" );
+ok( $out !~ /EXACT <bar>/, "No 'bar'" );
+ok( $out =~ /EXACT <baz>/, "Expect 'baz'" );
+ok( $out !~ /EXACT <bop>/, "No 'bop'" );
+ok( $out =~ /EXACT <fip>/, "Expect 'fip'" );
+ok( $out !~ /EXACT <fop>/, "No 'baz'" );
+ok( $out =~ /<liz>/, "Got 'liz'" ); # in a TRIE so no EXACT
+ok( $out =~ /<zoo>/, "Got 'zoo'" ); # in a TRIE so no EXACT
+ok( $out =~ /<zap>/, "Got 'zap'" ); # in a TRIE so no EXACT
+ok( $out =~ /Count=7\n/, "Count is 7")
+ or diag($out);
+
diff --git a/gnu/usr.bin/perl/ext/re/t/qr.t b/gnu/usr.bin/perl/ext/re/t/qr.t
new file mode 100644
index 00000000000..9a59a046bdd
--- /dev/null
+++ b/gnu/usr.bin/perl/ext/re/t/qr.t
@@ -0,0 +1,15 @@
+#!./perl
+
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = '../lib';
+ require Config;
+ if (($Config::Config{'extensions'} !~ /\bre\b/) ){
+ print "1..0 # Skip -- Perl configured without re module\n";
+ exit 0;
+ }
+}
+
+use Test::More tests => 1;
+use re 'Debug';
+isa_ok( qr//, "Regexp" );
diff --git a/gnu/usr.bin/perl/ext/re/t/re.t b/gnu/usr.bin/perl/ext/re/t/re.t
index 2a1923ea79a..204092f028b 100644
--- a/gnu/usr.bin/perl/ext/re/t/re.t
+++ b/gnu/usr.bin/perl/ext/re/t/re.t
@@ -31,8 +31,8 @@ my $warn;
local $SIG{__WARN__} = sub {
$warn = shift;
};
-eval { re::bits(1) };
-like( $warn, qr/Useless use/, 'bits() should warn with no args' );
+#eval { re::bits(1) };
+#like( $warn, qr/Useless use/, 'bits() should warn with no args' );
delete $ENV{PERL_RE_COLORS};
re::bits(0, 'debug');
@@ -58,6 +58,12 @@ re->unimport('taint');
ok( !( $^H & 0x00100000 ), 'unimport should clear bits in $^H when requested' );
re->unimport('eval');
ok( !( $^H & 0x00200000 ), '... and again' );
+my $reg=qr/(foo|bar|baz|blah)/;
+close STDERR;
+eval"use re Debug=>'ALL'";
+my $ok='foo'=~/$reg/;
+eval"no re Debug=>'ALL'";
+ok( $ok, 'No segv!' );
package Term::Cap;
diff --git a/gnu/usr.bin/perl/ext/re/t/re_funcs.t b/gnu/usr.bin/perl/ext/re/t/re_funcs.t
new file mode 100644
index 00000000000..97f795eac93
--- /dev/null
+++ b/gnu/usr.bin/perl/ext/re/t/re_funcs.t
@@ -0,0 +1,64 @@
+#!./perl
+
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = '../lib';
+ require Config;
+ if (($Config::Config{'extensions'} !~ /\bre\b/) ){
+ print "1..0 # Skip -- Perl configured without re module\n";
+ exit 0;
+ }
+}
+
+use strict;
+
+use Test::More; # test count at bottom of file
+use re qw(is_regexp regexp_pattern regmust
+ regname regnames regnames_count);
+{
+ my $qr=qr/foo/pi;
+ ok(is_regexp($qr),'is_regexp($qr)');
+ ok(!is_regexp(''),'is_regexp("")');
+ is((regexp_pattern($qr))[0],'foo','regexp_pattern[0]');
+ is((regexp_pattern($qr))[1],'ip','regexp_pattern[1]');
+ is(regexp_pattern($qr),'(?pi-xsm:foo)','scalar regexp_pattern');
+ ok(!regexp_pattern(''),'!regexp_pattern("")');
+}
+{
+ my $qr=qr/here .* there/x;
+ my ($anchored,$floating)=regmust($qr);
+ is($anchored,'here',"Regmust anchored - qr//");
+ is($floating,'there',"Regmust floating - qr//");
+ my $foo='blah';
+ ($anchored,$floating)=regmust($foo);
+ is($anchored,undef,"Regmust anchored - non ref");
+ is($floating,undef,"Regmust anchored - non ref");
+ my $bar=['blah'];
+ ($anchored,$floating)=regmust($foo);
+ is($anchored,undef,"Regmust anchored - ref");
+ is($floating,undef,"Regmust anchored - ref");
+}
+
+if ('1234'=~/(?:(?<A>\d)|(?<C>!))(?<B>\d)(?<A>\d)(?<B>\d)/){
+ my @names = sort +regnames();
+ is("@names","A B","regnames");
+ my @names = sort +regnames(0);
+ is("@names","A B","regnames");
+ my $names = regnames();
+ is($names, "B", "regnames in scalar context");
+ @names = sort +regnames(1);
+ is("@names","A B C","regnames");
+ is(join("", @{regname("A",1)}),"13");
+ is(join("", @{regname("B",1)}),"24");
+ {
+ if ('foobar'=~/(?<foo>foo)(?<bar>bar)/) {
+ is(regnames_count(),2);
+ } else {
+ ok(0); ok(0);
+ }
+ }
+ is(regnames_count(),3);
+}
+# New tests above this line, don't forget to update the test count below!
+use Test::More tests => 20;
+# No tests here!
diff --git a/gnu/usr.bin/perl/ext/re/t/regop.pl b/gnu/usr.bin/perl/ext/re/t/regop.pl
new file mode 100644
index 00000000000..89693352208
--- /dev/null
+++ b/gnu/usr.bin/perl/ext/re/t/regop.pl
@@ -0,0 +1,20 @@
+use re Debug=>qw(DUMP EXECUTE OFFSETS TRIEC);
+my @tests=(
+ XY => 'X(A|[B]Q||C|D)Y' ,
+ foobar => '[f][o][o][b][a][r]',
+ x => '.[XY].',
+ 'ABCD' => '(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)',
+ 'D:\\dev/perl/ver/28321_/perl.exe'=>
+ '/(\\.COM|\\.EXE|\\.BAT|\\.CMD|\\.VBS|\\.VBE|\\.JS|\\.JSE|\\.WSF|\\.WSH|\\.pyo|\\.pyc|\\.pyw|\\.py)$/i',
+ 'q'=>'[q]',
+);
+while (@tests) {
+ my ($str,$pat)=splice @tests,0,2;
+ warn "\n";
+ $pat="/$pat/" if substr($pat,0,1) ne '/';
+ # string eval to get the free regex message in the right place.
+ eval qq[
+ warn "$str"=~$pat ? "%MATCHED%" : "%FAILED%","\n";
+ ];
+ die $@ if $@;
+}
diff --git a/gnu/usr.bin/perl/ext/re/t/regop.t b/gnu/usr.bin/perl/ext/re/t/regop.t
new file mode 100644
index 00000000000..7fe7b204627
--- /dev/null
+++ b/gnu/usr.bin/perl/ext/re/t/regop.t
@@ -0,0 +1,261 @@
+#!./perl
+
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = '../lib';
+ require Config;
+ if (($Config::Config{'extensions'} !~ /\bre\b/) ){
+ print "1..0 # Skip -- Perl configured without re module\n";
+ exit 0;
+ }
+}
+
+use strict;
+BEGIN { require "./test.pl"; }
+our $NUM_SECTS;
+chomp(my @strs= grep { !/^\s*\#/ } <DATA>);
+my $out = runperl(progfile => "../ext/re/t/regop.pl", stderr => 1 );
+# VMS currently embeds linefeeds in the output.
+$out =~ s/\cJ//g if $^O = 'VMS';
+my @tests = grep { /\S/ } split /(?=Compiling REx)/, $out;
+# on debug builds we get an EXECUTING... message in there at the top
+shift @tests
+ if $tests[0] =~ /EXECUTING.../;
+
+plan( @tests + 2 + ( @strs - grep { !$_ or /^---/ } @strs ));
+
+is( scalar @tests, $NUM_SECTS,
+ "Expecting output for $NUM_SECTS patterns" );
+ok( defined $out, 'regop.pl returned something defined' );
+
+$out ||= "";
+my $test= 1;
+foreach my $testout ( @tests ) {
+ my ( $pattern )= $testout=~/Compiling REx "([^"]+)"/;
+ ok( $pattern, "Pattern for test " . ($test++) );
+ my $diaged;
+ while (@strs) {
+ local $_= shift @strs;
+ last if !$_
+ or /^---/;
+ next if /^\s*#/;
+ s/^\s+//;
+ s/\s+$//;
+ ok( $testout=~/\Q$_\E/, "$_: /$pattern/" )
+ or do {
+ !$diaged++ and diag("$_: /$pattern/\n$testout");
+ };
+ }
+}
+
+# The format below is simple. Each line is an exact
+# string that must be found in the output.
+# Lines starting the # are comments.
+# Lines starting with --- are seperators indicating
+# that the tests for this result set are finished.
+# If you add a test make sure you update $NUM_SECTS
+# the commented output is just for legacy/debugging purposes
+BEGIN{ $NUM_SECTS= 6 }
+
+__END__
+#Compiling REx "X(A|[B]Q||C|D)Y"
+#size 34
+#first at 1
+# 1: EXACT <X>(3)
+# 3: OPEN1(5)
+# 5: TRIE-EXACT(21)
+# [Words:5 Chars:5 Unique:5 States:6 Start-Class:A-D]
+# <A>
+# <BQ>
+# <>
+# <C>
+# <D>
+# 21: CLOSE1(23)
+# 23: EXACT <Y>(25)
+# 25: END(0)
+#anchored "X" at 0 floating "Y" at 1..3 (checking floating) minlen 2
+#Guessing start of match, REx "X(A|[B]Q||C|D)Y" against "XY"...
+#Found floating substr "Y" at offset 1...
+#Found anchored substr "X" at offset 0...
+#Guessed: match at offset 0
+#Matching REx "X(A|[B]Q||C|D)Y" against "XY"
+# Setting an EVAL scope, savestack=140
+# 0 <> <XY> | 1: EXACT <X>
+# 1 <X> <Y> | 3: OPEN1
+# 1 <X> <Y> | 5: TRIE-EXACT
+# matched empty string...
+# 1 <X> <Y> | 21: CLOSE1
+# 1 <X> <Y> | 23: EXACT <Y>
+# 2 <XY> <> | 25: END
+#Match successful!
+#%MATCHED%
+#Freeing REx: "X(A|[B]Q||C|D)Y"
+Compiling REx "X(A|[B]Q||C|D)Y"
+[A-D]
+TRIE-EXACT
+<BQ>
+matched empty string
+Match successful!
+Found floating substr "Y" at offset 1...
+Found anchored substr "X" at offset 0...
+Guessed: match at offset 0
+checking floating
+minlen 2
+S:1/6
+W:5
+L:0/2
+C:5/5
+%MATCHED%
+---
+#Compiling REx "[f][o][o][b][a][r]"
+#size 67
+#first at 1
+# 1: EXACT <foobar>(13)
+# 13: END(0)
+#anchored "foobar" at 0 (checking anchored isall) minlen 6
+#Guessing start of match, REx "[f][o][o][b][a][r]" against "foobar"...
+#Found anchored substr "foobar" at offset 0...
+#Guessed: match at offset 0
+#Freeing REx: "[f][o][o][b][a][r]"
+foobar
+checking anchored isall
+minlen 6
+anchored "foobar" at 0
+Guessed: match at offset 0
+Compiling REx "[f][o][o][b][a][r]"
+Freeing REx: "[f][o][o][b][a][r]"
+%MATCHED%
+---
+#Compiling REx ".[XY]."
+#size 14
+#first at 1
+# 1: REG_ANY(2)
+# 2: ANYOF[XY](13)
+# 13: REG_ANY(14)
+# 14: END(0)
+#minlen 3
+#%FAILED%
+#Freeing REx: ".[XY]."
+%FAILED%
+minlen 3
+---
+# Compiling REx "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)"
+# Got 164 bytes for offset annotations.
+# TRIE(NATIVE): W:6 C:24 Uq:7 Min:4 Max:4
+# Char : Match Base Ofs A B C P G E D
+# State|---------------------------------------------------
+# # 1| @ 7 + 0[ 2 . . . . . .]
+# # 2| @ 7 + 1[ . 3 . . . . .]
+# # 3| @ 7 + 2[ . . 4 . . . .]
+# # 4| @ A + 0[ 9 8 0 5 6 7 A]
+# # 5| W 1 @ 0
+# # 6| W 2 @ 0
+# # 7| W 3 @ 0
+# # 8| W 4 @ 0
+# # 9| W 5 @ 0
+# # A| W 6 @ 0
+# Final program:
+# 1: EXACT <ABC>(3)
+# 3: TRIEC-EXACT<S:4/10 W:6 L:1/1 C:24/7>[A-EGP](20)
+# <P>
+# <G>
+# <E>
+# <B>
+# <A>
+# <D>
+# 20: END(0)
+# anchored "ABC" at 0 (checking anchored) minlen 4
+# Offsets: [20]
+# 1:4[3] 3:4[15] 19:32[0] 20:34[0]
+# Guessing start of match in sv for REx "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)" against "ABCD"
+# Found anchored substr "ABC" at offset 0...
+# Guessed: match at offset 0
+# Matching REx "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)" against "ABCD"
+# 0 <> <ABCD> | 1:EXACT <ABC>(3)
+# 3 <ABC> <D> | 3:TRIEC-EXACT<S:4/10 W:6 L:1/1 C:24/7>[A-EGP](20)
+# 3 <ABC> <D> | State: 4 Accepted: 0 Charid: 7 CP: 44 After State: a
+# 4 <ABCD> <> | State: a Accepted: 1 Charid: 6 CP: 0 After State: 0
+# got 1 possible matches
+# only one match left: #6 <D>
+# 4 <ABCD> <> | 20:END(0)
+# Match successful!
+# %MATCHED%
+# Freeing REx: "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)"
+%MATCHED%
+EXACT <ABC>
+TRIEC-EXACT
+[A-EGP]
+only one match left: #6 <D>
+S:4/10
+W:6
+L:1/1
+C:24/7
+minlen 4
+(checking anchored)
+anchored "ABC" at 0
+---
+#Compiling REx "(\.COM|\.EXE|\.BAT|\.CMD|\.VBS|\.VBE|\.JS|\.JSE|\.WSF|\.WSH|\.pyo|\.pyc|\.pyw|\.py)$"
+#size 48 nodes first at 3
+#first at 3
+#rarest char
+# at 0
+# 1: OPEN1(3)
+# 3: EXACTF <.>(5)
+# 5: TRIE-EXACTF(45)
+# [Start:2 Words:14 Chars:54 Unique:18 States:29 Minlen:2 Maxlen:3 Start-Class:BCEJPVWbcejpvw]
+# <.COM>
+# ... yada yada ... (dmq)
+# <.py>
+# 45: CLOSE1(47)
+# 47: EOL(48)
+# 48: END(0)
+#floating ""$ at 3..4 (checking floating) stclass "EXACTF <.>" minlen 3
+#Offsets: [48]
+# 1:1[1] 3:2[1] 5:2[81] 45:83[1] 47:84[1] 48:85[0]
+#Guessing start of match, REx "(\.COM|\.EXE|\.BAT|\.CMD|\.VBS|\.VBE|\.JS|\.JSE|\.WSF|\.WSH|..." against "D:dev/perl/ver/28321_/perl.exe"...
+#Found floating substr ""$ at offset 30...
+#Starting position does not contradict /^/m...
+#Does not contradict STCLASS...
+#Guessed: match at offset 26
+#Matching REx "(\.COM|\.EXE|\.BAT|\.CMD|\.VBS|\.VBE|\.JS|\.JSE|\.WSF|\.WSH|\.pyo|\.pyc|\.pyw|\.py)$..." against ".exe"
+#Matching stclass "EXACTF <.>" against ".exe"
+# Setting an EVAL scope, savestack=140
+# 26 <21_/perl> <.exe> | 1: OPEN1
+# 26 <21_/perl> <.exe> | 3: EXACTF <.>
+# 27 <21_/perl.> <exe> | 5: TRIE-EXACTF
+# only one match : #2 <.EXE>
+# 30 <21_/perl.exe> <> | 45: CLOSE1
+# 30 <21_/perl.exe> <> | 47: EOL
+# 30 <21_/perl.exe> <> | 48: END
+#Match successful!
+#POP STATE(1)
+#%MATCHED%
+#Freeing REx: "(\\.COM|\\.EXE|\\.BAT|\\.CMD|\\.VBS|\\.VBE|\\.JS|\\.JSE|\\."......
+%MATCHED%
+floating ""$ at 3..4 (checking floating)
+1:1[1] 3:2[1] 5:2[64] 45:83[1] 47:84[1] 48:85[0]
+stclass EXACTF <.> minlen 3
+Found floating substr ""$ at offset 30...
+Does not contradict STCLASS...
+Guessed: match at offset 26
+Matching stclass EXACTF <.> against ".exe"
+---
+#Compiling REx "[q]"
+#size 12 nodes Got 100 bytes for offset annotations.
+#first at 1
+#Final program:
+# 1: EXACT <q>(3)
+# 3: END(0)
+#anchored "q" at 0 (checking anchored isall) minlen 1
+#Offsets: [12]
+# 1:1[3] 3:4[0]
+#Guessing start of match, REx "[q]" against "q"...
+#Found anchored substr "q" at offset 0...
+#Guessed: match at offset 0
+#%MATCHED%
+#Freeing REx: "[q]"
+Got 100 bytes for offset annotations.
+Offsets: [12]
+1:1[3] 3:4[0]
+%MATCHED%
+Freeing REx: "[q]" \ No newline at end of file