src - OpenBSD base system

diff options


context:
space:
mode:

author	Todd C. Miller <millert@cvs.openbsd.org>	2008-09-29 17:19:07 +0000
committer	Todd C. Miller <millert@cvs.openbsd.org>	2008-09-29 17:19:07 +0000
commit	bfa863c83bfd01d72a748decaaf2676b800c60ff (patch)
tree	02491e683928ba1c318d3ebc1200feb7c8bb78b4 /gnu/usr.bin/perl/ext/re
parent	37a0443eb842cf3d2c9de0e5647b0094a0dcf0bd (diff)

import perl 5.10.0 from CPAN

Diffstat (limited to 'gnu/usr.bin/perl/ext/re')

-rw-r--r--

gnu/usr.bin/perl/ext/re/re.pm

486

-rw-r--r--

gnu/usr.bin/perl/ext/re/re.xs

205

-rw-r--r--

gnu/usr.bin/perl/ext/re/re_comp.h

-rw-r--r--

gnu/usr.bin/perl/ext/re/re_top.h

-rw-r--r--

gnu/usr.bin/perl/ext/re/t/lexical_debug.pl

-rw-r--r--

gnu/usr.bin/perl/ext/re/t/lexical_debug.t

-rw-r--r--

gnu/usr.bin/perl/ext/re/t/qr.t

-rw-r--r--

gnu/usr.bin/perl/ext/re/t/re.t

-rw-r--r--

gnu/usr.bin/perl/ext/re/t/re_funcs.t

-rw-r--r--

gnu/usr.bin/perl/ext/re/t/regop.pl

-rw-r--r--

gnu/usr.bin/perl/ext/re/t/regop.t

261

11 files changed, 1068 insertions, 108 deletions

diff --git a/gnu/usr.bin/perl/ext/re/re.pm b/gnu/usr.bin/perl/ext/re/re.pm
index 6e9d1218ef0..0cf5376e866 100644
--- a/gnu/usr.bin/perl/ext/re/re.pm
+++ b/gnu/usr.bin/perl/ext/re/re.pm

@@ -1,6 +1,174 @@

package re;

-our $VERSION = 0.05;

+# pragma for controlling the regex engine

+use strict;

+use warnings;

+our $VERSION = "0.08";

+our @ISA = qw(Exporter);

+our @EXPORT_OK = qw(is_regexp regexp_pattern regmust

+ regname regnames regnames_count);

+our %EXPORT_OK = map { $_ => 1 } @EXPORT_OK;

+# *** WARNING *** WARNING *** WARNING *** WARNING *** WARNING ***

+# If you modify these values see comment below!

+my %bitmask = (

+ taint => 0x00100000, # HINT_RE_TAINT

+ eval => 0x00200000, # HINT_RE_EVAL

+);

+# - File::Basename contains a literal for 'taint' as a fallback. If

+# taint is changed here, File::Basename must be updated as well.

+# - ExtUtils::ParseXS uses a hardcoded

+# BEGIN { $^H |= 0x00200000 }

+# in it to allow re.xs to be built. So if 'eval' is changed here then

+# ExtUtils::ParseXS must be changed as well.

+# *** WARNING *** WARNING *** WARNING *** WARNING *** WARNING ***

+sub setcolor {

+ eval { # Ignore errors

+ require Term::Cap;

+ my $terminal = Tgetent Term::Cap ({OSPEED => 9600}); # Avoid warning.

+ my $props = $ENV{PERL_RE_TC} || 'md,me,so,se,us,ue';

+ my @props = split /,/, $props;

+ my $colors = join "\t", map {$terminal->Tputs($_,1)} @props;

+ $colors =~ s/\0//g;

+ $ENV{PERL_RE_COLORS} = $colors;

+ };

+ if ($@) {

+ $ENV{PERL_RE_COLORS} ||= qq'\t\t> <\t> <\t\t';

+ }

+my %flags = (

+ COMPILE => 0x0000FF,

+ PARSE => 0x000001,

+ OPTIMISE => 0x000002,

+ TRIEC => 0x000004,

+ DUMP => 0x000008,

+ FLAGS => 0x000010,

+ EXECUTE => 0x00FF00,

+ INTUIT => 0x000100,

+ MATCH => 0x000200,

+ TRIEE => 0x000400,

+ EXTRA => 0xFF0000,

+ TRIEM => 0x010000,

+ OFFSETS => 0x020000,

+ OFFSETSDBG => 0x040000,

+ STATE => 0x080000,

+ OPTIMISEM => 0x100000,

+ STACK => 0x280000,

+ BUFFERS => 0x400000,

+);

+$flags{ALL} = -1 & ~($flags{OFFSETS}|$flags{OFFSETSDBG}|$flags{BUFFERS});

+$flags{All} = $flags{all} = $flags{DUMP} | $flags{EXECUTE};

+$flags{Extra} = $flags{EXECUTE} | $flags{COMPILE};

+$flags{More} = $flags{MORE} = $flags{All} | $flags{TRIEC} | $flags{TRIEM} | $flags{STATE};

+$flags{State} = $flags{DUMP} | $flags{EXECUTE} | $flags{STATE};

+$flags{TRIE} = $flags{DUMP} | $flags{EXECUTE} | $flags{TRIEC};

+my $installed;

+my $installed_error;

+sub _do_install {

+ if ( ! defined($installed) ) {

+ require XSLoader;

+ $installed = eval { XSLoader::load('re', $VERSION) } || 0;

+ $installed_error = $@;

+ }

+sub _load_unload {

+ my ($on)= @_;

+ if ($on) {

+ _do_install();

+ if ( ! $installed ) {

+ die "'re' not installed!? ($installed_error)";

+ } else {

+ # We call install() every time, as if we didn't, we wouldn't

+ # "see" any changes to the color environment var since

+ # the last time it was called.

+ # install() returns an integer, which if casted properly

+ # in C resolves to a structure containing the regex

+ # hooks. Setting it to a random integer will guarantee

+ # segfaults.

+ $^H{regcomp} = install();

+ }

+ } else {

+ delete $^H{regcomp};

+ }

+sub bits {

+ my $on = shift;

+ my $bits = 0;

+ unless (@_) {

+ require Carp;

+ Carp::carp("Useless use of \"re\" pragma");

+ }

+ foreach my $idx (0..$#_){

+ my $s=$_[$idx];

+ if ($s eq 'Debug' or $s eq 'Debugcolor') {

+ setcolor() if $s =~/color/i;

+ ${^RE_DEBUG_FLAGS} = 0 unless defined ${^RE_DEBUG_FLAGS};

+ for my $idx ($idx+1..$#_) {

+ if ($flags{$_[$idx]}) {

+ if ($on) {

+ ${^RE_DEBUG_FLAGS} |= $flags{$_[$idx]};

+ } else {

+ ${^RE_DEBUG_FLAGS} &= ~ $flags{$_[$idx]};

+ }

+ } else {

+ require Carp;

+ Carp::carp("Unknown \"re\" Debug flag '$_[$idx]', possible flags: ",

+ join(", ",sort keys %flags ) );

+ }

+ _load_unload($on ? 1 : ${^RE_DEBUG_FLAGS});

+ last;

+ } elsif ($s eq 'debug' or $s eq 'debugcolor') {

+ setcolor() if $s =~/color/i;

+ _load_unload($on);

+ last;

+ } elsif (exists $bitmask{$s}) {

+ $bits |= $bitmask{$s};

+ } elsif ($EXPORT_OK{$s}) {

+ _do_install();

+ require Exporter;

+ re->export_to_level(2, 're', $s);

+ } else {

+ require Carp;

+ Carp::carp("Unknown \"re\" subpragma '$s' (known ones are: ",

+ join(', ', map {qq('$_')} 'debug', 'debugcolor', sort keys %bitmask),

+ ")");

+ }

+ $bits;

+sub import {

+ shift;

+ $^H |= bits(1, @_);

+sub unimport {

+ shift;

+ $^H &= ~ bits(0, @_);

+1;

+__END__

=head1 NAME

@@ -23,23 +191,38 @@ re - Perl pragma to alter regular expression behaviour

/foo${pat}bar/; # disallowed (with or without -T switch)

}

- use re 'debug'; # NOT lexically scoped (as others are)

- /^(.*)$/s; # output debugging info during

- # compile and run time

+ use re 'debug'; # output debugging info during

+ /^(.*)$/s; # compile and run time

use re 'debugcolor'; # same as 'debug', but with colored output

...

+ use re qw(Debug All); # Finer tuned debugging options.

+ use re qw(Debug More);

+ no re qw(Debug ALL); # Turn of all re debugging in this scope

+ use re qw(is_regexp regexp_pattern); # import utility functions

+ my ($pat,$mods)=regexp_pattern(qr/foo/i);

+ if (is_regexp($obj)) {

+ print "Got regexp: ",

+ scalar regexp_pattern($obj); # just as perl would stringify it

+ } # but no hassle with blessed re's.

(We use $^X in these examples because it's tainted by default.)

=head1 DESCRIPTION

+=head2 'taint' mode

When C<use re 'taint'> is in effect, and a tainted string is the target

of a regex, the regex memories (or values returned by the m// operator

in list context) are tainted. This feature is useful when regex operations

on tainted data aren't meant to extract safe substrings, but to perform

other transformations.

+=head2 'eval' mode

When C<use re 'eval'> is in effect, a regex is allowed to contain

C<(?{ ... })> zero-width assertions even if regular expression contains

variable interpolation. That is normally disallowed, since it is a

@@ -56,6 +239,8 @@ interpolation. Thus:

I<is> allowed if $pat is a precompiled regular expression, even

if $pat contains C<(?{ ... })> assertions.

+=head2 'debug' mode

When C<use re 'debug'> is in effect, perl emits debugging messages when

compiling and using regular expressions. The output is the same as that

obtained by running a C<-DDEBUGGING>-enabled perl interpreter with the

@@ -67,68 +252,253 @@ comma-separated list of C<termcap> properties to use for highlighting

strings on/off, pre-point part on/off.

See L<perldebug/"Debugging regular expressions"> for additional info.

-The directive C<use re 'debug'> is I<not lexically scoped>, as the

-other directives are. It has both compile-time and run-time effects.

+As of 5.9.5 the directive C<use re 'debug'> and its equivalents are

+lexically scoped, as the other directives are. However they have both

+compile-time and run-time effects.

See L<perlmodlib/Pragmatic Modules>.

-=cut

+=head2 'Debug' mode

-# N.B. File::Basename contains a literal for 'taint' as a fallback. If

-# taint is changed here, File::Basename must be updated as well.

-my %bitmask = (

-taint => 0x00100000, # HINT_RE_TAINT

-eval => 0x00200000, # HINT_RE_EVAL

-);

+Similarly C<use re 'Debug'> produces debugging output, the difference

+being that it allows the fine tuning of what debugging output will be

+emitted. Options are divided into three groups, those related to

+compilation, those related to execution and those related to special

+purposes. The options are as follows:

-sub setcolor {

- eval { # Ignore errors

- require Term::Cap;

+=over 4

- my $terminal = Tgetent Term::Cap ({OSPEED => 9600}); # Avoid warning.

- my $props = $ENV{PERL_RE_TC} || 'md,me,so,se,us,ue';

- my @props = split /,/, $props;

- my $colors = join "\t", map {$terminal->Tputs($_,1)} @props;

+=item Compile related options

- $colors =~ s/\0//g;

- $ENV{PERL_RE_COLORS} = $colors;

- };

+=over 4

-sub bits {

- my $on = shift;

- my $bits = 0;

- unless (@_) {

- require Carp;

- Carp::carp("Useless use of \"re\" pragma");

- }

- foreach my $s (@_){

- if ($s eq 'debug' or $s eq 'debugcolor') {

- setcolor() if $s eq 'debugcolor';

- require XSLoader;

- XSLoader::load('re');

- install() if $on;

- uninstall() unless $on;

- next;

- }

- if (exists $bitmask{$s}) {

- $bits |= $bitmask{$s};

- } else {

- require Carp;

- Carp::carp("Unknown \"re\" subpragma '$s' (known ones are: @{[join(', ', map {qq('$_')} 'debug', 'debugcolor', sort keys %bitmask)]})");

- }

- $bits;

+=item COMPILE

-sub import {

- shift;

- $^H |= bits(1, @_);

+Turns on all compile related debug options.

-sub unimport {

- shift;

- $^H &= ~ bits(0, @_);

+=item PARSE

-1;

+Turns on debug output related to the process of parsing the pattern.

+=item OPTIMISE

+Enables output related to the optimisation phase of compilation.

+=item TRIEC

+Detailed info about trie compilation.

+=item DUMP

+Dump the final program out after it is compiled and optimised.

+=back

+=item Execute related options

+=over 4

+=item EXECUTE

+Turns on all execute related debug options.

+=item MATCH

+Turns on debugging of the main matching loop.

+=item TRIEE

+Extra debugging of how tries execute.

+=item INTUIT

+Enable debugging of start point optimisations.

+=back

+=item Extra debugging options

+=over 4

+=item EXTRA

+Turns on all "extra" debugging options.

+=item BUFFERS

+Enable debugging the capture buffer storage during match. Warning,

+this can potentially produce extremely large output.

+=item TRIEM

+Enable enhanced TRIE debugging. Enhances both TRIEE

+and TRIEC.

+=item STATE

+Enable debugging of states in the engine.

+=item STACK

+Enable debugging of the recursion stack in the engine. Enabling

+or disabling this option automatically does the same for debugging

+states as well. This output from this can be quite large.

+=item OPTIMISEM

+Enable enhanced optimisation debugging and start point optimisations.

+Probably not useful except when debugging the regex engine itself.

+=item OFFSETS

+Dump offset information. This can be used to see how regops correlate

+to the pattern. Output format is

+ NODENUM:POSITION[LENGTH]

+Where 1 is the position of the first char in the string. Note that position

+can be 0, or larger than the actual length of the pattern, likewise length

+can be zero.

+=item OFFSETSDBG

+Enable debugging of offsets information. This emits copious

+amounts of trace information and doesn't mesh well with other

+debug options.

+Almost definitely only useful to people hacking

+on the offsets part of the debug engine.

+=back

+=item Other useful flags

+These are useful shortcuts to save on the typing.

+=over 4

+=item ALL

+Enable all options at once except OFFSETS, OFFSETSDBG and BUFFERS

+=item All

+Enable DUMP and all execute options. Equivalent to:

+ use re 'debug';

+=item MORE

+=item More

+Enable TRIEM and all execute compile and execute options.

+=back

+As of 5.9.5 the directive C<use re 'debug'> and its equivalents are

+lexically scoped, as the other directives are. However they have both

+compile-time and run-time effects.

+=head2 Exportable Functions

+As of perl 5.9.5 're' debug contains a number of utility functions that

+may be optionally exported into the caller's namespace. They are listed

+below.

+=over 4

+=item is_regexp($ref)

+Returns true if the argument is a compiled regular expression as returned

+by C<qr//>, false if it is not.

+This function will not be confused by overloading or blessing. In

+internals terms, this extracts the regexp pointer out of the

+PERL_MAGIC_qr structure so it it cannot be fooled.

+=item regexp_pattern($ref)

+If the argument is a compiled regular expression as returned by C<qr//>,

+then this function returns the pattern.

+In list context it returns a two element list, the first element

+containing the pattern and the second containing the modifiers used when

+the pattern was compiled.

+ my ($pat, $mods) = regexp_pattern($ref);

+In scalar context it returns the same as perl would when strigifying a raw

+C<qr//> with the same pattern inside. If the argument is not a compiled

+reference then this routine returns false but defined in scalar context,

+and the empty list in list context. Thus the following

+ if (regexp_pattern($ref) eq '(?i-xsm:foo)')

+will be warning free regardless of what $ref actually is.

+Like C<is_regexp> this function will not be confused by overloading

+or blessing of the object.

+=item regmust($ref)

+If the argument is a compiled regular expression as returned by C<qr//>,

+then this function returns what the optimiser consiers to be the longest

+anchored fixed string and longest floating fixed string in the pattern.

+A I<fixed string> is defined as being a substring that must appear for the

+pattern to match. An I<anchored fixed string> is a fixed string that must

+appear at a particular offset from the beginning of the match. A I<floating

+fixed string> is defined as a fixed string that can appear at any point in

+a range of positions relative to the start of the match. For example,

+ my $qr = qr/here .* there/x;

+ my ($anchored, $floating) = regmust($qr);

+ print "anchored:'$anchored'\nfloating:'$floating'\n";

+results in

+ anchored:'here'

+ floating:'there'

+Because the C<here> is before the C<.*> in the pattern, its position

+can be determined exactly. That's not true, however, for the C<there>;

+it could appear at any point after where the anchored string appeared.

+Perl uses both for its optimisations, prefering the longer, or, if they are

+equal, the floating.

+B<NOTE:> This may not necessarily be the definitive longest anchored and

+floating string. This will be what the optimiser of the Perl that you

+are using thinks is the longest. If you believe that the result is wrong

+please report it via the L<perlbug> utility.

+=item regname($name,$all)

+Returns the contents of a named buffer of the last successful match. If

+$all is true, then returns an array ref containing one entry per buffer,

+otherwise returns the first defined buffer.

+=item regnames($all)

+Returns a list of all of the named buffers defined in the last successful

+match. If $all is true, then it returns all names defined, if not it returns

+only names which were involved in the match.

+=item regnames_count()

+Returns the number of distinct names defined in the pattern used

+for the last successful match.

+B<Note:> this result is always the actual number of distinct

+named buffers defined, it may not actually match that which is

+returned by C<regnames()> and related routines when those routines

+have not been called with the $all parameter set.

+=back

+=head1 SEE ALSO

+L<perlmodlib/Pragmatic Modules>.

+=cut

diff --git a/gnu/usr.bin/perl/ext/re/re.xs b/gnu/usr.bin/perl/ext/re/re.xs
index 11239d7e59d..b4d3e34c4a1 100644
--- a/gnu/usr.bin/perl/ext/re/re.xs
+++ b/gnu/usr.bin/perl/ext/re/re.xs

@@ -6,73 +6,182 @@

#include "EXTERN.h"

#include "perl.h"

#include "XSUB.h"

+#include "re_comp.h"

START_EXTERN_C

-extern regexp* my_regcomp (pTHX_ char* exp, char* xend, PMOP* pm);

-extern I32 my_regexec (pTHX_ regexp* prog, char* stringarg, char* strend,

+extern REGEXP* my_re_compile (pTHX_ const SV * const pattern, const U32 pm_flags);

+extern I32 my_regexec (pTHX_ REGEXP * const prog, char* stringarg, char* strend,

char* strbeg, I32 minend, SV* screamer,

void* data, U32 flags);

-extern void my_regfree (pTHX_ struct regexp* r);

-extern char* my_re_intuit_start (pTHX_ regexp *prog, SV *sv, char *strpos,

- char *strend, U32 flags,

+extern char* my_re_intuit_start (pTHX_ REGEXP * const prog, SV *sv, char *strpos,

+ char *strend, const U32 flags,

struct re_scream_pos_data_s *data);

-extern SV* my_re_intuit_string (pTHX_ regexp *prog);

+extern SV* my_re_intuit_string (pTHX_ REGEXP * const prog);

-END_EXTERN_C

+extern void my_regfree (pTHX_ REGEXP * const r);

-#define MY_CXT_KEY "re::_guts" XS_VERSION

+extern void my_reg_numbered_buff_fetch(pTHX_ REGEXP * const rx, const I32 paren,

+ SV * const usesv);

+extern void my_reg_numbered_buff_store(pTHX_ REGEXP * const rx, const I32 paren,

+ SV const * const value);

+extern I32 my_reg_numbered_buff_length(pTHX_ REGEXP * const rx,

+ const SV * const sv, const I32 paren);

-typedef struct {

- int x_oldflag; /* debug flag */

-} my_cxt_t;

+extern SV* my_reg_named_buff(pTHX_ REGEXP * const, SV * const, SV * const,

+ const U32);

+extern SV* my_reg_named_buff_iter(pTHX_ REGEXP * const rx,

+ const SV * const lastkey, const U32 flags);

-START_MY_CXT

+extern SV* my_reg_qr_package(pTHX_ REGEXP * const rx);

+#if defined(USE_ITHREADS)

+extern void* my_regdupe (pTHX_ REGEXP * const r, CLONE_PARAMS *param);

+#endif

-#define oldflag (MY_CXT.x_oldflag)

+EXTERN_C const struct regexp_engine my_reg_engine;

-static void

-uninstall(pTHX)

- dMY_CXT;

- PL_regexecp = Perl_regexec_flags;

- PL_regcompp = Perl_pregcomp;

- PL_regint_start = Perl_re_intuit_start;

- PL_regint_string = Perl_re_intuit_string;

- PL_regfree = Perl_pregfree;

- if (!oldflag)

- PL_debug &= ~DEBUG_r_FLAG;

+END_EXTERN_C

-static void

-install(pTHX)

- dMY_CXT;

- PL_colorset = 0; /* Allow reinspection of ENV. */

- PL_regexecp = &my_regexec;

- PL_regcompp = &my_regcomp;

- PL_regint_start = &my_re_intuit_start;

- PL_regint_string = &my_re_intuit_string;

- PL_regfree = &my_regfree;

- oldflag = PL_debug & DEBUG_r_FLAG;

- PL_debug |= DEBUG_r_FLAG;

+const struct regexp_engine my_reg_engine = {

+ my_re_compile,

+ my_regexec,

+ my_re_intuit_start,

+ my_re_intuit_string,

+ my_regfree,

+ my_reg_numbered_buff_fetch,

+ my_reg_numbered_buff_store,

+ my_reg_numbered_buff_length,

+ my_reg_named_buff,

+ my_reg_named_buff_iter,

+ my_reg_qr_package,

+#if defined(USE_ITHREADS)

+ my_regdupe

+#endif

+};

MODULE = re PACKAGE = re

-BOOT:

+void

+install()

+ PPCODE:

+ PL_colorset = 0; /* Allow reinspection of ENV. */

+ /* PL_debug |= DEBUG_r_FLAG; */

+ XPUSHs(sv_2mortal(newSViv(PTR2IV(&my_reg_engine))));

+void

+regexp_pattern(sv)

+ SV * sv

+PROTOTYPE: $

+PREINIT:

+ REGEXP *re;

+PPCODE:

{

- MY_CXT_INIT;

+ /*

+ Checks if a reference is a regex or not. If the parameter is

+ not a ref, or is not the result of a qr// then returns false

+ in scalar context and an empty list in list context.

+ Otherwise in list context it returns the pattern and the

+ modifiers, in scalar context it returns the pattern just as it

+ would if the qr// was stringified normally, regardless as

+ to the class of the variable and any strigification overloads

+ on the object.

+ */

+ if ((re = SvRX(sv))) /* assign deliberate */

+ {

+ /* Housten, we have a regex! */

+ SV *pattern;

+ STRLEN patlen = 0;

+ STRLEN left = 0;

+ char reflags[6];

+ if ( GIMME_V == G_ARRAY ) {

+ /*

+ we are in list context so stringify

+ the modifiers that apply. We ignore "negative

+ modifiers" in this scenario.

+ */

+ char *fptr = INT_PAT_MODS;

+ char ch;

+ U16 match_flags = (U16)((re->extflags & PMf_COMPILETIME) >> 12);

+ while((ch = *fptr++)) {

+ if(match_flags & 1) {

+ reflags[left++] = ch;

+ }

+ match_flags >>= 1;

+ }

+ pattern = sv_2mortal(newSVpvn(re->precomp,re->prelen));

+ if (re->extflags & RXf_UTF8) SvUTF8_on(pattern);

+ /* return the pattern and the modifiers */

+ XPUSHs(pattern);

+ XPUSHs(sv_2mortal(newSVpvn(reflags,left)));

+ XSRETURN(2);

+ } else {

+ /* Scalar, so use the string that Perl would return */

+ /* return the pattern in (?msix:..) format */

+ pattern = sv_2mortal(newSVpvn(re->wrapped,re->wraplen));

+ if (re->extflags & RXf_UTF8)

+ SvUTF8_on(pattern);

+ XPUSHs(pattern);

+ XSRETURN(1);

+ }

+ } else {

+ /* It ain't a regexp folks */

+ if ( GIMME_V == G_ARRAY ) {

+ /* return the empty list */

+ XSRETURN_UNDEF;

+ } else {

+ /* Because of the (?:..) wrapping involved in a

+ stringified pattern it is impossible to get a

+ result for a real regexp that would evaluate to

+ false. Therefore we can return PL_sv_no to signify

+ that the object is not a regex, this means that one

+ can say

+ if (regex($might_be_a_regex) eq '(?:foo)') { }

+ and not worry about undefined values.

+ */

+ XSRETURN_NO;

+ }

+ /* NOT-REACHED */

}

void

-install()

- CODE:

- install(aTHX);

+regmust(sv)

+ SV * sv

+PROTOTYPE: $

+PREINIT:

+ REGEXP *re;

+PPCODE:

+ if ((re = SvRX(sv))) /* assign deliberate */

+ {

+ SV *an = &PL_sv_no;

+ SV *fl = &PL_sv_no;

+ if (re->anchored_substr) {

+ an = newSVsv(re->anchored_substr);

+ } else if (re->anchored_utf8) {

+ an = newSVsv(re->anchored_utf8);

+ }

+ if (re->float_substr) {

+ fl = newSVsv(re->float_substr);

+ } else if (re->float_utf8) {

+ fl = newSVsv(re->float_utf8);

+ }

+ XPUSHs(an);

+ XPUSHs(fl);

+ XSRETURN(2);

+ }

+ XSRETURN_UNDEF;

-void

-uninstall()

- CODE:

- uninstall(aTHX);

diff --git a/gnu/usr.bin/perl/ext/re/re_comp.h b/gnu/usr.bin/perl/ext/re/re_comp.h
new file mode 100644
index 00000000000..ba3aae851eb
--- /dev/null
+++ b/gnu/usr.bin/perl/ext/re/re_comp.h

@@ -0,0 +1,7 @@

+/* For blead, this file needs to do nothing other than pull in the regular

+ regcomp.h. For the 5.8.x re module it has to do more.

+ But doing it this way keeps regcomp.c and regexec.c clean.

+*/

+#include "regcomp.h"

diff --git a/gnu/usr.bin/perl/ext/re/re_top.h b/gnu/usr.bin/perl/ext/re/re_top.h
new file mode 100644
index 00000000000..6eb02e33684
--- /dev/null
+++ b/gnu/usr.bin/perl/ext/re/re_top.h

@@ -0,0 +1,42 @@

+/* need to replace pregcomp et al, so enable that */

+#ifndef PERL_IN_XSUB_RE

+# define PERL_IN_XSUB_RE

+#endif

+/* need access to debugger hooks */

+#if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)

+# define DEBUGGING

+#endif

+/* We *really* need to overwrite these symbols: */

+#define Perl_regexec_flags my_regexec

+#define Perl_regdump my_regdump

+#define Perl_regprop my_regprop

+#define Perl_re_intuit_start my_re_intuit_start

+#define Perl_re_compile my_re_compile

+#define Perl_regfree_internal my_regfree

+#define Perl_re_intuit_string my_re_intuit_string

+#define Perl_regdupe_internal my_regdupe

+#define Perl_reg_numbered_buff_fetch my_reg_numbered_buff_fetch

+#define Perl_reg_numbered_buff_store my_reg_numbered_buff_store

+#define Perl_reg_numbered_buff_length my_reg_numbered_buff_length

+#define Perl_reg_named_buff my_reg_named_buff

+#define Perl_reg_named_buff_iter my_reg_named_buff_iter

+#define Perl_reg_named_buff_fetch my_reg_named_buff_fetch

+#define Perl_reg_named_buff_exists my_reg_named_buff_exists

+#define Perl_reg_named_buff_firstkey my_reg_named_buff_firstkey

+#define Perl_reg_named_buff_nextkey my_reg_named_buff_nextkey

+#define Perl_reg_named_buff_scalar my_reg_named_buff_scalar

+#define Perl_reg_named_buff_all my_reg_named_buff_all

+#define Perl_reg_qr_package my_reg_qr_package

+#define PERL_NO_GET_CONTEXT

+/*

+ * Local variables:

+ * c-indentation-style: bsd

+ * c-basic-offset: 4

+ * indent-tabs-mode: t

+ * End:

+ *

+ * ex: set ts=8 sts=4 sw=4 noet:

+ */

diff --git a/gnu/usr.bin/perl/ext/re/t/lexical_debug.pl b/gnu/usr.bin/perl/ext/re/t/lexical_debug.pl
new file mode 100644
index 00000000000..3ec7455ba1a
--- /dev/null
+++ b/gnu/usr.bin/perl/ext/re/t/lexical_debug.pl

@@ -0,0 +1,29 @@

+use re 'debug';

+$_ = 'foo bar baz bop fip fop';

+/foo/ and $count++;

+ no re 'debug';

+ /bar/ and $count++;

+ {

+ use re 'debug';

+ /baz/ and $count++;

+ }

+ /bop/ and $count++;

+/fip/ and $count++;

+no re 'debug';

+/fop/ and $count++;

+use re 'debug';

+my $var='zoo|liz|zap';

+/($var)/ or $count++;

+print "Count=$count\n";

diff --git a/gnu/usr.bin/perl/ext/re/t/lexical_debug.t b/gnu/usr.bin/perl/ext/re/t/lexical_debug.t
new file mode 100644
index 00000000000..3c3f7ba316e
--- /dev/null
+++ b/gnu/usr.bin/perl/ext/re/t/lexical_debug.t

@@ -0,0 +1,37 @@

+#!./perl

+BEGIN {

+ chdir 't' if -d 't';

+ @INC = '../lib';

+ require Config;

+ if (($Config::Config{'extensions'} !~ /\bre\b/) ){

+ print "1..0 # Skip -- Perl configured without re module\n";

+ exit 0;

+ }

+use strict;

+# must use a BEGIN or the prototypes wont be respected meaning

+ # tests could pass that shouldn't

+BEGIN { require "./test.pl"; }

+my $out = runperl(progfile => "../ext/re/t/lexical_debug.pl", stderr => 1 );

+print "1..10\n";

+# Each pattern will produce an EXACT node with a specific string in

+# it, so we will look for that. We can't just look for the string

+# alone as the string being matched against contains all of them.

+ok( $out =~ /EXACT <foo>/, "Expect 'foo'" );

+ok( $out !~ /EXACT <bar>/, "No 'bar'" );

+ok( $out =~ /EXACT <baz>/, "Expect 'baz'" );

+ok( $out !~ /EXACT <bop>/, "No 'bop'" );

+ok( $out =~ /EXACT <fip>/, "Expect 'fip'" );

+ok( $out !~ /EXACT <fop>/, "No 'baz'" );

+ok( $out =~ /<liz>/, "Got 'liz'" ); # in a TRIE so no EXACT

+ok( $out =~ /<zoo>/, "Got 'zoo'" ); # in a TRIE so no EXACT

+ok( $out =~ /<zap>/, "Got 'zap'" ); # in a TRIE so no EXACT

+ok( $out =~ /Count=7\n/, "Count is 7")

+ or diag($out);

diff --git a/gnu/usr.bin/perl/ext/re/t/qr.t b/gnu/usr.bin/perl/ext/re/t/qr.t
new file mode 100644
index 00000000000..9a59a046bdd
--- /dev/null
+++ b/gnu/usr.bin/perl/ext/re/t/qr.t

@@ -0,0 +1,15 @@

+#!./perl

+BEGIN {

+ chdir 't' if -d 't';

+ @INC = '../lib';

+ require Config;

+ if (($Config::Config{'extensions'} !~ /\bre\b/) ){

+ print "1..0 # Skip -- Perl configured without re module\n";

+ exit 0;

+ }

+use Test::More tests => 1;

+use re 'Debug';

+isa_ok( qr//, "Regexp" );

diff --git a/gnu/usr.bin/perl/ext/re/t/re.t b/gnu/usr.bin/perl/ext/re/t/re.t
index 2a1923ea79a..204092f028b 100644
--- a/gnu/usr.bin/perl/ext/re/t/re.t
+++ b/gnu/usr.bin/perl/ext/re/t/re.t

@@ -31,8 +31,8 @@ my $warn;

local $SIG{__WARN__} = sub {

$warn = shift;

};

-eval { re::bits(1) };

-like( $warn, qr/Useless use/, 'bits() should warn with no args' );

+#eval { re::bits(1) };

+#like( $warn, qr/Useless use/, 'bits() should warn with no args' );

delete $ENV{PERL_RE_COLORS};

re::bits(0, 'debug');

@@ -58,6 +58,12 @@ re->unimport('taint');

ok( !( $^H & 0x00100000 ), 'unimport should clear bits in $^H when requested' );

re->unimport('eval');

ok( !( $^H & 0x00200000 ), '... and again' );

+my $reg=qr/(foo|bar|baz|blah)/;

+close STDERR;

+eval"use re Debug=>'ALL'";

+my $ok='foo'=~/$reg/;

+eval"no re Debug=>'ALL'";

+ok( $ok, 'No segv!' );

package Term::Cap;

diff --git a/gnu/usr.bin/perl/ext/re/t/re_funcs.t b/gnu/usr.bin/perl/ext/re/t/re_funcs.t
new file mode 100644
index 00000000000..97f795eac93
--- /dev/null
+++ b/gnu/usr.bin/perl/ext/re/t/re_funcs.t

@@ -0,0 +1,64 @@

+#!./perl

+BEGIN {

+ chdir 't' if -d 't';

+ @INC = '../lib';

+ require Config;

+ if (($Config::Config{'extensions'} !~ /\bre\b/) ){

+ print "1..0 # Skip -- Perl configured without re module\n";

+ exit 0;

+ }

+use strict;

+use Test::More; # test count at bottom of file

+use re qw(is_regexp regexp_pattern regmust

+ regname regnames regnames_count);

+ my $qr=qr/foo/pi;

+ ok(is_regexp($qr),'is_regexp($qr)');

+ ok(!is_regexp(''),'is_regexp("")');

+ is((regexp_pattern($qr))[0],'foo','regexp_pattern[0]');

+ is((regexp_pattern($qr))[1],'ip','regexp_pattern[1]');

+ is(regexp_pattern($qr),'(?pi-xsm:foo)','scalar regexp_pattern');

+ ok(!regexp_pattern(''),'!regexp_pattern("")');

+ my $qr=qr/here .* there/x;

+ my ($anchored,$floating)=regmust($qr);

+ is($anchored,'here',"Regmust anchored - qr//");

+ is($floating,'there',"Regmust floating - qr//");

+ my $foo='blah';

+ ($anchored,$floating)=regmust($foo);

+ is($anchored,undef,"Regmust anchored - non ref");

+ is($floating,undef,"Regmust anchored - non ref");

+ my $bar=['blah'];

+ ($anchored,$floating)=regmust($foo);

+ is($anchored,undef,"Regmust anchored - ref");

+ is($floating,undef,"Regmust anchored - ref");

+if ('1234'=~/(?:(?<A>\d)|(?<C>!))(?<B>\d)(?<A>\d)(?<B>\d)/){

+ my @names = sort +regnames();

+ is("@names","A B","regnames");

+ my @names = sort +regnames(0);

+ is("@names","A B","regnames");

+ my $names = regnames();

+ is($names, "B", "regnames in scalar context");

+ @names = sort +regnames(1);

+ is("@names","A B C","regnames");

+ is(join("", @{regname("A",1)}),"13");

+ is(join("", @{regname("B",1)}),"24");

+ {

+ if ('foobar'=~/(?<foo>foo)(?<bar>bar)/) {

+ is(regnames_count(),2);

+ } else {

+ ok(0); ok(0);

+ }

+ is(regnames_count(),3);

+# New tests above this line, don't forget to update the test count below!

+use Test::More tests => 20;

+# No tests here!

diff --git a/gnu/usr.bin/perl/ext/re/t/regop.pl b/gnu/usr.bin/perl/ext/re/t/regop.pl
new file mode 100644
index 00000000000..89693352208
--- /dev/null
+++ b/gnu/usr.bin/perl/ext/re/t/regop.pl

@@ -0,0 +1,20 @@

+use re Debug=>qw(DUMP EXECUTE OFFSETS TRIEC);

+my @tests=(

+ XY => 'X(A|[B]Q||C|D)Y' ,

+ foobar => '[f][o][o][b][a][r]',

+ x => '.[XY].',

+ 'D:\\dev/perl/ver/28321_/perl.exe'=>

+ '/(\\.COM|\\.EXE|\\.BAT|\\.CMD|\\.VBS|\\.VBE|\\.JS|\\.JSE|\\.WSF|\\.WSH|\\.pyo|\\.pyc|\\.pyw|\\.py)$/i',

+ 'q'=>'[q]',

+);

+while (@tests) {

+ my ($str,$pat)=splice @tests,0,2;

+ warn "\n";

+ $pat="/$pat/" if substr($pat,0,1) ne '/';

+ # string eval to get the free regex message in the right place.

+ eval qq[

+ warn "$str"=~$pat ? "%MATCHED%" : "%FAILED%","\n";

+ ];

+ die $@ if $@;

diff --git a/gnu/usr.bin/perl/ext/re/t/regop.t b/gnu/usr.bin/perl/ext/re/t/regop.t
new file mode 100644
index 00000000000..7fe7b204627
--- /dev/null
+++ b/gnu/usr.bin/perl/ext/re/t/regop.t

@@ -0,0 +1,261 @@

+#!./perl

+BEGIN {

+ chdir 't' if -d 't';

+ @INC = '../lib';

+ require Config;

+ if (($Config::Config{'extensions'} !~ /\bre\b/) ){

+ print "1..0 # Skip -- Perl configured without re module\n";

+ exit 0;

+ }

+use strict;

+BEGIN { require "./test.pl"; }

+our $NUM_SECTS;

+chomp(my @strs= grep { !/^\s*\#/ } <DATA>);

+my $out = runperl(progfile => "../ext/re/t/regop.pl", stderr => 1 );

+# VMS currently embeds linefeeds in the output.

+$out =~ s/\cJ//g if $^O = 'VMS';

+my @tests = grep { /\S/ } split /(?=Compiling REx)/, $out;

+# on debug builds we get an EXECUTING... message in there at the top

+shift @tests

+ if $tests[0] =~ /EXECUTING.../;

+plan( @tests + 2 + ( @strs - grep { !$_ or /^---/ } @strs ));

+is( scalar @tests, $NUM_SECTS,

+ "Expecting output for $NUM_SECTS patterns" );

+ok( defined $out, 'regop.pl returned something defined' );

+$out ||= "";

+my $test= 1;

+foreach my $testout ( @tests ) {

+ my ( $pattern )= $testout=~/Compiling REx "([^"]+)"/;

+ ok( $pattern, "Pattern for test " . ($test++) );

+ my $diaged;

+ while (@strs) {

+ local $_= shift @strs;

+ last if !$_

+ or /^---/;

+ next if /^\s*#/;

+ s/^\s+//;

+ s/\s+$//;

+ ok( $testout=~/\Q$_\E/, "$_: /$pattern/" )

+ or do {

+ !$diaged++ and diag("$_: /$pattern/\n$testout");

+ };

+ }

+# The format below is simple. Each line is an exact

+# string that must be found in the output.

+# Lines starting the # are comments.

+# Lines starting with --- are seperators indicating

+# that the tests for this result set are finished.

+# If you add a test make sure you update $NUM_SECTS

+# the commented output is just for legacy/debugging purposes

+BEGIN{ $NUM_SECTS= 6 }

+__END__

+#Compiling REx "X(A|[B]Q||C|D)Y"

+#size 34

+#first at 1

+# 1: EXACT <X>(3)

+# 3: OPEN1(5)

+# 5: TRIE-EXACT(21)

+# [Words:5 Chars:5 Unique:5 States:6 Start-Class:A-D]

+# <A>

+# <BQ>

+# <>

+# <C>

+# <D>

+# 21: CLOSE1(23)

+# 23: EXACT <Y>(25)

+# 25: END(0)

+#anchored "X" at 0 floating "Y" at 1..3 (checking floating) minlen 2

+#Guessing start of match, REx "X(A|[B]Q||C|D)Y" against "XY"...

+#Found floating substr "Y" at offset 1...

+#Found anchored substr "X" at offset 0...

+#Guessed: match at offset 0

+#Matching REx "X(A|[B]Q||C|D)Y" against "XY"

+# Setting an EVAL scope, savestack=140

+# 0 <> <XY> | 1: EXACT <X>

+# 1 <X> <Y> | 3: OPEN1

+# 1 <X> <Y> | 5: TRIE-EXACT

+# matched empty string...

+# 1 <X> <Y> | 21: CLOSE1

+# 1 <X> <Y> | 23: EXACT <Y>

+# 2 <XY> <> | 25: END

+#Match successful!

+#%MATCHED%

+#Freeing REx: "X(A|[B]Q||C|D)Y"

+Compiling REx "X(A|[B]Q||C|D)Y"

+[A-D]

+TRIE-EXACT

+<BQ>

+matched empty string

+Match successful!

+Found floating substr "Y" at offset 1...

+Found anchored substr "X" at offset 0...

+Guessed: match at offset 0

+checking floating

+minlen 2

+S:1/6

+W:5

+L:0/2

+C:5/5

+%MATCHED%

+---

+#Compiling REx "[f][o][o][b][a][r]"

+#size 67

+#first at 1

+# 1: EXACT <foobar>(13)

+# 13: END(0)

+#anchored "foobar" at 0 (checking anchored isall) minlen 6

+#Guessing start of match, REx "[f][o][o][b][a][r]" against "foobar"...

+#Found anchored substr "foobar" at offset 0...

+#Guessed: match at offset 0

+#Freeing REx: "[f][o][o][b][a][r]"

+foobar

+checking anchored isall

+minlen 6

+anchored "foobar" at 0

+Guessed: match at offset 0

+Compiling REx "[f][o][o][b][a][r]"

+Freeing REx: "[f][o][o][b][a][r]"

+%MATCHED%

+---

+#Compiling REx ".[XY]."

+#size 14

+#first at 1

+# 1: REG_ANY(2)

+# 2: ANYOF[XY](13)

+# 13: REG_ANY(14)

+# 14: END(0)

+#minlen 3

+#%FAILED%

+#Freeing REx: ".[XY]."

+%FAILED%

+minlen 3

+---

+# Got 164 bytes for offset annotations.

+# TRIE(NATIVE): W:6 C:24 Uq:7 Min:4 Max:4

+# Char : Match Base Ofs A B C P G E D

+# State|---------------------------------------------------

+# # 1| @ 7 + 0[ 2 . . . . . .]

+# # 2| @ 7 + 1[ . 3 . . . . .]

+# # 3| @ 7 + 2[ . . 4 . . . .]

+# # 4| @ A + 0[ 9 8 0 5 6 7 A]

+# # 5| W 1 @ 0

+# # 6| W 2 @ 0

+# # 7| W 3 @ 0

+# # 8| W 4 @ 0

+# # 9| W 5 @ 0

+# # A| W 6 @ 0

+# Final program:

+# 1: EXACT <ABC>(3)

+# 3: TRIEC-EXACT<S:4/10 W:6 L:1/1 C:24/7>[A-EGP](20)

+# <P>

+# <G>

+# <E>

+# <B>

+# <A>

+# <D>

+# 20: END(0)

+# anchored "ABC" at 0 (checking anchored) minlen 4

+# Offsets: [20]

+# 1:4[3] 3:4[15] 19:32[0] 20:34[0]

+# Found anchored substr "ABC" at offset 0...

+# Guessed: match at offset 0

+# 0 <> <ABCD> | 1:EXACT <ABC>(3)

+# 3 <ABC> <D> | 3:TRIEC-EXACT<S:4/10 W:6 L:1/1 C:24/7>[A-EGP](20)

+# 3 <ABC> <D> | State: 4 Accepted: 0 Charid: 7 CP: 44 After State: a

+# 4 <ABCD> <> | State: a Accepted: 1 Charid: 6 CP: 0 After State: 0

+# got 1 possible matches

+# only one match left: #6 <D>

+# 4 <ABCD> <> | 20:END(0)

+# Match successful!

+# %MATCHED%

+%MATCHED%

+EXACT <ABC>

+TRIEC-EXACT

+[A-EGP]

+only one match left: #6 <D>

+S:4/10

+W:6

+L:1/1

+C:24/7

+minlen 4

+(checking anchored)

+anchored "ABC" at 0

+---

+#Compiling REx "(\.COM|\.EXE|\.BAT|\.CMD|\.VBS|\.VBE|\.JS|\.JSE|\.WSF|\.WSH|\.pyo|\.pyc|\.pyw|\.py)$"

+#size 48 nodes first at 3

+#first at 3

+#rarest char

+# at 0

+# 1: OPEN1(3)

+# 3: EXACTF <.>(5)

+# 5: TRIE-EXACTF(45)

+# [Start:2 Words:14 Chars:54 Unique:18 States:29 Minlen:2 Maxlen:3 Start-Class:BCEJPVWbcejpvw]

+# <.COM>

+# ... yada yada ... (dmq)

+# <.py>

+# 45: CLOSE1(47)

+# 47: EOL(48)

+# 48: END(0)

+#floating ""$ at 3..4 (checking floating) stclass "EXACTF <.>" minlen 3

+#Offsets: [48]

+# 1:1[1] 3:2[1] 5:2[81] 45:83[1] 47:84[1] 48:85[0]

+#Guessing start of match, REx "(\.COM|\.EXE|\.BAT|\.CMD|\.VBS|\.VBE|\.JS|\.JSE|\.WSF|\.WSH|..." against "D:dev/perl/ver/28321_/perl.exe"...

+#Found floating substr ""$ at offset 30...

+#Starting position does not contradict /^/m...

+#Does not contradict STCLASS...

+#Guessed: match at offset 26

+#Matching REx "(\.COM|\.EXE|\.BAT|\.CMD|\.VBS|\.VBE|\.JS|\.JSE|\.WSF|\.WSH|\.pyo|\.pyc|\.pyw|\.py)$..." against ".exe"

+#Matching stclass "EXACTF <.>" against ".exe"

+# Setting an EVAL scope, savestack=140

+# 26 <21_/perl> <.exe> | 1: OPEN1

+# 26 <21_/perl> <.exe> | 3: EXACTF <.>

+# 27 <21_/perl.> <exe> | 5: TRIE-EXACTF

+# only one match : #2 <.EXE>

+# 30 <21_/perl.exe> <> | 45: CLOSE1

+# 30 <21_/perl.exe> <> | 47: EOL

+# 30 <21_/perl.exe> <> | 48: END

+#Match successful!

+#POP STATE(1)

+#%MATCHED%

+#Freeing REx: "(\\.COM|\\.EXE|\\.BAT|\\.CMD|\\.VBS|\\.VBE|\\.JS|\\.JSE|\\."......

+%MATCHED%

+floating ""$ at 3..4 (checking floating)

+1:1[1] 3:2[1] 5:2[64] 45:83[1] 47:84[1] 48:85[0]

+stclass EXACTF <.> minlen 3

+Found floating substr ""$ at offset 30...

+Does not contradict STCLASS...

+Guessed: match at offset 26

+Matching stclass EXACTF <.> against ".exe"

+---

+#Compiling REx "[q]"

+#size 12 nodes Got 100 bytes for offset annotations.

+#first at 1

+#Final program:

+# 1: EXACT <q>(3)

+# 3: END(0)

+#anchored "q" at 0 (checking anchored isall) minlen 1

+#Offsets: [12]

+# 1:1[3] 3:4[0]

+#Guessing start of match, REx "[q]" against "q"...

+#Found anchored substr "q" at offset 0...

+#Guessed: match at offset 0

+#%MATCHED%

+#Freeing REx: "[q]"

+Got 100 bytes for offset annotations.

+Offsets: [12]

+1:1[3] 3:4[0]

+%MATCHED%

+Freeing REx: "[q]" \ No newline at end of file