diff options
Diffstat (limited to 'gnu/usr.bin/perl/t')
86 files changed, 1763 insertions, 1684 deletions
diff --git a/gnu/usr.bin/perl/t/TEST b/gnu/usr.bin/perl/t/TEST index 6b3504002f6..a9c844f605b 100644 --- a/gnu/usr.bin/perl/t/TEST +++ b/gnu/usr.bin/perl/t/TEST @@ -49,7 +49,6 @@ my %abs = ( '../cpan/File-Fetch' => 1, '../cpan/IPC-Cmd' => 1, '../cpan/IPC-SysV' => 1, - '../cpan/Locale-Codes' => 1, '../cpan/Module-Load' => 1, '../cpan/Module-Load-Conditional' => 1, '../cpan/Pod-Simple' => 1, @@ -61,8 +60,7 @@ my %abs = ( '../dist/Tie-File' => 1, ); -my %temp_no_core = - ('../cpan/B-Debug' => 1, +my %temp_no_core = ( '../cpan/Compress-Raw-Bzip2' => 1, '../cpan/Compress-Raw-Zlib' => 1, '../cpan/Devel-PPPort' => 1, @@ -82,15 +80,9 @@ my %temp_no_core = # Ideally this # list will eventually be empty my %temp_needs_dot = map { $_ => 1 } qw( - ../cpan/ExtUtils-Install ../cpan/Filter-Util-Call ../cpan/libnet - ../cpan/Locale-Codes - ../cpan/Math-BigInt - ../cpan/Math-BigRat - ../cpan/Test-Harness ../cpan/Test-Simple - ../cpan/version ); @@ -990,6 +982,11 @@ sub _cleanup_valgrind { unlink _find_files('cachegrind.out.\d+$', qw ( ../t ../cpan ../ext ../dist/ )); } + elsif ($$toolnm eq 'valgrind') { + # Remove empty, hence non-error, output files + unlink grep { -z } _find_files('valgrind-current', + qw ( ../t ../cpan ../ext ../dist/ )); + } } # Generate regexps of known bad filenames / skips from Porting/deparse-skips.txt diff --git a/gnu/usr.bin/perl/t/charset_tools.pl b/gnu/usr.bin/perl/t/charset_tools.pl index 6e88a375319..877cead0fa8 100644 --- a/gnu/usr.bin/perl/t/charset_tools.pl +++ b/gnu/usr.bin/perl/t/charset_tools.pl @@ -164,6 +164,12 @@ for (my $i = 0; $i < 256; $i++) { $native_to_i8[$i8_to_native[$i]] = $i; } +# Use these to convert to/from UTF-8 bytes. I8 is the encoding that +# corresponds to UTF-8 with start bytes, continuation bytes, and invariant +# bytes. UTF-EBCDIC is derived from this by a mapping which causes things +# like the start byte C5 to map to something else, as C5 is actually an 'E' in +# EBCDIC so can't be a real start byte, as it must be an invariant; and it +# maps 0x45 (an ASCII 'E') to C5. *I8_to_native = ($::IS_ASCII) ? sub { return shift } : sub { return join "", map { chr $i8_to_native[ord $_] } diff --git a/gnu/usr.bin/perl/t/harness b/gnu/usr.bin/perl/t/harness index 5ae2702ec16..caa2a318b8a 100644 --- a/gnu/usr.bin/perl/t/harness +++ b/gnu/usr.bin/perl/t/harness @@ -160,18 +160,66 @@ if (@ARGV) { my %dir; my %total_time; + my %serials; + my %all_dirs; + # Preprocess the list of tests for (@last) { if ($^O eq 'MSWin32') { s,\\,/,g; # canonicalize path }; - # Treat every file matching lib/*.t as a "directory" - m! \A ( \.\. / (?: lib | ext/XS-APItest/t ) + + # Keep a list of the distinct directory names, and another list of + # those which contain a file whose name begins with a 0 + if ( m! \A \.\. / + ( .*? ) # $1 is the directory path name + / + ( [^/]* \.t ) # $2 is the .t name + \z !x) + { + my $path = $1; + + $all_dirs{$path} = 1; + $serials{$path} = 1 if $2 =~ / \A 0 /x; + } + } + + # We assume that the reason a test file's name begins with a 0 is to + # order its execution among the tests in its directory. Hence, a + # directory containing such files should be tested in serial order. + # + # Add exceptions to the above rule + for (qw(ext/Pod-Html/t cpan/IO-Zlib/t)) { + $serials{$_} = 1; + } + + my @nonexistent_serials = grep { not exists $all_dirs{$_} } keys %serials; + if (@nonexistent_serials) { + die "These directories to be run serially are incorrectly" + . " specified:\n" . join "\n", @nonexistent_serials; + } + + # Remove the serial testing directories from the list of all + # directories. The remaining ones are testable in parallel. Make the + # parallel list a scalar with names separated by '|' so that below + # they will be added to a regular expression. + my $non_serials = join "|", grep { not exists $serials{$_} } keys %all_dirs; + undef %all_dirs; + undef %serials; + + for (@last) { + # Treat every file in each non-serial directory as its own + # "directory", so that it can be executed in parallel + m! \A ( \.\. / (?: $non_serials ) / [^/]+ \.t \z | .* [/] ) !x or die "'$_'"; push @{$dir{$1}}, $_; + + # This file contributes time to the total needed for the directory + # as a whole $total_time{$1} += $times{$_} || 0; } + #print STDERR __LINE__, join "\n", sort { $total_time{$b} <=> $total_time{$a} } keys %dir, " "; push @tests, @last; diff --git a/gnu/usr.bin/perl/t/io/crlf.t b/gnu/usr.bin/perl/t/io/crlf.t index 7fb4c1e4a82..d0275af9a0f 100644 --- a/gnu/usr.bin/perl/t/io/crlf.t +++ b/gnu/usr.bin/perl/t/io/crlf.t @@ -18,7 +18,7 @@ my $crcr = uni_to_native("\x0d\x0d"); my $ungetc_count = 8200; # Somewhat over the likely buffer size { - plan(tests => 16 + 2 * $ungetc_count); + plan(tests => 21 + 2 * $ungetc_count); ok(open(FOO,">:crlf",$file)); ok(print FOO 'a'.((('a' x 14).qq{\n}) x 2000) || close(FOO)); ok(open(FOO,"<:crlf",$file)); @@ -87,6 +87,21 @@ my $ungetc_count = 8200; # Somewhat over the likely buffer size unlike($foo, qr/$crcr/); } } + + { + # check binmode removes :utf8 + # 133604 - on Win32 :crlf is the base buffer layer, so + # binmode doesn't remove it, but the binmode handler didn't + # remove :utf8 either + ok(open(my $fh, ">", $file), "open a file"); + ok(binmode($fh, ":utf8"), "add :utf8"); + ok((() = grep($_ eq "utf8", PerlIO::get_layers($fh))), + "check :utf8 set"); + ok(binmode($fh), "remove :utf8"); + ok(!(() = grep($_ eq "utf8", PerlIO::get_layers($fh))), + "check :utf8 removed"); + close $fh; + } } sub count_chars { diff --git a/gnu/usr.bin/perl/t/io/defout.t b/gnu/usr.bin/perl/t/io/defout.t index 27ba83b8bd2..aa40666162c 100644 --- a/gnu/usr.bin/perl/t/io/defout.t +++ b/gnu/usr.bin/perl/t/io/defout.t @@ -12,6 +12,8 @@ BEGIN { set_up_inc('../lib'); } +$|=0; # test.pl makes it 1, and that conflicts with the below. + plan tests => 16; diff --git a/gnu/usr.bin/perl/t/io/eintr.t b/gnu/usr.bin/perl/t/io/eintr.t index 1ec80a3e157..26a4636b7b2 100644 --- a/gnu/usr.bin/perl/t/io/eintr.t +++ b/gnu/usr.bin/perl/t/io/eintr.t @@ -59,6 +59,8 @@ if ($^O eq 'VMS' || $^O eq 'MSWin32' || $^O eq 'cygwin' || $^O =~ /freebsd/ || $ exit 0; } + + my ($in, $out, $st, $sigst, $buf); plan(tests => 10); @@ -96,60 +98,65 @@ alarm(0); ok(!$st, 'read/die: read status'); ok(close($in), 'read/die: close status'); -# This used to be 1_000_000, but on Linux/ppc64 (POWER7) this kept -# consistently failing. At exactly 0x100000 it started passing -# again. Now we're asking the kernel what the pipe buffer is, and if -# that fails, hoping this number is bigger than any pipe buffer. -my $surely_this_arbitrary_number_is_fine = (eval { - use Fcntl qw(F_GETPIPE_SZ); - fcntl($out, F_GETPIPE_SZ, 0); -} || 0xfffff) + 1; - -# close during print - -fresh_io; -$SIG{ALRM} = sub { $sigst = close($out) ? "ok" : "nok" }; -$buf = "a" x $surely_this_arbitrary_number_is_fine . "\n"; -select $out; $| = 1; select STDOUT; -alarm(1); -$st = print $out $buf; -alarm(0); -is($sigst, 'nok', 'print/close: sig handler close status'); -ok(!$st, 'print/close: print status'); -ok(!close($out), 'print/close: close status'); - -# die during print - -fresh_io; -$SIG{ALRM} = sub { die }; -$buf = "a" x $surely_this_arbitrary_number_is_fine . "\n"; -select $out; $| = 1; select STDOUT; -alarm(1); -$st = eval { print $out $buf }; -alarm(0); -ok(!$st, 'print/die: print status'); -# the close will hang since there's data to flush, so use alarm -alarm(1); -ok(!eval {close($out)}, 'print/die: close status'); -alarm(0); - -# close during close - -# Apparently there's nothing in standard Linux that can cause an -# EINTR in close(2); but run the code below just in case it does on some -# platform, just to see if it segfaults. -fresh_io; -$SIG{ALRM} = sub { $sigst = close($in) ? "ok" : "nok" }; -alarm(1); -close $in; -alarm(0); - -# die during close - -fresh_io; -$SIG{ALRM} = sub { die }; -alarm(1); -eval { close $in }; -alarm(0); +SKIP: { + skip "Tests hang on older versions of Darwin", 5 + if $^O eq 'darwin' && $osmajmin < 16; + + # This used to be 1_000_000, but on Linux/ppc64 (POWER7) this kept + # consistently failing. At exactly 0x100000 it started passing + # again. Now we're asking the kernel what the pipe buffer is, and if + # that fails, hoping this number is bigger than any pipe buffer. + my $surely_this_arbitrary_number_is_fine = (eval { + use Fcntl qw(F_GETPIPE_SZ); + fcntl($out, F_GETPIPE_SZ, 0); + } || 0xfffff) + 1; + + # close during print + + fresh_io; + $SIG{ALRM} = sub { $sigst = close($out) ? "ok" : "nok" }; + $buf = "a" x $surely_this_arbitrary_number_is_fine . "\n"; + select $out; $| = 1; select STDOUT; + alarm(1); + $st = print $out $buf; + alarm(0); + is($sigst, 'nok', 'print/close: sig handler close status'); + ok(!$st, 'print/close: print status'); + ok(!close($out), 'print/close: close status'); + + # die during print + + fresh_io; + $SIG{ALRM} = sub { die }; + $buf = "a" x $surely_this_arbitrary_number_is_fine . "\n"; + select $out; $| = 1; select STDOUT; + alarm(1); + $st = eval { print $out $buf }; + alarm(0); + ok(!$st, 'print/die: print status'); + # the close will hang since there's data to flush, so use alarm + alarm(1); + ok(!eval {close($out)}, 'print/die: close status'); + alarm(0); + + # close during close + + # Apparently there's nothing in standard Linux that can cause an + # EINTR in close(2); but run the code below just in case it does on some + # platform, just to see if it segfaults. + fresh_io; + $SIG{ALRM} = sub { $sigst = close($in) ? "ok" : "nok" }; + alarm(1); + close $in; + alarm(0); + + # die during close + + fresh_io; + $SIG{ALRM} = sub { die }; + alarm(1); + eval { close $in }; + alarm(0); +} # vim: ts=4 sts=4 sw=4: diff --git a/gnu/usr.bin/perl/t/io/fs.t b/gnu/usr.bin/perl/t/io/fs.t index f35b907d5dd..90bcc316171 100644 --- a/gnu/usr.bin/perl/t/io/fs.t +++ b/gnu/usr.bin/perl/t/io/fs.t @@ -120,15 +120,11 @@ SKIP: { SKIP: { skip "hard links not that hard in $^O", 1 if $^O eq 'amigaos'; - skip "no mode checks", 1 if $skip_mode_checks; + skip "no mode checks", 1 if $skip_mode_checks; -# if ($^O eq 'cygwin') { # new files on cygwin get rwx instead of rw- -# is($mode & 0777, 0777, "mode of triply-linked file"); -# } else { - is(sprintf("0%o", $mode & 0777), - sprintf("0%o", $a_mode & 0777), - "mode of triply-linked file"); -# } + is(sprintf("0%o", $mode & 0777), + sprintf("0%o", $a_mode & 0777), + "mode of triply-linked file"); } } @@ -197,7 +193,7 @@ SKIP: { } is(chmod($newmode, "a"), 1, "fchmod"); $mode = (stat $fh)[2]; - SKIP: { + SKIP: { skip "no mode checks", 1 if $skip_mode_checks; is($mode & 0777, $newmode, "perm restored"); } @@ -247,88 +243,33 @@ is($ino, undef, "ino of renamed file a should be undef"); $delta = $accurate_timestamps ? 1 : 2; # Granularity of time on the filesystem chmod 0777, 'b'; -$foo = (utime 500000000,500000000 + $delta,'b'); +$ut = 500000000; + +note("basic check of atime and mtime"); +$foo = (utime $ut,$ut + $delta,'b'); is($foo, 1, "utime"); -check_utime_result(); +check_utime_result($ut, $accurate_timestamps, $delta); utime undef, undef, 'b'; ($atime,$mtime) = (stat 'b')[8,9]; -print "# utime undef, undef --> $atime, $mtime\n"; -isnt($atime, 500000000, 'atime'); -isnt($mtime, 500000000 + $delta, 'mtime'); +note("# utime undef, undef --> $atime, $mtime"); +isnt($atime, $ut, 'atime: utime called with two undefs'); +isnt($mtime, $ut + $delta, 'mtime: utime called with two undefs'); SKIP: { skip "no futimes", 6 unless ($Config{d_futimes} || "") eq "define"; + note("check futimes"); open(my $fh, "<", 'b'); - $foo = (utime 500000000,500000000 + $delta, $fh); + $foo = (utime $ut,$ut + $delta, $fh); is($foo, 1, "futime"); - check_utime_result(); + check_utime_result($ut, $accurate_timestamps, $delta); # [perl #122703] close $fh; - ok(!utime(500000000,500000000 + $delta, $fh), + ok(!utime($ut,$ut + $delta, $fh), "utime fails on a closed file handle"); isnt($!+0, 0, "and errno was set"); } - -sub check_utime_result { - ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime, - $blksize,$blocks) = stat('b'); - - SKIP: { - skip "bogus inode num", 1 if ($^O eq 'MSWin32') || ($^O eq 'NetWare'); - - ok($ino, 'non-zero inode num'); - } - - SKIP: { - skip "filesystem atime/mtime granularity too low", 2 - unless $accurate_timestamps; - - if ($^O eq 'vos') { - skip ("# TODO - hit VOS bug posix-2055 - access time does not follow POSIX rules for an open file.", 2); - } - - print "# atime - $atime mtime - $mtime delta - $delta\n"; - if($atime == 500000000 && $mtime == 500000000 + $delta) { - pass('atime'); - pass('mtime'); - } - else { - if ($^O =~ /\blinux\b/i) { - print "# Maybe stat() cannot get the correct atime, ". - "as happens via NFS on linux?\n"; - $foo = (utime 400000000,500000000 + 2*$delta,'b'); - my ($new_atime, $new_mtime) = (stat('b'))[8,9]; - print "# newatime - $new_atime nemtime - $new_mtime\n"; - if ($new_atime == $atime && $new_mtime - $mtime == $delta) { - pass("atime - accounted for possible NFS/glibc2.2 bug on linux"); - pass("mtime - accounted for possible NFS/glibc2.2 bug on linux"); - } - else { - fail("atime - $atime/$new_atime $mtime/$new_mtime"); - fail("mtime - $atime/$new_atime $mtime/$new_mtime"); - } - } - elsif ($^O eq 'VMS') { - # why is this 1 second off? - is( $atime, 500000001, 'atime' ); - is( $mtime, 500000000 + $delta, 'mtime' ); - } - elsif ($^O eq 'haiku') { - SKIP: { - skip "atime not updated", 1; - } - is($mtime, 500000001, 'mtime'); - } - else { - fail("atime"); - fail("mtime"); - } - } - } -} - SKIP: { skip "has futimes", 1 if ($Config{d_futimes} || "") eq "define"; open(my $fh, "<", "b") || die; @@ -532,3 +473,62 @@ SKIP: { # need to remove $tmpdir if rename() in test 28 failed! END { rmdir $tmpdir1; rmdir $tmpdir; } + +sub check_utime_result { + ($ut, $accurate_timestamps, $delta) = @_; + ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime, + $blksize,$blocks) = stat('b'); + + SKIP: { + skip "bogus inode num", 1 if ($^O eq 'MSWin32') || ($^O eq 'NetWare'); + ok($ino, 'non-zero inode num'); + } + + SKIP: { + skip "filesystem atime/mtime granularity too low", 2 + unless $accurate_timestamps; + + if ($^O eq 'vos') { + skip ("# TODO - hit VOS bug posix-2055 - access time does not follow POSIX rules for an open file.", 2); + } + + note("# atime - $atime mtime - $mtime delta - $delta"); + if($atime == $ut && $mtime == $ut + $delta) { + pass('atime: granularity test'); + pass('mtime: granularity test'); + } + else { + # Operating systems whose filesystems may be mounted with the noatime option + # RT 132663 + my %noatime_oses = map { $_ => 1 } ( qw| haiku netbsd | ); + if ($^O =~ /\blinux\b/i) { + note("# Maybe stat() cannot get the correct atime, ". + "as happens via NFS on linux?"); + $foo = (utime 400000000,$ut + 2*$delta,'b'); + my ($new_atime, $new_mtime) = (stat('b'))[8,9]; + note("# newatime - $new_atime nemtime - $new_mtime"); + if ($new_atime == $atime && $new_mtime - $mtime == $delta) { + pass("atime - accounted for possible NFS/glibc2.2 bug on linux"); + pass("mtime - accounted for possible NFS/glibc2.2 bug on linux"); + } + else { + fail("atime - $atime/$new_atime $mtime/$new_mtime"); + fail("mtime - $atime/$new_atime $mtime/$new_mtime"); + } + } + elsif ($^O eq 'VMS') { + # why is this 1 second off? + is( $atime, $ut + 1, 'atime: VMS' ); + is( $mtime, $ut + $delta, 'mtime: VMS' ); + } + elsif ($noatime_oses{$^O}) { + pass("atime not updated"); + is($mtime, 500000001, 'mtime'); + } + else { + fail("atime: default case"); + fail("mtime: default case"); + } + } # END failed atime mtime 'else' block + } # END granularity SKIP block +} diff --git a/gnu/usr.bin/perl/t/io/inplace.t b/gnu/usr.bin/perl/t/io/inplace.t index 0403cd92503..586363b67e7 100644 --- a/gnu/usr.bin/perl/t/io/inplace.t +++ b/gnu/usr.bin/perl/t/io/inplace.t @@ -34,64 +34,56 @@ is ( runperl( prog => 'print<>;', args => \@tfiles_bak ), "foo\nfoo\nfoo\n", "backup file contents stay the same" ); -SKIP: +our @ifiles = ( tempfile(), tempfile(), tempfile() ); + { - # based on code, dosish systems can't do no-backup inplace - # edits - $^O =~ /^(MSWin32|cygwin|uwin|dos|os2)$/ - and skip("Can't inplace edit without backups on $^O", 4); - - our @ifiles = ( tempfile(), tempfile(), tempfile() ); - - { - for my $file (@ifiles) { - runperl( prog => 'print qq(bar\n);', - args => [ '>', $file ] ); - } - - local $^I = ''; + for my $file (@ifiles) { + runperl( prog => 'print qq(bar\n);', + args => [ '>', $file ] ); + } + + local $^I = ''; local @ARGV = @ifiles; - - while (<>) { - print "foo$_"; - } - - is(scalar(@ARGV), 0, "consumed ARGV"); - -# runperl may quote its arguments, so don't expect to be able -# to reuse things you send it. - - my @my_ifiles = @ifiles; - is( runperl( prog => 'print<>;', args => \@my_ifiles ), - "foobar\nfoobar\nfoobar\n", - "normal inplace edit"); + + while (<>) { + print "foo$_"; } - - # test * equivalence RT #70802 - { - for my $file (@ifiles) { - runperl( prog => 'print qq(bar\n);', - args => [ '>', $file ] ); - } - - local $^I = '*'; - local @ARGV = @ifiles; - - while (<>) { - print "foo$_"; - } - - is(scalar(@ARGV), 0, "consumed ARGV"); - - my @my_ifiles = @ifiles; - is( runperl( prog => 'print<>;', args => \@my_ifiles ), - "foobar\nfoobar\nfoobar\n", - "normal inplace edit"); + + is(scalar(@ARGV), 0, "consumed ARGV"); + + # runperl may quote its arguments, so don't expect to be able + # to reuse things you send it. + + my @my_ifiles = @ifiles; + is( runperl( prog => 'print<>;', args => \@my_ifiles ), + "foobar\nfoobar\nfoobar\n", + "normal inplace edit"); +} + +# test * equivalence RT #70802 +{ + for my $file (@ifiles) { + runperl( prog => 'print qq(bar\n);', + args => [ '>', $file ] ); + } + + local $^I = '*'; + local @ARGV = @ifiles; + + while (<>) { + print "foo$_"; } - - END { unlink_all(@ifiles); } + + is(scalar(@ARGV), 0, "consumed ARGV"); + + my @my_ifiles = @ifiles; + is( runperl( prog => 'print<>;', args => \@my_ifiles ), + "foobar\nfoobar\nfoobar\n", + "normal inplace edit"); } +END { unlink_all(@ifiles); } + { my @tests = ( # opts, code, result, name, $TODO diff --git a/gnu/usr.bin/perl/t/io/perlio.t b/gnu/usr.bin/perl/t/io/perlio.t index 187cd273e7a..d9ecc9824e0 100755 --- a/gnu/usr.bin/perl/t/io/perlio.t +++ b/gnu/usr.bin/perl/t/io/perlio.t @@ -97,12 +97,17 @@ ok(close($utffh)); # hardcoded default temp path my $perlio_tmp_file_glob = '/tmp/PerlIO_??????'; + my $filename; - ok( open(my $x,"+<",undef), 'TMPDIR honored by magic temp file via 3 arg open with undef - works if TMPDIR points to a non-existent dir'); + SKIP: { + skip("No /tmp on this platform to fall back to absent TMPDIR",2) + unless (-e '/tmp'); + ok( open(my $x,"+<",undef), 'TMPDIR honored by magic temp file via 3 arg open with undef - works if TMPDIR points to a non-existent dir'); - my $filename = find_filename($x, $perlio_tmp_file_glob); - is($filename, undef, "No tmp files leaked"); - unlink_all $filename if defined $filename; + $filename = find_filename($x, $perlio_tmp_file_glob); + is($filename, undef, "No tmp files leaked"); + unlink_all $filename if defined $filename; + } mkdir $ENV{TMPDIR}; ok(open(my $x,"+<",undef), 'TMPDIR honored by magic temp file via 3 arg open with undef - works if TMPDIR points to an existent dir'); @@ -234,7 +239,7 @@ require Symbol; # doesn't matter whether it exists or not EOP qr/\ARecursive call to Perl_load_module in PerlIO_find_layer at/s, {stderr => 1}, - 'Mutal recursion between Perl_load_module and PerlIO_find_layer croaks'); + 'Mutual recursion between Perl_load_module and PerlIO_find_layer croaks'); } { diff --git a/gnu/usr.bin/perl/t/io/socket.t b/gnu/usr.bin/perl/t/io/socket.t index 952ff097423..2dce1a7d08c 100644 --- a/gnu/usr.bin/perl/t/io/socket.t +++ b/gnu/usr.bin/perl/t/io/socket.t @@ -44,6 +44,18 @@ my $fork = $Config{d_fork} || $Config{d_pseudofork}; ok(close($sock), "close the socket"); } +SKIP: +{ + $udp + or skip "No udp", 1; + # [perl #133853] failed socket creation didn't set error + # for bad parameters on Win32 + $! = 0; + socket(my $sock, PF_INET, SOCK_STREAM, $udp) + and skip "managed to make a UDP stream socket", 1; + ok(0+$!, "error set on failed socket()"); +} + SKIP: { # test it all in TCP $local or skip("No localhost", 3); @@ -135,6 +147,96 @@ SKIP: { } } +SKIP: { + # test recv/send handling with :utf8 + # this doesn't appear to have been tested previously, this is + # separate to avoid interfering with the data expected above + $local or skip("No localhost", 1); + $fork or skip("No fork", 1); + + note "recv/send :utf8 tests"; + ok(socket(my $serv, PF_INET, SOCK_STREAM, $tcp), "make a tcp socket (recv/send :utf8 handling)"); + my $bind_at = pack_sockaddr_in(0, $local); + ok(bind($serv, $bind_at), "bind works") + or skip("Couldn't bind to localhost", 1); + my $bind_name = getsockname($serv); + ok($bind_name, "getsockname() on bound socket"); + my ($bind_port) = unpack_sockaddr_in($bind_name); + + print "# port $bind_port\n"; + + SKIP: + { + ok(listen($serv, 5), "listen() works") + or diag "listen error: $!"; + + my $pid = fork; + my $send_data = "test\x80\xFF" x 50_000; + if ($pid) { + # parent + ok(socket(my $accept, PF_INET, SOCK_STREAM, $tcp), + "make accept tcp socket"); + ok(my $addr = accept($accept, $serv), "accept() works") + or diag "accept error: $!"; + binmode $accept, ':raw:utf8'; + ok(!eval { send($accept, "ABC", 0); 1 }, + "should die on send to :utf8 socket"); + binmode $accept; + # check bytes will be sent + utf8::upgrade($send_data); + my $sent_total = 0; + while ($sent_total < length $send_data) { + my $sent = send($accept, substr($send_data, $sent_total), 0); + defined $sent or last; + $sent_total += $sent; + } + my $shutdown = shutdown($accept, 1); + + # wait for the remote to close so data isn't lost in + # transit on a certain broken implementation + <$accept>; + # child tests are printed once we hit eof + curr_test(curr_test()+6); + waitpid($pid, 0); + + ok($shutdown, "shutdown() works"); + } + elsif (defined $pid) { + curr_test(curr_test()+3); + #sleep 1; + # child + ok_child(close($serv), "close server socket in child"); + ok_child(socket(my $child, PF_INET, SOCK_STREAM, $tcp), + "make child tcp socket"); + + ok_child(connect($child, $bind_name), "connect() works") + or diag "connect error: $!"; + binmode $child, ':raw:utf8'; + my $buf; + + ok_child(!eval { recv($child, $buf, 1000, 0); 1 }, + "recv on :utf8 should die"); + is_child($buf, "", "buf shouldn't contain anything"); + binmode $child; + my $recv_peer = recv($child, $buf, 1000, 0); + while(defined recv($child, my $tmp, 1000, 0)) { + last if length $tmp == 0; + $buf .= $tmp; + } + is_child($buf, $send_data, "check we received the data"); + close($child); + end_child(); + + exit(0); + } + else { + # failed to fork + diag "fork() failed $!"; + skip("fork() failed", 2); + } + } +} + SKIP: { eval { require Errno; defined &Errno::EMFILE } diff --git a/gnu/usr.bin/perl/t/io/tell.t b/gnu/usr.bin/perl/t/io/tell.t index 7a046ca6b32..ba106f0cc7f 100644 --- a/gnu/usr.bin/perl/t/io/tell.t +++ b/gnu/usr.bin/perl/t/io/tell.t @@ -6,7 +6,7 @@ BEGIN { set_up_inc('../lib'); } -plan(35); +plan(36); $TST = 'TST'; @@ -187,3 +187,10 @@ seek $fh,0,0; is(tell, 0, "argless tell after seek \$coercible..."); seek *$fh,0,0; is(tell, 0, "argless tell after seek *\$coercible..."); + +{ + # [perl #133721] + fresh_perl_is(<<'EOI', 'ok', {}, 'eof with no ${^LAST_FH}'); +print "ok" if eof; +EOI +} diff --git a/gnu/usr.bin/perl/t/io/utf8.t b/gnu/usr.bin/perl/t/io/utf8.t index 2b700595c8d..0bc8a5c2bf7 100644 --- a/gnu/usr.bin/perl/t/io/utf8.t +++ b/gnu/usr.bin/perl/t/io/utf8.t @@ -10,7 +10,7 @@ skip_all_without_perlio(); no utf8; # needed for use utf8 not griping about the raw octets -plan(tests => 63); +plan(tests => 62); $| = 1; @@ -312,16 +312,14 @@ is($failed, undef); { # [perl #23428] Somethings rotten in unicode semantics open F, ">$a_file"; - binmode F, ":utf8"; - no warnings qw(deprecated); - syswrite(F, $a = chr(0x100)); + binmode F; + $a = "A"; + utf8::upgrade($a); + syswrite(F, $a); close F; - is( ord($a), 0x100, '23428 syswrite should not downgrade scalar' ); - like( $a, qr/^\w+/, '23428 syswrite should not downgrade scalar' ); + ok(utf8::is_utf8($a), '23428 syswrite should not downgrade scalar' ); } -# sysread() and syswrite() tested in lib/open.t since Fcntl is used - { # <FH> on a :utf8 stream should complain immediately with -w # if it finds bad UTF-8 (:encoding(utf8) works this way) diff --git a/gnu/usr.bin/perl/t/lib/croak/op b/gnu/usr.bin/perl/t/lib/croak/op index c11803e3064..5f314788602 100644 --- a/gnu/usr.bin/perl/t/lib/croak/op +++ b/gnu/usr.bin/perl/t/lib/croak/op @@ -217,3 +217,37 @@ Execution of - aborted due to compilation errors. EXPECT Type of arg 1 to each must be hash or array (not constant item) at - line 1, near "FRED ;" Execution of - aborted due to compilation errors. +######## +# NAME better messages for array-ops on non-arrays +push %a, 1; +pop %a; +shift %a; +unshift %a, 1; +push *a, 1; +pop *a; +shift *a; +unshift *a, 1; +EXPECT +Type of arg 1 to push must be array (not hash dereference) at - line 1, near "1;" +Type of arg 1 to pop must be array (not hash dereference) at - line 2, near "%a;" +Type of arg 1 to shift must be array (not hash dereference) at - line 3, near "%a;" +Type of arg 1 to unshift must be array (not hash dereference) at - line 4, near "1;" +Type of arg 1 to push must be array (not ref-to-glob cast) at - line 5, near "1;" +Type of arg 1 to pop must be array (not ref-to-glob cast) at - line 6, near "*a;" +Type of arg 1 to shift must be array (not ref-to-glob cast) at - line 7, near "*a;" +Type of arg 1 to unshift must be array (not ref-to-glob cast) at - line 8, near "1;" +Execution of - aborted due to compilation errors. +######## +# NAME better messages for array-ops on non-arrays (part 2) +# to check PADHV without hitting the reported error limit +my %a; +push %a, 1; +pop %a; +shift %a; +unshift %a, 1; +EXPECT +Type of arg 1 to push must be array (not private hash) at - line 3, near "1;" +Type of arg 1 to pop must be array (not private hash) at - line 4, near "%a;" +Type of arg 1 to shift must be array (not private hash) at - line 5, near "%a;" +Type of arg 1 to unshift must be array (not private hash) at - line 6, near "1;" +Execution of - aborted due to compilation errors. diff --git a/gnu/usr.bin/perl/t/lib/croak/pp_sys b/gnu/usr.bin/perl/t/lib/croak/pp_sys index 8b7dc9d53df..cf9e4ef0ed8 100644 --- a/gnu/usr.bin/perl/t/lib/croak/pp_sys +++ b/gnu/usr.bin/perl/t/lib/croak/pp_sys @@ -73,3 +73,29 @@ open my $foo, "../harness"; opendir $foo, "."; EXPECT Cannot open $foo as a dirhandle: it is already open as a filehandle at - line 5. +######## +# NAME sysread() disallowed on :utf8 +open my $fh, "<:raw", "../harness" or die "# $!"; +my $buf; +sysread $fh, $buf, 10; +binmode $fh, ':utf8'; +sysread $fh, $buf, 10; +EXPECT +sysread() isn't allowed on :utf8 handles at - line 5. +######## +# NAME syswrite() disallowed on :utf8 +my $file = "syswwarn.tmp"; +open my $fh, ">:raw", $file or die "# $!"; +syswrite $fh, 'ABC'; +binmode $fh, ':utf8'; +syswrite $fh, 'ABC'; +close $fh; +END { unlink $file; } +EXPECT +syswrite() isn't allowed on :utf8 handles at - line 5. +######## +# NAME readline() didn't scalar() its argument +# this would assert rather than failing on the method call +E{0;readline@0} +EXPECT +Can't call method "E" without a package or object reference at - line 2. diff --git a/gnu/usr.bin/perl/t/lib/croak/toke b/gnu/usr.bin/perl/t/lib/croak/toke index a3852900e56..21851229fe1 100644 --- a/gnu/usr.bin/perl/t/lib/croak/toke +++ b/gnu/usr.bin/perl/t/lib/croak/toke @@ -481,6 +481,19 @@ Bareword found where operator expected at - line 2, near "2p0" syntax error at - line 2, near "2p0" Execution of - aborted due to compilation errors. ######## +# NAME dump() must be written as CORE::dump() as of Perl 5.30 +BEGIN { $^C = 1; } +dump; +CORE::dump; +EXPECT +dump() must be written as CORE::dump() as of Perl 5.30 at - line 2. +######## +# NAME check Prototype not terminated includes line number (133524) +sub t1 {} +sub t2 (} +EXPECT +Prototype not terminated at - line 2. +######## # NAME [perl #132158] format with syntax errors format= @ diff --git a/gnu/usr.bin/perl/t/lib/feature/bundle b/gnu/usr.bin/perl/t/lib/feature/bundle index 5eacaff41ba..d12c7912a3f 100644 --- a/gnu/usr.bin/perl/t/lib/feature/bundle +++ b/gnu/usr.bin/perl/t/lib/feature/bundle @@ -83,40 +83,24 @@ custom sub # SKIP ? not defined DynaLoader::boot_DynaLoader no feature; use feature ":default"; +$[ = 0; $[ = 1; -print qw[a b c][2], "\n"; -use feature ":5.16"; # should not disable anything; no feature ':all' does that -print qw[a b c][2], "\n"; -no feature ':all'; -print qw[a b c][2], "\n"; -use feature ":5.16"; -print qw[a b c][2], "\n"; -EXPECT -Use of assignment to $[ is deprecated, and will be fatal in Perl 5.30 at - line 4. -b -b -c -c +EXPECT +Assigning non-zero to $[ is no longer possible at - line 5. ######## # "no feature" use feature ':5.16'; # turns array_base off -no feature; # resets to :default, thus turns array_base on +no feature; # resets to :default, thus would turn array_base on, if it still existed +$[ = 0; $[ = 1; -print qw[a b c][2], "\n"; EXPECT -Use of assignment to $[ is deprecated, and will be fatal in Perl 5.30 at - line 4. -b +Assigning non-zero to $[ is no longer possible at - line 5. ######## # "no feature 'all" -$[ = 1; -print qw[a b c][2], "\n"; no feature ':all'; # turns array_base (and everything else) off $[ = 1; -print qw[a b c][2], "\n"; EXPECT -Use of assignment to $[ is deprecated, and will be fatal in Perl 5.30 at - line 2. -Assigning non-zero to $[ is no longer possible at - line 5. -b +Assigning non-zero to $[ is no longer possible at - line 3. ######## # NAME $^H accidentally enabling all features eval 'BEGIN { $^H |= 0x1c020000 } $_ = evalbytes 12345'; diff --git a/gnu/usr.bin/perl/t/lib/feature/implicit b/gnu/usr.bin/perl/t/lib/feature/implicit index 79f1bf8888a..a6c47ef72ab 100644 --- a/gnu/usr.bin/perl/t/lib/feature/implicit +++ b/gnu/usr.bin/perl/t/lib/feature/implicit @@ -73,38 +73,6 @@ yes evalbytes sub say sub ######## -# No $[ under 5.15 -# SKIP ? not defined DynaLoader::boot_DynaLoader -use v5.14; -no warnings 'deprecated'; -$[ = 1; -print qw[a b c][2], "\n"; -use v5.15; -print qw[a b c][2], "\n"; -EXPECT -b -c -######## -# $[ under < 5.10 -# SKIP ? not defined DynaLoader::boot_DynaLoader -use feature 'say'; # make sure it is loaded and modifies %^H; we are test- -use v5.8.8; # ing to make sure it does not disable $[ -no warnings 'deprecated'; -$[ = 1; -print qw[a b c][2], "\n"; -EXPECT -b -######## -# $[ under < 5.10 after use v5.15 -# SKIP ? not defined DynaLoader::boot_DynaLoader -use v5.15; -use v5.8.8; -no warnings 'deprecated'; -$[ = 1; -print qw[a b c][2], "\n"; -EXPECT -b -######## # Implicit unicode_string feature use v5.14; my $sharp_s = chr utf8::unicode_to_native(0xdf); diff --git a/gnu/usr.bin/perl/t/lib/h2ph.pht b/gnu/usr.bin/perl/t/lib/h2ph.pht index cda8d21051c..f068d6dae46 100644 --- a/gnu/usr.bin/perl/t/lib/h2ph.pht +++ b/gnu/usr.bin/perl/t/lib/h2ph.pht @@ -90,6 +90,10 @@ unless(defined(&_H2PH_H_)) { } eval("sub flim () { 0; }") unless defined(&flim); eval("sub flam () { 1; }") unless defined(&flam); + eval 'sub blli_in_use { + my($blli) = @_; + eval q({ ($blli->{l2_proto}) || ($blli->{l3_proto}); }); + }' unless defined(&blli_in_use); eval 'sub multiline () {"multilinestring";}' unless defined(&multiline); } 1; diff --git a/gnu/usr.bin/perl/t/lib/warnings/7fatal b/gnu/usr.bin/perl/t/lib/warnings/7fatal index 40c649f249a..2056d01e515 100644 --- a/gnu/usr.bin/perl/t/lib/warnings/7fatal +++ b/gnu/usr.bin/perl/t/lib/warnings/7fatal @@ -278,6 +278,7 @@ EXPECT Reversed += operator at - line 8. ######## # TODO ? !$Config{usethreads} && $::UTF8 && ($ENV{PERL_DESTRUCT_LEVEL} || 0) > 1 ? "Parser leaks OPs, which leak shared hash keys" : '' +# SKIP ? $Config{ccflags} =~ /sanitize/ use warnings 'void' ; @@ -297,6 +298,7 @@ Useless use of time in void context at - line 4. Useless use of length in void context at - line 9. ######## # TODO ? !$Config{usethreads} && $::UTF8 && ($ENV{PERL_DESTRUCT_LEVEL} || 0) > 1 ? "Parser leaks OPs, which leak shared hash keys" : '' +# SKIP ? $Config{ccflags} =~ /sanitize/ use warnings ; diff --git a/gnu/usr.bin/perl/t/lib/warnings/op b/gnu/usr.bin/perl/t/lib/warnings/op index 54e2e3de20e..85297836d2a 100644 --- a/gnu/usr.bin/perl/t/lib/warnings/op +++ b/gnu/usr.bin/perl/t/lib/warnings/op @@ -1675,13 +1675,6 @@ Useless localization of match position at - line 49. Useless localization of vec at - line 50. ######## # op.c -my $x1 if 0; -my @x2 if 0; -my %x3 if 0; -my ($x4) if 0; -my ($x5,@x6, %x7) if 0; -0 && my $z1; -0 && my (%z2); # these shouldn't warn our $x if 0; our $x unless 0; @@ -1690,29 +1683,6 @@ if (my $w2) { $a=1 } if ($a && (my $w3 = 1)) {$a = 2} EXPECT -Deprecated use of my() in false conditional. This will be a fatal error in Perl 5.30 at - line 2. -Deprecated use of my() in false conditional. This will be a fatal error in Perl 5.30 at - line 3. -Deprecated use of my() in false conditional. This will be a fatal error in Perl 5.30 at - line 4. -Deprecated use of my() in false conditional. This will be a fatal error in Perl 5.30 at - line 5. -Deprecated use of my() in false conditional. This will be a fatal error in Perl 5.30 at - line 6. -Deprecated use of my() in false conditional. This will be a fatal error in Perl 5.30 at - line 7. -Deprecated use of my() in false conditional. This will be a fatal error in Perl 5.30 at - line 8. -######## -# op.c -$[ = 1; -($[) = 1; -use warnings 'deprecated'; -$[ = 2; -($[) = 2; -$[ = 0; -no warnings 'deprecated'; -$[ = 3; -($[) = 3; -EXPECT -Use of assignment to $[ is deprecated, and will be fatal in Perl 5.30 at - line 2. -Use of assignment to $[ is deprecated, and will be fatal in Perl 5.30 at - line 3. -Use of assignment to $[ is deprecated, and will be fatal in Perl 5.30 at - line 5. -Use of assignment to $[ is deprecated, and will be fatal in Perl 5.30 at - line 6. ######## # op.c use warnings 'void'; diff --git a/gnu/usr.bin/perl/t/lib/warnings/pp_sys b/gnu/usr.bin/perl/t/lib/warnings/pp_sys index 90d3cc790d6..5f6b83d2f63 100644 --- a/gnu/usr.bin/perl/t/lib/warnings/pp_sys +++ b/gnu/usr.bin/perl/t/lib/warnings/pp_sys @@ -890,30 +890,6 @@ sleep(-1); EXPECT sleep() with negative argument at - line 2. ######## -# NAME sysread() deprecated on :utf8 -open my $fh, "<:raw", "../harness" or die "# $!"; -my $buf; -sysread $fh, $buf, 10; -binmode $fh, ':utf8'; -sysread $fh, $buf, 10; -no warnings 'deprecated'; -sysread $fh, $buf, 10; -EXPECT -sysread() is deprecated on :utf8 handles. This will be a fatal error in Perl 5.30 at - line 5. -######## -# NAME syswrite() deprecated on :utf8 -my $file = "syswwarn.tmp"; -open my $fh, ">:raw", $file or die "# $!"; -syswrite $fh, 'ABC'; -binmode $fh, ':utf8'; -syswrite $fh, 'ABC'; -no warnings 'deprecated'; -syswrite $fh, 'ABC'; -close $fh; -unlink $file; -EXPECT -syswrite() is deprecated on :utf8 handles. This will be a fatal error in Perl 5.30 at - line 5. -######## # NAME stat on name with \0 use warnings; my @x = stat("./\0-"); diff --git a/gnu/usr.bin/perl/t/lib/warnings/regcomp b/gnu/usr.bin/perl/t/lib/warnings/regcomp index 516de419116..eb827b67270 100644 --- a/gnu/usr.bin/perl/t/lib/warnings/regcomp +++ b/gnu/usr.bin/perl/t/lib/warnings/regcomp @@ -56,24 +56,30 @@ Unmatched [ in regex; marked by <-- HERE in m/abc[ <-- HERE fi[.00./ at - line ######## # NAME perl qr/(?[[[:word]]])/ XXX Why is 'syntax' lc? # OPTION fatal +use warnings; +no warnings 'experimental::regex_sets'; qr/(?[[[:word]]])/; EXPECT -Assuming NOT a POSIX class since there is no terminating ':' in regex; marked by <-- HERE in m/(?[[[:word <-- HERE ]]])/ at - line 2. -Unexpected ']' with no following ')' in (?[... in regex; marked by <-- HERE in m/(?[[[:word]] <-- HERE ])/ at - line 2. +Assuming NOT a POSIX class since there is no terminating ':' in regex; marked by <-- HERE in m/(?[[[:word <-- HERE ]]])/ at - line 4. +Unexpected ']' with no following ')' in (?[... in regex; marked by <-- HERE in m/(?[[[:word]] <-- HERE ])/ at - line 4. ######## # NAME qr/(?[ [[:digit: ])/ # OPTION fatal +use warnings; +no warnings 'experimental::regex_sets'; qr/(?[[[:digit: ])/; EXPECT -Assuming NOT a POSIX class since no blanks are allowed in one in regex; marked by <-- HERE in m/(?[[[:digit: ] <-- HERE )/ at - line 2. -syntax error in (?[...]) in regex; marked by <-- HERE in m/(?[[[:digit: ]) <-- HERE / at - line 2. +Assuming NOT a POSIX class since no blanks are allowed in one in regex; marked by <-- HERE in m/(?[[[:digit: ] <-- HERE )/ at - line 4. +syntax error in (?[...]) in regex; marked by <-- HERE in m/(?[[[:digit: ]) <-- HERE / at - line 4. ######## # NAME qr/(?[ [:digit: ])/ # OPTION fatal +use warnings; +no warnings 'experimental::regex_sets'; qr/(?[[:digit: ])/ EXPECT -Assuming NOT a POSIX class since no blanks are allowed in one in regex; marked by <-- HERE in m/(?[[:digit: ] <-- HERE )/ at - line 2. -syntax error in (?[...]) in regex; marked by <-- HERE in m/(?[[:digit: ]) <-- HERE / at - line 2. +Assuming NOT a POSIX class since no blanks are allowed in one in regex; marked by <-- HERE in m/(?[[:digit: ] <-- HERE )/ at - line 4. +syntax error in (?[...]) in regex; marked by <-- HERE in m/(?[[:digit: ]) <-- HERE / at - line 4. ######## # NAME [perl #126141] # OPTION fatal diff --git a/gnu/usr.bin/perl/t/lib/warnings/toke b/gnu/usr.bin/perl/t/lib/warnings/toke index ffa6307c619..83641e51066 100644 --- a/gnu/usr.bin/perl/t/lib/warnings/toke +++ b/gnu/usr.bin/perl/t/lib/warnings/toke @@ -109,8 +109,6 @@ toke.c AOK $a = 0037777777777 ; $a = 0047777777777 ; - dump() better written as CORE::dump() - Use of /c modifier is meaningless without /g Use of /c modifier is meaningless in s/// @@ -1030,6 +1028,7 @@ Operator or semicolon missing before *foo at - line 8. Ambiguous use of * resolved as operator * at - line 8. Operator or semicolon missing before *foo at - line 10. Ambiguous use of * resolved as operator * at - line 10. +$* is no longer supported as of Perl 5.30 at - line 14. ######## # toke.c $^W = 0 ; @@ -1181,40 +1180,6 @@ Integer overflow in hexadecimal number at - line 8. Integer overflow in octal number at - line 11. ######## # toke.c -BEGIN { $^C = 1; } -dump; -CORE::dump; -EXPECT -dump() better written as CORE::dump(). dump() will no longer be available in Perl 5.30 at - line 3. -- syntax OK -######## -# toke.c -BEGIN { $^C = 1; } -no warnings 'deprecated'; -dump; -CORE::dump; -EXPECT -- syntax OK -######## -# toke.c -BEGIN { $^C = 1; } -no warnings 'deprecated'; -use warnings 'misc'; -dump; -CORE::dump; -EXPECT -dump() better written as CORE::dump(). dump() will no longer be available in Perl 5.30 at - line 5. -- syntax OK -######## -# toke.c -use warnings 'misc'; -use subs qw/dump/; -sub dump { print "no warning for overridden dump\n"; } -dump; -EXPECT -no warning for overridden dump -######## -# toke.c use warnings 'ambiguous'; "@mjd_previously_unused_array"; no warnings 'ambiguous'; @@ -1407,7 +1372,7 @@ my $a = "\o{}"; EXPECT Missing braces on \o{} at - line 3, within string Missing right brace on \o{ at - line 4, within string -Number with no digits at - line 5, within string +Empty \o{} at - line 5, within string BEGIN not safe after errors--compilation aborted at - line 6. ######## # toke.c @@ -1676,7 +1641,9 @@ BEGIN{ use utf8; my $a = qr ̂foobar̂; EXPECT -Use of unassigned code point or non-standalone grapheme for a delimiter will be a fatal error starting in Perl 5.30 at - line 8. +Use of unassigned code point or non-standalone grapheme for a delimiter is not allowed at - line 8, near "= " +Use of unassigned code point or non-standalone grapheme for a delimiter is not allowed at - line 8, near "= " +Execution of - aborted due to compilation errors. ######## # NAME [perl #130567] Assertion failure BEGIN { @@ -1708,3 +1675,19 @@ Execution of - aborted due to compilation errors. use utf8; qw∘foo ∞ ♥ bar∘ EXPECT +######## +# NAME [perl #134064] +BEGIN { + if (ord('A') == 193) { + print "SKIPPED\n# test is ASCII-specific, but could be extended to EBCDIC"; + exit 0; + } +} +use utf8; +$foo="m'\302'"; +eval $foo ; +print "The eval did not crash the program\n" +EXPECT +OPTION regex +Malformed UTF-8 character: .*non-continuation.* +The eval did not crash the program diff --git a/gnu/usr.bin/perl/t/lib/warnings/utf8 b/gnu/usr.bin/perl/t/lib/warnings/utf8 index a9a6388d31e..49fa4e404f4 100644 --- a/gnu/usr.bin/perl/t/lib/warnings/utf8 +++ b/gnu/usr.bin/perl/t/lib/warnings/utf8 @@ -782,4 +782,5 @@ use warnings 'utf8'; for(uc 0..t){0~~pack"UXc",exp} EXPECT OPTIONS regex -Malformed UTF-8 character: \\x([[:xdigit:]]{2})\\x([[:xdigit:]]{2}) \(unexpected non-continuation byte 0x\2, immediately after start byte 0x\1; need 2 bytes, got 1\) in smart match at - line 9. +Malformed UTF-8 character: \\x([[:xdigit:]]{2})\\x([[:xdigit:]]{2}) \(unexpected non-continuation byte 0x\2, immediately after start byte 0x\1; need 2 bytes, got 1\) in pack at - line 9. +Malformed UTF-8 character \(fatal\) at - line 9. diff --git a/gnu/usr.bin/perl/t/loc_tools.pl b/gnu/usr.bin/perl/t/loc_tools.pl index 7afb7bacf6c..c76e29388a7 100644 --- a/gnu/usr.bin/perl/t/loc_tools.pl +++ b/gnu/usr.bin/perl/t/loc_tools.pl @@ -109,21 +109,14 @@ sub _trylocale ($$$$) { # For use only by other functions in this file! # systems return if $locale =~ / ^ pig $ /ix; - # As of 6.3, this platform's locale handling is basically broken. khw - # filed a bug report (no ticket number was returned), and it is supposedly - # going to change in a future release, so the statements here below sunset - # for any larger version, at which point this may start failing and have - # to be revisited. - # - # Given a legal individual category, basically whatever you set the locale - # to, the return from setlocale() indicates that it has taken effect, even - # if it hasn't. However, the return from querying LC_ALL won't reflect - # this. - if ($Config{osname} =~ /openbsd/i && $locale !~ / ^ (?: C | POSIX ) $/ix) { - my ($major, $minor) = $Config{osvers} =~ / ^ ( \d+ ) \. ( \d+ ) /ax; - return if ! defined $major || ! defined $minor - || $major < 6 || ($major == 6 && $minor <= 3); - } + # Certain platforms have a crippled locale system in which setlocale + # returns success for just about any possible locale name, but if anything + # actually happens as a result of the call, it is that the underlying + # locale is set to a system default, likely C or C.UTF-8. We can't test + # such systems fully, but we shouldn't disable the user from using + # locales, as it may work out for them (or not). + return if defined $Config{d_setlocale_accepts_any_locale_name} + && $locale !~ / ^ (?: C | POSIX | C\.UTF-8 ) $/ix; $categories = [ $categories ] unless ref $categories; @@ -214,11 +207,20 @@ sub locales_enabled(;$) { # denoting a single category (either name or number). No conversion into # a number is done in this case. - return 0 unless $Config{d_setlocale} - # I (khw) cargo-culted the '?' in the pattern on the - # next line. - && $Config{ccflags} !~ /\bD?NO_LOCALE\b/ - && $has_locale_h; + # khw cargo-culted the '?' in the pattern on the next line. + return 0 if $Config{ccflags} =~ /\bD?NO_LOCALE\b/; + + # If we can't load the POSIX XS module, we can't have locales even if they + # normally would be available + return 0 if ! defined &DynaLoader::boot_DynaLoader; + + if (! $Config{d_setlocale}) { + return 0 if $Config{ccflags} =~ /\bD?NO_POSIX_2008_LOCALE\b/; + return 0 unless $Config{d_newlocale}; + return 0 unless $Config{d_uselocale}; + return 0 unless $Config{d_duplocale}; + return 0 unless $Config{d_freelocale}; + } # Done with the global possibilities. Now check if any passed in category # is disabled. @@ -337,6 +339,15 @@ sub find_locales ($;$) { my @Locale; _trylocale("C", $categories, \@Locale, $allow_incompatible); _trylocale("POSIX", $categories, \@Locale, $allow_incompatible); + + if ($Config{d_has_C_UTF8} eq 'true') { + _trylocale("C.UTF-8", $categories, \@Locale, $allow_incompatible); + } + + # There's no point in looking at anything more if we know that setlocale + # will return success on any garbage or non-garbage name. + return sort @Locale if defined $Config{d_setlocale_accepts_any_locale_name}; + foreach (1..16) { _trylocale("ISO8859-$_", $categories, \@Locale, $allow_incompatible); _trylocale("iso8859$_", $categories, \@Locale, $allow_incompatible); @@ -501,8 +512,8 @@ sub is_locale_utf8 ($) { # Return a boolean as to if core Perl thinks the input return $ret; } -sub find_utf8_ctype_locale (;$) { # Return the name of a locale that core Perl - # thinks is a UTF-8 LC_CTYPE locale. +sub find_utf8_ctype_locales (;$) { # Return the names of the locales that core + # Perl thinks are UTF-8 LC_CTYPE locales. # Optional parameter is a reference to a # list of locales to try; if omitted, this # tries all locales it can find on the @@ -510,6 +521,7 @@ sub find_utf8_ctype_locale (;$) { # Return the name of a locale that core Perl return unless locales_enabled('LC_CTYPE'); my $locales_ref = shift; + my @return; if (! defined $locales_ref) { @@ -518,12 +530,66 @@ sub find_utf8_ctype_locale (;$) { # Return the name of a locale that core Perl } foreach my $locale (@$locales_ref) { - return $locale if is_locale_utf8($locale); + push @return, $locale if is_locale_utf8($locale); + } + + return @return; +} + + +sub find_utf8_ctype_locale (;$) { # Return the name of a locale that core Perl + # thinks is a UTF-8 LC_CTYPE non-turkic + # locale. + # Optional parameter is a reference to a + # list of locales to try; if omitted, this + # tries all locales it can find on the + # platform + my $try_locales_ref = shift; + + my @utf8_locales = find_utf8_ctype_locales($try_locales_ref); + my @turkic_locales = find_utf8_turkic_locales($try_locales_ref); + + my %seen_turkic; + + # Create undef elements in the hash for turkic locales + @seen_turkic{@turkic_locales} = (); + + foreach my $locale (@utf8_locales) { + return $locale unless exists $seen_turkic{$locale}; } return; } +sub find_utf8_turkic_locales (;$) { + + # Return the name of all the locales that core Perl thinks are UTF-8 + # Turkic LC_CTYPE. Optional parameter is a reference to a list of locales + # to try; if omitted, this tries all locales it can find on the platform + + my @return; + + return unless locales_enabled('LC_CTYPE'); + + my $save_locale = setlocale(&POSIX::LC_CTYPE()); + foreach my $locale (find_utf8_ctype_locales(shift)) { + use locale; + setlocale(&POSIX::LC_CTYPE(), $locale); + push @return, $locale if uc('i') eq "\x{130}"; + } + setlocale(&POSIX::LC_CTYPE(), $save_locale); + + return @return; +} + +sub find_utf8_turkic_locale (;$) { + my @turkics = find_utf8_turkic_locales(shift); + + return unless @turkics; + return $turkics[0] +} + + # returns full path to the directory containing the current source # file, inspired by mauke's Dir::Self sub _source_location { diff --git a/gnu/usr.bin/perl/t/op/array_base.t b/gnu/usr.bin/perl/t/op/array_base.t deleted file mode 100755 index a30236d955f..00000000000 --- a/gnu/usr.bin/perl/t/op/array_base.t +++ /dev/null @@ -1,41 +0,0 @@ -#!perl -w -use strict; - -BEGIN { - chdir 't' if -d 't'; - require './test.pl'; - - plan (tests => my $tests = 11); - - # Run these at BEGIN time, before arybase loads - use v5.15; - is(eval('$[ = 1; 123'), undef); - like($@, qr/\AAssigning non-zero to \$\[ is no longer possible/); - - if (is_miniperl()) { - # skip the rest - SKIP: { skip ("no arybase.xs on miniperl", $tests-2) } - exit; - } -} - -no warnings 'deprecated'; - -is(eval('$['), 0); -is(eval('$[ = 0; 123'), 123); -is(eval('$[ = 1; 123'), 123); -$[ = 1; -ok $INC{'arybase.pm'}; - -use v5.15; -is(eval('$[ = 1; 123'), undef); -like($@, qr/\AAssigning non-zero to \$\[ is no longer possible/); -is $[, 0, '$[ is 0 under 5.16'; -$_ = "hello"; -/l/g; -my $pos = \pos; -is $$pos, 3; -$$pos = 1; -is $$pos, 1; - -1; diff --git a/gnu/usr.bin/perl/t/op/coresubs.t b/gnu/usr.bin/perl/t/op/coresubs.t index 62210b576d9..2ee63ef5fc5 100644 --- a/gnu/usr.bin/perl/t/op/coresubs.t +++ b/gnu/usr.bin/perl/t/op/coresubs.t @@ -37,7 +37,7 @@ my %args_for = ( splice =>)[0,1,2,1,3,1,4,1,5,1], ); my %desc = ( - pos => 'match position', + #pos => 'match position', ); use File::Spec::Functions; diff --git a/gnu/usr.bin/perl/t/op/dump.t b/gnu/usr.bin/perl/t/op/dump.t index 2edba2035cf..397c5b55ee9 100644 --- a/gnu/usr.bin/perl/t/op/dump.t +++ b/gnu/usr.bin/perl/t/op/dump.t @@ -45,14 +45,14 @@ plan(2); # Depending on how perl is built, there may be extraneous stuff on stderr # such as "Aborted", which isn't caught by the '2>&1' that -# fresh_perl_like() does. So execute each dump() in a sub-process. +# fresh_perl_like() does. So execute each CORE::dump() in a sub-process. # # In detail: # fresh_perl_like() ends up doing a `` which invokes a shell with 2 args: # # "sh", "-c", "perl /tmp/foo 2>&1" # -# When the perl process coredumps after calling dump(), the parent +# When the perl process coredumps after calling CORE::dump(), the parent # sh sees that the exit of the child flags a coredump and so prints # something like the following to stderr: # @@ -80,13 +80,12 @@ if ($pid) { else { # child print qq(A); - dump; + CORE::dump; print qq(B); } PROG -fresh_perl_like(<<'PROG', qr/A(?!B\z)/, {}, "dump with label quits"); -BEGIN {$SIG {__WARN__} = sub {1;}} +fresh_perl_like(<<'PROG', qr/A(?!B\z)/, {}, "CORE::dump with label quits"); BEGIN {$SIG {__WARN__} = sub {1;}} ++$|; my $pid = fork; die "fork: $!\n" unless defined $pid; @@ -96,7 +95,7 @@ if ($pid) { } else { print qq(A); - dump foo; + CORE::dump foo; foo: print qq(B); } diff --git a/gnu/usr.bin/perl/t/op/exec.t b/gnu/usr.bin/perl/t/op/exec.t index bda8dd56d73..c676a972d14 100644 --- a/gnu/usr.bin/perl/t/op/exec.t +++ b/gnu/usr.bin/perl/t/op/exec.t @@ -30,7 +30,7 @@ use vmsish qw(hushed); $| = 1; # flush stdout -$ENV{LC_ALL} = 'C'; # Forge English error messages. +$ENV{LC_ALL} = 'C'; # Force English error messages. $ENV{LANGUAGE} = 'C'; # Ditto in GNU. my $Is_VMS = $^O eq 'VMS'; diff --git a/gnu/usr.bin/perl/t/op/getppid.t b/gnu/usr.bin/perl/t/op/getppid.t index ebe97313da1..0a5dbcef1aa 100755 --- a/gnu/usr.bin/perl/t/op/getppid.t +++ b/gnu/usr.bin/perl/t/op/getppid.t @@ -100,7 +100,7 @@ sub fork_and_retrieve { } } else { # Fudge it by waiting a bit more: - sleep 3; + sleep 2; } my $ppid2 = getppid(); print $w "$how,$ppid1,$ppid2\n"; @@ -117,16 +117,3 @@ SKIP: { } isnt ($first, $$, "And that new parent isn't this process"); -# Orphaned Docker or Linux containers do not necessarily attach to PID 1. They might attach to 0 instead. -sub is_linux_container { - - if ($^O eq 'linux' && open my $fh, '<', '/proc/1/cgroup') { - while(<$fh>) { - if (m{^\d+:pids:(.*)} && $1 ne '/init.scope') { - return 1; - } - } - } - - return 0; -} diff --git a/gnu/usr.bin/perl/t/op/gmagic.t b/gnu/usr.bin/perl/t/op/gmagic.t index 210e8e5cc92..0c859550a7a 100644 --- a/gnu/usr.bin/perl/t/op/gmagic.t +++ b/gnu/usr.bin/perl/t/op/gmagic.t @@ -62,6 +62,7 @@ chomp $c; expected_tie_calls(tied $c, 1, 2, 'chomping a ref'); { + no warnings 'once'; # main::foo my $outfile = tempfile(); open my $h, ">$outfile" or die "$0 cannot close $outfile: $!"; binmode $h; @@ -76,15 +77,6 @@ expected_tie_calls(tied $c, 1, 2, 'chomping a ref'); expected_tie_calls(tied $c, 2, 2, 'calling sysread with tied buf'); close $h or die "$0 cannot close $outfile: $!"; - # Do this again, with a utf8 handle - $c = *foo; # 1 write - open $h, "<:utf8", $outfile; - no warnings 'deprecated'; - sysread $h, $c, 3, 7; # 1 read; 1 write - is $c, "*main::bar", 'what sysread wrote'; # 1 read - expected_tie_calls(tied $c, 2, 2, 'calling sysread with tied buf'); - close $h or die "$0 cannot close $outfile: $!"; - unlink_all $outfile; } diff --git a/gnu/usr.bin/perl/t/op/groups.t b/gnu/usr.bin/perl/t/op/groups.t index e50c50a8c16..7e064cc21fe 100644 --- a/gnu/usr.bin/perl/t/op/groups.t +++ b/gnu/usr.bin/perl/t/op/groups.t @@ -51,7 +51,7 @@ sub Test { my %basegroup = basegroups( $pwgid, $pwgnam ); my @extracted_supplementary_groups = remove_basegroup( \ %basegroup, \ @extracted_groups ); - plan 2; + plan 3; # Test: The supplementary groups in $( should match the @@ -121,6 +121,26 @@ sub Test { $gid_count->{0} //= 0; ok 0 == $pwgid || $gid_count->{0} < 2, "groupstype should be type short, not long"; + SKIP: { + # try to add a group as supplementary group + my $root_uid = 0; + skip "uid!=0", 1 if $< != $root_uid and $> != $root_uid; + my @groups = split ' ', $); + my @sup_group; + setgrent; + while(my @ent = getgrent) { + next if grep { $_ == $ent[2] } @groups; + @sup_group = @ent; + last; + } + endgrent; + skip "No group found we could add as a supplementary group", 1 + if (!@sup_group); + $) = "$) @sup_group[2]"; + my $ok = grep { $_ == $sup_group[2] } split ' ', $); + ok $ok, "Group `$sup_group[0]' added as supplementary group"; + } + return; } diff --git a/gnu/usr.bin/perl/t/op/heredoc.t b/gnu/usr.bin/perl/t/op/heredoc.t index 7b11852e050..0a7bb06b0ff 100644 --- a/gnu/usr.bin/perl/t/op/heredoc.t +++ b/gnu/usr.bin/perl/t/op/heredoc.t @@ -109,7 +109,7 @@ HEREDOC # [perl #125540] this asserted or crashed fresh_perl_like( - q(map d$#<<<<""), + q(map d<<<<""), qr/Can't find string terminator "" anywhere before EOF at - line 1\./, {}, "Don't assert parsing a here-doc if we hit EOF early" @@ -221,7 +221,7 @@ HEREDOC push @tests, [ "print <<~' EOF ';\n $string\n EOF\nEOF \n EOF \n EOF \n", " $string\n EOF\nEOF \n EOF \n", - "intented here-doc matches final delimiter correctly" + "indented here-doc matches final delimiter correctly" ]; for my $test (@tests) { diff --git a/gnu/usr.bin/perl/t/op/lc.t b/gnu/usr.bin/perl/t/op/lc.t index 2ce65ac73c9..69975b43fd9 100644 --- a/gnu/usr.bin/perl/t/op/lc.t +++ b/gnu/usr.bin/perl/t/op/lc.t @@ -1,4 +1,5 @@ #!./perl +use strict; # This file is intentionally encoded in latin-1. # @@ -16,7 +17,7 @@ BEGIN { use feature qw( fc ); -plan tests => 139 + 4 * 256; +plan tests => 139 + 2 * (4 * 256) + 15; is(lc(undef), "", "lc(undef) is ''"); is(lcfirst(undef), "", "lcfirst(undef) is ''"); @@ -164,9 +165,10 @@ is(uc("\x{1C5}") , "\x{1C4}", "U+01C5 uc is U+01C4"); is(uc("\x{1C6}") , "\x{1C4}", "U+01C6 uc is U+01C4, too"); # #18107: A host of bugs involving [ul]c{,first}. AMS 20021106 -$a = "\x{3c3}foo.bar"; # \x{3c3} == GREEK SMALL LETTER SIGMA. -$b = "\x{3a3}FOO.BAR"; # \x{3a3} == GREEK CAPITAL LETTER SIGMA. +my $a = "\x{3c3}foo.bar"; # \x{3c3} == GREEK SMALL LETTER SIGMA. +my $b = "\x{3a3}FOO.BAR"; # \x{3a3} == GREEK CAPITAL LETTER SIGMA. +my $c; ($c = $b) =~ s/(\w+)/lc($1)/ge; is($c , $a, "Using s///e to change case."); @@ -310,6 +312,7 @@ constantfolding # In-place lc/uc should not corrupt string buffers when given a non-utf8- # flagged thingy that stringifies to utf8 +my %h; $h{k} = bless[], "\x{3b0}\x{3b0}\x{3b0}bcde"; # U+03B0 grows with uc() # using delete marks it as TEMP, so uc-in-place is permitted like uc delete $h{k}, qr "^(?:\x{3a5}\x{308}\x{301}){3}BCDE=ARRAY\(.*\)", @@ -341,11 +344,15 @@ SKIP: { is($x, "A", "first { fc }"); } +my $non_turkic_locale = find_utf8_ctype_locale(); +my $turkic_locale = find_utf8_turkic_locale(); -my $utf8_locale = find_utf8_ctype_locale(); +foreach my $turkic (0 .. 1) { + my $type = ($turkic) ? "turkic" : "non-turkic"; + my $locale = ($turkic) ? $turkic_locale : $non_turkic_locale; -SKIP: { - skip 'Can\'t find a UTF-8 locale', 4*256 unless defined $utf8_locale; + SKIP: { + skip "Can't find a $type UTF-8 locale", 4*256 unless defined $locale; use feature qw( unicode_strings ); @@ -364,13 +371,70 @@ SKIP: { push @unicode_ucfirst, ucfirst(chr $i); } + if ($turkic) { + $unicode_lc[ord 'I'] = chr 0x131; + $unicode_lcfirst[ord 'I'] = chr 0x131; + $unicode_uc[ord 'i'] = chr 0x130; + $unicode_ucfirst[ord 'i'] = chr 0x130; + } + use locale; - setlocale(LC_CTYPE, $utf8_locale); + setlocale(&POSIX::LC_CTYPE, $locale); for my $i (0 .. 255) { - is(lc(chr $i), $unicode_lc[$i], "In a UTF-8 locale, lc(chr $i) is the same as official Unicode"); - is(uc(chr $i), $unicode_uc[$i], "In a UTF-8 locale, uc(chr $i) is the same as official Unicode"); - is(lcfirst(chr $i), $unicode_lcfirst[$i], "In a UTF-8 locale, lcfirst(chr $i) is the same as official Unicode"); - is(ucfirst(chr $i), $unicode_ucfirst[$i], "In a UTF-8 locale, ucfirst(chr $i) is the same as official Unicode"); + is(lc(chr $i), $unicode_lc[$i], "In a $type UTF-8 locale, lc(chr $i) is the same as official Unicode"); + is(uc(chr $i), $unicode_uc[$i], "In a $type UTF-8 locale, uc(chr $i) is the same as official Unicode"); + is(lcfirst(chr $i), $unicode_lcfirst[$i], "In a $type UTF-8 locale, lcfirst(chr $i) is the same as official Unicode"); + is(ucfirst(chr $i), $unicode_ucfirst[$i], "In a $type UTF-8 locale, ucfirst(chr $i) is the same as official Unicode"); } + } +} + +SKIP: { + skip "Can't find a turkic UTF-8 locale", 15 unless defined $turkic_locale; + + # These are designed to stress the calculation of space needed for the + # strings. $filler contains a variety of characters that have special + # handling in the casing functions, and some regular chars as well. + my $filler_length = 10000; + my $filler = uni_to_native("\x{df}\x{b5}\x{e0}\x{c1}\x{b6}\x{ff}") x $filler_length; + + # These are the correct answers to what should happen when the given + # casing function is called on $filler; + my $filler_lc = uni_to_native("\x{df}\x{b5}\x{e0}\x{e1}\x{b6}\x{ff}") x $filler_length; + my $filler_fc = ("ss" . uni_to_native("\x{b5}\x{e0}\x{e1}\x{b6}\x{ff}")) x $filler_length; + my $filler_uc = ("SS" . uni_to_native("\x{39c}\x{c0}\x{c1}\x{b6}\x{178}")) x $filler_length; + + use locale; + setlocale(&POSIX::LC_CTYPE, $turkic_locale); + + is (lc "IIIIIII$filler", "\x{131}\x{131}\x{131}\x{131}\x{131}\x{131}\x{131}$filler_lc", + "lc non-UTF-8, in Turkic locale, beginning with a bunch of I's"); + is (lc "${filler}IIIIIII$filler", "$filler_lc\x{131}\x{131}\x{131}\x{131}\x{131}\x{131}\x{131}$filler_lc", + "lc non-UTF-8, in Turkic locale, a bunch of I's, but not at the beginning"); + is (lc "${filler}I\x{307}$filler", "${filler_lc}i$filler_lc", + "lc in Turkic locale with DOT ABOVE immediately following I"); + is (lc "${filler}I\x{307}IIIIII$filler", "${filler_lc}i\x{131}\x{131}\x{131}\x{131}\x{131}\x{131}$filler_lc", + "lc in Turkic locale with DOT ABOVE immediately following I, then other I's "); + is (lc "${filler}I\x{316}\x{307}$filler", "${filler_lc}i\x{316}$filler_lc", + "lc in Turkic locale with DOT ABOVE after non-ABOVE"); + is (lc "${filler}I\x{307}\x{300}$filler", "${filler_lc}i\x{300}$filler_lc", + "lc in Turkic locale with DOT ABOVE followed by ABOVE"); + is (lc "${filler}I\x{300}\x{307}$filler", "$filler_lc\x{131}\x{300}\x{307}$filler_lc", + "lc in Turkic locale with with other ABOVE before DOT ABOVE"); + is (lcfirst "IIIIIII$filler", "\x{131}IIIIII$filler", + "lcfirst in Turkic locale, only first I changed"); + is (lcfirst "I\x{307}$filler", "i$filler", + "lcfirst in Turkic locale with DOT ABOVE immediately following I"); + is (lcfirst "I\x{307}IIIIII$filler", "iIIIIII$filler", + "lcfirst in Turkic locale with DOT ABOVE immediately following I, then" + . " other I's "); + is (lcfirst "I\x{316}\x{307}IIIIII$filler", "i\x{316}IIIIII$filler", + "lcfirst in Turkic locale with DOT ABOVE after non-ABOVE"); + is (lcfirst "I\x{307}\x{300}IIIIII$filler", "i\x{300}IIIIII$filler", + "lcfirst in Turkic locale with DOT ABOVE followed by ABOVE"); + is (lcfirst "I\x{300}\x{307}IIIIII$filler", "\x{131}\x{300}\x{307}IIIIII$filler", + "lcfirst in Turkic locale with with other ABOVE before DOT ABOVE"); + is (uc "${filler}i$filler", "$filler_uc\x{130}$filler_uc", "long string uc in Turkic locale"); + is (ucfirst "ii$filler", "\x{130}i$filler", "long string ucfirst in Turkic locale; only first char changes"); } diff --git a/gnu/usr.bin/perl/t/op/lex.t b/gnu/usr.bin/perl/t/op/lex.t index 90be5195243..e78fad2c42f 100755 --- a/gnu/usr.bin/perl/t/op/lex.t +++ b/gnu/usr.bin/perl/t/op/lex.t @@ -276,10 +276,7 @@ SKIP: fresh_perl_is( "stat\tt\$#0", <<'EOM', -$# is no longer supported. Its use will be fatal in Perl 5.30 at - line 1. -Number found where operator expected at - line 1, near "$#0" - (Missing operator before 0?) -Can't call method "t" on an undefined value at - line 1. +$# is no longer supported as of Perl 5.30 at - line 1. EOM {}, "[perl #129273] heap use after free or overflow" diff --git a/gnu/usr.bin/perl/t/op/local.t b/gnu/usr.bin/perl/t/op/local.t index df1413a8a0d..32142e3d07d 100644 --- a/gnu/usr.bin/perl/t/op/local.t +++ b/gnu/usr.bin/perl/t/op/local.t @@ -609,8 +609,8 @@ while (/(o.+?),/gc) { sub STORE { die "write \$_ forbidden" } tie $_, __PACKAGE__; my @tests = ( - "Nesting" => sub { print '#'; for (1..3) { print } - print "\n" }, 1, + "Nesting" => sub { my $x = '#'; for (1..3) { $x .= $_ } + print "$x\n" }, 1, "Reading" => sub { print }, 0, "Matching" => sub { $x = /badness/ }, 0, "Concat" => sub { $_ .= "a" }, 0, diff --git a/gnu/usr.bin/perl/t/op/lvref.t b/gnu/usr.bin/perl/t/op/lvref.t index 28adc6ad239..3d5e952fb0d 100644 --- a/gnu/usr.bin/perl/t/op/lvref.t +++ b/gnu/usr.bin/perl/t/op/lvref.t @@ -4,7 +4,7 @@ BEGIN { set_up_inc("../lib"); } -plan 156; +plan 164; eval '\$x = \$y'; like $@, qr/^Experimental aliasing via reference not enabled/, @@ -603,3 +603,39 @@ pass("RT #123821"); eval q{sub{\@0[0]=0};}; pass("RT #128252"); } + +# RT #133538 slices were inadvertently always localising + +{ + use feature 'refaliasing'; + no warnings 'experimental'; + + my @src = (100,200,300); + + my @a = (1,2,3); + my %h = qw(one 10 two 20 three 30); + + { + use feature 'declared_refs'; + local \(@a[0,1,2]) = \(@src); + local \(@h{qw(one two three)}) = \(@src); + $src[0]++; + is("@a", "101 200 300", "rt #133538 \@a aliased"); + is("$h{one} $h{two} $h{three}", "101 200 300", "rt #133538 %h aliased"); + } + is("@a", "1 2 3", "rt #133538 \@a restored"); + is("$h{one} $h{two} $h{three}", "10 20 30", "rt #133538 %h restored"); + + { + \(@a[0,1,2]) = \(@src); + \(@h{qw(one two three)}) = \(@src); + $src[0]++; + is("@a", "102 200 300", "rt #133538 \@a aliased try 2"); + is("$h{one} $h{two} $h{three}", "102 200 300", + "rt #133538 %h aliased try 2"); + } + $src[2]++; + is("@a", "102 200 301", "rt #133538 \@a still aliased"); + is("$h{one} $h{two} $h{three}", "102 200 301", "rt #133538 %h still aliased"); + +} diff --git a/gnu/usr.bin/perl/t/op/magic.t b/gnu/usr.bin/perl/t/op/magic.t index 02ced156d5f..27c1d43da01 100644 --- a/gnu/usr.bin/perl/t/op/magic.t +++ b/gnu/usr.bin/perl/t/op/magic.t @@ -5,7 +5,7 @@ BEGIN { chdir 't' if -d 't'; require './test.pl'; set_up_inc( '../lib' ); - plan (tests => 196); # some tests are run in BEGIN block + plan (tests => 195); # some tests are run in BEGIN block } # Test that defined() returns true for magic variables created on the fly, @@ -613,9 +613,9 @@ SKIP: { } SKIP: { - skip_if_miniperl("No XS in miniperl", 3); + skip_if_miniperl("No XS in miniperl", 2); - for ( [qw( %- Tie::Hash::NamedCapture )], [qw( $[ arybase )], + for ( [qw( %- Tie::Hash::NamedCapture )], [qw( %! Errno )] ) { my ($var, $mod) = @$_; my $modfile = $mod =~ s|::|/|gr . ".pm"; diff --git a/gnu/usr.bin/perl/t/op/multideref.t b/gnu/usr.bin/perl/t/op/multideref.t index 20ba1ca614f..23487ca283b 100644 --- a/gnu/usr.bin/perl/t/op/multideref.t +++ b/gnu/usr.bin/perl/t/op/multideref.t @@ -18,7 +18,7 @@ BEGIN { use warnings; use strict; -plan 63; +plan 65; # check that strict refs hint is handled @@ -233,3 +233,14 @@ sub defer {} is $x[qw(rt131627)->$*], 11, 'RT #131627: $a[qw(var)->$*]'; } +# this used to leak - run the code for ASan to spot any problems +{ + package Foo; + our %FIELDS = (); + my Foo $f; + eval q{ my $x = $f->{c}; }; + ::pass("S_maybe_multideref() shouldn't leak on croak"); +} + +fresh_perl_is('0for%{scalar local$0[0]}', '', {}, + "RT #134045 assertion on the OP_SCALAR"); diff --git a/gnu/usr.bin/perl/t/op/my.t b/gnu/usr.bin/perl/t/op/my.t index 35211068d78..5ac382cfe71 100644 --- a/gnu/usr.bin/perl/t/op/my.t +++ b/gnu/usr.bin/perl/t/op/my.t @@ -154,5 +154,21 @@ is( $@, '', "eval of my() passes"); eval 'my($a,$b),$x,my($c,$d)'; pass("RT #126844"); +# RT # 133543 +my @false_conditionals = ( + 'my $x1 if 0;', + 'my @x2 if 0;', + 'my %x3 if 0;', + 'my ($x4) if 0;', + 'my ($x5,@x6, %x7) if 0;', + '0 && my $z1;', + '0 && my (%z2);', +); +for (my $i=0; $i<=$#false_conditionals; $i++) { + eval $false_conditionals[$i]; + like( $@, qr/^This use of my\(\) in false conditional is no longer allowed/, + "RT #133543: my() in false conditional: $false_conditionals[$i]"); +} + #Variable number of tests due to the way the while/for loops are tested now done_testing(); diff --git a/gnu/usr.bin/perl/t/op/pack.t b/gnu/usr.bin/perl/t/op/pack.t index bb9f865091a..4543cde3f9b 100644 --- a/gnu/usr.bin/perl/t/op/pack.t +++ b/gnu/usr.bin/perl/t/op/pack.t @@ -955,15 +955,11 @@ is("@{[unpack('U*', pack('U*', 100, 200))]}", "100 200"); is("@{[pack('C0U*', map { utf8::native_to_unicode($_) } 64, 202)]}", pack("C*", 64, @bytes202)); - # does unpack U0U on byte data warn? - { - use warnings qw(NONFATAL all);; - - my $bad = pack("U0C", 202); - local $SIG{__WARN__} = sub { $@ = "@_" }; - my @null = unpack('U0U', $bad); - like($@, qr/^Malformed UTF-8 character: /); - } + # does unpack U0U on byte data fail? + fresh_perl_like('my $bad = pack("U0C", 202); my @null = unpack("U0U", $bad);', + qr/^Malformed UTF-8 character: /, + {}, + "pack doesn't return malformed UTF-8"); } { diff --git a/gnu/usr.bin/perl/t/op/postfixderef.t b/gnu/usr.bin/perl/t/op/postfixderef.t index c2983cf6dda..4125fc2fc7e 100644 --- a/gnu/usr.bin/perl/t/op/postfixderef.t +++ b/gnu/usr.bin/perl/t/op/postfixderef.t @@ -16,7 +16,7 @@ BEGIN { use strict qw(refs subs); -plan(130); +plan(128); { no strict 'refs'; @@ -326,13 +326,6 @@ is "@foo", "1 2 3 4 5 6 7 8 9", 'lvalue ->$#*'; $_ = "foo"; @foo = 7..9; %foo = qw( foo oof ); -{ - no warnings 'deprecated'; - $* = 42; - is "$_->$*", 'foo->42', '->$* interpolation without feature'; - $# = 43; - is "$_->$#*", 'foo->43*', '->$#* interpolation without feature'; -} is "$_->@*", 'foo->@*', '->@* does not interpolate without feature'; is "$_->@[0]", 'foo->@[0]', '->@[ does not interpolate without feature'; is "$_->@{foo}", "foo->7 8 9", '->@{ does not interpolate without feature'; diff --git a/gnu/usr.bin/perl/t/op/qr.t b/gnu/usr.bin/perl/t/op/qr.t index 32b9e3b23bf..e03a465430b 100644 --- a/gnu/usr.bin/perl/t/op/qr.t +++ b/gnu/usr.bin/perl/t/op/qr.t @@ -7,7 +7,7 @@ BEGIN { require './test.pl'; } -plan(tests => 34); +plan(tests => 37); sub r { return qr/Good/; @@ -135,3 +135,35 @@ sub { }; } pass("PVLV-as-REGEXP double-free of PVX"); + +# a non-cow SVPV leaked it's string buffer when a REGEXP was assigned to +# it. Give valgrind/ASan something to work on +{ + my $s = substr("ab",0,1); # generate a non-COW string + my $r1 = qr/x/; + $s = $$r1; # make sure "a" isn't leaked + pass("REGEXP leak"); + + my $dest = 0; + sub Foo99::DESTROY { $dest++ } + + # ditto but make sure we don't leak a reference + { + my $ref = bless [], "Foo99"; + my $r2 = qr/x/; + $ref = $$r2; + } + is($dest, 1, "REGEXP RV leak"); + + # and worse, assigning a REGEXP to an PVLV that had a string value + # caused an assert failure. Same code, but using $_[0] which is an + # lvalue, rather than $s. + + my %h; + sub { + $_[0] = substr("ab",0,1); # generate a non-COW string + my $r = qr/x/; + $_[0] = $$r; # make sure "a" isn't leaked + }->($h{foo}); # passes PVLV to sub + is($h{foo}, "(?^:x)", "REGEXP PVLV leak"); +} diff --git a/gnu/usr.bin/perl/t/op/readline.t b/gnu/usr.bin/perl/t/op/readline.t index c2727fe829a..ba4efa71a4f 100644 --- a/gnu/usr.bin/perl/t/op/readline.t +++ b/gnu/usr.bin/perl/t/op/readline.t @@ -215,9 +215,8 @@ SKIP: { my $line = 'ascii'; my ( $in, $out ); pipe $in, $out; - binmode $out, ':utf8'; + binmode $out; binmode $in, ':utf8'; - no warnings qw(deprecated); syswrite $out, "...\n"; $line .= readline $in; @@ -228,10 +227,11 @@ SKIP: { my $line = "\x{2080} utf8";; my ( $in, $out ); pipe $in, $out; - binmode $out, ':utf8'; + binmode $out; binmode $in, ':utf8'; - no warnings qw(deprecated); - syswrite $out, "\x{2080}...\n"; + my $outdata = "\x{2080}...\n"; + utf8::encode($outdata); + syswrite $out, $outdata; $line .= readline $in; is( $line, "\x{2080} utf8\x{2080}...\n", 'appending from utf to utf8' ); diff --git a/gnu/usr.bin/perl/t/op/sprintf2.t b/gnu/usr.bin/perl/t/op/sprintf2.t index 3f4c126c68f..569bd8053dc 100755 --- a/gnu/usr.bin/perl/t/op/sprintf2.t +++ b/gnu/usr.bin/perl/t/op/sprintf2.t @@ -818,6 +818,14 @@ SKIP: { local $::TODO = "denorm literals treated as zero" if $f == 0.0 && $t->[2] ne '0x0p+0'; + # Versions of Visual C++ earlier than 2015 (VC14, cl.exe version 19.x) + # fail three tests here - see perl #133982. + local $::TODO = "Visual C++ has problems prior to VC14" + if $^O eq 'MSWin32' and $Config{cc} eq 'cl' and + $Config{ccversion} =~ /^(\d+)/ and $1 < 19 and + (($t->[0] eq '3e-322' and ($t->[1] eq '%a' or $t->[1] eq '%.4a')) or + $t->[0] eq '7e-322'); + my $s = sprintf($t->[1], $f); is($s, $t->[2], "subnormal @$t got $s"); } @@ -830,6 +838,9 @@ SKIP: { # [rt.perl.org #128889] is(sprintf("%.*a", -1, 1.03125), "0x1.08p+0", "[rt.perl.org #128889]"); + # [rt.perl.org #134008] + is(sprintf("%.*a", -99999, 1.03125), "0x1.08p+0", "[rt.perl.org #134008]"); + # [rt.perl.org #128890] is(sprintf("%a", 0x1.18p+0), "0x1.18p+0"); is(sprintf("%.1a", 0x1.08p+0), "0x1.0p+0"); diff --git a/gnu/usr.bin/perl/t/op/stat.t b/gnu/usr.bin/perl/t/op/stat.t index 905eb85fd4d..e0ce03b12a2 100644 --- a/gnu/usr.bin/perl/t/op/stat.t +++ b/gnu/usr.bin/perl/t/op/stat.t @@ -51,6 +51,8 @@ my $Is_Dosish = $Is_Dos || $Is_OS2 || $Is_MSWin32 || $Is_NetWare; my $ufs_no_ctime = ($Is_Dfly || $Is_Darwin) && (() = `df -t ufs . 2>/dev/null`) == 2; +my $Is_linux_container = is_linux_container(); + if ($Is_Cygwin && !is_miniperl) { require Win32; Win32->import; @@ -357,6 +359,7 @@ SKIP: { # can be set to skip the tests that need a tty. SKIP: { skip "These tests require a TTY", 4 if $ENV{PERL_SKIP_TTY_TEST}; + skip "Skipping TTY tests on linux containers", 4 if $Is_linux_container; my $TTY = "/dev/tty"; diff --git a/gnu/usr.bin/perl/t/op/svleak.t b/gnu/usr.bin/perl/t/op/svleak.t index 3283c95cbf6..bfa6747a028 100644 --- a/gnu/usr.bin/perl/t/op/svleak.t +++ b/gnu/usr.bin/perl/t/op/svleak.t @@ -15,7 +15,7 @@ BEGIN { use Config; -plan tests => 149; +plan tests => 150; # run some code N times. If the number of SVs at the end of loop N is # greater than (N-1)*delta at the end of loop 1, we've got a leak @@ -625,3 +625,23 @@ SKIP: { sub Regex_Key_Leak { my ($r)= keys %rh; "foo"=~$r; } leak 2, 0, \&Regex_Key_Leak,"RT #132892 - regex patterns should not leak"; } + +{ + # perl #133660 + fresh_perl_is(<<'PERL', "ok", {}, "check goto core sub doesn't leak"); +# done this way to avoid overloads for all of svleak.t +use B; +BEGIN { + *CORE::GLOBAL::open = sub (*;$@) { + goto \&CORE::open; + }; +} + +my $refcount; +{ + open(my $fh, '<', 'TEST'); + my $sv = B::svref_2object($fh); + print $sv->REFCNT == 1 ? "ok" : "not ok"; +} +PERL +} diff --git a/gnu/usr.bin/perl/t/op/sysio.t b/gnu/usr.bin/perl/t/op/sysio.t index ebcf821d372..c6d9bd89176 100644 --- a/gnu/usr.bin/perl/t/op/sysio.t +++ b/gnu/usr.bin/perl/t/op/sysio.t @@ -6,7 +6,7 @@ BEGIN { set_up_inc('../lib'); } -plan tests => 48; +plan tests => 45; open(I, 'op/sysio.t') || die "sysio.t: cannot find myself: $!"; binmode I; @@ -221,32 +221,6 @@ close(I); unlink_all $outfile; -# Check that utf8 IO doesn't upgrade the scalar -{ - no warnings 'deprecated'; - open(I, ">$outfile") || die "sysio.t: cannot write $outfile: $!"; - # Will skip harmlessly on stdioperl - eval {binmode STDOUT, ":utf8"}; - die $@ if $@ and $@ !~ /^IO layers \(like ':utf8'\) unavailable/; - - # y diaresis is \w when UTF8 - $a = chr 255; - - unlike($a, qr/\w/); - - syswrite I, $a; - - # Should not be upgraded as a side effect of syswrite. - unlike($a, qr/\w/); - - # This should work - eval {syswrite I, 2;}; - is($@, ''); - - close(I); -} -unlink_all $outfile; - chdir('..'); 1; diff --git a/gnu/usr.bin/perl/t/op/taint.t b/gnu/usr.bin/perl/t/op/taint.t index 9edaa55b032..4c76de34ea9 100644 --- a/gnu/usr.bin/perl/t/op/taint.t +++ b/gnu/usr.bin/perl/t/op/taint.t @@ -17,7 +17,7 @@ BEGIN { use strict; use Config; -plan tests => 1041; +plan tests => 1043; $| = 1; @@ -2378,8 +2378,28 @@ end ok("A" =~ /\p{$prop}/, "user-defined property: non-tainted case"); $prop = "IsA$TAINT"; eval { "A" =~ /\p{$prop}/}; - like($@, qr/Insecure user-defined property \\p\{main::IsA\}/, + like($@, qr/Insecure user-defined property "IsA" in regex/, "user-defined property: tainted case"); + +} + +{ + SKIP: { + skip "Environment tainting tests skipped", 1 + if $Is_MSWin32 || $Is_NetWare || $Is_VMS || $Is_Dos; + + local $ENV{XX} = '\p{IsB}'; # Making it an environment variable taints it + + fresh_perl_like(<<'EOF', + BEGIN { $re = qr/$ENV{XX}/; } + + sub IsB { "42" }; + "B" =~ $re +EOF + qr/Insecure user-defined property \\p\{main::IsB\}/, + { switches => [ "-T" ] }, + "user-defined property; defn not known until runtime, tainted case"); + } } { @@ -2873,6 +2893,28 @@ is_tainted("$ovtaint", "overload preserves taint"); ok(!!($s =~ s/a/x/g), "RT #132385"); } +# RT #134409 +# When the last substitution added both taint and utf8, adding taint +# magic to the result also triggered a byte-to-utf8 recalulation of the +# existing pos() magic, which had not yet been reset, resulting in a panic +# about pos() being off the end of the string. +{ + my $utf8_taint = substr($^X,0,0); + utf8::upgrade($utf8_taint); + + my %map = ( + 'UTF8' => "$utf8_taint", + 'PLAIN' => '', + ); + + + my $v = "PLAIN UTF8"; + my $c = eval { $v =~ s/(\w+)/$map{$1}/g; }; + is($c, 2, "RT #134409") + or diag("\$@ = [$@]"); +} + + # This may bomb out with the alarm signal so keep it last SKIP: { skip "No alarm()" unless $Config{d_alarm}; diff --git a/gnu/usr.bin/perl/t/op/tie.t b/gnu/usr.bin/perl/t/op/tie.t index a2d771a009e..bfcafce87ad 100644 --- a/gnu/usr.bin/perl/t/op/tie.t +++ b/gnu/usr.bin/perl/t/op/tie.t @@ -1585,3 +1585,40 @@ print "[$x][$f][$n][$s]\n"; EXPECT [3][1][3][0] [0][2][3][0] +######## +# dying while doing a SAVEt_DELETE dureing scope exit leaked a copy of the +# key. Give ASan something to play with +sub TIEHASH { bless({}, $_[0]) } +sub EXISTS { 0 } +sub DELETE { die; } +sub DESTROY { print "destroy\n"; } + +eval { + my %h; + tie %h, "main"; + local $h{foo}; + print "leaving\n"; +}; +print "left\n"; +EXPECT +leaving +destroy +left +######## +# ditto for SAVEt_DELETE with an array +sub TIEARRAY { bless({}, $_[0]) } +sub EXISTS { 0 } +sub DELETE { die; } +sub DESTROY { print "destroy\n"; } + +eval { + my @a; + tie @a, "main"; + delete local $a[0]; + print "leaving\n"; +}; +print "left\n"; +EXPECT +leaving +destroy +left diff --git a/gnu/usr.bin/perl/t/op/tr.t b/gnu/usr.bin/perl/t/op/tr.t index 0f74936fdb0..47d603d4fdf 100644 --- a/gnu/usr.bin/perl/t/op/tr.t +++ b/gnu/usr.bin/perl/t/op/tr.t @@ -13,7 +13,7 @@ BEGIN { use utf8; -plan tests => 300; +plan tests => 301; # Test this first before we extend the stack with other operations. # This caused an asan failure due to a bad write past the end of the stack. @@ -1137,6 +1137,12 @@ for ("", nullrocow) { [\x{E5CD}-\x{E5DF}\x{EA80}-\x{EAFA}\x{EB0E}-\x{EB8E}\x{EAFB}-\x{EB0D}\x{E5B5}-\x{E5CC}]; is $x, "\x{E5CE}", '[perl #130656]'; + +} + +{ + fresh_perl_like('y/\x{a00}0-\N{}//', qr/Unknown charname/, { }, + 'RT #133880 illegal \N{}'); } 1; diff --git a/gnu/usr.bin/perl/t/op/write.t b/gnu/usr.bin/perl/t/op/write.t index 18d363cbd69..2869f1702b3 100644 --- a/gnu/usr.bin/perl/t/op/write.t +++ b/gnu/usr.bin/perl/t/op/write.t @@ -6,6 +6,8 @@ BEGIN { set_up_inc('../lib'); } +$| = 0; # test.pl now sets it on, which causes problems here. + use strict; # Amazed that this hackery can be made strict ... use Tie::Scalar; diff --git a/gnu/usr.bin/perl/t/opbasic/arith.t b/gnu/usr.bin/perl/t/opbasic/arith.t index 625f4c0e16f..96b243b2c5d 100644 --- a/gnu/usr.bin/perl/t/opbasic/arith.t +++ b/gnu/usr.bin/perl/t/opbasic/arith.t @@ -13,28 +13,30 @@ BEGIN { print "1..186\n"; sub try ($$$) { - print +($_[1] ? "ok" : "not ok"), " $_[0] - $_[2]\n"; + print +($_[1] ? "ok" : "not ok") . " $_[0] - $_[2]\n"; } sub tryeq ($$$$) { + my $status; if ($_[1] == $_[2]) { - print "ok $_[0]"; + $status = "ok $_[0]"; } else { - print "not ok $_[0] # $_[1] != $_[2]"; + $status = "not ok $_[0] # $_[1] != $_[2]"; } - print " - $_[3]\n"; + print "$status - $_[3]\n"; } sub tryeq_sloppy ($$$$) { + my $status; if ($_[1] == $_[2]) { - print "ok $_[0]"; + $status = "ok $_[0]"; } else { my $error = abs (($_[1] - $_[2]) / $_[1]); if ($error < 1e-9) { - print "ok $_[0] # $_[1] is close to $_[2], \$^O eq $^O"; + $status = "ok $_[0] # $_[1] is close to $_[2], \$^O eq $^O"; } else { - print "not ok $_[0] # $_[1] != $_[2]"; + $status = "not ok $_[0] # $_[1] != $_[2]"; } } - print " - $_[3]\n"; + print "$status - $_[3]\n"; } my $T = 1; diff --git a/gnu/usr.bin/perl/t/opbasic/concat.t b/gnu/usr.bin/perl/t/opbasic/concat.t index 9ce9722f5c8..4b73b22c1c1 100644 --- a/gnu/usr.bin/perl/t/opbasic/concat.t +++ b/gnu/usr.bin/perl/t/opbasic/concat.t @@ -39,7 +39,7 @@ sub is { return $ok; } -print "1..253\n"; +print "1..254\n"; ($a, $b, $c) = qw(foo bar); @@ -853,3 +853,11 @@ package RT132595 { my $res = $a.$t.$a.$t; ::is($res, "b1c1b1c2", "RT #132595"); } + +# RT #133441 +# multiconcat wasn't seeing a mutator as a mutator +{ + my ($a, $b) = qw(a b); + ($a = 'A'.$b) .= 'c'; + is($a, "Abc", "RT #133441"); +} diff --git a/gnu/usr.bin/perl/t/perf/benchmarks.t b/gnu/usr.bin/perl/t/perf/benchmarks.t index 57dbcf87937..9babda3b01f 100644 --- a/gnu/usr.bin/perl/t/perf/benchmarks.t +++ b/gnu/usr.bin/perl/t/perf/benchmarks.t @@ -15,8 +15,13 @@ use strict; my $file = 'perf/benchmarks'; my $benchmark_array = do $file; -die $@ if $@; -die "$! while trying to read '$file'" if $!; +unless ($benchmark_array) { + die "Error while parsing '$file': $@\n" if $@; + die "Error while trying to read '$file': $!" + unless defined $benchmark_array; + die "Unknown error running '$file'\n"; +} + die "'$file' did not return an array ref\n" unless ref $benchmark_array eq 'ARRAY'; diff --git a/gnu/usr.bin/perl/t/porting/bench.t b/gnu/usr.bin/perl/t/porting/bench.t index 7c137c1ded6..9d2ab87690f 100644 --- a/gnu/usr.bin/perl/t/porting/bench.t +++ b/gnu/usr.bin/perl/t/porting/bench.t @@ -77,9 +77,7 @@ my %format_qrs; "(" . "\\s*-?\\d+\\." . "\\d" x $l - ."|\\s{" - . ($l + 1) - . ",}-)" + ."|\\s*-)" }ge; # convert run of space chars into ' +' or ' *' diff --git a/gnu/usr.bin/perl/t/porting/customized.dat b/gnu/usr.bin/perl/t/porting/customized.dat index ced03a3b074..dd41c3d4537 100644 --- a/gnu/usr.bin/perl/t/porting/customized.dat +++ b/gnu/usr.bin/perl/t/porting/customized.dat @@ -1,10 +1,7 @@ -Digest::MD5 cpan/Digest-MD5/t/files.t f8fe234035918d3b7324eba05f73e7e903a45ca0 # Regenerate this file using: # cd t # ./perl -I../lib porting/customized.t --regen -Devel::PPPort dist/Devel-PPPort/parts/embed.fnc e030719d9c6921810554a8e2d398543348b4878c Digest cpan/Digest/Digest.pm 43f7f544cb11842b2f55c73e28930da50774e081 -Encode cpan/Encode/t/truncated_utf8.t 36e6780c20f1d22ed7c97b1a388a63629617451f Locale::Maketext::Simple cpan/Locale-Maketext-Simple/lib/Locale/Maketext/Simple.pm 57ed38905791a17c150210cd6f42ead22a7707b6 Math::Complex cpan/Math-Complex/lib/Math/Complex.pm 198ea6c6c584f5ea79a0fd7e9d411d0878f3b2af Math::Complex cpan/Math-Complex/t/Complex.t 4f307ed6fc59f1e5fb0e6b11103fc631b6bdb335 @@ -12,23 +9,21 @@ Math::Complex cpan/Math-Complex/t/Trig.t 2682526e23a161d54732c2a66393fe4a234d186 Memoize cpan/Memoize/Memoize.pm 902092ff91cdec9c7b4bd06202eb179e1ce26ca2 NEXT cpan/NEXT/lib/NEXT.pm 2c83d03ee361816e53ccb931137d314ab878d19f NEXT cpan/NEXT/t/next.t 66fd60eb0f75b6f3eea95d1dee745f9a7a348146 +Net::Ping dist/Net-Ping/lib/Net/Ping.pm e2e7053673ead1eff8f3ca8ecdd9b838598c1d8c Net::Ping dist/Net-Ping/t/000_load.t deff5dc2ca54dae28cb19d3631427db127279ac2 -Net::Ping dist/Net-Ping/t/001_new.t 90c9d63509b3efc8941449fbd1ca8b807fa42040 -Net::Ping dist/Net-Ping/t/500_ping_icmp.t a003daa5eaf215e58234786bb1fbfbebf669bf44 +Net::Ping dist/Net-Ping/t/500_ping_icmp.t 3eeb60181c01b85f876bd6658644548fdf2e24d4 +Net::Ping dist/Net-Ping/t/501_ping_icmpv6.t 54373de5858f8fb7e078e4998a4b3b8dbca91783 Pod::Checker cpan/Pod-Checker/t/pod/contains_bad_pod.xr 73538fd80dfe6e19ad561fe034009b44460208f6 Pod::Checker cpan/Pod-Checker/t/pod/selfcheck.t 8ce3cfd38e4b9bcf5bc7fe7f2a14195e49aed7d8 Pod::Checker cpan/Pod-Checker/t/pod/testcmp.pl a0cd5c8eca775c7753f4464eee96fa916e3d8a16 Pod::Checker cpan/Pod-Checker/t/pod/testpchk.pl b2072c7f4379fd050e15424175d7cac5facf5b3b -Pod::Perldoc cpan/Pod-Perldoc/lib/Pod/Perldoc.pm d97aa26b722e6e3120b19ee0d7cf9af04dfdfb7f +Pod::Perldoc cpan/Pod-Perldoc/lib/Pod/Perldoc.pm 582be34c077c9ff44d99914724a0cc2140bcd48c autodie cpan/autodie/lib/autodie/exception.pm b99e4e35a9ed36de94d54437888822ced4936207 autodie cpan/autodie/lib/autodie/hints.pm e1998fec61fb4e82fe46585bd82c73200be6f262 autodie cpan/autodie/t/exceptions.t ad315a208f875e06b0964012ce8d65daa438c036 autodie cpan/autodie/t/lib/Hints_pod_examples.pm 6944c218e9754b3613c8d0c90a5ae8aceccb5c99 autodie cpan/autodie/t/mkdir.t 9e70d2282a3cc7d76a78bf8144fccba20fb37dac -experimental cpan/experimental/t/basic.t a073ea03ccc98dec496569f3648ab01a5fe1c7a0 -version vutil.c 45ff345c3d8424ba63e130a223848f5b336bd87b -perlfaq cpan/perlfaq/lib/perlfaq5.pod bcc1b6af3b6dff3973643acf8d5e741463374123 -perlfaq cpan/perlfaq/lib/perlfaq8.pod bffbc0c8fa828aead24e0891a5e789369a8e0743 -podlators pod/perlpodstyle.pod c6500c9950b46e8228d4adbc09a3ee2ef23de2d0 -version cpan/version/lib/version.pm a61f969d55dd73ae2d7a604f2c9bbef1ea82b820 -version vxs.inc f26c23f0279fb64c77ad814af906c04930cff81c +autodie cpan/autodie/t/recv.t 63bea2daa330e44b67714527ddf701c1bf3a6954 +experimental cpan/experimental/t/basic.t cb9da8dd05b854375809872a05dd32637508d5da +version cpan/version/lib/version.pm 7ef9219d1d5f1d71f08a79f3b0577df138b21b12 +version vutil.c 105543ef1824fbc2429a7d5ebd19189c8081fede diff --git a/gnu/usr.bin/perl/t/porting/diag.t b/gnu/usr.bin/perl/t/porting/diag.t index 22cde90a9cf..b892dfa066f 100755 --- a/gnu/usr.bin/perl/t/porting/diag.t +++ b/gnu/usr.bin/perl/t/porting/diag.t @@ -76,7 +76,20 @@ my $category_re = qr/ [a-z0-9_:]+?/; # Note: requires an initial space my $severity_re = qr/ . (?: \| . )* /x; # A severity is a single char, but can # be of the form 'S|P|W' my @same_descr; +my $depth = 0; while (<$diagfh>) { + if (m/^=over/) { + $depth++; + next; + } + if (m/^=back/) { + $depth--; + next; + } + + # Stuff deeper than main level is ignored + next if $depth != 1; + if (m/^=item (.*)/) { $cur_entry = $1; @@ -141,6 +154,11 @@ while (<$diagfh>) { } } +if ($depth != 0) { + diag ("Unbalance =over/=back. Fix before proceeding; over - back = " . $depth); + exit(1); +} + foreach my $cur_entry ( keys %entries) { next if $entries{$cur_entry}{todo}; # If in this file, won't have a severity if (! exists $entries{$cur_entry}{severity} diff --git a/gnu/usr.bin/perl/t/porting/dual-life.t b/gnu/usr.bin/perl/t/porting/dual-life.t index fdfc9b692d4..4ad2256ef29 100644 --- a/gnu/usr.bin/perl/t/porting/dual-life.t +++ b/gnu/usr.bin/perl/t/porting/dual-life.t @@ -24,12 +24,6 @@ use File::Spec::Functions; # Exceptions that are found in dual-life bin dirs but aren't # installed by default; some occur only during testing: my $not_installed = qr{^(?: - \.\./cpan/Archive-Tar/bin/ptar.* - | - \.\./cpan/JSON-PP/bin/json_pp - | - \.\./cpan/IO-Compress/bin/zipdetails - | \.\./cpan/Encode/bin/u(?:cm(?:2table|lint|sort)|nidump) | \.\./cpan/Module-(?:Metadata|Build) diff --git a/gnu/usr.bin/perl/t/porting/known_pod_issues.dat b/gnu/usr.bin/perl/t/porting/known_pod_issues.dat index e89d5c02a13..a89edf79a48 100644 --- a/gnu/usr.bin/perl/t/porting/known_pod_issues.dat +++ b/gnu/usr.bin/perl/t/porting/known_pod_issues.dat @@ -22,7 +22,9 @@ Apache::MP3 Apache::SmallProf Archive::Extract Array::Base +arybase atan2(3) +atoi(3) Attribute::Constant autobox B::Generate @@ -160,6 +162,9 @@ Lingua::ZH::Romanize::Pinyin List::Gather listen(2) local::lib +Locale::Codes +Locale::Country +Locale::Language localeconv(3) lockf(3) Log::Message @@ -283,6 +288,7 @@ strftime(3) strictures String::Base String::Scanf +strtol(3) Switch tar(1) Template::Declare @@ -336,7 +342,7 @@ dist/devel-ppport/parts/inc/ppphdoc Unknown directive: =dontwarn 1 dist/devel-ppport/parts/inc/ppphdoc Unknown directive: =implementation 1 dist/devel-ppport/parts/inc/ppphdoc Unknown directive: =provides 1 dist/exporter/lib/exporter.pm Verbatim line length including indents exceeds 79 by 2 -dist/net-ping/lib/net/ping.pm Apparent broken link 1 +dist/net-ping/lib/net/ping.pm Apparent broken link 2 ext/amiga-exec/exec.pm Verbatim line length including indents exceeds 79 by 1 ext/dynaloader/dynaloader.pm Verbatim line length including indents exceeds 79 by 1 ext/hash-util/lib/hash/util.pm Verbatim line length including indents exceeds 79 by 2 @@ -374,7 +380,7 @@ pod/perltie.pod Verbatim line length including indents exceeds 79 by 3 pod/perltru64.pod Verbatim line length including indents exceeds 79 by 1 pod/perlwin32.pod Verbatim line length including indents exceeds 79 by 7 porting/epigraphs.pod Verbatim line length including indents exceeds 79 by -1 -porting/release_managers_guide.pod Verbatim line length including indents exceeds 79 by 1 +porting/release_managers_guide.pod Verbatim line length including indents exceeds 79 by 2 porting/todo.pod ? Should you be using F<...> or maybe L<...> instead of 1 lib/benchmark.pm Verbatim line length including indents exceeds 79 by 2 lib/config.pod ? Should you be using L<...> instead of -1 diff --git a/gnu/usr.bin/perl/t/porting/libperl.t b/gnu/usr.bin/perl/t/porting/libperl.t index 1536fda9440..f5fb53d2c32 100644 --- a/gnu/usr.bin/perl/t/porting/libperl.t +++ b/gnu/usr.bin/perl/t/porting/libperl.t @@ -250,7 +250,7 @@ sub nm_parse_darwin { # String literals can live in different sections # depending on the compiler and os release, assumedly # also linker flags. - if (/^\(__TEXT,__(?:const|cstring|literal\d+)\) (?:non-)?external _?(\w+)(\.\w+)?$/) { + if (/^\(__TEXT,__(?:const|(?:asan_)?cstring|literal\d+)\) (?:non-)?external _?(\w+)(\.\w+)?$/) { my ($symbol, $suffix) = ($1, $2); # Ignore function-local constants like # _Perl_av_extend_guts.oom_array_extend @@ -330,9 +330,11 @@ ok($symbols{data}{const}{PL_no_mem}{'globals.o'}, "has PL_no_mem"); my $GS = $Config{ccflags} =~ /-DPERL_GLOBAL_STRUCT\b/ ? 1 : 0; my $GSP = $Config{ccflags} =~ /-DPERL_GLOBAL_STRUCT_PRIVATE/ ? 1 : 0; +my $nocommon = $Config{ccflags} =~ /-fno-common/ ? 1 : 0; print "# GS = $GS\n"; print "# GSP = $GSP\n"; +print "# nocommon = $nocommon\n"; my %data_symbols; @@ -382,6 +384,11 @@ if ($GSP) { ok(!exists $data_symbols{PL_hash_seed}, "has no PL_hash_seed"); ok(!exists $data_symbols{PL_ppaddr}, "has no PL_ppaddr"); + if ($nocommon) { + $symbols{data}{common} = $symbols{data}{bss}; + delete $symbols{data}{bss}; + } + ok(! exists $symbols{data}{bss}, "has no data bss symbols") or do { my $bad = "BSS entries (there are supposed to be none):\n"; diff --git a/gnu/usr.bin/perl/t/porting/manifest.t b/gnu/usr.bin/perl/t/porting/manifest.t index 7adb06198de..7bed600de5a 100755 --- a/gnu/usr.bin/perl/t/porting/manifest.t +++ b/gnu/usr.bin/perl/t/porting/manifest.t @@ -60,11 +60,11 @@ while (<$m>) { next; } elsif ($separator !~ tr/ //c) { # It's all spaces - fail("Spaces in entry for $file"); + fail("Spaces in entry for $file in MANIFEST at line $."); } elsif ($separator =~ tr/\t//) { - fail("Mixed tabs and spaces in entry for $file"); + fail("Mixed tabs and spaces in entry for $file in MANIFEST at line $."); } else { - fail("Odd whitespace in entry for $file"); + fail("Odd whitespace in entry for $file in MANIFEST at line $."); } } @@ -86,7 +86,10 @@ SKIP: { SKIP: { find_git_or_skip(6); my %seen; # De-dup ls-files output (can appear more than once) - chomp(my @repo= grep { !/\.gitignore$/ && !$seen{$_}++ } `git ls-files`); + chomp(my @repo= grep { + !m{\.gitignore$} && + !$seen{$_}++ + } `git ls-files`); skip("git ls-files didnt work",3) if !@repo; is( 0+@repo, 0+@files, "git ls-files gives the same number of files as MANIFEST lists"); diff --git a/gnu/usr.bin/perl/t/porting/regen.t b/gnu/usr.bin/perl/t/porting/regen.t index 762e7407840..b4d438f5fb2 100644 --- a/gnu/usr.bin/perl/t/porting/regen.t +++ b/gnu/usr.bin/perl/t/porting/regen.t @@ -31,7 +31,7 @@ my $tests = 27; # I can't see a clean way to calculate this automatically. my %skip = ("regen_perly.pl" => [qw(perly.act perly.h perly.tab)], "regen/keywords.pl" => [qw(keywords.c keywords.h)], "regen/uconfig_h.h" => [qw(uconfig.h)], - "regen/mk_invlists.pl" => [qw(charclass_invlists.h)], + "regen/mk_invlists.pl" => [qw(charclass_invlists.h uni_keywords.h)], "regen/regcharclass.pl" => [qw(regcharclass.h)], ); diff --git a/gnu/usr.bin/perl/t/re/fold_grind.t b/gnu/usr.bin/perl/t/re/fold_grind.t deleted file mode 100644 index 0665517d61c..00000000000 --- a/gnu/usr.bin/perl/t/re/fold_grind.t +++ /dev/null @@ -1,953 +0,0 @@ -# Grind out a lot of combinatoric tests for folding. - -binmode STDOUT, ":utf8"; - -BEGIN { - chdir 't' if -d 't'; - require './test.pl'; - set_up_inc('../lib'); - require Config; import Config; - skip_all_if_miniperl("no dynamic loading on miniperl, no Encode nor POSIX"); - if ($^O eq 'dec_osf') { - skip_all("$^O cannot handle this test"); - } - my $time_out_factor = $ENV{PERL_TEST_TIME_OUT_FACTOR} || 1; - $time_out_factor = 1 if $time_out_factor < 1; - - watchdog(5 * 60 * $time_out_factor); - require './loc_tools.pl'; -} - -use charnames ":full"; - -my $DEBUG = 0; # Outputs extra information for debugging this .t - -use strict; -use warnings; -no warnings 'locale'; # Plenty of these would otherwise get generated -use Encode; -use POSIX; - -# Special-cased characters in the .c's that we want to make sure get tested. -my %be_sure_to_test = ( - chr utf8::unicode_to_native(0xDF) => 1, # LATIN_SMALL_LETTER_SHARP_S - "\x{1E9E}" => 1, # LATIN_CAPITAL_LETTER_SHARP_S - "\x{390}" => 1, # GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA_AND_TONOS - "\x{3B0}" => 1, # GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA_AND_TONOS - "\x{1FD3}" => 1, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA - "\x{1FE3}" => 1, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA - ); - - -# Tests both unicode and not, so make sure not implicitly testing unicode -no feature 'unicode_strings'; - -# Case-insensitive matching is a large and complicated issue. Perl does not -# implement it fully, properly. For example, it doesn't include normalization -# as part of the equation. To test every conceivable combination is clearly -# impossible; these tests are mostly drawn from visual inspection of the code -# and experience, trying to exercise all areas. - -# There are three basic ranges of characters that Perl may treat differently: -# 1) Invariants under utf8 which on ASCII-ish machines are ASCII, and are -# referred to here as ASCII. On EBCDIC machines, the non-ASCII invariants -# are all controls that fold to themselves. -my $ASCII = 1; - -# 2) Other characters that fit into a byte but are different in utf8 than not; -# here referred to, taking some liberties, as Latin1. -my $Latin1 = 2; - -# 3) Characters that won't fit in a byte; here referred to as Unicode -my $Unicode = 3; - -# Within these basic groups are equivalence classes that testing any character -# in is likely to lead to the same results as any other character. This is -# used to cut down the number of tests needed, unless PERL_RUN_SLOW_TESTS is -# set. -my $skip_apparently_redundant = ! $ENV{PERL_RUN_SLOW_TESTS}; - -# Additionally parts of this test run a lot of subtests, outputting the -# resulting TAP can be expensive so the tests are summarised internally. The -# PERL_DEBUG_FULL_TEST environment variable can be set to produce the full -# output for debugging purposes. - -sub range_type { - my $ord = ord shift; - - return $ASCII if utf8::native_to_unicode($ord) < 128; - return $Latin1 if $ord < 256; - return $Unicode; -} - -sub numerically { - return $a <=> $b -} - -my $list_all_tests = $ENV{PERL_DEBUG_FULL_TEST} || $DEBUG; -$| = 1 if $list_all_tests; - -# Significant time is saved by not outputting each test but grouping the -# output into subtests -my $okays; # Number of ok's in current subtest -my $this_iteration; # Number of possible tests in current subtest -my $count = 0; # Number of subtests = number of total tests - -sub run_test($$$$) { - my ($test, $todo, $do_we_output_locale_name, $debug) = @_; - - $debug = "" unless $DEBUG; - my $res = eval $test; - - if ($do_we_output_locale_name) { - $do_we_output_locale_name = 'setlocale(LC_CTYPE, "' - . POSIX::setlocale(&POSIX::LC_CTYPE) - . '"); '; - } - if (!$res || $list_all_tests) { - # Failed or debug; output the result - $count++; - ok($res, "$do_we_output_locale_name$test; $debug"); - } else { - # Just count the test as passed - $okays++; - } - $this_iteration++; -} - -my %has_test_by_participants; # Makes sure has tests for each range and each - # number of characters that fold to the same - # thing -my %has_test_by_byte_count; # Makes sure has tests for each combination of - # n bytes folds to m bytes - -my %tests; # The set of tests. -# Each key is a code point that folds to something else. -# Each value is a list of things that the key folds to. If the 'thing' is a -# single code point, it is that ordinal. If it is a multi-char fold, it is an -# ordered list of the code points in that fold. Here's an example for 'S': -# '83' => [ 115, 383 ] -# -# And one for a multi-char fold: \xDF -# 223 => [ -# [ # 'ss' -# 83, -# 83 -# ], -# [ # 'SS' -# 115, -# 115 -# ], -# [ # LATIN SMALL LETTER LONG S -# 383, -# 383 -# ], -# 7838 # LATIN_CAPITAL_LETTER_SHARP_S -# ], - -my %folds; # keys are code points that fold; - # values are each a list of code points the key folds to -my %inverse_folds; # keys are strings of the folded-to; - # values are lists of characters that fold to them - -sub add_test($@) { - my ($to, @from) = @_; - - # Called to cause the input to be tested by adding to %tests. @from is - # the list of characters that fold to the string $to. @from should be - # sorted so the lowest code point is first.... - # The input is in string form; %tests uses code points, so have to - # convert. - - my $to_chars = length $to; - my @test_to; # List of tests for $to - - if ($to_chars == 1) { - @test_to = ord $to; - } - else { - push @test_to, [ map { ord $_ } split "", $to ]; - - # For multi-char folds, we also test that things that can fold to each - # individual character in the fold also work. If we were testing - # comprehensively, we would try every combination of upper and lower - # case in the fold, but it will have to suffice to avoid running - # forever to make sure that each thing that folds to these is tested - # at least once. Because of complement matching ([^...]), we need to - # do both the folded, and the folded-from. - # We first look at each character in the multi-char fold, and save how - # many characters fold to it; and also the maximum number of such - # folds - my @folds_to_count; # 0th char in fold is index 0 ... - my $max_folds_to = 0; - - for (my $i = 0; $i < $to_chars; $i++) { - my $to_char = substr($to, $i, 1); - if (exists $inverse_folds{$to_char}) { - $folds_to_count[$i] = scalar @{$inverse_folds{$to_char}}; - $max_folds_to = $folds_to_count[$i] if $max_folds_to < $folds_to_count[$i]; - } - else { - $folds_to_count[$i] = 0; - } - } - - # We will need to generate as many tests as the maximum number of - # folds, so that each fold will have at least one test. - # For example, consider character X which folds to the three character - # string 'xyz'. If 2 things fold to x (X and x), 4 to y (Y, Y' - # (Y-prime), Y'' (Y-prime-prime), and y), and 1 thing to z (itself), 4 - # tests will be generated: - # xyz - # XYz - # xY'z - # xY''z - for (my $i = 0; $i < $max_folds_to; $i++) { - my @this_test_to; # Assemble a single test - - # For each character in the multi-char fold ... - for (my $j = 0; $j < $to_chars; $j++) { - my $this_char = substr($to, $j, 1); - - # Use its corresponding inverse fold, if available. - if ($i < $folds_to_count[$j]) { - push @this_test_to, ord $inverse_folds{$this_char}[$i]; - } - else { # Or else itself. - push @this_test_to, ord $this_char; - } - } - - # Add this test to the list - push @test_to, [ @this_test_to ]; - } - - # Here, have assembled all the tests for the multi-char fold. Sort so - # lowest code points are first for consistency and aesthetics in - # output. We know there are at least two characters in the fold, but - # I haven't bothered to worry about sorting on an optional third - # character if the first two are identical. - @test_to = sort { ($a->[0] == $b->[0]) - ? $a->[1] <=> $b->[1] - : $a->[0] <=> $b->[0] - } @test_to; - } - - - # This test is from n bytes to m bytes. Record that so won't try to add - # another test that does the same. - use bytes; - my $to_bytes = length $to; - foreach my $from_map (@from) { - $has_test_by_byte_count{length $from_map}{$to_bytes} = $to; - } - no bytes; - - my $ord_smallest_from = ord shift @from; - if (exists $tests{$ord_smallest_from}) { - die "There are already tests for $ord_smallest_from" - }; - - # Add in the fold tests, - push @{$tests{$ord_smallest_from}}, @test_to; - - # Then any remaining froms in the equivalence class. - push @{$tests{$ord_smallest_from}}, map { ord $_ } @from; -} - -# Get the Unicode rules and construct inverse mappings from them - -use Unicode::UCD; -my $file="../lib/unicore/CaseFolding.txt"; - -# Use the Unicode data file if we are on an ASCII platform (which its data is -# for), and it is in the modern format (starting in Unicode 3.1.0) and it is -# available. This avoids being affected by potential bugs introduced by other -# layers of Perl -if ($::IS_ASCII - && pack("C*", split /\./, Unicode::UCD::UnicodeVersion()) ge v3.1.0 - && open my $fh, "<", $file) -{ - while (<$fh>) { - chomp; - - # Lines look like (though without the initial '#') - #0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE - - # Get rid of comments, ignore blank or comment-only lines - my $line = $_ =~ s/ (?: \s* \# .* )? $ //rx; - next unless length $line; - my ($hex_from, $fold_type, @hex_folded) = split /[\s;]+/, $line; - - next if $fold_type =~ / ^ [IT] $/x; # Perl doesn't do Turkish folding - next if $fold_type eq 'S'; # If Unicode's tables are correct, the F - # should be a superset of S - - my $from = hex $hex_from; - my @to = map { hex $_ } @hex_folded; - @{$folds{$from}} = @to; - my $folded_str = pack ("U0U*", @to); - push @{$inverse_folds{$folded_str}}, chr $from; - } -} -else { # Here, can't use the .txt file: read the Unicode rules file and - # construct inverse mappings from it - - my ($invlist_ref, $invmap_ref, undef, $default) - = Unicode::UCD::prop_invmap('Case_Folding'); - for my $i (0 .. @$invlist_ref - 1 - 1) { - next if $invmap_ref->[$i] == $default; - - # Make into an array if not so already, so can treat uniformly below - $invmap_ref->[$i] = [ $invmap_ref->[$i] ] if ! ref $invmap_ref->[$i]; - - # Each subsequent element of the range requires adjustment of +1 from - # the previous element - my $adjust = -1; - for my $j ($invlist_ref->[$i] .. $invlist_ref->[$i+1] -1) { - $adjust++; - my @to = map { $_ + $adjust } @{$invmap_ref->[$i]}; - push @{$folds{$j}}, @to; - my $folded_str = join "", map { chr } @to; - utf8::upgrade($folded_str); - #note (sprintf "%d: %04X: %s", __LINE__, $j, join " ", - # map { sprintf "%04X", $_ + $adjust } @{$invmap_ref->[$i]}); - push @{$inverse_folds{$folded_str}}, chr $j; - } - } -} - -# Analyze the data and generate tests to get adequate test coverage. We sort -# things so that smallest code points are done first. -TO: -foreach my $to (sort { (length $a == length $b) - ? $a cmp $b - : length $a <=> length $b - } keys %inverse_folds) -{ - - # Within each fold, sort so that the smallest code points are done first - @{$inverse_folds{$to}} = sort { $a cmp $b } @{$inverse_folds{$to}}; - my @from = @{$inverse_folds{$to}}; - - # Just add it to the tests if doing complete coverage - if (! $skip_apparently_redundant) { - add_test($to, @from); - next TO; - } - - my $to_chars = length $to; - my $to_range_type = range_type(substr($to, 0, 1)); - - # If this is required to be tested, do so. We check for these first, as - # they will take up slots of byte-to-byte combinations that we otherwise - # would have to have other tests to get. - foreach my $from_map (@from) { - if (exists $be_sure_to_test{$from_map}) { - add_test($to, @from); - next TO; - } - } - - # If the fold contains heterogeneous range types, is suspect and should be - # tested. - if ($to_chars > 1) { - foreach my $char (split "", $to) { - if (range_type($char) != $to_range_type) { - add_test($to, @from); - next TO; - } - } - } - - # If the mapping crosses range types, is suspect and should be tested - foreach my $from_map (@from) { - if (range_type($from_map) != $to_range_type) { - add_test($to, @from); - next TO; - } - } - - # Here, all components of the mapping are in the same range type. For - # single character folds, we test one case in each range type that has 2 - # particpants, 3 particpants, etc. - if ($to_chars == 1) { - if (! exists $has_test_by_participants{scalar @from}{$to_range_type}) { - add_test($to, @from); - $has_test_by_participants{scalar @from}{$to_range_type} = $to; - next TO; - } - } - - # We also test all combinations of mappings from m to n bytes. This is - # because the regex optimizer cares. (Don't bother worrying about that - # Latin1 chars will occupy a different number of bytes under utf8, as - # there are plenty of other cases that catch these byte numbers.) - use bytes; - my $to_bytes = length $to; - foreach my $from_map (@from) { - if (! exists $has_test_by_byte_count{length $from_map}{$to_bytes}) { - add_test($to, @from); - next TO; - } - } -} - -# For each range type, test additionally a character that folds to itself -add_test(":", ":"); -add_test(chr utf8::unicode_to_native(0xF7), chr utf8::unicode_to_native(0xF7)); -add_test(chr 0x2C7, chr 0x2C7); - -# To cut down on the number of tests -my $has_tested_aa_above_latin1; -my $has_tested_latin1_aa; -my $has_tested_ascii_aa; -my $has_tested_l_above_latin1; -my $has_tested_above_latin1_l; -my $has_tested_ascii_l; -my $has_tested_above_latin1_d; -my $has_tested_ascii_d; -my $has_tested_non_latin1_d; -my $has_tested_above_latin1_a; -my $has_tested_ascii_a; -my $has_tested_non_latin1_a; - -# For use by pairs() in generating combinations -sub prefix { - my $p = shift; - map [ $p, $_ ], @_ -} - -# Returns all ordered combinations of pairs of elements from the input array. -# It doesn't return pairs like (a, a), (b, b). Change the slice to an array -# to do that. This was just to have fewer tests. -sub pairs (@) { - #print STDERR __LINE__, ": ", join(" XXX ", map { sprintf "%04X", $_ } @_), "\n"; - map { prefix $_[$_], @_[0..$_-1, $_+1..$#_] } 0..$#_ -} - -my $utf8_locale; - -my @charsets = qw(d u a aa); -if (locales_enabled('LC_CTYPE')) { - my $current_locale = POSIX::setlocale( &POSIX::LC_CTYPE, "C") // ""; - if ($current_locale eq 'C') { - use locale; - - # Some implementations don't have the 128-255 range characters all - # mean nothing under the C locale (an example being VMS). This is - # legal, but since we don't know what the right answers should be, - # skip the locale tests in that situation. - for my $i (128 .. 255) { - my $char = chr(utf8::unicode_to_native($i)); - goto skip_C_locale_tests if uc($char) ne $char || lc($char) ne $char; - } - push @charsets, 'l'; - - skip_C_locale_tests: - - # Look for utf8 locale. We use the pseudo-modifier 'L' to indicate - # that we really want /l, but change to a UTF-8 locale. - $utf8_locale = find_utf8_ctype_locale(); - push @charsets, 'L' if defined $utf8_locale; - } -} - -# Finally ready to do the tests -foreach my $test (sort { numerically } keys %tests) { - - my $previous_target; - my $previous_pattern; - my @pairs = pairs(sort numerically $test, @{$tests{$test}}); - - # Each fold can be viewed as a closure of all the characters that - # participate in it. Look at each possible pairing from a closure, with the - # first member of the pair the target string to match against, and the - # second member forming the pattern. Thus each fold member gets tested as - # the string, and the pattern with every other member in the opposite role. - while (my $pair = shift @pairs) { - my ($target, $pattern) = @$pair; - - # When testing a char that doesn't fold, we can get the same - # permutation twice; so skip all but the first. - next if $previous_target - && $previous_target == $target - && $previous_pattern == $pattern; - ($previous_target, $previous_pattern) = ($target, $pattern); - - # Each side may be either a single char or a string. Extract each into an - # array (perhaps of length 1) - my @target, my @pattern; - @target = (ref $target) ? @$target : $target; - @pattern = (ref $pattern) ? @$pattern : $pattern; - - # We are testing just folds to/from a single character. If our pairs - # happens to generate multi/multi, skip. - next if @target > 1 && @pattern > 1; - - # Get in hex form. - my @x_target = map { sprintf "\\x{%04X}", $_ } @target; - my @x_pattern = map { sprintf "\\x{%04X}", $_ } @pattern; - - my $target_above_latin1 = grep { $_ > 255 } @target; - my $pattern_above_latin1 = grep { $_ > 255 } @pattern; - my $target_has_ascii = grep { utf8::native_to_unicode($_) < 128 } @target; - my $pattern_has_ascii = grep { utf8::native_to_unicode($_) < 128 } @pattern; - my $target_only_ascii = ! grep { utf8::native_to_unicode($_) > 127 } @target; - my $pattern_only_ascii = ! grep { utf8::native_to_unicode($_) > 127 } @pattern; - my $target_has_latin1 = grep { $_ < 256 } @target; - my $target_has_upper_latin1 - = grep { $_ < 256 && utf8::native_to_unicode($_) > 127 } @target; - my $pattern_has_upper_latin1 - = grep { $_ < 256 && utf8::native_to_unicode($_) > 127 } @pattern; - my $pattern_has_latin1 = grep { $_ < 256 } @pattern; - my $is_self = @target == 1 && @pattern == 1 && $target[0] == $pattern[0]; - - # We don't test multi-char folding into other multi-chars. We are testing - # a code point that folds to or from other characters. Find the single - # code point for diagnostic purposes. (If both are single, choose the - # target string) - my $ord = @target == 1 ? $target[0] : $pattern[0]; - my $progress = sprintf "%04X: \"%s\" and /%s/", - $test, - join("", @x_target), - join("", @x_pattern); - #note $progress; - - # Now grind out tests, using various combinations. - foreach my $charset (@charsets) { - my $charset_mod = lc $charset; - my $current_locale = ""; - if ($charset_mod eq 'l') { - $current_locale = POSIX::setlocale(&POSIX::LC_CTYPE, - ($charset eq 'L') - ? $utf8_locale - : 'C'); - $current_locale = 'C locale' if $current_locale eq 'C'; - } - $okays = 0; - $this_iteration = 0; - - # To cut down somewhat on the enormous quantity of tests this currently - # runs, skip some for some of the character sets whose results aren't - # likely to differ from others. But run all tests on the code points - # that don't fold, plus one other set in each range group. - if (! $is_self) { - - # /aa should only affect things with folds in the ASCII range. But, try - # it on one set in the other ranges just to make sure it doesn't break - # them. - if ($charset eq 'aa') { - - # It may be that this $pair of code points to test are both - # non-ascii, but if either of them actually fold to ascii, that is - # suspect and should be tested. So for /aa, use whether their folds - # are ascii or not - my $target_has_ascii = $target_has_ascii; - my $pattern_has_ascii = $pattern_has_ascii; - if (! $target_has_ascii) { - foreach my $cp (@target) { - if (exists $folds{$cp} - && grep { utf8::native_to_unicode($_) < 128 } @{$folds{$cp}} ) - { - $target_has_ascii = 1; - last; - } - } - } - if (! $pattern_has_ascii) { - foreach my $cp (@pattern) { - if (exists $folds{$cp} - && grep { utf8::native_to_unicode($_) < 128 } @{$folds{$cp}} ) - { - $pattern_has_ascii = 1; - last; - } - } - } - - if (! $target_has_ascii && ! $pattern_has_ascii) { - if ($target_above_latin1 || $pattern_above_latin1) { - next if defined $has_tested_aa_above_latin1 - && $has_tested_aa_above_latin1 != $test; - $has_tested_aa_above_latin1 = $test; - } - next if defined $has_tested_latin1_aa - && $has_tested_latin1_aa != $test; - $has_tested_latin1_aa = $test; - } - elsif ($target_only_ascii && $pattern_only_ascii) { - - # And, except for one set just to make sure, skip tests - # where both elements in the pair are ASCII. If one works for - # aa, the others are likely too. This skips tests where the - # fold is from non-ASCII to ASCII, but this part of the test - # is just about the ASCII components. - next if defined $has_tested_ascii_l - && $has_tested_ascii_l != $test; - $has_tested_ascii_l = $test; - } - } - elsif ($charset eq 'l') { - - # For l, don't need to test beyond one set those things that are - # all above latin1, because unlikely to have different successes - # than /u. But, for the same reason as described in the /aa above, - # it is suspect and should be tested, if either of the folds are to - # latin1. - my $target_has_latin1 = $target_has_latin1; - my $pattern_has_latin1 = $pattern_has_latin1; - if (! $target_has_latin1) { - foreach my $cp (@target) { - if (exists $folds{$cp} - && grep { $_ < 256 } @{$folds{$cp}} ) - { - $target_has_latin1 = 1; - last; - } - } - } - if (! $pattern_has_latin1) { - foreach my $cp (@pattern) { - if (exists $folds{$cp} - && grep { $_ < 256 } @{$folds{$cp}} ) - { - $pattern_has_latin1 = 1; - last; - } - } - } - if (! $target_has_latin1 && ! $pattern_has_latin1) { - next if defined $has_tested_above_latin1_l - && $has_tested_above_latin1_l != $test; - $has_tested_above_latin1_l = $test; - } - elsif ($target_only_ascii && $pattern_only_ascii) { - - # And, except for one set just to make sure, skip tests - # where both elements in the pair are ASCII. This is - # essentially the same reasoning as above for /aa. - next if defined $has_tested_ascii_l - && $has_tested_ascii_l != $test; - $has_tested_ascii_l = $test; - } - } - elsif ($charset eq 'd') { - # Similarly for d. Beyond one test (besides self) each, we don't - # test pairs that are both ascii; or both above latin1, or are - # combinations of ascii and above latin1. - if (! $target_has_upper_latin1 && ! $pattern_has_upper_latin1) { - if ($target_has_ascii && $pattern_has_ascii) { - next if defined $has_tested_ascii_d - && $has_tested_ascii_d != $test; - $has_tested_ascii_d = $test - } - elsif (! $target_has_latin1 && ! $pattern_has_latin1) { - next if defined $has_tested_above_latin1_d - && $has_tested_above_latin1_d != $test; - $has_tested_above_latin1_d = $test; - } - else { - next if defined $has_tested_non_latin1_d - && $has_tested_non_latin1_d != $test; - $has_tested_non_latin1_d = $test; - } - } - } - elsif ($charset eq 'a') { - # Similarly for a. This should match identically to /u, so wasn't - # tested at all until a bug was found that was thereby missed. - # As a compromise, beyond one test (besides self) each, we don't - # test pairs that are both ascii; or both above latin1, or are - # combinations of ascii and above latin1. - if (! $target_has_upper_latin1 && ! $pattern_has_upper_latin1) { - if ($target_has_ascii && $pattern_has_ascii) { - next if defined $has_tested_ascii_a - && $has_tested_ascii_a != $test; - $has_tested_ascii_a = $test - } - elsif (! $target_has_latin1 && ! $pattern_has_latin1) { - next if defined $has_tested_above_latin1_a - && $has_tested_above_latin1_a != $test; - $has_tested_above_latin1_a = $test; - } - else { - next if defined $has_tested_non_latin1_a - && $has_tested_non_latin1_a != $test; - $has_tested_non_latin1_a = $test; - } - } - } - } - - foreach my $utf8_target (0, 1) { # Both utf8 and not, for - # code points < 256 - my $upgrade_target = ""; - - # These must already be in utf8 because the string to match has - # something above latin1. So impossible to test if to not to be in - # utf8; and otherwise, no upgrade is needed. - next if $target_above_latin1 && ! $utf8_target; - $upgrade_target = ' utf8::upgrade($c);' if ! $target_above_latin1 && $utf8_target; - - foreach my $utf8_pattern (0, 1) { - next if $pattern_above_latin1 && ! $utf8_pattern; - - # Our testing of 'l' uses the POSIX locale, which is ASCII-only - my $uni_semantics = $charset ne 'l' && ($utf8_target || $charset eq 'u' || $charset eq 'L' || ($charset eq 'd' && $utf8_pattern) || $charset =~ /a/); - my $upgrade_pattern = ""; - $upgrade_pattern = ' utf8::upgrade($p);' if ! $pattern_above_latin1 && $utf8_pattern; - - my $lhs = join "", @x_target; - my $lhs_str = eval qq{"$lhs"}; fail($@) if $@; - my @rhs = @x_pattern; - my $rhs = join "", @rhs; - my $should_fail = (! $uni_semantics && $ord < 256 && ! $is_self && utf8::native_to_unicode($ord) >= 128) - || ($charset eq 'aa' && $target_has_ascii != $pattern_has_ascii) - || ($charset eq 'l' && $target_has_latin1 != $pattern_has_latin1); - - # Do simple tests of referencing capture buffers, named and - # numbered. - my $op = '=~'; - $op = '!~' if $should_fail; - - my $todo = 0; # No longer any todo's - my $eval = "my \$c = \"$lhs$rhs\"; my \$p = qr/(?$charset_mod:^($rhs)\\1\$)/i;$upgrade_target$upgrade_pattern \$c $op \$p"; - run_test($eval, $todo, ($charset_mod eq 'l'), ""); - - $eval = "my \$c = \"$lhs$rhs\"; my \$p = qr/(?$charset_mod:^(?<grind>$rhs)\\k<grind>\$)/i;$upgrade_target$upgrade_pattern \$c $op \$p"; - run_test($eval, $todo, ($charset_mod eq 'l'), ""); - - if ($lhs ne $rhs) { - $eval = "my \$c = \"$rhs$lhs\"; my \$p = qr/(?$charset_mod:^($rhs)\\1\$)/i;$upgrade_target$upgrade_pattern \$c $op \$p"; - run_test($eval, "", ($charset_mod eq 'l'), ""); - - $eval = "my \$c = \"$rhs$lhs\"; my \$p = qr/(?$charset_mod:^(?<grind>$rhs)\\k<grind>\$)/i;$upgrade_target$upgrade_pattern \$c $op \$p"; - run_test($eval, "", ($charset_mod eq 'l'), ""); - } - - # See if works on what could be a simple trie. - my $alternate; - { - # Keep the alternate | branch the same length as the tested one so - # that it's length doesn't influence things - my $evaled = eval "\"$rhs\""; # Convert e.g. \x{foo} into its - # chr equivalent - use bytes; - $alternate = 'q' x length $evaled; - } - $eval = "my \$c = \"$lhs\"; my \$p = qr/$rhs|$alternate/i$charset_mod;$upgrade_target$upgrade_pattern \$c $op \$p"; - run_test($eval, "", ($charset_mod eq 'l'), ""); - - # Check that works when the folded character follows something that - # is quantified. This test knows the regex code internals to the - # extent that it knows this is a potential problem, and that there - # are three different types of quantifiers generated: 1) The thing - # being quantified matches a single character; 2) it matches more - # than one character, but is fixed width; 3) it can match a variable - # number of characters. (It doesn't know that case 3 shouldn't - # matter, since it doesn't do anything special for the character - # following the quantifier; nor that some of the different - # quantifiers execute the same underlying code, as these tests are - # quick, and this insulates these tests from changes in the - # implementation.) - for my $quantifier ('?', '??', '*', '*?', '+', '+?', '{1,2}', '{1,2}?') { - $eval = "my \$c = \"_$lhs\"; my \$p = qr/(?$charset_mod:.$quantifier$rhs)/i;$upgrade_target$upgrade_pattern \$c $op \$p"; - run_test($eval, "", ($charset_mod eq 'l'), ""); - $eval = "my \$c = \"__$lhs\"; my \$p = qr/(?$charset_mod:(?:..)$quantifier$rhs)/i;$upgrade_target$upgrade_pattern \$c $op \$p"; - run_test($eval, "", ($charset_mod eq 'l'), ""); - $eval = "my \$c = \"__$lhs\"; my \$p = qr/(?$charset_mod:(?:.|\\R)$quantifier$rhs)/i;$upgrade_target$upgrade_pattern \$c $op \$p"; - run_test($eval, "", ($charset_mod eq 'l'), ""); - } - - foreach my $bracketed (0, 1) { # Put rhs in [...], or not - next if $bracketed && @pattern != 1; # bracketed makes these - # or's instead of a sequence - foreach my $optimize_bracketed (0, 1) { - next if $optimize_bracketed && ! $bracketed; - foreach my $inverted (0,1) { - next if $inverted && ! $bracketed; # inversion only valid - # in [^...] - next if $inverted && @target != 1; # [perl #89750] multi-char - # not valid in [^...] - - # In some cases, add an extra character that doesn't fold, and - # looks ok in the output. - my $extra_char = "_"; - foreach my $prepend ("", $extra_char) { - foreach my $append ("", $extra_char) { - - # Assemble the rhs. Put each character in a separate - # bracketed if using charclasses. This creates a stress on - # the code to span a match across multiple elements - my $rhs = ""; - foreach my $rhs_char (@rhs) { - $rhs .= '[' if $bracketed; - $rhs .= '^' if $inverted; - $rhs .= $rhs_char; - - # Add a character to the class, so class doesn't get - # optimized out, unless we are testing that optimization - $rhs .= '_' if $optimize_bracketed; - $rhs .= ']' if $bracketed; - } - - # Add one of: no capturing parens - # a single set - # a nested set - # Use quantifiers and extra variable width matches inside - # them to keep some optimizations from happening - foreach my $parend (0, 1, 2) { - my $interior = (! $parend) - ? $rhs - : ($parend == 1) - ? "(${rhs},?)" - : "((${rhs})+,?)"; - foreach my $quantifier ("", '?', '*', '+', '{1,3}') { - - # Perhaps should be TODOs, as are unimplemented, but - # maybe will never be implemented - next if @pattern != 1 && $quantifier; - - # A ? or * quantifier normally causes the thing to be - # able to match a null string - my $quantifier_can_match_null = $quantifier eq '?' - || $quantifier eq '*'; - - # But since we only quantify the last character in a - # multiple fold, the other characters will have width, - # except if we are quantifying the whole rhs - my $can_match_null = $quantifier_can_match_null - && (@rhs == 1 || $parend); - - foreach my $l_anchor ("", '^') { # '\A' didn't change - # result) - foreach my $r_anchor ("", '$') { # '\Z', '\z' didn't - # change result) - # The folded part can match the null string if it - # isn't required to have width, and there's not - # something on one or both sides that force it to. - my $both_sides = ($l_anchor && $r_anchor) - || ($l_anchor && $append) - || ($r_anchor && $prepend) - || ($prepend && $append); - my $must_match = ! $can_match_null || $both_sides; - # for performance, but doing this missed many failures - #next unless $must_match; - my $quantified = "(?$charset_mod:$l_anchor$prepend$interior${quantifier}$append$r_anchor)"; - my $op; - if ($must_match && $should_fail) { - $op = 0; - } else { - $op = 1; - } - $op = ! $op if $must_match && $inverted; - - if ($inverted && @target > 1) { - # When doing an inverted match against a - # multi-char target, and there is not something on - # the left to anchor the match, if it shouldn't - # succeed, skip, as what will happen (when working - # correctly) is that it will match the first - # position correctly, and then be inverted to not - # match; then it will go to the second position - # where it won't match, but get inverted to match, - # and hence succeeding. - next if ! ($l_anchor || $prepend) && ! $op; - - # Can't ever match for latin1 code points non-uni - # semantics that have a inverted multi-char fold - # when there is something on both sides and the - # quantifier isn't such as to span the required - # width, which is 2 or 3. - $op = 0 if $ord < 255 - && ! $uni_semantics - && $both_sides - && ( ! $quantifier || $quantifier eq '?') - && $parend < 2; - - # Similarly can't ever match when inverting a - # multi-char fold for /aa and the quantifier - # isn't sufficient to allow it to span to both - # sides. - $op = 0 if $target_has_ascii - && $charset eq 'aa' - && $both_sides - && ( ! $quantifier || $quantifier eq '?') - && $parend < 2; - - # Or for /l - $op = 0 if $target_has_latin1 && $charset eq 'l' - && $both_sides - && ( ! $quantifier || $quantifier eq '?') - && $parend < 2; - } - - - my $desc = ""; - if ($charset_mod eq 'l') { - $desc .= 'setlocale(LC_CTYPE, "' - . POSIX::setlocale(&POSIX::LC_CTYPE) - . '"); ' - } - $desc .= "my \$c = \"$prepend$lhs$append\"; " - . "my \$p = qr/$quantified/i;" - . "$upgrade_target$upgrade_pattern " - . "\$c " . ($op ? "=~" : "!~") . " \$p; "; - if ($DEBUG) { - $desc .= ( - "; uni_semantics=$uni_semantics, " - . "should_fail=$should_fail, " - . "bracketed=$bracketed, " - . "prepend=$prepend, " - . "append=$append, " - . "parend=$parend, " - . "quantifier=$quantifier, " - . "l_anchor=$l_anchor, " - . "r_anchor=$r_anchor; " - . "pattern_above_latin1=$pattern_above_latin1; " - . "utf8_pattern=$utf8_pattern" - ); - } - - my $c = "$prepend$lhs_str$append"; - my $p = qr/$quantified/i; - utf8::upgrade($c) if length($upgrade_target); - utf8::upgrade($p) if length($upgrade_pattern); - my $res = $op ? ($c =~ $p): ($c !~ $p); - - if (!$res || $list_all_tests) { - # Failed or debug; output the result - $count++; - ok($res, "test $count - $desc"); - } else { - # Just count the test as passed - $okays++; - } - $this_iteration++; - } - } - } - } - } - } - } - } - } - } - } - unless($list_all_tests) { - $count++; - is $okays, $this_iteration, "$okays subtests ok for" - . " /$charset_mod" - . (($charset_mod eq 'l') ? " ($current_locale)" : "") - . ', target="' . join("", @x_target) . '",' - . ' pat="' . join("", @x_pattern) . '"'; - } - } - } -} - -plan($count); - -1 diff --git a/gnu/usr.bin/perl/t/re/pat.t b/gnu/usr.bin/perl/t/re/pat.t index 1d98fe77d7f..6a868f4bcd4 100755 --- a/gnu/usr.bin/perl/t/re/pat.t +++ b/gnu/usr.bin/perl/t/re/pat.t @@ -6,6 +6,7 @@ use strict; use warnings; +no warnings 'experimental::vlb'; use 5.010; sub run_tests; @@ -20,10 +21,11 @@ BEGIN { require './loc_tools.pl'; set_up_inc('../lib', '.', '../ext/re'); } - skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader; - skip_all_without_unicode_tables(); -plan tests => 848; # Update this when adding/deleting tests. +skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader; +skip_all_without_unicode_tables(); + +plan tests => 864; # Update this when adding/deleting tests. run_tests() unless caller; @@ -31,7 +33,6 @@ run_tests() unless caller; # Tests start here. # sub run_tests { - my $sharp_s = uni_to_native("\xdf"); { @@ -319,7 +320,7 @@ sub run_tests { # Defaults assumed if this fails eval { require Config; }; - $::reg_infty = $Config::Config{reg_infty} // 32767; + $::reg_infty = $Config::Config{reg_infty} // 65535; $::reg_infty_m = $::reg_infty - 1; $::reg_infty_p = $::reg_infty + 1; $::reg_infty_m = $::reg_infty_m; # Suppress warning. @@ -339,6 +340,11 @@ sub run_tests { like($@, qr/^\QQuantifier in {,} bigger than/, $message); eval "'aaa' =~ /a{1,$::reg_infty_p}/"; like($@, qr/^\QQuantifier in {,} bigger than/, $message); + + # It should be 'a' x 2147483647, but that exhausts memory on + # reasonably sized modern machines + like('a' x $::reg_infty_p, qr/a{1,}/, + "{1,} matches more times than REG_INFTY"); } { @@ -1347,6 +1353,7 @@ EOP unlike "\x{100}", qr/(?i:\w)/, "(?i: shouldn't lose the passed in /a"; use re '/aa'; unlike 'k', qr/(?i:\N{KELVIN SIGN})/, "(?i: shouldn't lose the passed in /aa"; + unlike 'k', qr'(?i:\N{KELVIN SIGN})', "(?i: shouldn't lose the passed in /aa"; } { @@ -1462,7 +1469,17 @@ EOP # test that this is true for 1..100 # Note that this test causes the engine to recurse at runtime, and # hence use a lot of C stack. + + # Compiling for all 100 nested captures blows the stack under + # clang and ASan; reduce. + my $max_captures = $Config{ccflags} =~ /sanitize/ ? 20 : 100; + for my $i (1..100) { + if ($i > $max_captures) { + pass("skipping $i buffers under ASan aa"); + pass("skipping $i buffers under ASan aba"); + next; + } my $capture= "a"; $capture= "($capture)" for 1 .. $i; for my $mid ("","b") { @@ -1870,6 +1887,26 @@ EOF_CODE like($got[5],qr/Error: Infinite recursion via empty pattern/, "empty pattern in regex codeblock: produced the right exception message" ); } + + # This test is based on the one directly above, which happened to + # leak. Repeat the test, but stripped down to the bare essentials + # of the leak, which is to die while executing a regex which is + # already the current regex, thus causing the saved outer set of + # capture offsets to leak. The test itself doesn't do anything + # except sit around hoping not to be triggered by ASan + { + eval { + my $s = "abcd"; + $s =~ m{([abcd]) (?{ die if $1 eq 'd'; })}gx; + $s =~ //g; + $s =~ //g; + $s =~ //g; + }; + pass("call to current regex doesn't leak"); + } + + + { # [perl #130495] /x comment skipping stopped a byte short, leading # to assertion failure or 'malformed utf-8 character" warning @@ -1913,6 +1950,10 @@ EOP } { # buffer overflow + + # This test also used to leak - fixed by the commit which added + # this line. + fresh_perl_is("BEGIN{\$^H=0x200000}\ns/[(?{//xx", "Unmatched [ in regex; marked by <-- HERE in m/[ <-- HERE (?{/ at (eval 1) line 1.\n", {}, "buffer overflow for regexp component"); @@ -1943,10 +1984,136 @@ EOP } { # [perl $132227] fresh_perl_is("('0ba' . ('ss' x 300)) =~ m/0B\\N{U+41}" . $sharp_s x 150 . '/i and print "1\n"', 1,{},"Use of sharp s under /di that changes to /ui"); + + # A variation, but as far as khw knows not part of 132227 + fresh_perl_is("'0bssa' =~ m/0B" . $sharp_s . "\\N{U+41}" . '/i and print "1\n"', 1,{},"Use of sharp s under /di that changes to /ui"); } { # [perl $132164] fresh_perl_is('m m0*0+\Rm', "",{},"Undefined behavior in address sanitizer"); } + { # [perl #133642] + fresh_perl_is('no warnings "experimental::vlb"; + m/((?<=(0?)))/', "",{},"Was getting 'Double free'"); + } + { # [perl #133782] + # this would panic on DEBUGGING builds + fresh_perl_is(<<'CODE', "ok\nok\n",{}, 'Bad length magic was left on $^R'); +while( "\N{U+100}bc" =~ /(..?)(?{$^N})/g ) { + print "ok\n" if length($^R)==length("$^R"); +} +CODE + } + { # [perl #133871], ASAN/valgrind out-of-bounds access + fresh_perl_like('qr/(?|(())|())|//', qr/syntax error/, {}, "[perl #133871]"); + } + { # [perl #133921], segfault + fresh_perl_is('qr0||ß+p00000F00000ù\Q00000ÿ00000x00000x0c0e0\Qx0\Qx0\x{0c!}\;\;î0\x + fresh_perl_is('|ß+W0ü0r0\Qx0\Qx0x0c0G00000000000000000O000000000x0x0x0c!}\;îçÿù\Q0 \x + +fresh_perl_is('s|ß+W0ü0f0\Qx0\Qx0x0c0G0xgive0000000000000O0h000x0 \xòÿÿÿ + + + + + ç + + + + + + + + + + + + + + + +x{0c!}\;\;çÿ + + fresh_perl_is('a aú + + + + + + ç + + + + + + + + + + + + + + + +x{1c!}\;\;îçÿp + + fresh_perl_is('s|ß+W0ü0f0\Qx0\Qx0x0c0g0c 000n0000000000000O0h000x0 \xòÿÿÿ + + + + + + ç + + + + + + + + + + + + + + + +x{0c!}\;\;îçÿ + } + + { # perl #133998] + fresh_perl_is('print "\x{110000}" =~ qr/(?l)|[^\S\pC\s]/', 1, {}, + '/[\S\s]/l works'); + } + + { # perl #133995] + use utf8; + fresh_perl_is('"έδωσαν ελληνικήვე" =~ m/[^0](?=0)0?/', "", + {wide_chars => 1}, + '[^0] doesnt crash on UTF-8 target string'); + } + + { # [perl #133992] This is a tokenizer bug of parsing a pattern + fresh_perl_is(q:$z = do { + use utf8; + "q!ÑеÑÑ! =~ m'" + }; + $z .= 'è(?#'; + $z .= "'"; + eval $z;:, "", {}, 'foo'); + } + + { # [perl #134325] + my $quote="\\Q"; + my $back="\\\\"; + my $ff="\xff"; + my $s = sprintf "/\\1|(|%s)%s%s /i", + $quote x 8 . $back x 69, + $quote x 5 . $back x 4, + $ff x 48; + like(runperl(prog => "$s", stderr => 1), qr/Unmatched \(/); + } } # End of sub run_tests diff --git a/gnu/usr.bin/perl/t/re/pat_advanced.t b/gnu/usr.bin/perl/t/re/pat_advanced.t index d90ceeb5bde..b4f32eec045 100755 --- a/gnu/usr.bin/perl/t/re/pat_advanced.t +++ b/gnu/usr.bin/perl/t/re/pat_advanced.t @@ -535,27 +535,45 @@ sub run_tests { like("\N{LATIN SMALL LETTER SHARP S}", qr/\N{LATIN SMALL LETTER SHARP S}/, $message); like("\N{LATIN SMALL LETTER SHARP S}", + qr'\N{LATIN SMALL LETTER SHARP S}', $message); + like("\N{LATIN SMALL LETTER SHARP S}", qr/\N{LATIN SMALL LETTER SHARP S}/i, $message); like("\N{LATIN SMALL LETTER SHARP S}", + qr'\N{LATIN SMALL LETTER SHARP S}'i, $message); + like("\N{LATIN SMALL LETTER SHARP S}", qr/[\N{LATIN SMALL LETTER SHARP S}]/, $message); like("\N{LATIN SMALL LETTER SHARP S}", + qr'[\N{LATIN SMALL LETTER SHARP S}]', $message); + like("\N{LATIN SMALL LETTER SHARP S}", qr/[\N{LATIN SMALL LETTER SHARP S}]/i, $message); + like("\N{LATIN SMALL LETTER SHARP S}", + qr'[\N{LATIN SMALL LETTER SHARP S}]'i, $message); like("ss", qr /\N{LATIN SMALL LETTER SHARP S}/i, $message); + like("ss", qr '\N{LATIN SMALL LETTER SHARP S}'i, $message); like("SS", qr /\N{LATIN SMALL LETTER SHARP S}/i, $message); + like("SS", qr '\N{LATIN SMALL LETTER SHARP S}'i, $message); like("ss", qr/[\N{LATIN SMALL LETTER SHARP S}]/i, $message); + like("ss", qr'[\N{LATIN SMALL LETTER SHARP S}]'i, $message); like("SS", qr/[\N{LATIN SMALL LETTER SHARP S}]/i, $message); + like("SS", qr'[\N{LATIN SMALL LETTER SHARP S}]'i, $message); like("\N{LATIN SMALL LETTER SHARP S}", qr/ss/i, $message); like("\N{LATIN SMALL LETTER SHARP S}", qr/SS/i, $message); $message = "Unoptimized named sequence in class"; like("ss", qr/[\N{LATIN SMALL LETTER SHARP S}x]/i, $message); + like("ss", qr'[\N{LATIN SMALL LETTER SHARP S}x]'i, $message); like("SS", qr/[\N{LATIN SMALL LETTER SHARP S}x]/i, $message); + like("SS", qr'[\N{LATIN SMALL LETTER SHARP S}x]'i, $message); like("\N{LATIN SMALL LETTER SHARP S}", qr/[\N{LATIN SMALL LETTER SHARP S}x]/, $message); like("\N{LATIN SMALL LETTER SHARP S}", + qr'[\N{LATIN SMALL LETTER SHARP S}x]', $message); + like("\N{LATIN SMALL LETTER SHARP S}", qr/[\N{LATIN SMALL LETTER SHARP S}x]/i, $message); + like("\N{LATIN SMALL LETTER SHARP S}", + qr'[\N{LATIN SMALL LETTER SHARP S}x]'i, $message); } { @@ -783,11 +801,11 @@ sub run_tests { { my $re = qq /^([^X]*)X/; utf8::upgrade ($re); - ok "\x{100}X" =~ /$re/, "S_cl_and ANYOF_UNICODE & ANYOF_INVERTED"; + like "\x{100}X", qr/$re/, "S_cl_and ANYOF_UNICODE & ANYOF_INVERTED"; my $loc_re = qq /(?l:^([^X]*)X)/; utf8::upgrade ($loc_re); no warnings 'locale'; - ok "\x{100}X" =~ /$loc_re/, "locale, S_cl_and ANYOF_UNICODE & ANYOF_INVERTED"; + like "\x{100}X", qr/$loc_re/, "locale, S_cl_and ANYOF_UNICODE & ANYOF_INVERTED"; } { @@ -825,6 +843,8 @@ sub run_tests { for my $tail ('\N{SNOWFLAKE}') { eval qq {use charnames ':full'; like("$head$tail", qr/$head$tail/, \$message)}; + eval qq {use charnames ':full'; + like("$head$tail", qr'$head$tail', \$message)}; is($@, '', $message); } } @@ -907,7 +927,8 @@ sub run_tests { BEGIN { unshift @INC, 'lib'; } - use Cname; + use Cname; # Our custom charname plugin, currently found in + # t/lib/Cname.pm like 'fooB', qr/\N{foo}[\N{B}\N{b}]/, "Passthrough charname"; my $name = "foo\xDF"; @@ -937,10 +958,16 @@ sub run_tests { like $w, qr/Ignoring zero length/, 'Ignoring zero length \N{} in character class warning'; + # EVIL keeps track of its calls, and appends a new character each + # time: A AB ABC ABCD ... ok 'AB' =~ /(\N{EVIL})/ && $1 eq 'A', 'Charname caching $1'; like 'ABC', qr/(\N{EVIL})/, 'Charname caching $1'; + ok 'ABCD' =~ m'(\N{EVIL})' && $1 eq 'ABC', 'Charname caching $1'; + ok 'ABCDE' =~ m'(\N{EVIL})', 'Charname caching $1'; like 'xy', qr/x\N{EMPTY-STR}y/, 'Empty string charname produces NOTHING node'; + ok 'xy' =~ 'x\N{EMPTY-STR}y', + 'Empty string charname produces NOTHING node'; like '', qr/\N{EMPTY-STR}/, 'Empty string charname produces NOTHING node'; like "\N{LONG-STR}", qr/^\N{LONG-STR}$/, 'Verify that long string works'; @@ -948,9 +975,14 @@ sub run_tests { # perlhacktips points out that these work on both ASCII and EBCDIC like "\xfc", qr/\N{EMPTY-STR}\xdc/i, 'Empty \N{} should change /d to /u'; + like "\xfc", qr'\N{EMPTY-STR}\xdc'i, 'Empty \N{} should change /d to /u'; eval '/(?[[\N{EMPTY-STR}]])/'; like $@, qr/Zero length \\N\{\}/, 'Verify zero-length return from \N{} correctly fails'; + ok "\N{LONG-STR}" =~ /^\N{LONG-STR}$/, 'Verify that long string works'; + ok "\N{LONG-STR}" =~ '^\N{LONG-STR}$', 'Verify that long string works'; + ok "\N{LONG-STR}" =~ /^\N{LONG-STR}$/i, 'Verify under folding that long string works'; + ok "\N{LONG-STR}" =~ m'^\N{LONG-STR}$'i, 'Verify under folding that long string works'; undef $w; { @@ -2107,7 +2139,7 @@ EOP } # RT #82610 - ok 'foo/file.fob' =~ m,^(?=[^\.])[^/]*/(?=[^\.])[^/]*\.fo[^/]$,; + like 'foo/file.fob', qr,^(?=[^\.])[^/]*/(?=[^\.])[^/]*\.fo[^/]$,; { # This was failing unless an explicit /d was added my $E0 = uni_to_native("\xE0"); @@ -2176,6 +2208,33 @@ EOP } } ok(! $failed, "Matched multi-char fold 'ss' across EXACTF node boundaries; if failed, was at count $failed"); + + for my $non_finals ("t", "ft", "ift", "sift") { + my $base_pat = $non_finals . "enKalt"; # (The tail is taken from + # the trouble ticket, is + # arbitrary) + for my $utf8 ("non-UTF-8", "UTF-8") { + + # Try at different lengths to be sure to get a node boundary + for my $repeat (120 .. 270) { # [perl #133756] + my $head = ("b" x $repeat) . "\xDC"; + my $pat = $base_pat; + utf8::upgrade($pat) if $utf8 eq "UTF-8"; + $pat = $head . $pat; + my $text = $head . $base_pat; + + if ($text !~ /$pat/i) { + $failed = $repeat; + last; + } + } + + ok(! $failed, "A non-final fold character " + . (length($non_finals) - 1) + . " characters from the end of an EXACTFish" + . " $utf8 pattern works; if failed, was at count $failed"); + } + } } { @@ -2286,6 +2345,10 @@ EOF is (scalar split(/\b{sb}/, "Don't think twice. It's all right."), 2, '\b{wb} splits sentences correctly'); + ok "my/dir/audio_07.mp3" =~ + qr/(.*)\/(.*)\/(.*)\.(?<=(?=(?:\.(?!\d+\b)\w{1,4}$)$)\.)(.*)$()/, + "[perl #133948]"; + # !!! NOTE! Keep the following tests last -- they may crash perl @@ -2344,33 +2407,34 @@ EOF # scoped, and want to turn them off, so have to do the match in this # scope. if ($Config{uvsize} < 8) { - ok(chr(0x7FFF_FFFE) =~ /\p{Is_31_Bit_Super}/, + like(chr(0x7FFF_FFFE), qr/\p{Is_31_Bit_Super}/, "chr(0x7FFF_FFFE) can match a Unicode property"); - ok(chr(0x7FFF_FFFF) =~ /\p{Is_31_Bit_Super}/, + like(chr(0x7FFF_FFFF), qr/\p{Is_31_Bit_Super}/, "chr(0x7FFF_FFFF) can match a Unicode property"); my $p = qr/^[\x{7FFF_FFFF}]$/; - ok(chr(0x7FFF_FFFF) =~ $p, + like(chr(0x7FFF_FFFF), qr/$p/, "chr(0x7FFF_FFFF) can match itself in a [class]"); - ok(chr(0x7FFF_FFFF) =~ $p, # Tests any caching + like(chr(0x7FFF_FFFF), qr/$p/, # Tests any caching "chr(0x7FFF_FFFF) can match itself in a [class] subsequently"); } else { no warnings 'overflow'; - ok(chr(0x7FFF_FFFF_FFFF_FFFE) =~ qr/\p{Is_Portable_Super}/, + like(chr(0x7FFF_FFFF_FFFF_FFFE), qr/\p{Is_Portable_Super}/, "chr(0x7FFF_FFFF_FFFF_FFFE) can match a Unicode property"); - ok(chr(0x7FFF_FFFF_FFFF_FFFF) =~ qr/^\p{Is_Portable_Super}$/, + like(chr(0x7FFF_FFFF_FFFF_FFFF), qr/^\p{Is_Portable_Super}$/, "chr(0x7FFF_FFFF_FFFF_FFFF) can match a Unicode property"); my $p = qr/^[\x{7FFF_FFFF_FFFF_FFFF}]$/; - ok(chr(0x7FFF_FFFF_FFFF_FFFF) =~ $p, + like(chr(0x7FFF_FFFF_FFFF_FFFF), qr/$p/, "chr(0x7FFF_FFFF_FFFF_FFFF) can match itself in a [class]"); - ok(chr(0x7FFF_FFFF_FFFF_FFFF) =~ $p, # Tests any caching + like(chr(0x7FFF_FFFF_FFFF_FFFF), qr/$p/, # Tests any caching "chr(0x7FFF_FFFF_FFFF_FFFF) can match itself in a [class] subsequently"); # This test is because something was declared as 32 bits, but # should have been cast to 64; only a problem where # sizeof(STRLEN) != sizeof(UV) - ok(chr(0x7FFF_FFFF_FFFF_FFFE) !~ qr/\p{Is_31_Bit_Super}/, "chr(0x7FFF_FFFF_FFFF_FFFE) shouldn't match a range ending in 0x7FFF_FFFF"); + unlike(chr(0x7FFF_FFFF_FFFF_FFFE), qr/\p{Is_31_Bit_Super}/, + "chr(0x7FFF_FFFF_FFFF_FFFE) shouldn't match a range ending in 0x7FFF_FFFF"); } } @@ -2405,6 +2469,7 @@ EOF { # [perl #126606 crashed the interpreter use Cname; like("sS", qr/\N{EMPTY-STR}Ss|/i, '\N{} with empty branch alternation works'); + like("sS", qr'\N{EMPTY-STR}Ss|'i, '\N{} with empty branch alternation works'); } { # Regexp:Grammars was broken: @@ -2440,6 +2505,25 @@ EOF ok(! $?, "User-defined pattern did not cause panic [perl 130010]"); } + { # [perl #133999] Previously assertion failure + fresh_perl_like('0 =~ /\p{nv:(\B(*COMMIT)C+)}/', + qr/No Unicode property value wildcard matches/, + {}, + "Assertion failure with *COMMIT and wildcard property"); + } + + { # [perl #134029] Previously assertion failure + fresh_perl_like('qr/\p{upper:]}|\337(?|ss)|)(?0/', + qr/Unicode property wildcard not terminated/, + {}, + "Assertion failure with single character wildcard"); + } + + { # [perl #134034] Previously assertion failure + fresh_perl_is('use utf8; q!Ȧिम한글💣ყაოსაა!=~/(?li)\b{wb}\B(*COMMIT)0/;', + "", {}, "*COMMIT caused positioning beyond EOS"); + } + # !!! NOTE that tests that aren't at all likely to crash perl should go # a ways above, above these last ones. There's a comment there that, like diff --git a/gnu/usr.bin/perl/t/re/pat_re_eval.t b/gnu/usr.bin/perl/t/re/pat_re_eval.t index f88a8651a18..696b6a3fb50 100755 --- a/gnu/usr.bin/perl/t/re/pat_re_eval.t +++ b/gnu/usr.bin/perl/t/re/pat_re_eval.t @@ -23,7 +23,7 @@ BEGIN { our @global; -plan tests => 502; # Update this when adding/deleting tests. +plan tests => 506; # Update this when adding/deleting tests. run_tests() unless caller; @@ -1301,6 +1301,35 @@ sub run_tests { ok /^$qr$/, "RT #132772 - run time time qr//"; } + # RT #133687 + # mixing compile-time (?(?{code})) with run-time code blocks + # was failing, because the second pass through the parser + # (which compiles the runtime code blocks) was failing to adequately + # mask the compile-time code blocks to shield them from a second + # compile: /X(?{...})Y/ was being correctly masked as /X________Y/ + # but /X(?(?{...}))Y/ was being incorrectly masked as + # /X(?________)Y/ + + { + use re 'eval'; + my $runtime_re = '(??{ "A"; })'; + ok "ABC" =~ /^ $runtime_re (?(?{ 1; })BC) $/x, 'RT #133687 yes'; + ok "ABC" =~ /^ $runtime_re (?(?{ 0; })xy|BC) $/x, 'RT #133687 yes|no'; + } + + # RT #134208 + # when the string being matched was an SvTEMP and the re_eval died, + # the SV's magic was being restored after the SV was freed. + # Give ASan something to play with. + + { + my $a; + no warnings 'uninitialized'; + eval { "$a $1" =~ /(?{ die })/ }; + pass("SvTEMP 1"); + eval { sub { " " }->() =~ /(?{ die })/ }; + pass("SvTEMP 2"); + } } # End of sub run_tests diff --git a/gnu/usr.bin/perl/t/re/pat_rt_report.t b/gnu/usr.bin/perl/t/re/pat_rt_report.t index dd740e713b9..ced4fe670b2 100755 --- a/gnu/usr.bin/perl/t/re/pat_rt_report.t +++ b/gnu/usr.bin/perl/t/re/pat_rt_report.t @@ -20,7 +20,7 @@ use warnings; use 5.010; use Config; -plan tests => 2504; # Update this when adding/deleting tests. +plan tests => 2510; # Update this when adding/deleting tests. run_tests() unless caller; @@ -1044,23 +1044,29 @@ sub run_tests { use charnames ":full"; # Delayed interpolation of \N' my $r1 = qr/\N{THAI CHARACTER SARA I}/; + my $r2 = qr'\N{THAI CHARACTER SARA I}'; my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}"; # Bug #56444 ok $s1 =~ /$r1+/, 'my $r1 = qr/\N{THAI CHARACTER SARA I}/; my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}; $s1 =~ /$r1+/'; + ok $s1 =~ /$r2+/, 'my $r2 = qr\'\N{THAI CHARACTER SARA I}\'; my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}; $s1 =~ \'$r2+\''; # Bug #62056 ok "${s1}A" =~ m/$s1\N{LATIN CAPITAL LETTER A}/, '"${s1}A" =~ m/$s1\N{LATIN CAPITAL LETTER A}/'; ok "abbbbc" =~ m/\N{1}/ && $& eq "a", '"abbbbc" =~ m/\N{1}/ && $& eq "a"'; + ok "abbbbc" =~ m'\N{1}' && $& eq "a", '"abbbbc" =~ m\'\N{1}\' && $& eq "a"'; ok "abbbbc" =~ m/\N{3,4}/ && $& eq "abbb", '"abbbbc" =~ m/\N{3,4}/ && $& eq "abbb"'; + ok "abbbbc" =~ m'\N{3,4}' && $& eq "abbb", '"abbbbc" =~ m\'\N{3,4}\' && $& eq "abbb"'; } { use charnames ":full"; my $message = '[perl #74982] Period coming after \N{}'; ok("\x{ff08}." =~ m/\N{FULLWIDTH LEFT PARENTHESIS}./ && $& eq "\x{ff08}.", $message); + ok("\x{ff08}." =~ m'\N{FULLWIDTH LEFT PARENTHESIS}.' && $& eq "\x{ff08}.", $message); ok("\x{ff08}." =~ m/[\N{FULLWIDTH LEFT PARENTHESIS}]./ && $& eq "\x{ff08}.", $message); + ok("\x{ff08}." =~ m'[\N{FULLWIDTH LEFT PARENTHESIS}].' && $& eq "\x{ff08}.", $message); } SKIP: { @@ -1141,6 +1147,15 @@ EOP ok($s=~/(foo){1,0}|(?1)/, "RT #130561 - allowing impossible quantifier should not break recursion"); } + { + # RT #133892 Coredump in Perl_re_intuit_start + # Second match flips to checking floating substring before fixed + # substring, which triggers a pathway that failed to check there + # was a non-utf8 version of the string before trying to use it + # resulting in a SEGV. + my $result = grep /b\x{1c0}ss0/i, qw{ xxxx xxxx0 }; + ok($result == 0); + } } # End of sub run_tests diff --git a/gnu/usr.bin/perl/t/re/re_tests b/gnu/usr.bin/perl/t/re/re_tests index 3fd24ff572c..17a5b53eee7 100644 --- a/gnu/usr.bin/perl/t/re/re_tests +++ b/gnu/usr.bin/perl/t/re/re_tests @@ -487,12 +487,19 @@ a(?:b|(c|e){1,2}?|d)+?(.) ace y $1$2 ce (((a{2}){2})+) aaaaaaaaaa y $1 aaaaaaaa (?:(f)(o)(o)|(b)(a)(r))* foobar y $1:$2:$3:$4:$5:$6 f:o:o:b:a:r (?<=a)b ab y $& b +(?<=af?)b ab y $& b (?<=a)b cb n - - +(?<=a(?:fo)?)b cb n - - (?<=a)b b n - - +(?<=a(?:foo)?)b b n - - (?<!c)b ab y $& b +(?<!c(?:foob)?)b ab y $& b (?<!c)b cb n - - +(?<!c(?:fooba)?)b cb n - - (?<!c)b b y - - +(?<!c(?:foobar)?)b b y - - (?<!c)b b y $& b +(?<!c(?:foobarb)?)b b y $& b (?<%)b - c - Group name must start with a non-digit word character (?:..)*a aba y $& aba (?:..)*?a aba y $& a @@ -559,7 +566,10 @@ x(~~)*(?:(?:F)?)? x~~ y - - ^a(?#xxx){3}c aaac y $& aaac '^a (?#xxx) (?#yyy) {3}c'x aaac y $& aaac (?<![cd])b dbcb n - - +(?<![cd]e{0,254})b dbcb n - - (?<![cd])[ab] dbaacb y $& a +(?<![cd]{1,2})[ab] dbaacb y $& a +#Why does this fail. I think it's confusing (?<![cd]{1,3})[ab] dbaacb y $& a (?<!(c|d))b dbcb n - - (?<!(c|d))[ab] dbaacb y $& a (?<!cd)[ab] cdaccb y $& b @@ -668,7 +678,7 @@ $(?<=^(a)) a y $1 a ((?>a+)b) aaab y $1 aaab (?>(a+))b aaab y $1 aaa ((?>[^()]+)|\([^()]*\))+ ((abc(ade)ufh()()x y $& abc(ade)ufh()()x -(?<=x+)y - c - Variable length lookbehind not implemented +(?<=x+)y - c - Lookbehind longer than 255 not implemented ((def){37,17})?ABC ABC y $& ABC \Z a\nb\n y $-[0] 3 \z a\nb\n y $-[0] 4 @@ -1143,6 +1153,11 @@ X(?<=foo.)[YZ] ..XfooXY.. y pos 8 /(a)+((?1))(fox)/ aafox y $1-$2-$3 a-a-fox /(a){1,100}((?1))(fox)/ aafox y $1-$2-$3 a-a-fox /(a){0,100}((?1))(fox)/ aafox y $1-$2-$3 a-a-fox +/(a)??((?1))(fox)/ aafox y $1-$2-$3 a-a-fox +/(a)*?((?1))(fox)/ aafox y $1-$2-$3 a-a-fox +/(a)+?((?1))(fox)/ aafox y $1-$2-$3 a-a-fox +/(a){1,100}?((?1))(fox)/ aafox y $1-$2-$3 a-a-fox +/(a){0,100}?((?1))(fox)/ aafox y $1-$2-$3 a-a-fox /(ab)?((?1))(fox)/ ababfox y $1-$2-$3 ab-ab-fox /(ab)*((?1))(fox)/ ababfox y $1-$2-$3 ab-ab-fox /(ab)+((?1))(fox)/ ababfox y $1-$2-$3 ab-ab-fox @@ -1344,6 +1359,7 @@ a*(*F) aaaab n - - (?<=abcd(?<=(aaaabcd))) ..aaaabcd.. y $1 aaaabcd (?=xy(?<=(aaxy))) ..aaxy.. y $1 aaxy +(?=xy(?<=(aaxyz?))) ..aaxy.. y $1 aaxy X(\w+)(?=\s)|X(\w+) Xab y [$1-$2] [-ab] @@ -1441,7 +1457,9 @@ foo(\h)bar foo\tbar y $1 \t # Verify that \N{U+...} forces Unicode rules /\N{U+41}\x{c1}/i a\x{e1} y $& a\x{e1} +'\N{U+41}\x{c1}'i a\x{e1} y $& a\x{e1} /[\N{U+41}\x{c1}]/i \x{e1} y $& \x{e1} +'[\N{U+41}\x{c1}]'i \x{e1} y $& \x{e1} '\N{U+41}' A y $& A # Even for single quoted patterns [\s][\S] \x{a0}\x{a0} n - - # Unicode complements should not match same character @@ -1474,7 +1492,7 @@ abc\N abc\n n [\N{U+}] - c - Invalid hexadecimal number \N{U+4AG3} - c - Invalid hexadecimal number [\N{U+4AG3}] - c - Invalid hexadecimal number -abc\N{def} - c - \\N{NAME} must be resolved by the lexer +abc\N{def} - c - Unknown charname 'def' in regex abc\N{U+4AG3 - c - Missing right brace on \\N{} abc\N{def - c - Missing right brace on \\N{} abc\N{ - c - Missing right brace on \\N{} @@ -1485,6 +1503,7 @@ abc\N{ - c - Missing right brace on \\N{} # Verifies catches hex errors /\N{U+0xBEEF}/ - c - Invalid hexadecimal number +\N{U+0xBEEF} - c - Invalid hexadecimal number # Used to be an error, but not any more: /\N{U+BEEF.BEAD}/ - c - @@ -1670,8 +1689,6 @@ ab[c\\\](??{"x"})]{3}d ab\\](d y - - /st/i \x{DF}\x{FB05} y $& \x{FB05} /ssst/i \x{DF}\x{FB05} y $& \x{DF}\x{FB05} -/[s]s/i \x{DF} n - - -/s[s]/i \x{DF} n - - # [perl #101970] /[[:lower:]]/i \x{100} y $& \x{100} @@ -1992,7 +2009,13 @@ AB\s+\x{100} AB \x{100}X y - - /\A\x80+\z/ \x80\x80\x80\x80\x80\x80\x80\x80\x80 y $& \x80\x80\x80\x80\x80\x80\x80\x80\x80 # [perl #132900] ^(\d+)*?4X$ 1234X y $1 123 # perl #131648 (?il)\x{100}|\x{100}|\x{FE} \xFE y $& \xFE - +\A([\x00-\x7F]+)(.*)\z \007\011\012 y $& \007\011\012 # [perl #133311] +(?:(?^:(?{1}))[^0-9]) : y $& : # [perl #133348] +/[\xdf-/i - c - Invalid [] range # [perl #133620] likely only fails under valgrind +/\1a(b)/ bab n - - # This compiles but fails to match as \1 is not set when parsed. +/(?iu)(?<=\xdf)hbase/ sshbase y $& hbase +/\x{30c3}?[\x{30a2}\x{30a4}\x{30a6}\x{30a8}\x{30aa}-\x{30e2}\x{30e4}\x{30e6}\x{30e8}-\x{30f4}](?:[\x{30e3}\x{30e5}\x{30e7}\x{30a1}\x{30a3}\x{30a5}\x{30a7}\x{30a9}])?\x{30fc}?\x{30f3}?/ \x{30de}\x{30fc}\x{30af}\x{30b5}\x{30fc}\x{30d3}\x{30b9} y $& \x{30de}\x{30fc} # part of [perl #133942 +/[\x{3041}-\x{3093}]+/ \x{6f22}\x{5b57}\x{3001}\x{30ab}\x{30bf}\x{30ab}\x{30ca}\x{3001}\x{3072}\x{3089}\x{304c}\x{306a}\x{306e}\x{5165}\x{3063}\x{305f}String y $& \x{3072}\x{3089}\x{304c}\x{306a}\x{306e} # [perl #133978] # Keep these lines at the end of the file # vim: softtabstop=0 noexpandtab diff --git a/gnu/usr.bin/perl/t/re/reg_eval_scope.t b/gnu/usr.bin/perl/t/re/reg_eval_scope.t index 25b90b64827..3bf937d251e 100644 --- a/gnu/usr.bin/perl/t/re/reg_eval_scope.t +++ b/gnu/usr.bin/perl/t/re/reg_eval_scope.t @@ -12,7 +12,7 @@ BEGIN { } } -plan 48; +plan 49; fresh_perl_is <<'CODE', '781745', {}, '(?{}) has its own lexical scope'; my $x = 7; my $a = 4; my $b = 5; @@ -371,3 +371,15 @@ SKIP: { f3(); is ($s, \&f3, '__SUB__ qr multi'); } + +# RT #133879 +# ensure scope is properly restored when there's an error compiling a +# "looks a bit like it has (?{}) but doesn't" qr// + +fresh_perl_like <<'CODE', + BEGIN {$^H = 0x10000 }; # HINT_NEW_RE + qr/\(?{/ +CODE + qr/Constant\(qq\) unknown/, + { stderr => 1 }, + 'qr/\(?{'; diff --git a/gnu/usr.bin/perl/t/re/reg_mesg.t b/gnu/usr.bin/perl/t/re/reg_mesg.t index 2880be45eb0..c5c79f0323a 100755 --- a/gnu/usr.bin/perl/t/re/reg_mesg.t +++ b/gnu/usr.bin/perl/t/re/reg_mesg.t @@ -102,7 +102,7 @@ sub mark_as_utf8 { return @ret; } -my $inf_m1 = ($Config::Config{reg_infty} || 32767) - 1; +my $inf_m1 = ($Config::Config{reg_infty} || 65535) - 1; my $inf_p1 = $inf_m1 + 2; my $B_hex = sprintf("\\x%02X", ord "B"); @@ -128,7 +128,7 @@ my @death = ( '/[[=foo=]]/' => 'POSIX syntax [= =] is reserved for future extensions {#} m/[[=foo=]{#}]/', - '/(?<= .*)/' => 'Variable length lookbehind not implemented in regex m/(?<= .*)/', + '/(?<= .*)/' => 'Lookbehind longer than 255 not implemented in regex m/(?<= .*)/', '/(?<= x{1000})/' => 'Lookbehind longer than 255 not implemented in regex m/(?<= x{1000})/', @@ -238,10 +238,10 @@ my @death = '/(?[ \0004 ])/' => 'Need exactly 3 octal digits {#} m/(?[ \0004 {#}])/', '/(?[ \05 ])/' => 'Need exactly 3 octal digits {#} m/(?[ \05 {#}])/', '/(?[ \o{1038} ])/' => 'Non-octal character {#} m/(?[ \o{1038{#}} ])/', - '/(?[ \o{} ])/' => 'Number with no digits {#} m/(?[ \o{}{#} ])/', + '/(?[ \o{} ])/' => 'Empty \o{} {#} m/(?[ \o{}{#} ])/', '/(?[ \x{defg} ])/' => 'Non-hex character {#} m/(?[ \x{defg{#}} ])/', '/(?[ \xabcdef ])/' => 'Use \\x{...} for more than two hex characters {#} m/(?[ \xabc{#}def ])/', - '/(?[ \x{} ])/' => 'Number with no digits {#} m/(?[ \x{}{#} ])/', + '/(?[ \x{} ])/' => 'Empty \x{} {#} m/(?[ \x{}{#} ])/', '/(?[ \cK + ) ])/' => 'Unexpected \')\' {#} m/(?[ \cK + ){#} ])/', '/(?[ \cK + ])/' => 'Incomplete expression within \'(?[ ])\' {#} m/(?[ \cK + {#}])/', '/(?[ ( ) ])/' => 'Incomplete expression within \'(?[ ])\' {#} m/(?[ ( ){#} ])/', @@ -261,14 +261,14 @@ my @death = 'm/(?[[a-\pM]])/' => 'False [] range "a-\pM" {#} m/(?[[a-\pM{#}]])/', 'm/(?[[\pM-x]])/' => 'False [] range "\pM-" {#} m/(?[[\pM-{#}x]])/', 'm/(?[[^\N{LATIN CAPITAL LETTER A WITH MACRON AND GRAVE}]])/' => '\N{} in inverted character class or as a range end-point is restricted to one character {#} m/(?[[^\N{U+100.300{#}}]])/', - 'm/(?[ \p{Digit} & (?(?[ \p{Thai} | \p{Lao} ]))])/' => 'Sequence (?(...) not recognized {#} m/(?[ \p{Digit} & (?({#}?[ \p{Thai} | \p{Lao} ]))])/', - 'm/(?[ \p{Digit} & (?:(?[ \p{Thai} | \p{Lao} ]))])/' => 'Expecting \'(?flags:(?[...\' {#} m/(?[ \p{Digit} & (?{#}:(?[ \p{Thai} | \p{Lao} ]))])/', + 'm/(?[ \p{Digit} & (?^(?[ \p{Thai} | \p{Lao} ]))])/' => 'Sequence (?^(...) not recognized {#} m/(?[ \p{Digit} & (?^({#}?[ \p{Thai} | \p{Lao} ]))])/', + 'm/(?[ \p{Digit} & (?(?[ \p{Thai} | \p{Lao} ]))])/' => 'Unexpected character {#} m/(?[ \p{Digit} & (?{#}(?[ \p{Thai} | \p{Lao} ]))])/', 'm/\o{/' => 'Missing right brace on \o{ {#} m/\o{{#}/', 'm/\o/' => 'Missing braces on \o{} {#} m/\o{#}/', - 'm/\o{}/' => 'Number with no digits {#} m/\o{}{#}/', + 'm/\o{}/' => 'Empty \o{} {#} m/\o{}{#}/', 'm/[\o{]/' => 'Missing right brace on \o{ {#} m/[\o{{#}]/', 'm/[\o]/' => 'Missing braces on \o{} {#} m/[\o{#}]/', - 'm/[\o{}]/' => 'Number with no digits {#} m/[\o{}{#}]/', + 'm/[\o{}]/' => 'Empty \o{} {#} m/[\o{}{#}]/', 'm/(?^-i:foo)/' => 'Sequence (?^-...) not recognized {#} m/(?^-{#}i:foo)/', 'm/\87/' => 'Reference to nonexistent group {#} m/\87{#}/', 'm/a\87/' => 'Reference to nonexistent group {#} m/a\87{#}/', @@ -307,10 +307,17 @@ my @death = '/\w{/' => 'Unescaped left brace in regex is illegal here {#} m/\w{{#}/', '/\q{/' => 'Unescaped left brace in regex is illegal here {#} m/\q{{#}/', '/\A{/' => 'Unescaped left brace in regex is illegal here {#} m/\A{{#}/', + '/.{, 4 }/' => 'Unescaped left brace in regex is illegal here {#} m/.{{#}, 4 }/', + '/[x]{, 4}/' => 'Unescaped left brace in regex is illegal here {#} m/[x]{{#}, 4}/', + '/\p{Latin}{,4 }/' => 'Unescaped left brace in regex is illegal here {#} m/\p{Latin}{{#},4 }/', '/(?<=/' => 'Sequence (?... not terminated {#} m/(?<={#}/', # [perl #128170] '/\p{vertical tab}/' => 'Can\'t find Unicode property definition "vertical tab" {#} m/\\p{vertical tab}{#}/', # [perl #132055] "/$bug133423/" => "Operand with no preceding operator {#} m/(?[(?^:(?[\\ - + '/[^/' => 'Unmatched [ {#} m/[{#}^/', # [perl #133767] + '/\p{Is_Other_Alphabetic=F}/ ' => 'Can\'t find Unicode property definition "Is_Other_Alphabetic=F" {#} m/\p{Is_Other_Alphabetic=F}{#}/', + '/\p{Is_Other_Alphabetic=F}/ ' => 'Can\'t find Unicode property definition "Is_Other_Alphabetic=F" {#} m/\p{Is_Other_Alphabetic=F}{#}/', + '/\x{100}(?(/' => 'Unknown switch condition (?(...)) {#} m/\\x{100}(?({#}/', # [perl #133896] + '/(?[\N{KEYCAP DIGIT NINE}/' => '\N{} in inverted character class or as a range end-point is restricted to one character {#} m/(?[\\N{U+39.FE0F.20E3{#}}/', # [perl #133988] ); # These are messages that are death under 'use re "strict"', and may or may @@ -337,9 +344,9 @@ my @death_only_under_strict = ( 'm/[\o{789}]/' => 'Non-octal character \'8\'. Resolved as "\o{7}"', => 'Non-octal character {#} m/[\o{78{#}9}]/', 'm/\x{}/' => "", - => 'Number with no digits {#} m/\x{}{#}/', + => 'Empty \x{} {#} m/\x{}{#}/', 'm/[\x{}]/' => "", - => 'Number with no digits {#} m/[\x{}{#}]/', + => 'Empty \x{} {#} m/[\x{}{#}]/', 'm/\x{ABCDEFG}/' => 'Illegal hexadecimal digit \'G\' ignored', => 'Non-hex character {#} m/\x{ABCDEFG{#}}/', 'm/[\x{ABCDEFG}]/' => 'Illegal hexadecimal digit \'G\' ignored', @@ -392,19 +399,27 @@ my @death_only_under_strict = ( => 'False [] range "[:digit:]-" {#} m/[[:digit:]-{#}[:alpha:]]\x{100}/', '/[a\zb]\x{100}/' => 'Unrecognized escape \z in character class passed through {#} m/[a\z{#}b]\x{100}/', => 'Unrecognized escape \z in character class {#} m/[a\z{#}b]\x{100}/', - 'default_on/:{4,a}/' => 'Unescaped left brace in regex is deprecated here (and will be fatal in Perl 5.30), passed through {#} m/:{{#}4,a}/', - => 'Unescaped left brace in regex is illegal here {#} m/:{{#}4,a}/', - 'default_on/xa{3\,4}y/' => 'Unescaped left brace in regex is deprecated here (and will be fatal in Perl 5.30), passed through {#} m/xa{{#}3\,4}y/', - => 'Unescaped left brace in regex is illegal here {#} m/xa{{#}3\,4}y/', - 'default_on/\\${[^\\}]*}/' => 'Unescaped left brace in regex is deprecated here (and will be fatal in Perl 5.30), passed through {#} m/\\${{#}[^\\}]*}/', - => 'Unescaped left brace in regex is illegal here {#} m/\\${{#}[^\\}]*}/', '/[ab]/' => "", => 'Literal vertical space in [] is illegal except under /x {#} m/[a{#}b]/', + '/:{4,a}/' => 'Unescaped left brace in regex is passed through {#} m/:{{#}4,a}/', + => 'Unescaped left brace in regex is illegal here {#} m/:{{#}4,a}/', + '/xa{3\,4}y/' => 'Unescaped left brace in regex is passed through {#} m/xa{{#}3\,4}y/', + => 'Unescaped left brace in regex is illegal here {#} m/xa{{#}3\,4}y/', + '/\\${[^\\}]*}/' => 'Unescaped left brace in regex is passed through {#} m/\\${{#}[^\\}]*}/', + => 'Unescaped left brace in regex is illegal here {#} m/\\${{#}[^\\}]*}/', + '/.{/' => 'Unescaped left brace in regex is passed through {#} m/.{{#}/', + => 'Unescaped left brace in regex is illegal here {#} m/.{{#}/', + '/[x]{/' => 'Unescaped left brace in regex is passed through {#} m/[x]{{#}/', + => 'Unescaped left brace in regex is illegal here {#} m/[x]{{#}/', + '/\p{Latin}{/' => 'Unescaped left brace in regex is passed through {#} m/\p{Latin}{{#}/', + => 'Unescaped left brace in regex is illegal here {#} m/\p{Latin}{{#}/', + '/\x{100}\x/' => "", + => "Empty \\x {#} m/\\x{100}\\x{#}/", ); # These need the character 'ネ' as a marker for mark_as_utf8() my @death_utf8 = mark_as_utf8( - '/ネ(?<= .*)/' => 'Variable length lookbehind not implemented in regex m/ネ(?<= .*)/', + '/ネ(?<= .*)/' => 'Lookbehind longer than 255 not implemented in regex m/ネ(?<= .*)/', '/(?<= ネ{1000})/' => 'Lookbehind longer than 255 not implemented in regex m/(?<= ネ{1000})/', @@ -466,7 +481,7 @@ my @death_utf8 = mark_as_utf8( '/ネ(?[ \cK [ネ] ])ネ/' => 'Operand with no preceding operator {#} m/ネ(?[ \cK [ネ{#}] ])ネ/', '/ネ(?[ \0004 ])ネ/' => 'Need exactly 3 octal digits {#} m/ネ(?[ \0004 {#}])ネ/', '/(?[ \o{ネ} ])ネ/' => 'Non-octal character {#} m/(?[ \o{ネ{#}} ])ネ/', - '/ネ(?[ \o{} ])ネ/' => 'Number with no digits {#} m/ネ(?[ \o{}{#} ])ネ/', + '/ネ(?[ \o{} ])ネ/' => 'Empty \o{} {#} m/ネ(?[ \o{}{#} ])ネ/', '/(?[ \x{ネ} ])ネ/' => 'Non-hex character {#} m/(?[ \x{ネ{#}} ])ネ/', '/(?[ \p{ネ} ])/' => 'Can\'t find Unicode property definition "ネ" {#} m/(?[ \p{ネ}{#} ])/', '/(?[ \p{ ネ = bar } ])/' => 'Can\'t find Unicode property definition "ネ = bar" {#} m/(?[ \p{ ネ = bar }{#} ])/', @@ -523,6 +538,7 @@ my @warning = ( '/(?=a)*/' => '(?=a)* matches null string many times {#} m/(?=a)*{#}/', 'my $x = \'\m\'; qr/a$x/' => 'Unrecognized escape \m passed through {#} m/a\m{#}/', '/\q/' => 'Unrecognized escape \q passed through {#} m/\q{#}/', + '/\q\p{Any}/' => 'Unrecognized escape \q passed through {#} m/\q{#}\p{Any}/', # These two tests do not include the marker, because regcomp.c no # longer knows where it goes by the time this warning is emitted. @@ -659,6 +675,7 @@ my @warning_only_under_strict = ( "/[$low_mixed_digit-$high_mixed_digit]/" => "Ranges of ASCII printables should be some subset of \"0-9\", \"A-Z\", or \"a-z\" {#} m/[$low_mixed_digit-$high_mixed_digit\{#}]/", '/\b<GCB}/' => 'Unescaped literal \'}\' {#} m/\b<GCB}{#}/', '/[ ]def]/' => 'Unescaped literal \']\' {#} m/[ ]def]{#}/', + '/(?)/' => 'Empty (?) without any modifiers {#} m/(?){#}/', # [perl #132851] ); my @warning_utf8_only_under_strict = mark_as_utf8( @@ -693,9 +710,6 @@ my @deprecated = ( '/foo(:?{bar)/' => "", '/\s*{/' => "", '/a{3,4}{/' => "", - '/.{/' => 'Unescaped left brace in regex is deprecated here (and will be fatal in Perl 5.30), passed through {#} m/.{{#}/', - '/[x]{/' => 'Unescaped left brace in regex is deprecated here (and will be fatal in Perl 5.30), passed through {#} m/[x]{{#}/', - '/\p{Latin}{/' => 'Unescaped left brace in regex is deprecated here (and will be fatal in Perl 5.30), passed through {#} m/\p{Latin}{{#}/', ); for my $strict ("", "use re 'strict';") { @@ -725,6 +739,7 @@ for my $strict ("", "use re 'strict';") { no warnings 'experimental::regex_sets'; no warnings 'experimental::script_run'; no warnings 'experimental::re_strict'; + no warnings 'experimental::alpha_assertions'; warning_is(sub { my $meaning_of_life; diff --git a/gnu/usr.bin/perl/t/re/regex_sets.t b/gnu/usr.bin/perl/t/re/regex_sets.t index e70df81254f..fc089a90b69 100644 --- a/gnu/usr.bin/perl/t/re/regex_sets.t +++ b/gnu/usr.bin/perl/t/re/regex_sets.t @@ -215,6 +215,11 @@ for my $char ("٠", "٥", "٩") { 'qr/(?[ ( \p{Uppercase} ) + (\p{Lowercase} - ([a] + [b])) ]) compiles and properly matches'); } +{ # [perl #133889] Caused assertion failure + fresh_perl_like('no warnings "experimental::regex_sets"; + qr/(?[\P{Is0}])/', qr/\QUnknown user-defined property name "Is0"/, {}, "[perl #133889]"); +} + done_testing(); 1; diff --git a/gnu/usr.bin/perl/t/re/regexp.t b/gnu/usr.bin/perl/t/re/regexp.t index 037d7b7a488..1ad028bcd2e 100755 --- a/gnu/usr.bin/perl/t/re/regexp.t +++ b/gnu/usr.bin/perl/t/re/regexp.t @@ -105,6 +105,7 @@ sub convert_from_ascii { use strict; use warnings FATAL=>"all"; +no warnings 'experimental::vlb'; our ($bang, $ffff, $nulnul); # used by the tests our ($qr, $skip_amp, $qr_embed, $qr_embed_thr, $regex_sets, $alpha_assertions, $no_null); # set by our callers diff --git a/gnu/usr.bin/perl/t/re/regexp_unicode_prop.t b/gnu/usr.bin/perl/t/re/regexp_unicode_prop.t index 42191dfb211..6df29687358 100755 --- a/gnu/usr.bin/perl/t/re/regexp_unicode_prop.t +++ b/gnu/usr.bin/perl/t/re/regexp_unicode_prop.t @@ -6,7 +6,14 @@ use strict; use warnings; -use 5.010; +use v5.16; +use utf8; + +# To verify that messages containing the expansions work on UTF-8 +my $utf8_comment; + +my @warnings; +local $SIG {__WARN__} = sub {push @warnings, "@_"}; BEGIN { chdir 't' if -d 't'; @@ -16,6 +23,27 @@ BEGIN { sub run_tests; +sub get_str_name($) { + my $char = shift; + + my ($str, $name); + + if ($char =~ /^\\/) { + $str = eval qq ["$char"]; + $name = qq ["$char"]; + } + elsif ($char =~ /^0x([0-9A-Fa-f]+)$/) { + $str = chr hex $1; + $name = "chr ($char)"; + } + else { + $str = $char; + $name = qq ["$char"]; + } + + return ($str, $name); +} + # # This is the data to test. # @@ -81,38 +109,96 @@ my @CLASSES = ( ); -my @USER_DEFINED_PROPERTIES = ( - # - # User defined properties - # - InKana1 => ['\x{3040}', '!\x{303F}'], - InKana2 => ['\x{3040}', '!\x{303F}'], - InKana3 => ['\x{3041}', '!\x{3040}'], - InNotKana => ['\x{3040}', '!\x{3041}'], - InConsonant => ['d', '!e'], - IsSyriac1 => ['\x{0712}', '!\x{072F}'], - IsSyriac1KanaMark => ['\x{309A}', '!\x{3090}'], - IsSyriac1KanaMark => ['\x{0730}', '!\x{0712}'], - '# User-defined character properties may lack \n at the end', - InGreekSmall => ['\N{GREEK SMALL LETTER PI}', - '\N{GREEK SMALL LETTER FINAL SIGMA}'], - InGreekCapital => ['\N{GREEK CAPITAL LETTER PI}', '!\x{03A2}'], - Dash => ['-'], - ASCII_Hex_Digit => ['!-', 'A'], - IsAsciiHexAndDash => ['-', 'A'], - - # This overrides the official one - InLatin1 => ['\x{0100}', '!\x{00FF}'], -); +my @USER_DEFINED_PROPERTIES; +my @USER_CASELESS_PROPERTIES; +my @USER_ERROR_PROPERTIES; +my @DEFERRED; +my $overflow; +BEGIN { + $utf8_comment = "#\N{U+30CD}"; + + use Config; + $overflow = $Config{uvsize} < 8 ? "80000000" : "80000000000000000"; + + # We defined these at compile time, so that the subroutines that they + # refer to aren't known, so that we can test properties not known until + # runtime + + @USER_DEFINED_PROPERTIES = ( + # + # User defined properties + # + InKana1 => ['\x{3040}', '!\x{303F}'], + InKana2 => ['\x{3040}', '!\x{303F}'], + InKana3 => ['\x{3041}', '!\x{3040}'], + InNotKana => ['\x{3040}', '!\x{3041}'], + InConsonant => ['d', '!e'], + IsSyriac1 => ['\x{0712}', '!\x{072F}'], + IsSyriac1KanaMark => ['\x{309A}', '!\x{3090}'], + IsSyriac1KanaMark => ['\x{0730}', '!\x{0712}'], + '# User-defined character properties may lack \n at the end', + InGreekSmall => ['\N{GREEK SMALL LETTER PI}', + '\N{GREEK SMALL LETTER FINAL SIGMA}'], + InGreekCapital => ['\N{GREEK CAPITAL LETTER PI}', '!\x{03A2}'], + Dash => ['-'], + ASCII_Hex_Digit => ['!-', 'A'], + IsAsciiHexAndDash => ['-', 'A'], + ); + + @USER_CASELESS_PROPERTIES = ( + # + # User defined properties which differ depending on /i. Second entry + # is false normally, true under /i + # + 'IsMyUpper' => ["M", "!m" ], + 'pkg1::pkg2::IsMyLower' => ["a", "!A" ], + ); + + @USER_ERROR_PROPERTIES = ( + 'IsOverflow' => qr/Code point too large in (?# + )"0\t$overflow$utf8_comment" in expansion of (?# + )main::IsOverflow/, + 'InRecursedA' => qr/Infinite recursion in user-defined property (?# + )"main::InRecursedA" in expansion of (?# + )main::InRecursedC in expansion of (?# + )main::InRecursedB in expansion of (?# + )main::InRecursedA/, + 'IsRangeReversed' => qr/Illegal range in "200 100$utf8_comment" in (?# + )expansion of main::IsRangeReversed/, + 'IsNonHex' => qr/Can't find Unicode property definition (?# + )"BEEF CAGED" in expansion of main::IsNonHex/, + + # Could have \n, hence /s + 'IsDeath' => qr/Died.* in expansion of main::IsDeath/s, + ); + + # Now create a list of properties whose definitions won't be known at + # runtime. The qr// below thus will have forward references to them, and + # when matched at runtime will not know what's in the property definition + my @DEFERRABLE_USER_DEFINED_PROPERTIES; + push @DEFERRABLE_USER_DEFINED_PROPERTIES, @USER_DEFINED_PROPERTIES; + push @DEFERRABLE_USER_DEFINED_PROPERTIES, @USER_CASELESS_PROPERTIES; + unshift @DEFERRABLE_USER_DEFINED_PROPERTIES, @USER_ERROR_PROPERTIES; + for (my $i = 0; $i < @DEFERRABLE_USER_DEFINED_PROPERTIES; $i+=2) { + my $property = $DEFERRABLE_USER_DEFINED_PROPERTIES[$i]; + if ($property =~ / ^ \# /x) { + $i++; + redo; + } -my @USER_CASELESS_PROPERTIES = ( - # - # User defined properties which differ depending on /i. Second entry is - # false normally, true under /i - # - 'IsMyUpper' => ["M", "!m" ], -); + # Only do this for the properties in the list that are user-defined + next if ($property !~ / ( ^ | :: ) I[ns] /x); + push @DEFERRED, qr/\p{$property}/, + $DEFERRABLE_USER_DEFINED_PROPERTIES[$i+1]; + } +} + +# These override the official ones, so if found before defined, the official +# ones prevail, so can't test deferred definition +my @OVERRIDING_USER_DEFINED_PROPERTIES = ( + InLatin1 => ['\x{0100}', '!\x{00FF}'], +); # # From the short properties we populate POSIX-like classes. @@ -163,7 +249,8 @@ while (my ($class, $chars) = each %SHORT_PROPERTIES) { push @CLASSES => "# Short properties" => %SHORT_PROPERTIES, "# POSIX like properties" => %d, - "# User defined properties" => @USER_DEFINED_PROPERTIES; + "# User defined properties" => @USER_DEFINED_PROPERTIES, + "# Overriding user defined properties" => @OVERRIDING_USER_DEFINED_PROPERTIES; # @@ -177,7 +264,10 @@ for (my $i = 0; $i < @CLASSES; $i += 2) { $count += 4 * @ILLEGAL_PROPERTIES; $count += 4 * grep {length $_ == 1} @ILLEGAL_PROPERTIES; $count += 8 * @USER_CASELESS_PROPERTIES; -$count += 1; # Test for pkg:IsMyLower +$count += 1 * (@DEFERRED - @USER_ERROR_PROPERTIES) / 2; +$count += 1 * @USER_ERROR_PROPERTIES; +$count += 1; # one bad apple +$count += 1; # No warnings generated plan(tests => $count); @@ -188,36 +278,42 @@ sub match { $caseless = "" unless defined $caseless; $caseless = 'i' if $caseless; - my ($str, $name); - - if ($char =~ /^\\/) { - $str = eval qq ["$char"]; - $name = qq ["$char"]; - } - elsif ($char =~ /^0x([0-9A-Fa-f]+)$/) { - $str = chr hex $1; - $name = "chr ($char)"; - } - else { - $str = $char; - $name = qq ["$char"]; - } + my ($str, $name) = get_str_name($char); undef $@; my $pat = "qr/$match/$caseless"; my $match_pat = eval $pat; - is($@, '', "$pat compiled correctly to a regexp: $@"); - like($str, $match_pat, "$name correctly matched"); + if (is($@, '', "$pat compiled correctly to a regexp: $@")) { + like($str, $match_pat, "$name correctly matched"); + } undef $@; $pat = "qr/$nomatch/$caseless"; my $nomatch_pat = eval $pat; - is($@, '', "$pat compiled correctly to a regexp: $@"); - unlike($str, $nomatch_pat, "$name correctly did not match"); + if (is($@, '', "$pat compiled correctly to a regexp: $@")) { + unlike($str, $nomatch_pat, "$name correctly did not match"); + } } sub run_tests { + for (my $i = 0; $i < @DEFERRED; $i+=2) { + if (ref $DEFERRED[$i+1] eq 'ARRAY') { + my ($str, $name) = get_str_name($DEFERRED[$i+1][0]); + like($str, $DEFERRED[$i], + "$name correctly matched $DEFERRED[$i] (defn. not known until runtime)"); + } + else { # Single entry rhs indicates a property that is an error + undef $@; + + # Using block eval causes the pattern to not be recompiled, so it + # retains its deferred status until this is executed. + eval { 'A' =~ $DEFERRED[$i] }; + like($@, $DEFERRED[$i+1], + "$DEFERRED[$i] gave correct failure message (defn. not known until runtime)"); + } + } + while (@CLASSES) { my $class = shift @CLASSES; if ($class =~ /^\h*#\h*(.*)/) { @@ -282,15 +378,24 @@ sub run_tests { my $in_pat = eval qq ['\\p{$class}']; my $out_pat = eval qq ['\\P{$class}']; + # Verify that adding /i does change the out set to match. + match $_, $in_pat, $out_pat, 'i' for @out; + + # Verify that adding /i doesn't change the in set. + match $_, $in_pat, $out_pat, 'i' for @in; + # Verify works as regularly for not /i match $_, $in_pat, $out_pat for @in; match $_, $out_pat, $in_pat for @out; + } - # Verify that adding /i doesn't change the in set. - match $_, $in_pat, $out_pat, 'i' for @in; + print "# User-defined properties with errors in their definition\n"; + while (my $error_property = shift @USER_ERROR_PROPERTIES) { + my $error_re = shift @USER_ERROR_PROPERTIES; - # Verify that adding /i does change the out set to match. - match $_, $in_pat, $out_pat, 'i' for @out; + undef $@; + eval { 'A' =~ /\p{$error_property}/; }; + like($@, $error_re, "$error_property gave correct failure message"); } } @@ -300,8 +405,8 @@ sub run_tests { # sub InKana1 {<<'--'} -3040 309F -30A0 30FF +3040 309F # A comment; next line has trailing spaces +30A0 30FF -- sub InKana2 {<<'--'} @@ -310,15 +415,18 @@ sub InKana2 {<<'--'} -- sub InKana3 {<<'--'} +# First line comment +utf8::InHiragana +# Full line comment +utf8::InKatakana -utf8::IsCn -- sub InNotKana {<<'--'} -!utf8::InHiragana --utf8::InKatakana +!utf8::InHiragana # A comment; next line has trailing spaces +-utf8::InKatakana +utf8::IsCn +# Final line comment -- sub InConsonant { @@ -337,6 +445,18 @@ sub IsSyriac1 {<<'--'} 0730 074A -- +sub InRecursedA { + return "+main::InRecursedB\n"; +} + +sub InRecursedB { + return "+main::InRecursedC\n"; +} + +sub InRecursedC { + return "+main::InRecursedA\n"; +} + sub InGreekSmall {return "03B1\t03C9"} sub InGreekCapital {return "0391\t03A9\n-03A2"} @@ -350,32 +470,46 @@ sub InLatin1 { } sub IsMyUpper { + use feature 'state'; + + state $cased_count = 0; + state $caseless_count = 0; + my $ret= "+utf8::"; + my $caseless = shift; - return "+utf8::" - . (($caseless) - ? 'Alphabetic' - : 'Uppercase') - . "\n&utf8::ASCII"; -} + if($caseless) { + die "Called twice" if $caseless_count; + $caseless_count++; + $ret .= 'Alphabetic' + } + else { + die "Called twice" if $cased_count; + $cased_count++; + $ret .= 'Uppercase'; + } -{ # This has to be done here and not like the others, because we have to - # make sure that the property is not known until after the regex is - # compiled. It was previously getting confused about the pkg and /i - # combination + return $ret . "\n&utf8::ASCII"; +} - my $mylower = qr/\p{pkg::IsMyLower}/i; +sub pkg1::pkg2::IsMyLower { + my $caseless = shift; + return "+utf8::" + . (($caseless) + ? 'Alphabetic' + : 'Lowercase') + . "\n&utf8::ASCII"; +} - sub pkg::IsMyLower { - my $caseless = shift; - return "+utf8::" - . (($caseless) - ? 'Alphabetic' - : 'Lowercase') - . "\n&utf8::ASCII"; - } +sub IsRangeReversed { + return "200 100$utf8_comment"; +} - like("A", $mylower, "Not available until runtime user-defined property with pkg:: and /i works"); +sub IsNonHex { + return "BEEF CAGED$utf8_comment"; +} +sub IsDeath { + die; } # Verify that can use user-defined properties inside another one @@ -396,4 +530,20 @@ sub ISfoo { die } sub INfoo { die } sub Is::foo { die } sub In::foo { die } + +sub IsOverflow { + return "0\t$overflow$utf8_comment"; +} + +fresh_perl_like(<<'EOP', qr/Can't find Unicode property definition "F000\\tF010" in expansion of InOneBadApple/, {}, "Just one component bad"); +# Extra backslash converts tab to backslash-t +sub InOneBadApple { return "0100\t0110\n10000\t10010\nF000\\tF010\n0400\t0410" } +qr/\p{InOneBadApple}/; +EOP + +if (! is(@warnings, 0, "No warnings were generated")) { + diag join "\n", @warnings, "\n"; +} + +1; __END__ diff --git a/gnu/usr.bin/perl/t/re/script_run.t b/gnu/usr.bin/perl/t/re/script_run.t index 4b098c5129b..21cacec8be2 100644 --- a/gnu/usr.bin/perl/t/re/script_run.t +++ b/gnu/usr.bin/perl/t/re/script_run.t @@ -92,10 +92,17 @@ foreach my $type ('script_run', 'sr', 'atomic_script_run', 'asr') { } # Until fixed, this was skipping the '[' - unlike("abc]c", qr/^ (*sr:a(*sr:[bc]*)c) $/x, "Doesn't skip parts of exact matches"); + unlike("abc]c", qr/^ (*sr:a(*sr:[bc]*)c) $/x, + "Doesn't skip parts of exact matches"); - like("abc", qr/(*asr:a[bc]*c)/, "Outer asr works on a run"); - unlike("abc", qr/(*asr:a(*asr:[bc]*)c)/, "Nested asr works to exclude some things"); + like("abc", qr/(*asr:a[bc]*c)/, "Outer asr works on a run"); + unlike("abc", qr/(*asr:a(*asr:[bc]*)c)/, + "Nested asr works to exclude some things"); + + like("\x{0980}12\x{0993}", qr/^(*sr:.{4})/, + "Script with own zero works with ASCII digits"); # perl #133547 + like("\x{3041}12\x{3041}", qr/^(*sr:.{4})/, + "Script without own zero works with ASCII digits"); like("A\x{ff10}\x{ff19}B", qr/^(*sr:.{4})/, "Non-ASCII Common digits work with Latin"); # perl #133547 @@ -112,4 +119,8 @@ foreach my $type ('script_run', 'sr', 'atomic_script_run', 'asr') { like("\x{1d7ce}αβγ", qr/^(*sr:.{4})/, "Non-ASCII Common digits work with Greek"); # perl #133547 + fresh_perl_is('no warnings "experimental::script_run"; + print scalar "0" =~ m!(((*sr:()|)0)(*sr:)0|)!;', + 1, {}, '[perl #133997]'); + done_testing(); diff --git a/gnu/usr.bin/perl/t/re/speed.t b/gnu/usr.bin/perl/t/re/speed.t index af0e4330a01..9a57de167f8 100644 --- a/gnu/usr.bin/perl/t/re/speed.t +++ b/gnu/usr.bin/perl/t/re/speed.t @@ -42,7 +42,7 @@ run_tests() unless caller; sub run_tests { - watchdog(($::running_as_thread && $::running_as_thread) ? 150 : 540); + watchdog(($::running_as_thread && $::running_as_thread) ? 150 : 225); { # [perl #120446] @@ -153,7 +153,7 @@ PROG my $substr= substr( $str, 1 ); 1 while $substr=~m/0/g; $elapsed += time; - ok( $elapsed <= 2, "should not COW on long string with substr and m//g"); + ok( $elapsed <= 1, "should not COW on long string with substr and m//g"); } # [perl #133185] Infinite loop diff --git a/gnu/usr.bin/perl/t/re/subst.t b/gnu/usr.bin/perl/t/re/subst.t index d0fb0486748..2ce08049e0a 100755 --- a/gnu/usr.bin/perl/t/re/subst.t +++ b/gnu/usr.bin/perl/t/re/subst.t @@ -11,7 +11,7 @@ BEGIN { require './loc_tools.pl'; } -plan(tests => 277); +plan(tests => 278); $_ = 'david'; $a = s/david/rules/r; @@ -1174,6 +1174,12 @@ __EOF__ is $lines, 4, "RT #131930"; } +{ # [perl #133899], would panic + + fresh_perl_is('my $a = "ha"; $a =~ s!|0?h\x{300}(?{})!!gi', "", {}, + "[perl #133899] s!|0?h\\x{300}(?{})!!gi panics"); +} + { fresh_perl_is("s//00000000000format \0 '0000000\\x{800}/;eval", "", {}, "RT #133882"); } diff --git a/gnu/usr.bin/perl/t/run/locale.t b/gnu/usr.bin/perl/t/run/locale.t index 282fbb5f867..78cfc2ff721 100644 --- a/gnu/usr.bin/perl/t/run/locale.t +++ b/gnu/usr.bin/perl/t/run/locale.t @@ -23,6 +23,7 @@ use Config; my $have_strtod = $Config{d_strtod} eq 'define'; my @locales = find_locales( [ 'LC_ALL', 'LC_CTYPE', 'LC_NUMERIC' ]); skip_all("no locales available") unless @locales; +note("locales available: @locales"); my $debug = 0; my $switches = ""; @@ -67,12 +68,13 @@ EOF my $non_C_locale; foreach my $locale (@locales) { - next if $locale eq "C" || $locale eq 'POSIX'; + next if $locale eq "C" || $locale eq 'POSIX' || $locale eq "C.UTF-8"; $non_C_locale = $locale; last; } if ($non_C_locale) { + note("using non-C locale '$non_C_locale'"); setlocale(LC_NUMERIC, $non_C_locale); isnt(setlocale(LC_NUMERIC), "C", "retrieving current non-C LC_NUMERIC doesn't give 'C'"); setlocale(LC_ALL, $non_C_locale); @@ -80,7 +82,7 @@ if ($non_C_locale) { my @test_numeric_locales = @locales; - # Skip this locale on these cywgwin versions as the returned radix character + # Skip this locale on these cygwin versions as the returned radix character # length is wrong if ( $^O eq 'cygwin' && version->new(($Config{'osvers'} =~ /^(\d+(?:\.\d+)+)/)[0]) le v2.4.1) @@ -164,7 +166,8 @@ EOF . " radix is marked UTF-8"); } - if ($different) { + SKIP: { + skip("no locale available where LC_NUMERIC radix isn't '.'", 30) unless $different; note("using the '$different' locale for LC_NUMERIC tests"); { local $ENV{LC_NUMERIC} = $different; @@ -438,6 +441,7 @@ EOF EOF "1,5\n2,5", { stderr => 'devnull' }, "Can do math when radix is a comma"); # [perl 115800] + SKIP: { unless ($have_strtod) { skip("no strtod()", 1); } @@ -451,10 +455,22 @@ EOF EOF "1.5", { stderr => 'devnull' }, "POSIX::strtod() uses underlying locale"); } + } } } - { +SKIP: { + # Note: the setlocale Configure probe could be enhanced to give us the + # syntax to use, but khw doesn't think it's worth it at this time, as + # the current outliers seem to be skipped by the test just below + # anyway. If the POSIX 2008 locale functions are being used, the + # syntax becomes mostly irrelevant, so do the test anyway if they are. + # It's a lot of trouble to figure out in a perl script. + if ($Config{d_setlocale_accepts_any_locale_name} eq 'true') + { + skip("Can't distinguish between valid and invalid locale names on this system", 2); + } + my @valid_categories = valid_locale_categories(); my $valid_string = ""; diff --git a/gnu/usr.bin/perl/t/test.pl b/gnu/usr.bin/perl/t/test.pl index 868911ce394..25eae4009d6 100644 --- a/gnu/usr.bin/perl/t/test.pl +++ b/gnu/usr.bin/perl/t/test.pl @@ -19,6 +19,7 @@ # In this file, we use the latter "Baby Perl" approach, and increment # will be worked over by t/op/inc.t +$| = 1; $Level = 1; my $test = 1; my $planned; @@ -199,7 +200,9 @@ sub find_git_or_skip { $source_dir = '.' } } - if ($source_dir) { + if ($ENV{'PERL_BUILD_PACKAGING'}) { + $reason = 'PERL_BUILD_PACKAGING is set'; + } elsif ($source_dir) { my $version_string = `git --version`; if (defined $version_string && $version_string =~ /\Agit version (\d+\.\d+\.\d+)(.*)/) { @@ -212,9 +215,6 @@ sub find_git_or_skip { } else { $reason = 'not being run from a git checkout'; } - if ($ENV{'PERL_BUILD_PACKAGING'}) { - $reason = 'PERL_BUILD_PACKAGING is set'; - } skip_all($reason) if $_[0] && $_[0] eq 'all'; skip($reason, @_); } @@ -1746,4 +1746,18 @@ WATCHDOG_VIA_ALARM: } } +# Orphaned Docker or Linux containers do not necessarily attach to PID 1. They might attach to 0 instead. +sub is_linux_container { + + if ($^O eq 'linux' && open my $fh, '<', '/proc/1/cgroup') { + while(<$fh>) { + if (m{^\d+:pids:(.*)} && $1 ne '/init.scope') { + return 1; + } + } + } + + return 0; +} + 1; diff --git a/gnu/usr.bin/perl/t/uni/class.t b/gnu/usr.bin/perl/t/uni/class.t index 37392aabed0..572a5380042 100644 --- a/gnu/usr.bin/perl/t/uni/class.t +++ b/gnu/usr.bin/perl/t/uni/class.t @@ -5,7 +5,7 @@ BEGIN { skip_all_without_unicode_tables(); } -plan tests => 11; +plan tests => 12; my $str = join "", map { chr utf8::unicode_to_native($_) } 0x20 .. 0x6F; @@ -88,5 +88,22 @@ $str = "[\x{038B}\x{038C}\x{038D}]"; is(($str =~ /(\p{InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}"); +{ # [perl #133860], compilation before data for it is available + package Foo; + + sub make { + my @lines; + while( my($c) = splice(@_,0,1) ) { + push @lines, sprintf("%04X", $c); + } + return join "\n", @lines; + } + + my @characters = ( ord("a") ); + sub IsProperty { make(@characters); }; + + main::like('a', qr/\p{IsProperty}/, "foo"); +} + # The other tests that are based on looking at the generated files are now # in t/re/uniprops.t diff --git a/gnu/usr.bin/perl/t/uni/fold.t b/gnu/usr.bin/perl/t/uni/fold.t index 949ed97c984..bd1dd8596bd 100644 --- a/gnu/usr.bin/perl/t/uni/fold.t +++ b/gnu/usr.bin/perl/t/uni/fold.t @@ -155,6 +155,22 @@ foreach my $test_ref (@CF) { # since they use '$u', they are left out of the main loop $test = qq[ my \$s = ":$u:"; utf8::upgrade(\$s); \$s =~ /:[_$c]:/i]; ok eval $test, "$code - $name - $mapping - $type - $test"; + + my $bracketed_f = ($f =~ s/(.)/[$1]/gr); + $test = qq[":$c:" =~ /:$bracketed_f:/iu]; + ok eval $test, "$code - $name - $mapping - $type - $test"; + + my @f_chars = ($f =~ / (.) (.) (.?) /x); + my $every_other_bracketed_f = "[$f_chars[0]]$f_chars[1]"; + $every_other_bracketed_f .= "[$f_chars[2]]" if $f_chars[2]; + $test = qq[":$c:" =~ /:$every_other_bracketed_f:/iu]; + ok eval $test, "$code - $name - $mapping - $type - $test"; + + my $other_every_bracketed_f = "$f_chars[0]"; + $other_every_bracketed_f .= "[$f_chars[1]]"; + $other_every_bracketed_f .= "$f_chars[2]" if $f_chars[2]; + $test = qq[":$c:" =~ /:$other_every_bracketed_f:/iu]; + ok eval $test, "$code - $name - $mapping - $type - $test"; } } diff --git a/gnu/usr.bin/perl/t/uni/overload.t b/gnu/usr.bin/perl/t/uni/overload.t index 8e722c850e8..161484500ef 100644 --- a/gnu/usr.bin/perl/t/uni/overload.t +++ b/gnu/usr.bin/perl/t/uni/overload.t @@ -9,7 +9,7 @@ BEGIN { set_up_inc( '../lib' ); } -plan(tests => 217); +plan(tests => 193); package UTF8Toggle; use strict; @@ -158,8 +158,8 @@ my $tmpfile = tempfile(); foreach my $operator ('print', 'syswrite', 'syswrite len', 'syswrite off', 'syswrite len off') { - foreach my $layer ('', ':utf8') { - open my $fh, "+>$layer", $tmpfile or die $!; + foreach my $layer ('', $operator =~ /syswrite/ ? () : (':utf8')) { + open my $fh, "+>:raw$layer", $tmpfile or die $!; my $pad = $operator =~ /\boff\b/ ? "\243" : ""; my $trail = $operator =~ /\blen\b/ ? "!" : ""; my $u = UTF8Toggle->new("$pad$E_acute\n$trail"); diff --git a/gnu/usr.bin/perl/t/uni/parser.t b/gnu/usr.bin/perl/t/uni/parser.t index 2d24f1d06d3..0df238428f7 100644 --- a/gnu/usr.bin/perl/t/uni/parser.t +++ b/gnu/usr.bin/perl/t/uni/parser.t @@ -10,7 +10,7 @@ BEGIN { skip_all_without_unicode_tables(); } -plan (tests => 57); +plan (tests => 58); use utf8; use open qw( :utf8 :std ); @@ -261,6 +261,13 @@ SKIP: { } } +fresh_perl_is(<<'EOS', <<'EXPECT', {}, 'no panic in pad_findmy_pvn (#134061)'); +use utf8; +eval "sort \x{100}%"; +die $@; +EOS +syntax error at (eval 1) line 1, at EOF +EXPECT # New tests go here ^^^^^ diff --git a/gnu/usr.bin/perl/t/uni/readline.t b/gnu/usr.bin/perl/t/uni/readline.t index 893a2908932..253efe3a423 100644 --- a/gnu/usr.bin/perl/t/uni/readline.t +++ b/gnu/usr.bin/perl/t/uni/readline.t @@ -29,8 +29,7 @@ like($@, qr/Modification of a read-only value attempted/, '[perl #19566]'); use strict; my $err; { - no warnings qw(deprecated); - open ᕝ, '.' and sysread ᕝ, $_, 1; + open ᕝ, '.' and binmode ᕝ and sysread ᕝ, $_, 1; $err = $! + 0; close ᕝ; } diff --git a/gnu/usr.bin/perl/t/uni/upper.t b/gnu/usr.bin/perl/t/uni/upper.t index 252b51ce39f..3c8d8c2be77 100644 --- a/gnu/usr.bin/perl/t/uni/upper.t +++ b/gnu/usr.bin/perl/t/uni/upper.t @@ -11,8 +11,9 @@ use feature 'unicode_strings'; is(uc("\x{3B1}\x{345}\x{301}"), "\x{391}\x{301}\x{399}", 'Verify moves YPOGEGRAMMENI'); +fresh_perl_is('use 5.026;m.\U00ÿÿ0000.', "", {}, "[perl #133876] This caused valgrind and asan errors"); -casetest( 1, # extra tests already run +casetest( 2, # extra tests already run "Uppercase_Mapping", uc => sub { uc $_[0] }, uc_with_appended_null_arg => sub { my $a = ""; uc ($_[0] . $a) } diff --git a/gnu/usr.bin/perl/t/uni/variables.t b/gnu/usr.bin/perl/t/uni/variables.t index a1f7cc2d008..d8709a62b74 100644 --- a/gnu/usr.bin/perl/t/uni/variables.t +++ b/gnu/usr.bin/perl/t/uni/variables.t @@ -6,7 +6,6 @@ BEGIN { chdir 't' if -d 't'; require './test.pl'; - skip_all_if_miniperl("miniperl, no arybase"); skip_all_without_unicode_tables(); } @@ -15,7 +14,7 @@ use utf8; use open qw( :utf8 :std ); no warnings qw(misc reserved); -plan (tests => 66894); +plan (tests => 66880); # ${single:colon} should not be treated as a simple variable, but as a # block with a label inside. @@ -56,9 +55,8 @@ plan (tests => 66894); } # Checking that at least some of the special variables work -for my $v (qw( ^V ; < > ( ) {^GLOBAL_PHASE} ^W _ 1 4 0 [ ] ! @ / \ = )) { +for my $v (qw( ^V ; < > ( ) {^GLOBAL_PHASE} ^W _ 1 4 0 ] ! @ / \ = )) { SKIP: { - skip_if_miniperl('No $[ under miniperl', 2) if $v eq '['; local $@; evalbytes "\$$v;"; is $@, '', "No syntax error for \$$v"; @@ -136,6 +134,7 @@ for ( 0x0 .. 0xff ) { $tests++; } elsif ($chr =~ /[[:punct:][:digit:]]/a) { + next if ($chr eq '#' or $chr eq '*'); # RT 133583 # Unlike other variables, we dare not try setting the length-1 # variables that are ASCII punctuation and digits. This is |