summaryrefslogtreecommitdiff
path: root/gnu/usr.bin/perl
diff options
context:
space:
mode:
authorTodd C. Miller <millert@cvs.openbsd.org>2004-08-09 17:48:38 +0000
committerTodd C. Miller <millert@cvs.openbsd.org>2004-08-09 17:48:38 +0000
commitfcd51aefcee6b99d1de2040108f05931de2f2710 (patch)
tree53694e7b3df4e50a35d3d8a7ab76fb713b33791c /gnu/usr.bin/perl
parent1da0ec6edba07cd8d9c7ae52e35799093a90900f (diff)
Import of stock perl 5.8.5
Diffstat (limited to 'gnu/usr.bin/perl')
-rw-r--r--gnu/usr.bin/perl/lib/DBM_Filter/t/encode.t53
-rw-r--r--gnu/usr.bin/perl/lib/DBM_Filter/t/utf8.t48
-rw-r--r--gnu/usr.bin/perl/t/comp/utf.t139
-rw-r--r--gnu/usr.bin/perl/t/op/utftaint.t150
-rw-r--r--gnu/usr.bin/perl/t/uni/chomp.t28
-rw-r--r--gnu/usr.bin/perl/t/uni/class.t170
6 files changed, 313 insertions, 275 deletions
diff --git a/gnu/usr.bin/perl/lib/DBM_Filter/t/encode.t b/gnu/usr.bin/perl/lib/DBM_Filter/t/encode.t
index d7a8ef233f2..7b71a98b2ee 100644
--- a/gnu/usr.bin/perl/lib/DBM_Filter/t/encode.t
+++ b/gnu/usr.bin/perl/lib/DBM_Filter/t/encode.t
@@ -21,17 +21,7 @@ require "dbm_filter_util.pl";
use Test::More tests => 26;
BEGIN { use_ok('DBM_Filter') };
-my $db_file;
-BEGIN {
- use Config;
- foreach (qw/SDBM_File ODBM_File NDBM_File GDBM_File DB_File/) {
- if ($Config{extensions} =~ /\b$_\b/) {
- $db_file = $_;
- last;
- }
- }
- use_ok($db_file);
-};
+BEGIN { use_ok('SDBM_File') };
BEGIN { use_ok('Fcntl') };
BEGIN { use_ok('charnames', qw{greek})};
@@ -41,12 +31,12 @@ unlink <Op_dbmx*>;
END { unlink <Op_dbmx*>; }
my %h1 = () ;
-my $db1 = tie(%h1, $db_file,'Op_dbmx', O_RDWR|O_CREAT, 0640) ;
+my $db1 = tie(%h1, 'SDBM_File','Op_dbmx', O_RDWR|O_CREAT, 0640) ;
-ok $db1, "tied to $db_file";
+ok $db1, "tied to SDBM_File";
eval { $db1->Filter_Push('encode' => 'blah') };
-like $@, qr/^Encoding 'blah' is not available/, "push an illegal filter" ;
+like $@, qr/^Encoding 'blah' is not available/, "push an illigal filter" ;
eval { $db1->Filter_Push('encode') };
is $@, '', "push an 'encode' filter (default to utf-8)" ;
@@ -93,29 +83,18 @@ undef $db1;
# read the dbm file without the filter
my %h2 = () ;
-my $db2 = tie(%h2, $db_file,'Op_dbmx', O_RDWR|O_CREAT, 0640) ;
-
-ok $db2, "tied to $db_file";
-
-if (ord('A') == 193) { # EBCDIC.
- VerifyData(\%h2,
- {
- 'alpha' => "\xB4\x58",
- 'beta' => "\xB4\x59",
- "\xB4\x62"=> "gamma",
- "\x65\x75\x72\x6F" => "\xA4",
- "" => "",
- });
-} else {
- VerifyData(\%h2,
- {
- 'alpha' => "\xCE\xB1",
- 'beta' => "\xCE\xB2",
- "\xCE\xB3"=> "gamma",
- 'euro' => "\xA4",
- "" => "",
- });
-}
+my $db2 = tie(%h2, 'SDBM_File','Op_dbmx', O_RDWR|O_CREAT, 0640) ;
+
+ok $db2, "tied to SDBM_File";
+
+VerifyData(\%h2,
+ {
+ 'alpha' => "\xCE\xB1",
+ 'beta' => "\xCE\xB2",
+ "\xCE\xB3"=> "gamma",
+ 'euro' => "\xA4",
+ "" => "",
+ });
undef $db2;
{
diff --git a/gnu/usr.bin/perl/lib/DBM_Filter/t/utf8.t b/gnu/usr.bin/perl/lib/DBM_Filter/t/utf8.t
index 0bc38f86030..e37afa2d4ad 100644
--- a/gnu/usr.bin/perl/lib/DBM_Filter/t/utf8.t
+++ b/gnu/usr.bin/perl/lib/DBM_Filter/t/utf8.t
@@ -19,17 +19,7 @@ require "dbm_filter_util.pl";
use Test::More tests => 20;
BEGIN { use_ok('DBM_Filter') };
-my $db_file;
-BEGIN {
- use Config;
- foreach (qw/SDBM_File ODBM_File NDBM_File GDBM_File DB_File/) {
- if ($Config{extensions} =~ /\b$_\b/) {
- $db_file = $_;
- last;
- }
- }
- use_ok($db_file);
-};
+BEGIN { use_ok('SDBM_File') };
BEGIN { use_ok('Fcntl') };
BEGIN { use_ok('charnames', qw{greek})};
@@ -39,9 +29,9 @@ unlink <Op_dbmx*>;
END { unlink <Op_dbmx*>; }
my %h1 = () ;
-my $db1 = tie(%h1, $db_file,'Op_dbmx', O_RDWR|O_CREAT, 0640) ;
+my $db1 = tie(%h1, 'SDBM_File','Op_dbmx', O_RDWR|O_CREAT, 0640) ;
-ok $db1, "tied to $db_file";
+ok $db1, "tied to SDBM_File";
eval { $db1->Filter_Push('utf8') };
is $@, '', "push a 'utf8' filter" ;
@@ -75,27 +65,17 @@ undef $db1;
# read the dbm file without the filter
my %h2 = () ;
-my $db2 = tie(%h2, $db_file,'Op_dbmx', O_RDWR|O_CREAT, 0640) ;
-
-ok $db2, "tied to $db_file";
-
-if (ord('A') == 193) { # EBCDIC.
- VerifyData(\%h2,
- {
- 'alpha' => "\xB4\x58",
- 'beta' => "\xB4\x59",
- "\xB4\x62"=> "gamma",
- "" => "",
- });
-} else {
- VerifyData(\%h2,
- {
- 'alpha' => "\xCE\xB1",
- 'beta' => "\xCE\xB2",
- "\xCE\xB3"=> "gamma",
- "" => "",
- });
-}
+my $db2 = tie(%h2, 'SDBM_File','Op_dbmx', O_RDWR|O_CREAT, 0640) ;
+
+ok $db2, "tied to SDBM_File";
+
+VerifyData(\%h2,
+ {
+ 'alpha' => "\xCE\xB1",
+ 'beta' => "\xCE\xB2",
+ "\xCE\xB3"=> "gamma",
+ "" => "",
+ });
undef $db2;
{
diff --git a/gnu/usr.bin/perl/t/comp/utf.t b/gnu/usr.bin/perl/t/comp/utf.t
index f5190f9eebe..90a9e5e11bb 100644
--- a/gnu/usr.bin/perl/t/comp/utf.t
+++ b/gnu/usr.bin/perl/t/comp/utf.t
@@ -1,102 +1,57 @@
-#!./perl -w
-
-print "1..4016\n";
-my $test = 0;
-
-my %templates = (
- 'UTF-8' => 'C0U',
- 'UTF-16BE' => 'n',
- 'UTF-16LE' => 'v',
- );
-
-sub bytes_to_utf {
- my ($enc, $content, $do_bom) = @_;
- my $template = $templates{$enc};
- die "Unsupported encoding $enc" unless $template;
- my @chars = unpack "U*", $content;
- if ($enc ne 'UTF-8') {
- # Make surrogate pairs
- my @remember_that_utf_16_is_variable_length;
- foreach my $ord (@chars) {
- if ($ord < 0x10000) {
- push @remember_that_utf_16_is_variable_length,
- $ord;
- } else {
- $ord -= 0x10000;
- push @remember_that_utf_16_is_variable_length,
- (0xD800 | ($ord >> 10)), (0xDC00 | ($ord & 0x3FF));
- }
- }
- @chars = @remember_that_utf_16_is_variable_length;
+#!./perl
+
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = '../lib';
+ unless (find PerlIO::Layer 'perlio') {
+ print "1..0 # Skip: not perlio\n";
+ exit 0;
}
- return pack "$template*", ($do_bom ? 0xFEFF : ()), @chars;
-}
-
-sub test {
- my ($enc, $write, $expect, $bom, $nl, $name) = @_;
- open my $fh, ">", "utf$$.pl" or die "utf.pl: $!";
- binmode $fh;
- print $fh bytes_to_utf($enc, $write . ($nl ? "\n" : ''), $bom);
- close $fh or die $!;
- my $got = do "./utf$$.pl";
- $test = $test + 1;
- if (!defined $got) {
- if ($@ =~ /^(Unsupported script encoding \Q$enc\E)/) {
- print "ok $test # skip $1\n";
- } else {
- print "not ok $test # $enc $bom $nl $name; got undef\n";
- }
- } elsif ($got ne $expect) {
- print "not ok $test # $enc $bom $nl $name; got '$got'\n";
- } else {
- print "ok $test # $enc $bom $nl $name\n";
+ if ($ENV{PERL_CORE_MINITEST}) {
+ print "1..0 # Skip: no dynamic loading on miniperl, no threads\n";
+ exit 0;
+ }
+ require Config; import Config;
+ if ($Config{'extensions'} !~ /\bEncode\b/) {
+ print "1..0 # Skip: Encode was not built\n";
+ exit 0;
}
}
-for my $bom (0, 1) {
- for my $enc (qw(UTF-16LE UTF-16BE UTF-8)) {
- for my $nl (1, 0) {
- for my $value (123, 1234, 12345) {
- test($enc, $value, $value, $bom, $nl, $value);
- # This has the unfortunate side effect of causing an infinite
- # loop without the bug fix it corresponds to:
- test($enc, "($value)", $value, $bom, $nl, "($value)");
- }
- next if $enc eq 'UTF-8';
- # Arguably a bug that currently string literals from UTF-8 file
- # handles are not implicitly "use utf8", but don't FIXME that
- # right now, as here we're testing the input filter itself.
+require "./test.pl";
- for my $expect ("N", "\xFF", "\x{100}", "\x{010a}", "\x{0a23}",
- "\x{10000}", "\x{64321}", "\x{10FFFD}",
- "\x{1000a}", # 0xD800 0xDC0A
- "\x{12800}", # 0xD80A 0xDC00
- ) {
- # A space so that the UTF-16 heuristic triggers - " '" gives two
- # characters of ASCII.
- my $write = " '$expect'";
- my $name = 'chrs ' . join ', ', map {ord $_} split '', $expect;
- test($enc, $write, $expect, $bom, $nl, $name);
- }
+plan(tests => 15);
- # This is designed to try to trip over the end of the buffer,
- # with similar results to U-1000A and U-12800 above.
- for my $pad (2 .. 162) {
- for my $chr ("\x{10000}", "\x{1000a}", "\x{12800}") {
- my $padding = ' ' x $pad;
- # Need 4 octets that were from 2 ASCII characters to trigger
- # the heuristic that detects UTF-16 without a BOM. For
- # UTF-16BE, one space and the newline will do, as the
- # newline's high octet comes first. But for UTF-16LE, a
- # newline is "\n\0", so it doesn't trigger it.
- test($enc, " \n$padding'$chr'", $chr, $bom, $nl,
- sprintf "'\\x{%x}' with $pad spaces before it", ord $chr);
- }
- }
- }
- }
+my $BOM = chr(0xFEFF);
+
+sub test {
+ my ($enc, $tag, $bom) = @_;
+ open(UTF_PL, ">:raw:encoding($enc)", "utf.pl")
+ or die "utf.pl($enc,$tag,$bom): $!";
+ print UTF_PL $BOM if $bom;
+ print UTF_PL "$tag\n";
+ close(UTF_PL);
+ my $got = do "./utf.pl";
+ is($got, $tag);
}
+test("utf16le", 123, 1);
+test("utf16le", 1234, 1);
+test("utf16le", 12345, 1);
+test("utf16be", 123, 1);
+test("utf16be", 1234, 1);
+test("utf16be", 12345, 1);
+test("utf8", 123, 1);
+test("utf8", 1234, 1);
+test("utf8", 12345, 1);
+
+test("utf16le", 123, 0);
+test("utf16le", 1234, 0);
+test("utf16le", 12345, 0);
+test("utf16be", 123, 0);
+test("utf16be", 1234, 0);
+test("utf16be", 12345, 0);
+
END {
- 1 while unlink "utf$$.pl";
+ 1 while unlink "utf.pl";
}
diff --git a/gnu/usr.bin/perl/t/op/utftaint.t b/gnu/usr.bin/perl/t/op/utftaint.t
index d734927d590..cd44503e749 100644
--- a/gnu/usr.bin/perl/t/op/utftaint.t
+++ b/gnu/usr.bin/perl/t/op/utftaint.t
@@ -2,6 +2,10 @@
# tests whether tainting works with UTF-8
BEGIN {
+ if ($ENV{PERL_CORE_MINITEST}) {
+ print "1..0 # Skip: no dynamic loading on miniperl, no threads\n";
+ exit 0;
+ }
chdir 't' if -d 't';
@INC = qw(../lib);
}
@@ -9,21 +13,26 @@ BEGIN {
use strict;
use Config;
-# How to identify taint when you see it
-sub any_tainted (@) {
- not eval { join("",@_), kill 0; 1 };
-}
-sub tainted ($) {
- any_tainted @_;
+BEGIN {
+ if ($Config{extensions} !~ m(\bList/Util\b)) {
+ print "1..0 # Skip: no Scalar::Util module\n";
+ exit 0;
+ }
}
-require './test.pl';
-plan(tests => 3*10 + 3*8 + 2*16 + 3);
+use Scalar::Util qw(tainted);
+
+use Test;
+plan tests => 3*10 + 3*8 + 2*16;
+my $cnt = 0;
my $arg = $ENV{PATH}; # a tainted value
use constant UTF8 => "\x{1234}";
-*is_utf8 = \&utf8::is_utf8;
+sub is_utf8 {
+ my $s = shift;
+ return 0xB6 != ord pack('a*', chr(0xB6).$s);
+}
for my $ary ([ascii => 'perl'], [latin1 => "\xB6"], [utf8 => "\x{100}"]) {
my $encode = $ary->[0];
@@ -31,31 +40,41 @@ for my $ary ([ascii => 'perl'], [latin1 => "\xB6"], [utf8 => "\x{100}"]) {
my $taint = $arg; substr($taint, 0) = $ary->[1];
- is(tainted($taint), tainted($arg), "tainted: $encode, before test");
+ print tainted($taint) == tainted($arg)
+ ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, before test\n";
my $lconcat = $taint;
$lconcat .= UTF8;
- is($lconcat, $string.UTF8, "compare: $encode, concat left");
+ print $lconcat eq $string.UTF8
+ ? "ok " : "not ok ", ++$cnt, " # compare: $encode, concat left\n";
- is(tainted($lconcat), tainted($arg), "tainted: $encode, concat left");
+ print tainted($lconcat) == tainted($arg)
+ ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, concat left\n";
my $rconcat = UTF8;
$rconcat .= $taint;
- is($rconcat, UTF8.$string, "compare: $encode, concat right");
+ print $rconcat eq UTF8.$string
+ ? "ok " : "not ok ", ++$cnt, " # compare: $encode, concat right\n";
- is(tainted($rconcat), tainted($arg), "tainted: $encode, concat right");
+ print tainted($rconcat) == tainted($arg)
+ ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, concat right\n";
my $ljoin = join('!', $taint, UTF8);
- is($ljoin, join('!', $string, UTF8), "compare: $encode, join left");
+ print $ljoin eq join('!', $string, UTF8)
+ ? "ok " : "not ok ", ++$cnt, " # compare: $encode, join left\n";
- is(tainted($ljoin), tainted($arg), "tainted: $encode, join left");
+ print tainted($ljoin) == tainted($arg)
+ ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, join left\n";
my $rjoin = join('!', UTF8, $taint);
- is($rjoin, join('!', UTF8, $string), "compare: $encode, join right");
+ print $rjoin eq join('!', UTF8, $string)
+ ? "ok " : "not ok ", ++$cnt, " # compare: $encode, join right\n";
- is(tainted($rjoin), tainted($arg), "tainted: $encode, join right");
+ print tainted($rjoin) == tainted($arg)
+ ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, join right\n";
- is(tainted($taint), tainted($arg), "tainted: $encode, after test");
+ print tainted($taint) == tainted($arg)
+ ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, after test\n";
}
@@ -63,29 +82,37 @@ for my $ary ([ascii => 'perl'], [latin1 => "\xB6"], [utf8 => "\x{100}"]) {
my $encode = $ary->[0];
my $utf8 = pack('U*') . $ary->[1];
- my $byte = unpack('U0a*', $utf8);
+ my $byte = pack('C0a*', $utf8);
my $taint = $arg; substr($taint, 0) = $utf8;
utf8::encode($taint);
- is($taint, $byte, "compare: $encode, encode utf8");
+ print $taint eq $byte
+ ? "ok " : "not ok ", ++$cnt, " # compare: $encode, encode utf8\n";
- is(pack('a*',$taint), pack('a*',$byte), "bytecmp: $encode, encode utf8");
+ print pack('a*',$taint) eq pack('a*',$byte)
+ ? "ok " : "not ok ", ++$cnt, " # bytecmp: $encode, encode utf8\n";
- ok(!is_utf8($taint), "is_utf8: $encode, encode utf8");
+ print !is_utf8($taint)
+ ? "ok " : "not ok ", ++$cnt, " # is_utf8: $encode, encode utf8\n";
- is(tainted($taint), tainted($arg), "tainted: $encode, encode utf8");
+ print tainted($taint) == tainted($arg)
+ ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, encode utf8\n";
my $taint = $arg; substr($taint, 0) = $byte;
utf8::decode($taint);
- is($taint, $utf8, "compare: $encode, decode byte");
+ print $taint eq $utf8
+ ? "ok " : "not ok ", ++$cnt, " # compare: $encode, decode byte\n";
- is(pack('a*',$taint), pack('a*',$utf8), "bytecmp: $encode, decode byte");
+ print pack('a*',$taint) eq pack('a*',$utf8)
+ ? "ok " : "not ok ", ++$cnt, " # bytecmp: $encode, decode byte\n";
- is(is_utf8($taint), ($encode ne 'ascii'), "is_utf8: $encode, decode byte");
+ print is_utf8($taint) eq ($encode ne 'ascii')
+ ? "ok " : "not ok ", ++$cnt, " # is_utf8: $encode, decode byte\n";
- is(tainted($taint), tainted($arg), "tainted: $encode, decode byte");
+ print tainted($taint) == tainted($arg)
+ ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, decode byte\n";
}
@@ -93,68 +120,67 @@ for my $ary ([ascii => 'perl'], [latin1 => "\xB6"]) {
my $encode = $ary->[0];
my $up = pack('U*') . $ary->[1];
- my $down = pack("a*", $ary->[1]);
+ my $down = pack('C0a*', $ary->[1]);
my $taint = $arg; substr($taint, 0) = $up;
utf8::upgrade($taint);
- is($taint, $up, "compare: $encode, upgrade up");
+ print $taint eq $up
+ ? "ok " : "not ok ", ++$cnt, " # compare: $encode, upgrade up\n";
- is(pack('a*',$taint), pack('a*',$up), "bytecmp: $encode, upgrade up");
+ print pack('a*',$taint) eq pack('a*',$up)
+ ? "ok " : "not ok ", ++$cnt, " # bytecmp: $encode, upgrade up\n";
- ok(is_utf8($taint), "is_utf8: $encode, upgrade up");
+ print is_utf8($taint)
+ ? "ok " : "not ok ", ++$cnt, " # is_utf8: $encode, upgrade up\n";
- is(tainted($taint), tainted($arg), "tainted: $encode, upgrade up");
+ print tainted($taint) == tainted($arg)
+ ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, upgrade up\n";
my $taint = $arg; substr($taint, 0) = $down;
utf8::upgrade($taint);
- is($taint, $up, "compare: $encode, upgrade down");
+ print $taint eq $up
+ ? "ok " : "not ok ", ++$cnt, " # compare: $encode, upgrade down\n";
- is(pack('a*',$taint), pack('a*',$up), "bytecmp: $encode, upgrade down");
+ print pack('a*',$taint) eq pack('a*',$up)
+ ? "ok " : "not ok ", ++$cnt, " # bytecmp: $encode, upgrade down\n";
- ok(is_utf8($taint), "is_utf8: $encode, upgrade down");
+ print is_utf8($taint)
+ ? "ok " : "not ok ", ++$cnt, " # is_utf8: $encode, upgrade down\n";
- is(tainted($taint), tainted($arg), "tainted: $encode, upgrade down");
+ print tainted($taint) == tainted($arg)
+ ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, upgrade down\n";
my $taint = $arg; substr($taint, 0) = $up;
utf8::downgrade($taint);
- is($taint, $down, "compare: $encode, downgrade up");
+ print $taint eq $down
+ ? "ok " : "not ok ", ++$cnt, " # compare: $encode, downgrade up\n";
- is(pack('a*',$taint), pack('a*',$down), "bytecmp: $encode, downgrade up");
+ print pack('a*',$taint) eq pack('a*',$down)
+ ? "ok " : "not ok ", ++$cnt, " # bytecmp: $encode, downgrade up\n";
- ok(!is_utf8($taint), "is_utf8: $encode, downgrade up");
+ print !is_utf8($taint)
+ ? "ok " : "not ok ", ++$cnt, " # is_utf8: $encode, downgrade up\n";
- is(tainted($taint), tainted($arg), "tainted: $encode, downgrade up");
+ print tainted($taint) == tainted($arg)
+ ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, downgrade up\n";
my $taint = $arg; substr($taint, 0) = $down;
utf8::downgrade($taint);
- is($taint, $down, "compare: $encode, downgrade down");
+ print $taint eq $down
+ ? "ok " : "not ok ", ++$cnt, " # compare: $encode, downgrade down\n";
- is(pack('a*',$taint), pack('a*',$down), "bytecmp: $encode, downgrade down");
+ print pack('a*',$taint) eq pack('a*',$down)
+ ? "ok " : "not ok ", ++$cnt, " # bytecmp: $encode, downgrade down\n";
- ok(!is_utf8($taint), "is_utf8: $encode, downgrade down");
+ print !is_utf8($taint)
+ ? "ok " : "not ok ", ++$cnt, " # is_utf8: $encode, downgrade down\n";
- is(tainted($taint), tainted($arg), "tainted: $encode, downgrade down");
+ print tainted($taint) == tainted($arg)
+ ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, downgrade down\n";
}
-{
- fresh_perl_is('$a = substr $^X, 0, 0; /\x{100}/i; /$a\x{100}/i || print q,ok,',
- 'ok', {switches => ["-T", "-l"]},
- "matching a regexp is taint agnostic");
- fresh_perl_is('$a = substr $^X, 0, 0; /$a\x{100}/i || print q,ok,',
- 'ok', {switches => ["-T", "-l"]},
- "therefore swash_init should be taint agnostic");
-}
-
-{
- # RT #122148: s///e on tainted utf8 strings got pos() messed up in 5.20
-
- my @p;
- my $s = "\x{100}\x{100}\x{100}\x{100}". $^X;
- $s =~ s/\x{100}/push @p, pos($s); "xxxx";/eg;
- is("@p", "0 1 2 3", "RT #122148");
-}
diff --git a/gnu/usr.bin/perl/t/uni/chomp.t b/gnu/usr.bin/perl/t/uni/chomp.t
index 0dca91a9bf9..5f52558ab61 100644
--- a/gnu/usr.bin/perl/t/uni/chomp.t
+++ b/gnu/usr.bin/perl/t/uni/chomp.t
@@ -1,14 +1,32 @@
#!./perl -w
BEGIN {
- require './test.pl';
- skip_all_without_dynamic_extension('Encode');
- skip_all("EBCDIC") if $::IS_EBCDIC;
- skip_all_without_perlio();
+ if ($ENV{'PERL_CORE'}){
+ chdir 't';
+ @INC = '../lib';
+ }
+ require Config; import Config;
+ if ($Config{'extensions'} !~ /\bEncode\b/) {
+ print "1..0 # Skip: Encode was not built\n";
+ exit 0;
+ }
+ if (ord("A") == 193) {
+ print "1..0 # Skip: EBCDIC\n";
+ exit 0;
+ }
+ unless (PerlIO::Layer->find('perlio')){
+ print "1..0 # Skip: PerlIO required\n";
+ exit 0;
+ }
+ if ($ENV{PERL_CORE_MINITEST}) {
+ print "1..0 # Skip: no dynamic loading on miniperl, no Encode\n";
+ exit 0;
+ }
}
-use strict;
use Encode;
+use strict;
+use Test::More;
# %mbchars = (encoding => { bytes => utf8, ... }, ...);
# * pack('C*') is expected to return bytes even if ${^ENCODING} is true.
diff --git a/gnu/usr.bin/perl/t/uni/class.t b/gnu/usr.bin/perl/t/uni/class.t
index 144ae4334d0..130b720eacc 100644
--- a/gnu/usr.bin/perl/t/uni/class.t
+++ b/gnu/usr.bin/perl/t/uni/class.t
@@ -4,20 +4,15 @@ BEGIN {
require "test.pl";
}
-plan tests => 11;
+plan tests => 4334;
-my $str = join "", map latin1_to_native(chr($_)), 0x20 .. 0x6F;
-
-is(($str =~ /(\p{IsMyUniClass}+)/)[0], '0123456789:;<=>?@ABCDEFGHIJKLMNO',
- 'user-defined class compiled before defined');
-
-sub IsMyUniClass {
+sub MyUniClass {
<<END;
0030 004F
END
}
-sub Other::IsClass {
+sub Other::Class {
<<END;
0040 005F
END
@@ -25,62 +20,147 @@ END
sub A::B::Intersection {
<<END;
-+main::IsMyUniClass
-&Other::IsClass
++main::MyUniClass
+&Other::Class
END
}
-sub test_regexp ($$) {
- # test that given string consists of N-1 chars matching $qr1, and 1
- # char matching $qr2
- my ($str, $blk) = @_;
-
- # constructing these objects here makes the last test loop go much faster
- my $qr1 = qr/(\p{$blk}+)/;
- if ($str =~ $qr1) {
- is($1, substr($str, 0, -1)); # all except last char
- }
- else {
- fail('first N-1 chars did not match');
- }
-
- my $qr2 = qr/(\P{$blk}+)/;
- if ($str =~ $qr2) {
- is($1, substr($str, -1)); # only last char
- }
- else {
- fail('last char did not match');
- }
-}
-use strict;
+my $str = join "", map chr($_), 0x20 .. 0x6F;
# make sure it finds built-in class
is(($str =~ /(\p{Letter}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
is(($str =~ /(\p{l}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
# make sure it finds user-defined class
-is(($str =~ /(\p{IsMyUniClass}+)/)[0], '0123456789:;<=>?@ABCDEFGHIJKLMNO');
+is(($str =~ /(\p{MyUniClass}+)/)[0], '0123456789:;<=>?@ABCDEFGHIJKLMNO');
# make sure it finds class in other package
-is(($str =~ /(\p{Other::IsClass}+)/)[0], '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_');
+is(($str =~ /(\p{Other::Class}+)/)[0], '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_');
# make sure it finds class in other OTHER package
is(($str =~ /(\p{A::B::Intersection}+)/)[0], '@ABCDEFGHIJKLMNO');
-# lib/unicore/lib/Bc/AL.pl. U+070E is unassigned, currently, but still has
-# bidi class AL. The first one in the sequence that doesn't is 0711, which is
-# BC=NSM.
-$str = "\x{070D}\x{070E}\x{070F}\x{0710}\x{0711}\x{0712}";
-is(($str =~ /(\P{BidiClass: ArabicLetter}+)/)[0], "\x{0711}");
-is(($str =~ /(\P{BidiClass: AL}+)/)[0], "\x{0711}");
-is(($str =~ /(\P{BC :ArabicLetter}+)/)[0], "\x{0711}");
-is(($str =~ /(\P{bc=AL}+)/)[0], "\x{0711}");
+# all of these should look in lib/unicore/bc/AL.pl
+$str = "\x{070D}\x{070E}\x{070F}\x{0710}\x{0711}";
+is(($str =~ /(\P{BidiClass: ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
+is(($str =~ /(\P{BidiClass: AL}+)/)[0], "\x{070E}\x{070F}");
+is(($str =~ /(\P{BC :ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
+is(($str =~ /(\P{bc=AL}+)/)[0], "\x{070E}\x{070F}");
# make sure InGreek works
$str = "[\x{038B}\x{038C}\x{038D}]";
is(($str =~ /(\p{InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
+is(($str =~ /(\p{Script:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
+is(($str =~ /(\p{Script=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
+is(($str =~ /(\p{sc:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
+is(($str =~ /(\p{sc=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
+
+
+use File::Spec;
+my $updir = File::Spec->updir;
+
+
+# the %utf8::... hashes are already in existence
+# because utf8_pva.pl was run by utf8_heavy.pl
+
+# non-General Category and non-Script
+while (my ($abbrev, $files) = each %utf8::PVA_abbr_map) {
+ my $prop_name = $utf8::PropertyAlias{$abbrev};
+ next unless $prop_name;
+ next if $abbrev eq "gc_sc";
+
+ for (sort keys %$files) {
+ my $filename = File::Spec->catfile(
+ $updir => lib => unicore => lib => $abbrev => "$files->{$_}.pl"
+ );
+
+ next unless -e $filename;
+ my ($h1, $h2) = map hex, split /\t/, (do $filename);
+ my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);
-# The other tests that are based on looking at the generated files are now
-# in t/re/uniprops.t
+ for my $p ($prop_name, $abbrev) {
+ for my $c ($files->{$_}, $_) {
+ is($str =~ /(\p{$p: $c}+)/ && $1, substr($str, 0, -1));
+ is($str =~ /(\P{$p= $c}+)/ && $1, substr($str, -1));
+ }
+ }
+ }
+}
+
+# General Category and Script
+for my $p ('gc', 'sc') {
+ while (my ($abbr) = each %{ $utf8::PropValueAlias{$p} }) {
+ my $filename = File::Spec->catfile(
+ $updir => lib => unicore => lib => gc_sc => "$utf8::PVA_abbr_map{gc_sc}{$abbr}.pl"
+ );
+
+ next unless -e $filename;
+ my ($h1, $h2) = map hex, split /\t/, (do $filename);
+ my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);
+
+ for my $x ($p, { gc => 'General Category', sc => 'Script' }->{$p}) {
+ for my $y ($abbr, $utf8::PropValueAlias{$p}{$abbr}, $utf8::PVA_abbr_map{gc_sc}{$abbr}) {
+ is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
+ is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
+ is($str =~ /(\p{$y}+)/ && $1, substr($str, 0, -1));
+ is($str =~ /(\P{$y}+)/ && $1, substr($str, -1));
+ }
+ }
+ }
+}
+
+# test extra properties (ASCII_Hex_Digit, Bidi_Control, etc.)
+{
+ # Aargh. Nasty case insensitive filesystems mean that Cf.pl will cause a -e
+ # test for cf.pl to return true. So need to read the filenames explicitly
+ # to get a case sensitive test
+ my %files;
+
+ my $dirname = File::Spec->catdir($updir => lib => unicore => lib => gc_sc);
+ opendir D, $dirname or die $!;
+ @files{readdir D} = ();
+ closedir D;
+
+ for (keys %utf8::PA_reverse) {
+ my $leafname = "$utf8::PA_reverse{$_}.pl";
+ next unless exists $files{$leafname};
+
+ my $filename = File::Spec->catfile($dirname, $leafname);
+
+ my ($h1, $h2) = map hex, split /\t/, (do $filename);
+ my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);
+
+ for my $x ('gc', 'General Category') {
+ print "# $filename $x $_, $utf8::PA_reverse{$_}\n";
+ for my $y ($_, $utf8::PA_reverse{$_}) {
+ is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
+ is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
+ is($str =~ /(\p{$y}+)/ && $1, substr($str, 0, -1));
+ is($str =~ /(\P{$y}+)/ && $1, substr($str, -1));
+ }
+ }
+ }
+}
+
+# test the blocks (InFoobar)
+for (grep $utf8::Canonical{$_} =~ /^In/, keys %utf8::Canonical) {
+ my $filename = File::Spec->catfile(
+ $updir => lib => unicore => lib => gc_sc => "$utf8::Canonical{$_}.pl"
+ );
+
+ next unless -e $filename;
+ my ($h1, $h2) = map hex, split /\t/, (do $filename);
+ my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);
+
+ my $blk = $_;
+
+ is($str =~ /(\p{$blk}+)/ && $1, substr($str, 0, -1));
+ is($str =~ /(\P{$blk}+)/ && $1, substr($str, -1));
+
+ $blk =~ s/^In/Block:/;
+
+ is($str =~ /(\p{$blk}+)/ && $1, substr($str, 0, -1));
+ is($str =~ /(\P{$blk}+)/ && $1, substr($str, -1));
+}