diff options
author | Todd C. Miller <millert@cvs.openbsd.org> | 2004-08-09 17:48:38 +0000 |
---|---|---|
committer | Todd C. Miller <millert@cvs.openbsd.org> | 2004-08-09 17:48:38 +0000 |
commit | fcd51aefcee6b99d1de2040108f05931de2f2710 (patch) | |
tree | 53694e7b3df4e50a35d3d8a7ab76fb713b33791c /gnu/usr.bin/perl | |
parent | 1da0ec6edba07cd8d9c7ae52e35799093a90900f (diff) |
Import of stock perl 5.8.5
Diffstat (limited to 'gnu/usr.bin/perl')
-rw-r--r-- | gnu/usr.bin/perl/lib/DBM_Filter/t/encode.t | 53 | ||||
-rw-r--r-- | gnu/usr.bin/perl/lib/DBM_Filter/t/utf8.t | 48 | ||||
-rw-r--r-- | gnu/usr.bin/perl/t/comp/utf.t | 139 | ||||
-rw-r--r-- | gnu/usr.bin/perl/t/op/utftaint.t | 150 | ||||
-rw-r--r-- | gnu/usr.bin/perl/t/uni/chomp.t | 28 | ||||
-rw-r--r-- | gnu/usr.bin/perl/t/uni/class.t | 170 |
6 files changed, 313 insertions, 275 deletions
diff --git a/gnu/usr.bin/perl/lib/DBM_Filter/t/encode.t b/gnu/usr.bin/perl/lib/DBM_Filter/t/encode.t index d7a8ef233f2..7b71a98b2ee 100644 --- a/gnu/usr.bin/perl/lib/DBM_Filter/t/encode.t +++ b/gnu/usr.bin/perl/lib/DBM_Filter/t/encode.t @@ -21,17 +21,7 @@ require "dbm_filter_util.pl"; use Test::More tests => 26; BEGIN { use_ok('DBM_Filter') }; -my $db_file; -BEGIN { - use Config; - foreach (qw/SDBM_File ODBM_File NDBM_File GDBM_File DB_File/) { - if ($Config{extensions} =~ /\b$_\b/) { - $db_file = $_; - last; - } - } - use_ok($db_file); -}; +BEGIN { use_ok('SDBM_File') }; BEGIN { use_ok('Fcntl') }; BEGIN { use_ok('charnames', qw{greek})}; @@ -41,12 +31,12 @@ unlink <Op_dbmx*>; END { unlink <Op_dbmx*>; } my %h1 = () ; -my $db1 = tie(%h1, $db_file,'Op_dbmx', O_RDWR|O_CREAT, 0640) ; +my $db1 = tie(%h1, 'SDBM_File','Op_dbmx', O_RDWR|O_CREAT, 0640) ; -ok $db1, "tied to $db_file"; +ok $db1, "tied to SDBM_File"; eval { $db1->Filter_Push('encode' => 'blah') }; -like $@, qr/^Encoding 'blah' is not available/, "push an illegal filter" ; +like $@, qr/^Encoding 'blah' is not available/, "push an illigal filter" ; eval { $db1->Filter_Push('encode') }; is $@, '', "push an 'encode' filter (default to utf-8)" ; @@ -93,29 +83,18 @@ undef $db1; # read the dbm file without the filter my %h2 = () ; -my $db2 = tie(%h2, $db_file,'Op_dbmx', O_RDWR|O_CREAT, 0640) ; - -ok $db2, "tied to $db_file"; - -if (ord('A') == 193) { # EBCDIC. - VerifyData(\%h2, - { - 'alpha' => "\xB4\x58", - 'beta' => "\xB4\x59", - "\xB4\x62"=> "gamma", - "\x65\x75\x72\x6F" => "\xA4", - "" => "", - }); -} else { - VerifyData(\%h2, - { - 'alpha' => "\xCE\xB1", - 'beta' => "\xCE\xB2", - "\xCE\xB3"=> "gamma", - 'euro' => "\xA4", - "" => "", - }); -} +my $db2 = tie(%h2, 'SDBM_File','Op_dbmx', O_RDWR|O_CREAT, 0640) ; + +ok $db2, "tied to SDBM_File"; + +VerifyData(\%h2, + { + 'alpha' => "\xCE\xB1", + 'beta' => "\xCE\xB2", + "\xCE\xB3"=> "gamma", + 'euro' => "\xA4", + "" => "", + }); undef $db2; { diff --git a/gnu/usr.bin/perl/lib/DBM_Filter/t/utf8.t b/gnu/usr.bin/perl/lib/DBM_Filter/t/utf8.t index 0bc38f86030..e37afa2d4ad 100644 --- a/gnu/usr.bin/perl/lib/DBM_Filter/t/utf8.t +++ b/gnu/usr.bin/perl/lib/DBM_Filter/t/utf8.t @@ -19,17 +19,7 @@ require "dbm_filter_util.pl"; use Test::More tests => 20; BEGIN { use_ok('DBM_Filter') }; -my $db_file; -BEGIN { - use Config; - foreach (qw/SDBM_File ODBM_File NDBM_File GDBM_File DB_File/) { - if ($Config{extensions} =~ /\b$_\b/) { - $db_file = $_; - last; - } - } - use_ok($db_file); -}; +BEGIN { use_ok('SDBM_File') }; BEGIN { use_ok('Fcntl') }; BEGIN { use_ok('charnames', qw{greek})}; @@ -39,9 +29,9 @@ unlink <Op_dbmx*>; END { unlink <Op_dbmx*>; } my %h1 = () ; -my $db1 = tie(%h1, $db_file,'Op_dbmx', O_RDWR|O_CREAT, 0640) ; +my $db1 = tie(%h1, 'SDBM_File','Op_dbmx', O_RDWR|O_CREAT, 0640) ; -ok $db1, "tied to $db_file"; +ok $db1, "tied to SDBM_File"; eval { $db1->Filter_Push('utf8') }; is $@, '', "push a 'utf8' filter" ; @@ -75,27 +65,17 @@ undef $db1; # read the dbm file without the filter my %h2 = () ; -my $db2 = tie(%h2, $db_file,'Op_dbmx', O_RDWR|O_CREAT, 0640) ; - -ok $db2, "tied to $db_file"; - -if (ord('A') == 193) { # EBCDIC. - VerifyData(\%h2, - { - 'alpha' => "\xB4\x58", - 'beta' => "\xB4\x59", - "\xB4\x62"=> "gamma", - "" => "", - }); -} else { - VerifyData(\%h2, - { - 'alpha' => "\xCE\xB1", - 'beta' => "\xCE\xB2", - "\xCE\xB3"=> "gamma", - "" => "", - }); -} +my $db2 = tie(%h2, 'SDBM_File','Op_dbmx', O_RDWR|O_CREAT, 0640) ; + +ok $db2, "tied to SDBM_File"; + +VerifyData(\%h2, + { + 'alpha' => "\xCE\xB1", + 'beta' => "\xCE\xB2", + "\xCE\xB3"=> "gamma", + "" => "", + }); undef $db2; { diff --git a/gnu/usr.bin/perl/t/comp/utf.t b/gnu/usr.bin/perl/t/comp/utf.t index f5190f9eebe..90a9e5e11bb 100644 --- a/gnu/usr.bin/perl/t/comp/utf.t +++ b/gnu/usr.bin/perl/t/comp/utf.t @@ -1,102 +1,57 @@ -#!./perl -w - -print "1..4016\n"; -my $test = 0; - -my %templates = ( - 'UTF-8' => 'C0U', - 'UTF-16BE' => 'n', - 'UTF-16LE' => 'v', - ); - -sub bytes_to_utf { - my ($enc, $content, $do_bom) = @_; - my $template = $templates{$enc}; - die "Unsupported encoding $enc" unless $template; - my @chars = unpack "U*", $content; - if ($enc ne 'UTF-8') { - # Make surrogate pairs - my @remember_that_utf_16_is_variable_length; - foreach my $ord (@chars) { - if ($ord < 0x10000) { - push @remember_that_utf_16_is_variable_length, - $ord; - } else { - $ord -= 0x10000; - push @remember_that_utf_16_is_variable_length, - (0xD800 | ($ord >> 10)), (0xDC00 | ($ord & 0x3FF)); - } - } - @chars = @remember_that_utf_16_is_variable_length; +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; + unless (find PerlIO::Layer 'perlio') { + print "1..0 # Skip: not perlio\n"; + exit 0; } - return pack "$template*", ($do_bom ? 0xFEFF : ()), @chars; -} - -sub test { - my ($enc, $write, $expect, $bom, $nl, $name) = @_; - open my $fh, ">", "utf$$.pl" or die "utf.pl: $!"; - binmode $fh; - print $fh bytes_to_utf($enc, $write . ($nl ? "\n" : ''), $bom); - close $fh or die $!; - my $got = do "./utf$$.pl"; - $test = $test + 1; - if (!defined $got) { - if ($@ =~ /^(Unsupported script encoding \Q$enc\E)/) { - print "ok $test # skip $1\n"; - } else { - print "not ok $test # $enc $bom $nl $name; got undef\n"; - } - } elsif ($got ne $expect) { - print "not ok $test # $enc $bom $nl $name; got '$got'\n"; - } else { - print "ok $test # $enc $bom $nl $name\n"; + if ($ENV{PERL_CORE_MINITEST}) { + print "1..0 # Skip: no dynamic loading on miniperl, no threads\n"; + exit 0; + } + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; } } -for my $bom (0, 1) { - for my $enc (qw(UTF-16LE UTF-16BE UTF-8)) { - for my $nl (1, 0) { - for my $value (123, 1234, 12345) { - test($enc, $value, $value, $bom, $nl, $value); - # This has the unfortunate side effect of causing an infinite - # loop without the bug fix it corresponds to: - test($enc, "($value)", $value, $bom, $nl, "($value)"); - } - next if $enc eq 'UTF-8'; - # Arguably a bug that currently string literals from UTF-8 file - # handles are not implicitly "use utf8", but don't FIXME that - # right now, as here we're testing the input filter itself. +require "./test.pl"; - for my $expect ("N", "\xFF", "\x{100}", "\x{010a}", "\x{0a23}", - "\x{10000}", "\x{64321}", "\x{10FFFD}", - "\x{1000a}", # 0xD800 0xDC0A - "\x{12800}", # 0xD80A 0xDC00 - ) { - # A space so that the UTF-16 heuristic triggers - " '" gives two - # characters of ASCII. - my $write = " '$expect'"; - my $name = 'chrs ' . join ', ', map {ord $_} split '', $expect; - test($enc, $write, $expect, $bom, $nl, $name); - } +plan(tests => 15); - # This is designed to try to trip over the end of the buffer, - # with similar results to U-1000A and U-12800 above. - for my $pad (2 .. 162) { - for my $chr ("\x{10000}", "\x{1000a}", "\x{12800}") { - my $padding = ' ' x $pad; - # Need 4 octets that were from 2 ASCII characters to trigger - # the heuristic that detects UTF-16 without a BOM. For - # UTF-16BE, one space and the newline will do, as the - # newline's high octet comes first. But for UTF-16LE, a - # newline is "\n\0", so it doesn't trigger it. - test($enc, " \n$padding'$chr'", $chr, $bom, $nl, - sprintf "'\\x{%x}' with $pad spaces before it", ord $chr); - } - } - } - } +my $BOM = chr(0xFEFF); + +sub test { + my ($enc, $tag, $bom) = @_; + open(UTF_PL, ">:raw:encoding($enc)", "utf.pl") + or die "utf.pl($enc,$tag,$bom): $!"; + print UTF_PL $BOM if $bom; + print UTF_PL "$tag\n"; + close(UTF_PL); + my $got = do "./utf.pl"; + is($got, $tag); } +test("utf16le", 123, 1); +test("utf16le", 1234, 1); +test("utf16le", 12345, 1); +test("utf16be", 123, 1); +test("utf16be", 1234, 1); +test("utf16be", 12345, 1); +test("utf8", 123, 1); +test("utf8", 1234, 1); +test("utf8", 12345, 1); + +test("utf16le", 123, 0); +test("utf16le", 1234, 0); +test("utf16le", 12345, 0); +test("utf16be", 123, 0); +test("utf16be", 1234, 0); +test("utf16be", 12345, 0); + END { - 1 while unlink "utf$$.pl"; + 1 while unlink "utf.pl"; } diff --git a/gnu/usr.bin/perl/t/op/utftaint.t b/gnu/usr.bin/perl/t/op/utftaint.t index d734927d590..cd44503e749 100644 --- a/gnu/usr.bin/perl/t/op/utftaint.t +++ b/gnu/usr.bin/perl/t/op/utftaint.t @@ -2,6 +2,10 @@ # tests whether tainting works with UTF-8 BEGIN { + if ($ENV{PERL_CORE_MINITEST}) { + print "1..0 # Skip: no dynamic loading on miniperl, no threads\n"; + exit 0; + } chdir 't' if -d 't'; @INC = qw(../lib); } @@ -9,21 +13,26 @@ BEGIN { use strict; use Config; -# How to identify taint when you see it -sub any_tainted (@) { - not eval { join("",@_), kill 0; 1 }; -} -sub tainted ($) { - any_tainted @_; +BEGIN { + if ($Config{extensions} !~ m(\bList/Util\b)) { + print "1..0 # Skip: no Scalar::Util module\n"; + exit 0; + } } -require './test.pl'; -plan(tests => 3*10 + 3*8 + 2*16 + 3); +use Scalar::Util qw(tainted); + +use Test; +plan tests => 3*10 + 3*8 + 2*16; +my $cnt = 0; my $arg = $ENV{PATH}; # a tainted value use constant UTF8 => "\x{1234}"; -*is_utf8 = \&utf8::is_utf8; +sub is_utf8 { + my $s = shift; + return 0xB6 != ord pack('a*', chr(0xB6).$s); +} for my $ary ([ascii => 'perl'], [latin1 => "\xB6"], [utf8 => "\x{100}"]) { my $encode = $ary->[0]; @@ -31,31 +40,41 @@ for my $ary ([ascii => 'perl'], [latin1 => "\xB6"], [utf8 => "\x{100}"]) { my $taint = $arg; substr($taint, 0) = $ary->[1]; - is(tainted($taint), tainted($arg), "tainted: $encode, before test"); + print tainted($taint) == tainted($arg) + ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, before test\n"; my $lconcat = $taint; $lconcat .= UTF8; - is($lconcat, $string.UTF8, "compare: $encode, concat left"); + print $lconcat eq $string.UTF8 + ? "ok " : "not ok ", ++$cnt, " # compare: $encode, concat left\n"; - is(tainted($lconcat), tainted($arg), "tainted: $encode, concat left"); + print tainted($lconcat) == tainted($arg) + ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, concat left\n"; my $rconcat = UTF8; $rconcat .= $taint; - is($rconcat, UTF8.$string, "compare: $encode, concat right"); + print $rconcat eq UTF8.$string + ? "ok " : "not ok ", ++$cnt, " # compare: $encode, concat right\n"; - is(tainted($rconcat), tainted($arg), "tainted: $encode, concat right"); + print tainted($rconcat) == tainted($arg) + ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, concat right\n"; my $ljoin = join('!', $taint, UTF8); - is($ljoin, join('!', $string, UTF8), "compare: $encode, join left"); + print $ljoin eq join('!', $string, UTF8) + ? "ok " : "not ok ", ++$cnt, " # compare: $encode, join left\n"; - is(tainted($ljoin), tainted($arg), "tainted: $encode, join left"); + print tainted($ljoin) == tainted($arg) + ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, join left\n"; my $rjoin = join('!', UTF8, $taint); - is($rjoin, join('!', UTF8, $string), "compare: $encode, join right"); + print $rjoin eq join('!', UTF8, $string) + ? "ok " : "not ok ", ++$cnt, " # compare: $encode, join right\n"; - is(tainted($rjoin), tainted($arg), "tainted: $encode, join right"); + print tainted($rjoin) == tainted($arg) + ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, join right\n"; - is(tainted($taint), tainted($arg), "tainted: $encode, after test"); + print tainted($taint) == tainted($arg) + ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, after test\n"; } @@ -63,29 +82,37 @@ for my $ary ([ascii => 'perl'], [latin1 => "\xB6"], [utf8 => "\x{100}"]) { my $encode = $ary->[0]; my $utf8 = pack('U*') . $ary->[1]; - my $byte = unpack('U0a*', $utf8); + my $byte = pack('C0a*', $utf8); my $taint = $arg; substr($taint, 0) = $utf8; utf8::encode($taint); - is($taint, $byte, "compare: $encode, encode utf8"); + print $taint eq $byte + ? "ok " : "not ok ", ++$cnt, " # compare: $encode, encode utf8\n"; - is(pack('a*',$taint), pack('a*',$byte), "bytecmp: $encode, encode utf8"); + print pack('a*',$taint) eq pack('a*',$byte) + ? "ok " : "not ok ", ++$cnt, " # bytecmp: $encode, encode utf8\n"; - ok(!is_utf8($taint), "is_utf8: $encode, encode utf8"); + print !is_utf8($taint) + ? "ok " : "not ok ", ++$cnt, " # is_utf8: $encode, encode utf8\n"; - is(tainted($taint), tainted($arg), "tainted: $encode, encode utf8"); + print tainted($taint) == tainted($arg) + ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, encode utf8\n"; my $taint = $arg; substr($taint, 0) = $byte; utf8::decode($taint); - is($taint, $utf8, "compare: $encode, decode byte"); + print $taint eq $utf8 + ? "ok " : "not ok ", ++$cnt, " # compare: $encode, decode byte\n"; - is(pack('a*',$taint), pack('a*',$utf8), "bytecmp: $encode, decode byte"); + print pack('a*',$taint) eq pack('a*',$utf8) + ? "ok " : "not ok ", ++$cnt, " # bytecmp: $encode, decode byte\n"; - is(is_utf8($taint), ($encode ne 'ascii'), "is_utf8: $encode, decode byte"); + print is_utf8($taint) eq ($encode ne 'ascii') + ? "ok " : "not ok ", ++$cnt, " # is_utf8: $encode, decode byte\n"; - is(tainted($taint), tainted($arg), "tainted: $encode, decode byte"); + print tainted($taint) == tainted($arg) + ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, decode byte\n"; } @@ -93,68 +120,67 @@ for my $ary ([ascii => 'perl'], [latin1 => "\xB6"]) { my $encode = $ary->[0]; my $up = pack('U*') . $ary->[1]; - my $down = pack("a*", $ary->[1]); + my $down = pack('C0a*', $ary->[1]); my $taint = $arg; substr($taint, 0) = $up; utf8::upgrade($taint); - is($taint, $up, "compare: $encode, upgrade up"); + print $taint eq $up + ? "ok " : "not ok ", ++$cnt, " # compare: $encode, upgrade up\n"; - is(pack('a*',$taint), pack('a*',$up), "bytecmp: $encode, upgrade up"); + print pack('a*',$taint) eq pack('a*',$up) + ? "ok " : "not ok ", ++$cnt, " # bytecmp: $encode, upgrade up\n"; - ok(is_utf8($taint), "is_utf8: $encode, upgrade up"); + print is_utf8($taint) + ? "ok " : "not ok ", ++$cnt, " # is_utf8: $encode, upgrade up\n"; - is(tainted($taint), tainted($arg), "tainted: $encode, upgrade up"); + print tainted($taint) == tainted($arg) + ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, upgrade up\n"; my $taint = $arg; substr($taint, 0) = $down; utf8::upgrade($taint); - is($taint, $up, "compare: $encode, upgrade down"); + print $taint eq $up + ? "ok " : "not ok ", ++$cnt, " # compare: $encode, upgrade down\n"; - is(pack('a*',$taint), pack('a*',$up), "bytecmp: $encode, upgrade down"); + print pack('a*',$taint) eq pack('a*',$up) + ? "ok " : "not ok ", ++$cnt, " # bytecmp: $encode, upgrade down\n"; - ok(is_utf8($taint), "is_utf8: $encode, upgrade down"); + print is_utf8($taint) + ? "ok " : "not ok ", ++$cnt, " # is_utf8: $encode, upgrade down\n"; - is(tainted($taint), tainted($arg), "tainted: $encode, upgrade down"); + print tainted($taint) == tainted($arg) + ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, upgrade down\n"; my $taint = $arg; substr($taint, 0) = $up; utf8::downgrade($taint); - is($taint, $down, "compare: $encode, downgrade up"); + print $taint eq $down + ? "ok " : "not ok ", ++$cnt, " # compare: $encode, downgrade up\n"; - is(pack('a*',$taint), pack('a*',$down), "bytecmp: $encode, downgrade up"); + print pack('a*',$taint) eq pack('a*',$down) + ? "ok " : "not ok ", ++$cnt, " # bytecmp: $encode, downgrade up\n"; - ok(!is_utf8($taint), "is_utf8: $encode, downgrade up"); + print !is_utf8($taint) + ? "ok " : "not ok ", ++$cnt, " # is_utf8: $encode, downgrade up\n"; - is(tainted($taint), tainted($arg), "tainted: $encode, downgrade up"); + print tainted($taint) == tainted($arg) + ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, downgrade up\n"; my $taint = $arg; substr($taint, 0) = $down; utf8::downgrade($taint); - is($taint, $down, "compare: $encode, downgrade down"); + print $taint eq $down + ? "ok " : "not ok ", ++$cnt, " # compare: $encode, downgrade down\n"; - is(pack('a*',$taint), pack('a*',$down), "bytecmp: $encode, downgrade down"); + print pack('a*',$taint) eq pack('a*',$down) + ? "ok " : "not ok ", ++$cnt, " # bytecmp: $encode, downgrade down\n"; - ok(!is_utf8($taint), "is_utf8: $encode, downgrade down"); + print !is_utf8($taint) + ? "ok " : "not ok ", ++$cnt, " # is_utf8: $encode, downgrade down\n"; - is(tainted($taint), tainted($arg), "tainted: $encode, downgrade down"); + print tainted($taint) == tainted($arg) + ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, downgrade down\n"; } -{ - fresh_perl_is('$a = substr $^X, 0, 0; /\x{100}/i; /$a\x{100}/i || print q,ok,', - 'ok', {switches => ["-T", "-l"]}, - "matching a regexp is taint agnostic"); - fresh_perl_is('$a = substr $^X, 0, 0; /$a\x{100}/i || print q,ok,', - 'ok', {switches => ["-T", "-l"]}, - "therefore swash_init should be taint agnostic"); -} - -{ - # RT #122148: s///e on tainted utf8 strings got pos() messed up in 5.20 - - my @p; - my $s = "\x{100}\x{100}\x{100}\x{100}". $^X; - $s =~ s/\x{100}/push @p, pos($s); "xxxx";/eg; - is("@p", "0 1 2 3", "RT #122148"); -} diff --git a/gnu/usr.bin/perl/t/uni/chomp.t b/gnu/usr.bin/perl/t/uni/chomp.t index 0dca91a9bf9..5f52558ab61 100644 --- a/gnu/usr.bin/perl/t/uni/chomp.t +++ b/gnu/usr.bin/perl/t/uni/chomp.t @@ -1,14 +1,32 @@ #!./perl -w BEGIN { - require './test.pl'; - skip_all_without_dynamic_extension('Encode'); - skip_all("EBCDIC") if $::IS_EBCDIC; - skip_all_without_perlio(); + if ($ENV{'PERL_CORE'}){ + chdir 't'; + @INC = '../lib'; + } + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } + if (ord("A") == 193) { + print "1..0 # Skip: EBCDIC\n"; + exit 0; + } + unless (PerlIO::Layer->find('perlio')){ + print "1..0 # Skip: PerlIO required\n"; + exit 0; + } + if ($ENV{PERL_CORE_MINITEST}) { + print "1..0 # Skip: no dynamic loading on miniperl, no Encode\n"; + exit 0; + } } -use strict; use Encode; +use strict; +use Test::More; # %mbchars = (encoding => { bytes => utf8, ... }, ...); # * pack('C*') is expected to return bytes even if ${^ENCODING} is true. diff --git a/gnu/usr.bin/perl/t/uni/class.t b/gnu/usr.bin/perl/t/uni/class.t index 144ae4334d0..130b720eacc 100644 --- a/gnu/usr.bin/perl/t/uni/class.t +++ b/gnu/usr.bin/perl/t/uni/class.t @@ -4,20 +4,15 @@ BEGIN { require "test.pl"; } -plan tests => 11; +plan tests => 4334; -my $str = join "", map latin1_to_native(chr($_)), 0x20 .. 0x6F; - -is(($str =~ /(\p{IsMyUniClass}+)/)[0], '0123456789:;<=>?@ABCDEFGHIJKLMNO', - 'user-defined class compiled before defined'); - -sub IsMyUniClass { +sub MyUniClass { <<END; 0030 004F END } -sub Other::IsClass { +sub Other::Class { <<END; 0040 005F END @@ -25,62 +20,147 @@ END sub A::B::Intersection { <<END; -+main::IsMyUniClass -&Other::IsClass ++main::MyUniClass +&Other::Class END } -sub test_regexp ($$) { - # test that given string consists of N-1 chars matching $qr1, and 1 - # char matching $qr2 - my ($str, $blk) = @_; - - # constructing these objects here makes the last test loop go much faster - my $qr1 = qr/(\p{$blk}+)/; - if ($str =~ $qr1) { - is($1, substr($str, 0, -1)); # all except last char - } - else { - fail('first N-1 chars did not match'); - } - - my $qr2 = qr/(\P{$blk}+)/; - if ($str =~ $qr2) { - is($1, substr($str, -1)); # only last char - } - else { - fail('last char did not match'); - } -} -use strict; +my $str = join "", map chr($_), 0x20 .. 0x6F; # make sure it finds built-in class is(($str =~ /(\p{Letter}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'); is(($str =~ /(\p{l}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'); # make sure it finds user-defined class -is(($str =~ /(\p{IsMyUniClass}+)/)[0], '0123456789:;<=>?@ABCDEFGHIJKLMNO'); +is(($str =~ /(\p{MyUniClass}+)/)[0], '0123456789:;<=>?@ABCDEFGHIJKLMNO'); # make sure it finds class in other package -is(($str =~ /(\p{Other::IsClass}+)/)[0], '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_'); +is(($str =~ /(\p{Other::Class}+)/)[0], '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_'); # make sure it finds class in other OTHER package is(($str =~ /(\p{A::B::Intersection}+)/)[0], '@ABCDEFGHIJKLMNO'); -# lib/unicore/lib/Bc/AL.pl. U+070E is unassigned, currently, but still has -# bidi class AL. The first one in the sequence that doesn't is 0711, which is -# BC=NSM. -$str = "\x{070D}\x{070E}\x{070F}\x{0710}\x{0711}\x{0712}"; -is(($str =~ /(\P{BidiClass: ArabicLetter}+)/)[0], "\x{0711}"); -is(($str =~ /(\P{BidiClass: AL}+)/)[0], "\x{0711}"); -is(($str =~ /(\P{BC :ArabicLetter}+)/)[0], "\x{0711}"); -is(($str =~ /(\P{bc=AL}+)/)[0], "\x{0711}"); +# all of these should look in lib/unicore/bc/AL.pl +$str = "\x{070D}\x{070E}\x{070F}\x{0710}\x{0711}"; +is(($str =~ /(\P{BidiClass: ArabicLetter}+)/)[0], "\x{070E}\x{070F}"); +is(($str =~ /(\P{BidiClass: AL}+)/)[0], "\x{070E}\x{070F}"); +is(($str =~ /(\P{BC :ArabicLetter}+)/)[0], "\x{070E}\x{070F}"); +is(($str =~ /(\P{bc=AL}+)/)[0], "\x{070E}\x{070F}"); # make sure InGreek works $str = "[\x{038B}\x{038C}\x{038D}]"; is(($str =~ /(\p{InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}"); +is(($str =~ /(\p{Script:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}"); +is(($str =~ /(\p{Script=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}"); +is(($str =~ /(\p{sc:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}"); +is(($str =~ /(\p{sc=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}"); + + +use File::Spec; +my $updir = File::Spec->updir; + + +# the %utf8::... hashes are already in existence +# because utf8_pva.pl was run by utf8_heavy.pl + +# non-General Category and non-Script +while (my ($abbrev, $files) = each %utf8::PVA_abbr_map) { + my $prop_name = $utf8::PropertyAlias{$abbrev}; + next unless $prop_name; + next if $abbrev eq "gc_sc"; + + for (sort keys %$files) { + my $filename = File::Spec->catfile( + $updir => lib => unicore => lib => $abbrev => "$files->{$_}.pl" + ); + + next unless -e $filename; + my ($h1, $h2) = map hex, split /\t/, (do $filename); + my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1); -# The other tests that are based on looking at the generated files are now -# in t/re/uniprops.t + for my $p ($prop_name, $abbrev) { + for my $c ($files->{$_}, $_) { + is($str =~ /(\p{$p: $c}+)/ && $1, substr($str, 0, -1)); + is($str =~ /(\P{$p= $c}+)/ && $1, substr($str, -1)); + } + } + } +} + +# General Category and Script +for my $p ('gc', 'sc') { + while (my ($abbr) = each %{ $utf8::PropValueAlias{$p} }) { + my $filename = File::Spec->catfile( + $updir => lib => unicore => lib => gc_sc => "$utf8::PVA_abbr_map{gc_sc}{$abbr}.pl" + ); + + next unless -e $filename; + my ($h1, $h2) = map hex, split /\t/, (do $filename); + my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1); + + for my $x ($p, { gc => 'General Category', sc => 'Script' }->{$p}) { + for my $y ($abbr, $utf8::PropValueAlias{$p}{$abbr}, $utf8::PVA_abbr_map{gc_sc}{$abbr}) { + is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1)); + is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1)); + is($str =~ /(\p{$y}+)/ && $1, substr($str, 0, -1)); + is($str =~ /(\P{$y}+)/ && $1, substr($str, -1)); + } + } + } +} + +# test extra properties (ASCII_Hex_Digit, Bidi_Control, etc.) +{ + # Aargh. Nasty case insensitive filesystems mean that Cf.pl will cause a -e + # test for cf.pl to return true. So need to read the filenames explicitly + # to get a case sensitive test + my %files; + + my $dirname = File::Spec->catdir($updir => lib => unicore => lib => gc_sc); + opendir D, $dirname or die $!; + @files{readdir D} = (); + closedir D; + + for (keys %utf8::PA_reverse) { + my $leafname = "$utf8::PA_reverse{$_}.pl"; + next unless exists $files{$leafname}; + + my $filename = File::Spec->catfile($dirname, $leafname); + + my ($h1, $h2) = map hex, split /\t/, (do $filename); + my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1); + + for my $x ('gc', 'General Category') { + print "# $filename $x $_, $utf8::PA_reverse{$_}\n"; + for my $y ($_, $utf8::PA_reverse{$_}) { + is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1)); + is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1)); + is($str =~ /(\p{$y}+)/ && $1, substr($str, 0, -1)); + is($str =~ /(\P{$y}+)/ && $1, substr($str, -1)); + } + } + } +} + +# test the blocks (InFoobar) +for (grep $utf8::Canonical{$_} =~ /^In/, keys %utf8::Canonical) { + my $filename = File::Spec->catfile( + $updir => lib => unicore => lib => gc_sc => "$utf8::Canonical{$_}.pl" + ); + + next unless -e $filename; + my ($h1, $h2) = map hex, split /\t/, (do $filename); + my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1); + + my $blk = $_; + + is($str =~ /(\p{$blk}+)/ && $1, substr($str, 0, -1)); + is($str =~ /(\P{$blk}+)/ && $1, substr($str, -1)); + + $blk =~ s/^In/Block:/; + + is($str =~ /(\p{$blk}+)/ && $1, substr($str, 0, -1)); + is($str =~ /(\P{$blk}+)/ && $1, substr($str, -1)); +} |