src - OpenBSD base system

diff options


context:
space:
mode:

author	Todd C. Miller <millert@cvs.openbsd.org>	2004-08-09 17:48:38 +0000
committer	Todd C. Miller <millert@cvs.openbsd.org>	2004-08-09 17:48:38 +0000
commit	fcd51aefcee6b99d1de2040108f05931de2f2710 (patch)
tree	53694e7b3df4e50a35d3d8a7ab76fb713b33791c /gnu/usr.bin/perl
parent	1da0ec6edba07cd8d9c7ae52e35799093a90900f (diff)

Import of stock perl 5.8.5

Diffstat (limited to 'gnu/usr.bin/perl')

-rw-r--r--

gnu/usr.bin/perl/lib/DBM_Filter/t/encode.t

-rw-r--r--

gnu/usr.bin/perl/lib/DBM_Filter/t/utf8.t

-rw-r--r--

gnu/usr.bin/perl/t/comp/utf.t

139

-rw-r--r--

gnu/usr.bin/perl/t/op/utftaint.t

150

-rw-r--r--

gnu/usr.bin/perl/t/uni/chomp.t

-rw-r--r--

gnu/usr.bin/perl/t/uni/class.t

170

6 files changed, 313 insertions, 275 deletions

diff --git a/gnu/usr.bin/perl/lib/DBM_Filter/t/encode.t b/gnu/usr.bin/perl/lib/DBM_Filter/t/encode.t
index d7a8ef233f2..7b71a98b2ee 100644
--- a/gnu/usr.bin/perl/lib/DBM_Filter/t/encode.t
+++ b/gnu/usr.bin/perl/lib/DBM_Filter/t/encode.t

@@ -21,17 +21,7 @@ require "dbm_filter_util.pl";

use Test::More tests => 26;

BEGIN { use_ok('DBM_Filter') };

-my $db_file;

-BEGIN {

- use Config;

- foreach (qw/SDBM_File ODBM_File NDBM_File GDBM_File DB_File/) {

- if ($Config{extensions} =~ /\b$_\b/) {

- $db_file = $_;

- last;

- }

- use_ok($db_file);

-};

+BEGIN { use_ok('SDBM_File') };

BEGIN { use_ok('Fcntl') };

BEGIN { use_ok('charnames', qw{greek})};

@@ -41,12 +31,12 @@ unlink <Op_dbmx*>;

END { unlink <Op_dbmx*>; }

my %h1 = () ;

-my $db1 = tie(%h1, $db_file,'Op_dbmx', O_RDWR|O_CREAT, 0640) ;

+my $db1 = tie(%h1, 'SDBM_File','Op_dbmx', O_RDWR|O_CREAT, 0640) ;

-ok $db1, "tied to $db_file";

+ok $db1, "tied to SDBM_File";

eval { $db1->Filter_Push('encode' => 'blah') };

-like $@, qr/^Encoding 'blah' is not available/, "push an illegal filter" ;

+like $@, qr/^Encoding 'blah' is not available/, "push an illigal filter" ;

eval { $db1->Filter_Push('encode') };

is $@, '', "push an 'encode' filter (default to utf-8)" ;

@@ -93,29 +83,18 @@ undef $db1;

# read the dbm file without the filter

my %h2 = () ;

-my $db2 = tie(%h2, $db_file,'Op_dbmx', O_RDWR|O_CREAT, 0640) ;

-ok $db2, "tied to $db_file";

-if (ord('A') == 193) { # EBCDIC.

- VerifyData(\%h2,

- {

- 'alpha' => "\xB4\x58",

- 'beta' => "\xB4\x59",

- "\xB4\x62"=> "gamma",

- "\x65\x75\x72\x6F" => "\xA4",

- "" => "",

- });

-} else {

- VerifyData(\%h2,

- {

- 'alpha' => "\xCE\xB1",

- 'beta' => "\xCE\xB2",

- "\xCE\xB3"=> "gamma",

- 'euro' => "\xA4",

- "" => "",

- });

+my $db2 = tie(%h2, 'SDBM_File','Op_dbmx', O_RDWR|O_CREAT, 0640) ;

+ok $db2, "tied to SDBM_File";

+VerifyData(\%h2,

+ {

+ 'alpha' => "\xCE\xB1",

+ 'beta' => "\xCE\xB2",

+ "\xCE\xB3"=> "gamma",

+ 'euro' => "\xA4",

+ "" => "",

+ });

undef $db2;

{

diff --git a/gnu/usr.bin/perl/lib/DBM_Filter/t/utf8.t b/gnu/usr.bin/perl/lib/DBM_Filter/t/utf8.t
index 0bc38f86030..e37afa2d4ad 100644
--- a/gnu/usr.bin/perl/lib/DBM_Filter/t/utf8.t
+++ b/gnu/usr.bin/perl/lib/DBM_Filter/t/utf8.t

@@ -19,17 +19,7 @@ require "dbm_filter_util.pl";

use Test::More tests => 20;

BEGIN { use_ok('DBM_Filter') };

-my $db_file;

-BEGIN {

- use Config;

- foreach (qw/SDBM_File ODBM_File NDBM_File GDBM_File DB_File/) {

- if ($Config{extensions} =~ /\b$_\b/) {

- $db_file = $_;

- last;

- }

- use_ok($db_file);

-};

+BEGIN { use_ok('SDBM_File') };

BEGIN { use_ok('Fcntl') };

BEGIN { use_ok('charnames', qw{greek})};

@@ -39,9 +29,9 @@ unlink <Op_dbmx*>;

END { unlink <Op_dbmx*>; }

my %h1 = () ;

-my $db1 = tie(%h1, $db_file,'Op_dbmx', O_RDWR|O_CREAT, 0640) ;

+my $db1 = tie(%h1, 'SDBM_File','Op_dbmx', O_RDWR|O_CREAT, 0640) ;

-ok $db1, "tied to $db_file";

+ok $db1, "tied to SDBM_File";

eval { $db1->Filter_Push('utf8') };

is $@, '', "push a 'utf8' filter" ;

@@ -75,27 +65,17 @@ undef $db1;

# read the dbm file without the filter

my %h2 = () ;

-my $db2 = tie(%h2, $db_file,'Op_dbmx', O_RDWR|O_CREAT, 0640) ;

-ok $db2, "tied to $db_file";

-if (ord('A') == 193) { # EBCDIC.

- VerifyData(\%h2,

- {

- 'alpha' => "\xB4\x58",

- 'beta' => "\xB4\x59",

- "\xB4\x62"=> "gamma",

- "" => "",

- });

-} else {

- VerifyData(\%h2,

- {

- 'alpha' => "\xCE\xB1",

- 'beta' => "\xCE\xB2",

- "\xCE\xB3"=> "gamma",

- "" => "",

- });

+my $db2 = tie(%h2, 'SDBM_File','Op_dbmx', O_RDWR|O_CREAT, 0640) ;

+ok $db2, "tied to SDBM_File";

+VerifyData(\%h2,

+ {

+ 'alpha' => "\xCE\xB1",

+ 'beta' => "\xCE\xB2",

+ "\xCE\xB3"=> "gamma",

+ "" => "",

+ });

undef $db2;

{

diff --git a/gnu/usr.bin/perl/t/comp/utf.t b/gnu/usr.bin/perl/t/comp/utf.t
index f5190f9eebe..90a9e5e11bb 100644
--- a/gnu/usr.bin/perl/t/comp/utf.t
+++ b/gnu/usr.bin/perl/t/comp/utf.t

@@ -1,102 +1,57 @@

-#!./perl -w

-print "1..4016\n";

-my $test = 0;

-my %templates = (

- 'UTF-8' => 'C0U',

- 'UTF-16BE' => 'n',

- 'UTF-16LE' => 'v',

- );

-sub bytes_to_utf {

- my ($enc, $content, $do_bom) = @_;

- my $template = $templates{$enc};

- die "Unsupported encoding $enc" unless $template;

- my @chars = unpack "U*", $content;

- if ($enc ne 'UTF-8') {

- # Make surrogate pairs

- my @remember_that_utf_16_is_variable_length;

- foreach my $ord (@chars) {

- if ($ord < 0x10000) {

- push @remember_that_utf_16_is_variable_length,

- $ord;

- } else {

- $ord -= 0x10000;

- push @remember_that_utf_16_is_variable_length,

- (0xD800 | ($ord >> 10)), (0xDC00 | ($ord & 0x3FF));

- }

- @chars = @remember_that_utf_16_is_variable_length;

+#!./perl

+BEGIN {

+ chdir 't' if -d 't';

+ @INC = '../lib';

+ unless (find PerlIO::Layer 'perlio') {

+ print "1..0 # Skip: not perlio\n";

+ exit 0;

}

- return pack "$template*", ($do_bom ? 0xFEFF : ()), @chars;

-sub test {

- my ($enc, $write, $expect, $bom, $nl, $name) = @_;

- open my $fh, ">", "utf$$.pl" or die "utf.pl: $!";

- binmode $fh;

- print $fh bytes_to_utf($enc, $write . ($nl ? "\n" : ''), $bom);

- close $fh or die $!;

- my $got = do "./utf$$.pl";

- $test = $test + 1;

- if (!defined $got) {

- if ($@ =~ /^(Unsupported script encoding \Q$enc\E)/) {

- print "ok $test # skip $1\n";

- } else {

- print "not ok $test # $enc $bom $nl $name; got undef\n";

- }

- } elsif ($got ne $expect) {

- print "not ok $test # $enc $bom $nl $name; got '$got'\n";

- } else {

- print "ok $test # $enc $bom $nl $name\n";

+ if ($ENV{PERL_CORE_MINITEST}) {

+ print "1..0 # Skip: no dynamic loading on miniperl, no threads\n";

+ exit 0;

+ }

+ require Config; import Config;

+ if ($Config{'extensions'} !~ /\bEncode\b/) {

+ print "1..0 # Skip: Encode was not built\n";

+ exit 0;

}

-for my $bom (0, 1) {

- for my $enc (qw(UTF-16LE UTF-16BE UTF-8)) {

- for my $nl (1, 0) {

- for my $value (123, 1234, 12345) {

- test($enc, $value, $value, $bom, $nl, $value);

- # This has the unfortunate side effect of causing an infinite

- # loop without the bug fix it corresponds to:

- test($enc, "($value)", $value, $bom, $nl, "($value)");

- }

- next if $enc eq 'UTF-8';

- # Arguably a bug that currently string literals from UTF-8 file

- # handles are not implicitly "use utf8", but don't FIXME that

- # right now, as here we're testing the input filter itself.

+require "./test.pl";

- for my $expect ("N", "\xFF", "\x{100}", "\x{010a}", "\x{0a23}",

- "\x{10000}", "\x{64321}", "\x{10FFFD}",

- "\x{1000a}", # 0xD800 0xDC0A

- "\x{12800}", # 0xD80A 0xDC00

- ) {

- # A space so that the UTF-16 heuristic triggers - " '" gives two

- # characters of ASCII.

- my $write = " '$expect'";

- my $name = 'chrs ' . join ', ', map {ord $_} split '', $expect;

- test($enc, $write, $expect, $bom, $nl, $name);

- }

+plan(tests => 15);

- # This is designed to try to trip over the end of the buffer,

- # with similar results to U-1000A and U-12800 above.

- for my $pad (2 .. 162) {

- for my $chr ("\x{10000}", "\x{1000a}", "\x{12800}") {

- my $padding = ' ' x $pad;

- # Need 4 octets that were from 2 ASCII characters to trigger

- # the heuristic that detects UTF-16 without a BOM. For

- # UTF-16BE, one space and the newline will do, as the

- # newline's high octet comes first. But for UTF-16LE, a

- # newline is "\n\0", so it doesn't trigger it.

- test($enc, " \n$padding'$chr'", $chr, $bom, $nl,

- sprintf "'\\x{%x}' with $pad spaces before it", ord $chr);

- }

+my $BOM = chr(0xFEFF);

+sub test {

+ my ($enc, $tag, $bom) = @_;

+ open(UTF_PL, ">:raw:encoding($enc)", "utf.pl")

+ or die "utf.pl($enc,$tag,$bom): $!";

+ print UTF_PL $BOM if $bom;

+ print UTF_PL "$tag\n";

+ close(UTF_PL);

+ my $got = do "./utf.pl";

+ is($got, $tag);

}

+test("utf16le", 123, 1);

+test("utf16le", 1234, 1);

+test("utf16le", 12345, 1);

+test("utf16be", 123, 1);

+test("utf16be", 1234, 1);

+test("utf16be", 12345, 1);

+test("utf8", 123, 1);

+test("utf8", 1234, 1);

+test("utf8", 12345, 1);

+test("utf16le", 123, 0);

+test("utf16le", 1234, 0);

+test("utf16le", 12345, 0);

+test("utf16be", 123, 0);

+test("utf16be", 1234, 0);

+test("utf16be", 12345, 0);

END {

- 1 while unlink "utf$$.pl";

+ 1 while unlink "utf.pl";

}

diff --git a/gnu/usr.bin/perl/t/op/utftaint.t b/gnu/usr.bin/perl/t/op/utftaint.t
index d734927d590..cd44503e749 100644
--- a/gnu/usr.bin/perl/t/op/utftaint.t
+++ b/gnu/usr.bin/perl/t/op/utftaint.t

@@ -2,6 +2,10 @@

# tests whether tainting works with UTF-8

BEGIN {

+ if ($ENV{PERL_CORE_MINITEST}) {

+ print "1..0 # Skip: no dynamic loading on miniperl, no threads\n";

+ exit 0;

+ }

chdir 't' if -d 't';

@INC = qw(../lib);

}

@@ -9,21 +13,26 @@ BEGIN {

use strict;

use Config;

-# How to identify taint when you see it

-sub any_tainted (@) {

- not eval { join("",@_), kill 0; 1 };

-sub tainted ($) {

- any_tainted @_;

+BEGIN {

+ if ($Config{extensions} !~ m(\bList/Util\b)) {

+ print "1..0 # Skip: no Scalar::Util module\n";

+ exit 0;

+ }

}

-require './test.pl';

-plan(tests => 3*10 + 3*8 + 2*16 + 3);

+use Scalar::Util qw(tainted);

+use Test;

+plan tests => 3*10 + 3*8 + 2*16;

+my $cnt = 0;

my $arg = $ENV{PATH}; # a tainted value

use constant UTF8 => "\x{1234}";

-*is_utf8 = \&utf8::is_utf8;

+sub is_utf8 {

+ my $s = shift;

+ return 0xB6 != ord pack('a*', chr(0xB6).$s);

for my $ary ([ascii => 'perl'], [latin1 => "\xB6"], [utf8 => "\x{100}"]) {

my $encode = $ary->[0];

@@ -31,31 +40,41 @@ for my $ary ([ascii => 'perl'], [latin1 => "\xB6"], [utf8 => "\x{100}"]) {

my $taint = $arg; substr($taint, 0) = $ary->[1];

- is(tainted($taint), tainted($arg), "tainted: $encode, before test");

+ print tainted($taint) == tainted($arg)

+ ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, before test\n";

my $lconcat = $taint;

$lconcat .= UTF8;

- is($lconcat, $string.UTF8, "compare: $encode, concat left");

+ print $lconcat eq $string.UTF8

+ ? "ok " : "not ok ", ++$cnt, " # compare: $encode, concat left\n";

- is(tainted($lconcat), tainted($arg), "tainted: $encode, concat left");

+ print tainted($lconcat) == tainted($arg)

+ ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, concat left\n";

my $rconcat = UTF8;

$rconcat .= $taint;

- is($rconcat, UTF8.$string, "compare: $encode, concat right");

+ print $rconcat eq UTF8.$string

+ ? "ok " : "not ok ", ++$cnt, " # compare: $encode, concat right\n";

- is(tainted($rconcat), tainted($arg), "tainted: $encode, concat right");

+ print tainted($rconcat) == tainted($arg)

+ ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, concat right\n";

my $ljoin = join('!', $taint, UTF8);

- is($ljoin, join('!', $string, UTF8), "compare: $encode, join left");

+ print $ljoin eq join('!', $string, UTF8)

+ ? "ok " : "not ok ", ++$cnt, " # compare: $encode, join left\n";

- is(tainted($ljoin), tainted($arg), "tainted: $encode, join left");

+ print tainted($ljoin) == tainted($arg)

+ ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, join left\n";

my $rjoin = join('!', UTF8, $taint);

- is($rjoin, join('!', UTF8, $string), "compare: $encode, join right");

+ print $rjoin eq join('!', UTF8, $string)

+ ? "ok " : "not ok ", ++$cnt, " # compare: $encode, join right\n";

- is(tainted($rjoin), tainted($arg), "tainted: $encode, join right");

+ print tainted($rjoin) == tainted($arg)

+ ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, join right\n";

- is(tainted($taint), tainted($arg), "tainted: $encode, after test");

+ print tainted($taint) == tainted($arg)

+ ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, after test\n";

}

@@ -63,29 +82,37 @@ for my $ary ([ascii => 'perl'], [latin1 => "\xB6"], [utf8 => "\x{100}"]) {

my $encode = $ary->[0];

my $utf8 = pack('U*') . $ary->[1];

- my $byte = unpack('U0a*', $utf8);

+ my $byte = pack('C0a*', $utf8);

my $taint = $arg; substr($taint, 0) = $utf8;

utf8::encode($taint);

- is($taint, $byte, "compare: $encode, encode utf8");

+ print $taint eq $byte

+ ? "ok " : "not ok ", ++$cnt, " # compare: $encode, encode utf8\n";

- is(pack('a*',$taint), pack('a*',$byte), "bytecmp: $encode, encode utf8");

+ print pack('a*',$taint) eq pack('a*',$byte)

+ ? "ok " : "not ok ", ++$cnt, " # bytecmp: $encode, encode utf8\n";

- ok(!is_utf8($taint), "is_utf8: $encode, encode utf8");

+ print !is_utf8($taint)

+ ? "ok " : "not ok ", ++$cnt, " # is_utf8: $encode, encode utf8\n";

- is(tainted($taint), tainted($arg), "tainted: $encode, encode utf8");

+ print tainted($taint) == tainted($arg)

+ ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, encode utf8\n";

my $taint = $arg; substr($taint, 0) = $byte;

utf8::decode($taint);

- is($taint, $utf8, "compare: $encode, decode byte");

+ print $taint eq $utf8

+ ? "ok " : "not ok ", ++$cnt, " # compare: $encode, decode byte\n";

- is(pack('a*',$taint), pack('a*',$utf8), "bytecmp: $encode, decode byte");

+ print pack('a*',$taint) eq pack('a*',$utf8)

+ ? "ok " : "not ok ", ++$cnt, " # bytecmp: $encode, decode byte\n";

- is(is_utf8($taint), ($encode ne 'ascii'), "is_utf8: $encode, decode byte");

+ print is_utf8($taint) eq ($encode ne 'ascii')

+ ? "ok " : "not ok ", ++$cnt, " # is_utf8: $encode, decode byte\n";

- is(tainted($taint), tainted($arg), "tainted: $encode, decode byte");

+ print tainted($taint) == tainted($arg)

+ ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, decode byte\n";

}

@@ -93,68 +120,67 @@ for my $ary ([ascii => 'perl'], [latin1 => "\xB6"]) {

my $encode = $ary->[0];

my $up = pack('U*') . $ary->[1];

- my $down = pack("a*", $ary->[1]);

+ my $down = pack('C0a*', $ary->[1]);

my $taint = $arg; substr($taint, 0) = $up;

utf8::upgrade($taint);

- is($taint, $up, "compare: $encode, upgrade up");

+ print $taint eq $up

+ ? "ok " : "not ok ", ++$cnt, " # compare: $encode, upgrade up\n";

- is(pack('a*',$taint), pack('a*',$up), "bytecmp: $encode, upgrade up");

+ print pack('a*',$taint) eq pack('a*',$up)

+ ? "ok " : "not ok ", ++$cnt, " # bytecmp: $encode, upgrade up\n";

- ok(is_utf8($taint), "is_utf8: $encode, upgrade up");

+ print is_utf8($taint)

+ ? "ok " : "not ok ", ++$cnt, " # is_utf8: $encode, upgrade up\n";

- is(tainted($taint), tainted($arg), "tainted: $encode, upgrade up");

+ print tainted($taint) == tainted($arg)

+ ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, upgrade up\n";

my $taint = $arg; substr($taint, 0) = $down;

utf8::upgrade($taint);

- is($taint, $up, "compare: $encode, upgrade down");

+ print $taint eq $up

+ ? "ok " : "not ok ", ++$cnt, " # compare: $encode, upgrade down\n";

- is(pack('a*',$taint), pack('a*',$up), "bytecmp: $encode, upgrade down");

+ print pack('a*',$taint) eq pack('a*',$up)

+ ? "ok " : "not ok ", ++$cnt, " # bytecmp: $encode, upgrade down\n";

- ok(is_utf8($taint), "is_utf8: $encode, upgrade down");

+ print is_utf8($taint)

+ ? "ok " : "not ok ", ++$cnt, " # is_utf8: $encode, upgrade down\n";

- is(tainted($taint), tainted($arg), "tainted: $encode, upgrade down");

+ print tainted($taint) == tainted($arg)

+ ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, upgrade down\n";

my $taint = $arg; substr($taint, 0) = $up;

utf8::downgrade($taint);

- is($taint, $down, "compare: $encode, downgrade up");

+ print $taint eq $down

+ ? "ok " : "not ok ", ++$cnt, " # compare: $encode, downgrade up\n";

- is(pack('a*',$taint), pack('a*',$down), "bytecmp: $encode, downgrade up");

+ print pack('a*',$taint) eq pack('a*',$down)

+ ? "ok " : "not ok ", ++$cnt, " # bytecmp: $encode, downgrade up\n";

- ok(!is_utf8($taint), "is_utf8: $encode, downgrade up");

+ print !is_utf8($taint)

+ ? "ok " : "not ok ", ++$cnt, " # is_utf8: $encode, downgrade up\n";

- is(tainted($taint), tainted($arg), "tainted: $encode, downgrade up");

+ print tainted($taint) == tainted($arg)

+ ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, downgrade up\n";

my $taint = $arg; substr($taint, 0) = $down;

utf8::downgrade($taint);

- is($taint, $down, "compare: $encode, downgrade down");

+ print $taint eq $down

+ ? "ok " : "not ok ", ++$cnt, " # compare: $encode, downgrade down\n";

- is(pack('a*',$taint), pack('a*',$down), "bytecmp: $encode, downgrade down");

+ print pack('a*',$taint) eq pack('a*',$down)

+ ? "ok " : "not ok ", ++$cnt, " # bytecmp: $encode, downgrade down\n";

- ok(!is_utf8($taint), "is_utf8: $encode, downgrade down");

+ print !is_utf8($taint)

+ ? "ok " : "not ok ", ++$cnt, " # is_utf8: $encode, downgrade down\n";

- is(tainted($taint), tainted($arg), "tainted: $encode, downgrade down");

+ print tainted($taint) == tainted($arg)

+ ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, downgrade down\n";

}

- fresh_perl_is('$a = substr $^X, 0, 0; /\x{100}/i; /$a\x{100}/i || print q,ok,',

- 'ok', {switches => ["-T", "-l"]},

- "matching a regexp is taint agnostic");

- fresh_perl_is('$a = substr $^X, 0, 0; /$a\x{100}/i || print q,ok,',

- 'ok', {switches => ["-T", "-l"]},

- "therefore swash_init should be taint agnostic");

- # RT #122148: s///e on tainted utf8 strings got pos() messed up in 5.20

- my @p;

- my $s = "\x{100}\x{100}\x{100}\x{100}". $^X;

- $s =~ s/\x{100}/push @p, pos($s); "xxxx";/eg;

- is("@p", "0 1 2 3", "RT #122148");

diff --git a/gnu/usr.bin/perl/t/uni/chomp.t b/gnu/usr.bin/perl/t/uni/chomp.t
index 0dca91a9bf9..5f52558ab61 100644
--- a/gnu/usr.bin/perl/t/uni/chomp.t
+++ b/gnu/usr.bin/perl/t/uni/chomp.t

@@ -1,14 +1,32 @@

#!./perl -w

BEGIN {

- require './test.pl';

- skip_all_without_dynamic_extension('Encode');

- skip_all("EBCDIC") if $::IS_EBCDIC;

- skip_all_without_perlio();

+ if ($ENV{'PERL_CORE'}){

+ chdir 't';

+ @INC = '../lib';

+ }

+ require Config; import Config;

+ if ($Config{'extensions'} !~ /\bEncode\b/) {

+ print "1..0 # Skip: Encode was not built\n";

+ exit 0;

+ }

+ if (ord("A") == 193) {

+ print "1..0 # Skip: EBCDIC\n";

+ exit 0;

+ }

+ unless (PerlIO::Layer->find('perlio')){

+ print "1..0 # Skip: PerlIO required\n";

+ exit 0;

+ }

+ if ($ENV{PERL_CORE_MINITEST}) {

+ print "1..0 # Skip: no dynamic loading on miniperl, no Encode\n";

+ exit 0;

+ }

}

-use strict;

use Encode;

+use strict;

+use Test::More;

# %mbchars = (encoding => { bytes => utf8, ... }, ...);

# * pack('C*') is expected to return bytes even if ${^ENCODING} is true.

diff --git a/gnu/usr.bin/perl/t/uni/class.t b/gnu/usr.bin/perl/t/uni/class.t
index 144ae4334d0..130b720eacc 100644
--- a/gnu/usr.bin/perl/t/uni/class.t
+++ b/gnu/usr.bin/perl/t/uni/class.t

@@ -4,20 +4,15 @@ BEGIN {

require "test.pl";

}

-plan tests => 11;

+plan tests => 4334;

-my $str = join "", map latin1_to_native(chr($_)), 0x20 .. 0x6F;

-is(($str =~ /(\p{IsMyUniClass}+)/)[0], '0123456789:;<=>?@ABCDEFGHIJKLMNO',

- 'user-defined class compiled before defined');

-sub IsMyUniClass {

+sub MyUniClass {

<<END;

0030 004F

END

}

-sub Other::IsClass {

+sub Other::Class {

<<END;

0040 005F

END

@@ -25,62 +20,147 @@ END

sub A::B::Intersection {

<<END;

-+main::IsMyUniClass

-&Other::IsClass

++main::MyUniClass

+&Other::Class

END

}

-sub test_regexp ($$) {

- # test that given string consists of N-1 chars matching $qr1, and 1

- # char matching $qr2

- my ($str, $blk) = @_;

- # constructing these objects here makes the last test loop go much faster

- my $qr1 = qr/(\p{$blk}+)/;

- if ($str =~ $qr1) {

- is($1, substr($str, 0, -1)); # all except last char

- }

- else {

- fail('first N-1 chars did not match');

- }

- my $qr2 = qr/(\P{$blk}+)/;

- if ($str =~ $qr2) {

- is($1, substr($str, -1)); # only last char

- }

- else {

- fail('last char did not match');

- }

-use strict;

+my $str = join "", map chr($_), 0x20 .. 0x6F;

# make sure it finds built-in class

is(($str =~ /(\p{Letter}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');

is(($str =~ /(\p{l}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');

# make sure it finds user-defined class

-is(($str =~ /(\p{IsMyUniClass}+)/)[0], '0123456789:;<=>?@ABCDEFGHIJKLMNO');

+is(($str =~ /(\p{MyUniClass}+)/)[0], '0123456789:;<=>?@ABCDEFGHIJKLMNO');

# make sure it finds class in other package

-is(($str =~ /(\p{Other::IsClass}+)/)[0], '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_');

+is(($str =~ /(\p{Other::Class}+)/)[0], '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_');

# make sure it finds class in other OTHER package

is(($str =~ /(\p{A::B::Intersection}+)/)[0], '@ABCDEFGHIJKLMNO');

-# lib/unicore/lib/Bc/AL.pl. U+070E is unassigned, currently, but still has

-# bidi class AL. The first one in the sequence that doesn't is 0711, which is

-# BC=NSM.

-$str = "\x{070D}\x{070E}\x{070F}\x{0710}\x{0711}\x{0712}";

-is(($str =~ /(\P{BidiClass: ArabicLetter}+)/)[0], "\x{0711}");

-is(($str =~ /(\P{BidiClass: AL}+)/)[0], "\x{0711}");

-is(($str =~ /(\P{BC :ArabicLetter}+)/)[0], "\x{0711}");

-is(($str =~ /(\P{bc=AL}+)/)[0], "\x{0711}");

+# all of these should look in lib/unicore/bc/AL.pl

+$str = "\x{070D}\x{070E}\x{070F}\x{0710}\x{0711}";

+is(($str =~ /(\P{BidiClass: ArabicLetter}+)/)[0], "\x{070E}\x{070F}");

+is(($str =~ /(\P{BidiClass: AL}+)/)[0], "\x{070E}\x{070F}");

+is(($str =~ /(\P{BC :ArabicLetter}+)/)[0], "\x{070E}\x{070F}");

+is(($str =~ /(\P{bc=AL}+)/)[0], "\x{070E}\x{070F}");

# make sure InGreek works

$str = "[\x{038B}\x{038C}\x{038D}]";

is(($str =~ /(\p{InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");

+is(($str =~ /(\p{Script:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");

+is(($str =~ /(\p{Script=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");

+is(($str =~ /(\p{sc:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");

+is(($str =~ /(\p{sc=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");

+use File::Spec;

+my $updir = File::Spec->updir;

+# the %utf8::... hashes are already in existence

+# because utf8_pva.pl was run by utf8_heavy.pl

+# non-General Category and non-Script

+while (my ($abbrev, $files) = each %utf8::PVA_abbr_map) {

+ my $prop_name = $utf8::PropertyAlias{$abbrev};

+ next unless $prop_name;

+ next if $abbrev eq "gc_sc";

+ for (sort keys %$files) {

+ my $filename = File::Spec->catfile(

+ $updir => lib => unicore => lib => $abbrev => "$files->{$_}.pl"

+ );

+ next unless -e $filename;

+ my ($h1, $h2) = map hex, split /\t/, (do $filename);

+ my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);

-# The other tests that are based on looking at the generated files are now

-# in t/re/uniprops.t

+ for my $p ($prop_name, $abbrev) {

+ for my $c ($files->{$_}, $_) {

+ is($str =~ /(\p{$p: $c}+)/ && $1, substr($str, 0, -1));

+ is($str =~ /(\P{$p= $c}+)/ && $1, substr($str, -1));

+ }

+# General Category and Script

+for my $p ('gc', 'sc') {

+ while (my ($abbr) = each %{ $utf8::PropValueAlias{$p} }) {

+ my $filename = File::Spec->catfile(

+ $updir => lib => unicore => lib => gc_sc => "$utf8::PVA_abbr_map{gc_sc}{$abbr}.pl"

+ );

+ next unless -e $filename;

+ my ($h1, $h2) = map hex, split /\t/, (do $filename);

+ my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);

+ for my $x ($p, { gc => 'General Category', sc => 'Script' }->{$p}) {

+ for my $y ($abbr, $utf8::PropValueAlias{$p}{$abbr}, $utf8::PVA_abbr_map{gc_sc}{$abbr}) {

+ is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));

+ is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));

+ is($str =~ /(\p{$y}+)/ && $1, substr($str, 0, -1));

+ is($str =~ /(\P{$y}+)/ && $1, substr($str, -1));

+ }

+# test extra properties (ASCII_Hex_Digit, Bidi_Control, etc.)

+ # Aargh. Nasty case insensitive filesystems mean that Cf.pl will cause a -e

+ # test for cf.pl to return true. So need to read the filenames explicitly

+ # to get a case sensitive test

+ my %files;

+ my $dirname = File::Spec->catdir($updir => lib => unicore => lib => gc_sc);

+ opendir D, $dirname or die $!;

+ @files{readdir D} = ();

+ closedir D;

+ for (keys %utf8::PA_reverse) {

+ my $leafname = "$utf8::PA_reverse{$_}.pl";

+ next unless exists $files{$leafname};

+ my $filename = File::Spec->catfile($dirname, $leafname);

+ my ($h1, $h2) = map hex, split /\t/, (do $filename);

+ my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);

+ for my $x ('gc', 'General Category') {

+ print "# $filename $x $_, $utf8::PA_reverse{$_}\n";

+ for my $y ($_, $utf8::PA_reverse{$_}) {

+ is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));

+ is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));

+ is($str =~ /(\p{$y}+)/ && $1, substr($str, 0, -1));

+ is($str =~ /(\P{$y}+)/ && $1, substr($str, -1));

+ }

+# test the blocks (InFoobar)

+for (grep $utf8::Canonical{$_} =~ /^In/, keys %utf8::Canonical) {

+ my $filename = File::Spec->catfile(

+ $updir => lib => unicore => lib => gc_sc => "$utf8::Canonical{$_}.pl"

+ );

+ next unless -e $filename;

+ my ($h1, $h2) = map hex, split /\t/, (do $filename);

+ my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);

+ my $blk = $_;

+ is($str =~ /(\p{$blk}+)/ && $1, substr($str, 0, -1));

+ is($str =~ /(\P{$blk}+)/ && $1, substr($str, -1));

+ $blk =~ s/^In/Block:/;

+ is($str =~ /(\p{$blk}+)/ && $1, substr($str, 0, -1));

+ is($str =~ /(\P{$blk}+)/ && $1, substr($str, -1));