summaryrefslogtreecommitdiff
path: root/gnu/usr.bin/perl/lib/I18N
diff options
context:
space:
mode:
authorTodd C. Miller <millert@cvs.openbsd.org>2002-10-27 22:15:15 +0000
committerTodd C. Miller <millert@cvs.openbsd.org>2002-10-27 22:15:15 +0000
commit74cfb115ac810480c0000dc742b20383c1578bac (patch)
tree316d96e5123617976f1637b143570c309a662045 /gnu/usr.bin/perl/lib/I18N
parent453ade492b8e06c619009d6cd52a85cb04e8cf17 (diff)
stock perl 5.8.0 from CPAN
Diffstat (limited to 'gnu/usr.bin/perl/lib/I18N')
-rw-r--r--gnu/usr.bin/perl/lib/I18N/Collate.t44
-rw-r--r--gnu/usr.bin/perl/lib/I18N/LangTags.pm800
-rw-r--r--gnu/usr.bin/perl/lib/I18N/LangTags/ChangeLog107
-rw-r--r--gnu/usr.bin/perl/lib/I18N/LangTags/List.pm1622
-rw-r--r--gnu/usr.bin/perl/lib/I18N/LangTags/README78
-rw-r--r--gnu/usr.bin/perl/lib/I18N/LangTags/test.pl79
6 files changed, 2730 insertions, 0 deletions
diff --git a/gnu/usr.bin/perl/lib/I18N/Collate.t b/gnu/usr.bin/perl/lib/I18N/Collate.t
new file mode 100644
index 00000000000..bf3ba20b6aa
--- /dev/null
+++ b/gnu/usr.bin/perl/lib/I18N/Collate.t
@@ -0,0 +1,44 @@
+#!./perl
+
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = '../lib';
+ require Config; import Config;
+ if (!$Config{d_setlocale} || $Config{ccflags} =~ /\bD?NO_LOCALE\b/) {
+ print "1..0\n";
+ exit;
+ }
+}
+
+print "1..7\n";
+
+use I18N::Collate;
+
+print "ok 1\n";
+
+$a = I18N::Collate->new("foo");
+
+print "ok 2\n";
+
+{
+ use warnings;
+ local $SIG{__WARN__} = sub { $@ = $_[0] };
+ $b = I18N::Collate->new("foo");
+ print "not " unless $@ =~ /\bHAS BEEN DEPRECATED\b/;
+ print "ok 3\n";
+ $@ = '';
+}
+
+print "not " unless $a eq $b;
+print "ok 4\n";
+
+$b = I18N::Collate->new("bar");
+print "not " if $@ =~ /\bHAS BEEN DEPRECATED\b/;
+print "ok 5\n";
+
+print "not " if $a eq $b;
+print "ok 6\n";
+
+print "not " if $a lt $b == $a gt $b;
+print "ok 7\n";
+
diff --git a/gnu/usr.bin/perl/lib/I18N/LangTags.pm b/gnu/usr.bin/perl/lib/I18N/LangTags.pm
new file mode 100644
index 00000000000..ab5ef38245e
--- /dev/null
+++ b/gnu/usr.bin/perl/lib/I18N/LangTags.pm
@@ -0,0 +1,800 @@
+
+# Time-stamp: "2002-02-02 20:43:03 MST"
+# Sean M. Burke <sburke@cpan.org>
+
+require 5.000;
+package I18N::LangTags;
+use strict;
+use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION %Panic);
+require Exporter;
+@ISA = qw(Exporter);
+@EXPORT = qw();
+@EXPORT_OK = qw(is_language_tag same_language_tag
+ extract_language_tags super_languages
+ similarity_language_tag is_dialect_of
+ locale2language_tag alternate_language_tags
+ encode_language_tag panic_languages
+ );
+%EXPORT_TAGS = ('ALL' => \@EXPORT_OK);
+
+$VERSION = "0.27";
+
+=head1 NAME
+
+I18N::LangTags - functions for dealing with RFC3066-style language tags
+
+=head1 SYNOPSIS
+
+ use I18N::LangTags qw(is_language_tag same_language_tag
+ extract_language_tags super_languages
+ similarity_language_tag is_dialect_of
+ locale2language_tag alternate_language_tags
+ encode_language_tag panic_languages
+ );
+
+...or whatever of those functions you want to import. Those are
+all the exportable functions -- you're free to import only some,
+or none at all. By default, none are imported. If you say:
+
+ use I18N::LangTags qw(:ALL)
+
+...then all are exported. (This saves you from having to use
+something less obvious like C<use I18N::LangTags qw(/./)>.)
+
+If you don't import any of these functions, assume a C<&I18N::LangTags::>
+in front of all the function names in the following examples.
+
+=head1 DESCRIPTION
+
+Language tags are a formalism, described in RFC 3066 (obsoleting
+1766), for declaring what language form (language and possibly
+dialect) a given chunk of information is in.
+
+This library provides functions for common tasks involving language
+tags as they are needed in a variety of protocols and applications.
+
+Please see the "See Also" references for a thorough explanation
+of how to correctly use language tags.
+
+=over
+
+=cut
+
+###########################################################################
+
+=item * the function is_language_tag($lang1)
+
+Returns true iff $lang1 is a formally valid language tag.
+
+ is_language_tag("fr") is TRUE
+ is_language_tag("x-jicarilla") is FALSE
+ (Subtags can be 8 chars long at most -- 'jicarilla' is 9)
+
+ is_language_tag("sgn-US") is TRUE
+ (That's American Sign Language)
+
+ is_language_tag("i-Klikitat") is TRUE
+ (True without regard to the fact noone has actually
+ registered Klikitat -- it's a formally valid tag)
+
+ is_language_tag("fr-patois") is TRUE
+ (Formally valid -- altho descriptively weak!)
+
+ is_language_tag("Spanish") is FALSE
+ is_language_tag("french-patois") is FALSE
+ (No good -- first subtag has to match
+ /^([xXiI]|[a-zA-Z]{2,3})$/ -- see RFC3066)
+
+ is_language_tag("x-borg-prot2532") is TRUE
+ (Yes, subtags can contain digits, as of RFC3066)
+
+=cut
+
+sub is_language_tag {
+
+ ## Changes in the language tagging standards may have to be reflected here.
+
+ my($tag) = lc($_[0]);
+
+ return 0 if $tag eq "i" or $tag eq "x";
+ # Bad degenerate cases that the following
+ # regexp would erroneously let pass
+
+ return $tag =~
+ /^(?: # First subtag
+ [xi] | [a-z]{2,3}
+ )
+ (?: # Subtags thereafter
+ - # separator
+ [a-z0-9]{1,8} # subtag
+ )*
+ $/xs ? 1 : 0;
+}
+
+###########################################################################
+
+=item * the function extract_language_tags($whatever)
+
+Returns a list of whatever looks like formally valid language tags
+in $whatever. Not very smart, so don't get too creative with
+what you want to feed it.
+
+ extract_language_tags("fr, fr-ca, i-mingo")
+ returns: ('fr', 'fr-ca', 'i-mingo')
+
+ extract_language_tags("It's like this: I'm in fr -- French!")
+ returns: ('It', 'in', 'fr')
+ (So don't just feed it any old thing.)
+
+The output is untainted. If you don't know what tainting is,
+don't worry about it.
+
+=cut
+
+sub extract_language_tags {
+
+ ## Changes in the language tagging standards may have to be reflected here.
+
+ my($text) =
+ $_[0] =~ m/(.+)/ # to make for an untainted result
+ ? $1 : ''
+ ;
+
+ return grep(!m/^[ixIX]$/s, # 'i' and 'x' aren't good tags
+ $text =~
+ m/
+ \b
+ (?: # First subtag
+ [iIxX] | [a-zA-Z]{2,3}
+ )
+ (?: # Subtags thereafter
+ - # separator
+ [a-zA-Z0-9]{1,8} # subtag
+ )*
+ \b
+ /xsg
+ );
+}
+
+###########################################################################
+
+=item * the function same_language_tag($lang1, $lang2)
+
+Returns true iff $lang1 and $lang2 are acceptable variant tags
+representing the same language-form.
+
+ same_language_tag('x-kadara', 'i-kadara') is TRUE
+ (The x/i- alternation doesn't matter)
+ same_language_tag('X-KADARA', 'i-kadara') is TRUE
+ (...and neither does case)
+ same_language_tag('en', 'en-US') is FALSE
+ (all-English is not the SAME as US English)
+ same_language_tag('x-kadara', 'x-kadar') is FALSE
+ (these are totally unrelated tags)
+ same_language_tag('no-bok', 'nb') is TRUE
+ (no-bok is a legacy tag for nb (Norwegian Bokmal))
+
+C<same_language_tag> works by just seeing whether
+C<encode_language_tag($lang1)> is the same as
+C<encode_language_tag($lang2)>.
+
+(Yes, I know this function is named a bit oddly. Call it historic
+reasons.)
+
+=cut
+
+sub same_language_tag {
+ my $el1 = &encode_language_tag($_[0]);
+ return 0 unless defined $el1;
+ # this avoids the problem of
+ # encode_language_tag($lang1) eq and encode_language_tag($lang2)
+ # being true if $lang1 and $lang2 are both undef
+
+ return $el1 eq &encode_language_tag($_[1]) ? 1 : 0;
+}
+
+###########################################################################
+
+=item * the function similarity_language_tag($lang1, $lang2)
+
+Returns an integer representing the degree of similarity between
+tags $lang1 and $lang2 (the order of which does not matter), where
+similarity is the number of common elements on the left,
+without regard to case and to x/i- alternation.
+
+ similarity_language_tag('fr', 'fr-ca') is 1
+ (one element in common)
+ similarity_language_tag('fr-ca', 'fr-FR') is 1
+ (one element in common)
+
+ similarity_language_tag('fr-CA-joual',
+ 'fr-CA-PEI') is 2
+ similarity_language_tag('fr-CA-joual', 'fr-CA') is 2
+ (two elements in common)
+
+ similarity_language_tag('x-kadara', 'i-kadara') is 1
+ (x/i- doesn't matter)
+
+ similarity_language_tag('en', 'x-kadar') is 0
+ similarity_language_tag('x-kadara', 'x-kadar') is 0
+ (unrelated tags -- no similarity)
+
+ similarity_language_tag('i-cree-syllabic',
+ 'i-cherokee-syllabic') is 0
+ (no B<leftmost> elements in common!)
+
+=cut
+
+sub similarity_language_tag {
+ my $lang1 = &encode_language_tag($_[0]);
+ my $lang2 = &encode_language_tag($_[1]);
+ # And encode_language_tag takes care of the whole
+ # no-nyn==nn, i-hakka==zh-hakka, etc, things
+
+ # NB: (i-sil-...)? (i-sgn-...)?
+
+ return undef if !defined($lang1) and !defined($lang2);
+ return 0 if !defined($lang1) or !defined($lang2);
+
+ my @l1_subtags = split('-', $lang1);
+ my @l2_subtags = split('-', $lang2);
+ my $similarity = 0;
+
+ while(@l1_subtags and @l2_subtags) {
+ if(shift(@l1_subtags) eq shift(@l2_subtags)) {
+ ++$similarity;
+ } else {
+ last;
+ }
+ }
+ return $similarity;
+}
+
+###########################################################################
+
+=item * the function is_dialect_of($lang1, $lang2)
+
+Returns true iff language tag $lang1 represents a subform of
+language tag $lang2.
+
+B<Get the order right! It doesn't work the other way around!>
+
+ is_dialect_of('en-US', 'en') is TRUE
+ (American English IS a dialect of all-English)
+
+ is_dialect_of('fr-CA-joual', 'fr-CA') is TRUE
+ is_dialect_of('fr-CA-joual', 'fr') is TRUE
+ (Joual is a dialect of (a dialect of) French)
+
+ is_dialect_of('en', 'en-US') is FALSE
+ (all-English is a NOT dialect of American English)
+
+ is_dialect_of('fr', 'en-CA') is FALSE
+
+ is_dialect_of('en', 'en' ) is TRUE
+ is_dialect_of('en-US', 'en-US') is TRUE
+ (B<Note:> these are degenerate cases)
+
+ is_dialect_of('i-mingo-tom', 'x-Mingo') is TRUE
+ (the x/i thing doesn't matter, nor does case)
+
+ is_dialect_of('nn', 'no') is TRUE
+ (because 'nn' (New Norse) is aliased to 'no-nyn',
+ as a special legacy case, and 'no-nyn' is a
+ subform of 'no' (Norwegian))
+
+=cut
+
+sub is_dialect_of {
+
+ my $lang1 = &encode_language_tag($_[0]);
+ my $lang2 = &encode_language_tag($_[1]);
+
+ return undef if !defined($lang1) and !defined($lang2);
+ return 0 if !defined($lang1) or !defined($lang2);
+
+ return 1 if $lang1 eq $lang2;
+ return 0 if length($lang1) < length($lang2);
+
+ $lang1 .= '-';
+ $lang2 .= '-';
+ return
+ (substr($lang1, 0, length($lang2)) eq $lang2) ? 1 : 0;
+}
+
+###########################################################################
+
+=item * the function super_languages($lang1)
+
+Returns a list of language tags that are superordinate tags to $lang1
+-- it gets this by removing subtags from the end of $lang1 until
+nothing (or just "i" or "x") is left.
+
+ super_languages("fr-CA-joual") is ("fr-CA", "fr")
+
+ super_languages("en-AU") is ("en")
+
+ super_languages("en") is empty-list, ()
+
+ super_languages("i-cherokee") is empty-list, ()
+ ...not ("i"), which would be illegal as well as pointless.
+
+If $lang1 is not a valid language tag, returns empty-list in
+a list context, undef in a scalar context.
+
+A notable and rather unavoidable problem with this method:
+"x-mingo-tom" has an "x" because the whole tag isn't an
+IANA-registered tag -- but super_languages('x-mingo-tom') is
+('x-mingo') -- which isn't really right, since 'i-mingo' is
+registered. But this module has no way of knowing that. (But note
+that same_language_tag('x-mingo', 'i-mingo') is TRUE.)
+
+More importantly, you assume I<at your peril> that superordinates of
+$lang1 are mutually intelligible with $lang1. Consider this
+carefully.
+
+=cut
+
+sub super_languages {
+ my $lang1 = $_[0];
+ return() unless defined($lang1) && &is_language_tag($lang1);
+
+ # a hack for those annoying new (2001) tags:
+ $lang1 =~ s/^nb\b/no-bok/i; # yes, backwards
+ $lang1 =~ s/^nn\b/no-nyn/i; # yes, backwards
+ $lang1 =~ s/^[ix](-hakka\b)/zh$1/i; # goes the right way
+ # i-hakka-bork-bjork-bjark => zh-hakka-bork-bjork-bjark
+
+ my @l1_subtags = split('-', $lang1);
+
+ ## Changes in the language tagging standards may have to be reflected here.
+
+ # NB: (i-sil-...)?
+
+ my @supers = ();
+ foreach my $bit (@l1_subtags) {
+ push @supers,
+ scalar(@supers) ? ($supers[-1] . '-' . $bit) : $bit;
+ }
+ pop @supers if @supers;
+ shift @supers if @supers && $supers[0] =~ m<^[iIxX]$>s;
+ return reverse @supers;
+}
+
+###########################################################################
+
+=item * the function locale2language_tag($locale_identifier)
+
+This takes a locale name (like "en", "en_US", or "en_US.ISO8859-1")
+and maps it to a language tag. If it's not mappable (as with,
+notably, "C" and "POSIX"), this returns empty-list in a list context,
+or undef in a scalar context.
+
+ locale2language_tag("en") is "en"
+
+ locale2language_tag("en_US") is "en-US"
+
+ locale2language_tag("en_US.ISO8859-1") is "en-US"
+
+ locale2language_tag("C") is undef or ()
+
+ locale2language_tag("POSIX") is undef or ()
+
+ locale2language_tag("POSIX") is undef or ()
+
+I'm not totally sure that locale names map satisfactorily to language
+tags. Think REAL hard about how you use this. YOU HAVE BEEN WARNED.
+
+The output is untainted. If you don't know what tainting is,
+don't worry about it.
+
+=cut
+
+sub locale2language_tag {
+ my $lang =
+ $_[0] =~ m/(.+)/ # to make for an untainted result
+ ? $1 : ''
+ ;
+
+ return $lang if &is_language_tag($lang); # like "en"
+
+ $lang =~ tr<_><->; # "en_US" -> en-US
+ $lang =~ s<\.[-_a-zA-Z0-9\.]*><>s; # "en_US.ISO8859-1" -> en-US
+
+ return $lang if &is_language_tag($lang);
+
+ return;
+}
+
+###########################################################################
+
+=item * the function encode_language_tag($lang1)
+
+This function, if given a language tag, returns an encoding of it such
+that:
+
+* tags representing different languages never get the same encoding.
+
+* tags representing the same language always get the same encoding.
+
+* an encoding of a formally valid language tag always is a string
+value that is defined, has length, and is true if considered as a
+boolean.
+
+Note that the encoding itself is B<not> a formally valid language tag.
+Note also that you cannot, currently, go from an encoding back to a
+language tag that it's an encoding of.
+
+Note also that you B<must> consider the encoded value as atomic; i.e.,
+you should not consider it as anything but an opaque, unanalysable
+string value. (The internals of the encoding method may change in
+future versions, as the language tagging standard changes over time.)
+
+C<encode_language_tag> returns undef if given anything other than a
+formally valid language tag.
+
+The reason C<encode_language_tag> exists is because different language
+tags may represent the same language; this is normally treatable with
+C<same_language_tag>, but consider this situation:
+
+You have a data file that expresses greetings in different languages.
+Its format is "[language tag]=[how to say 'Hello']", like:
+
+ en-US=Hiho
+ fr=Bonjour
+ i-mingo=Hau'
+
+And suppose you write a program that reads that file and then runs as
+a daemon, answering client requests that specify a language tag and
+then expect the string that says how to greet in that language. So an
+interaction looks like:
+
+ greeting-client asks: fr
+ greeting-server answers: Bonjour
+
+So far so good. But suppose the way you're implementing this is:
+
+ my %greetings;
+ die unless open(IN, "<in.dat");
+ while(<IN>) {
+ chomp;
+ next unless /^([^=]+)=(.+)/s;
+ my($lang, $expr) = ($1, $2);
+ $greetings{$lang} = $expr;
+ }
+ close(IN);
+
+at which point %greetings has the contents:
+
+ "en-US" => "Hiho"
+ "fr" => "Bonjour"
+ "i-mingo" => "Hau'"
+
+And suppose then that you answer client requests for language $wanted
+by just looking up $greetings{$wanted}.
+
+If the client asks for "fr", that will look up successfully in
+%greetings, to the value "Bonjour". And if the client asks for
+"i-mingo", that will look up successfully in %greetings, to the value
+"Hau'".
+
+But if the client asks for "i-Mingo" or "x-mingo", or "Fr", then the
+lookup in %greetings fails. That's the Wrong Thing.
+
+You could instead do lookups on $wanted with:
+
+ use I18N::LangTags qw(same_language_tag);
+ my $repsonse = '';
+ foreach my $l2 (keys %greetings) {
+ if(same_language_tag($wanted, $l2)) {
+ $response = $greetings{$l2};
+ last;
+ }
+ }
+
+But that's rather inefficient. A better way to do it is to start your
+program with:
+
+ use I18N::LangTags qw(encode_language_tag);
+ my %greetings;
+ die unless open(IN, "<in.dat");
+ while(<IN>) {
+ chomp;
+ next unless /^([^=]+)=(.+)/s;
+ my($lang, $expr) = ($1, $2);
+ $greetings{
+ encode_language_tag($lang)
+ } = $expr;
+ }
+ close(IN);
+
+and then just answer client requests for language $wanted by just
+looking up
+
+ $greetings{encode_language_tag($wanted)}
+
+And that does the Right Thing.
+
+=cut
+
+sub encode_language_tag {
+ # Only similarity_language_tag() is allowed to analyse encodings!
+
+ ## Changes in the language tagging standards may have to be reflected here.
+
+ my($tag) = $_[0] || return undef;
+ return undef unless &is_language_tag($tag);
+
+ # For the moment, these legacy variances are few enough that
+ # we can just handle them here with regexps.
+ $tag =~ s/^iw\b/he/i; # Hebrew
+ $tag =~ s/^in\b/id/i; # Indonesian
+ $tag =~ s/^[ix]-lux\b/lb/i; # Luxemburger
+ $tag =~ s/^[ix]-navajo\b/nv/i; # Navajo
+ $tag =~ s/^ji\b/yi/i; # Yiddish
+ #
+ # These go FROM the simplex to complex form, to get
+ # similarity-comparison right. And that's okay, since
+ # similarity_language_tag is the only thing that
+ # analyzes our output.
+ $tag =~ s/^[ix]-hakka\b/zh-hakka/i; # Hakka
+ $tag =~ s/^nb\b/no-bok/i; # BACKWARDS for Bokmal
+ $tag =~ s/^nn\b/no-nyn/i; # BACKWARDS for Nynorsk
+
+ $tag =~ s/^[xiXI]-//s;
+ # Just lop off any leading "x/i-"
+
+ return "~" . uc($tag);
+}
+
+#--------------------------------------------------------------------------
+
+=item * the function alternate_language_tags($lang1)
+
+This function, if given a language tag, returns all language tags that
+are alternate forms of this language tag. (I.e., tags which refer to
+the same language.) This is meant to handle legacy tags caused by
+the minor changes in language tag standards over the years; and
+the x-/i- alternation is also dealt with.
+
+Note that this function does I<not> try to equate new (and never-used,
+and unusable)
+ISO639-2 three-letter tags to old (and still in use) ISO639-1
+two-letter equivalents -- like "ara" -> "ar" -- because
+"ara" has I<never> been in use as an Internet language tag,
+and RFC 3066 stipulates that it never should be, since a shorter
+tag ("ar") exists.
+
+Examples:
+
+ alternate_language_tags('no-bok') is ('nb')
+ alternate_language_tags('nb') is ('no-bok')
+ alternate_language_tags('he') is ('iw')
+ alternate_language_tags('iw') is ('he')
+ alternate_language_tags('i-hakka') is ('zh-hakka', 'x-hakka')
+ alternate_language_tags('zh-hakka') is ('i-hakka', 'x-hakka')
+ alternate_language_tags('en') is ()
+ alternate_language_tags('x-mingo-tom') is ('i-mingo-tom')
+ alternate_language_tags('x-klikitat') is ('i-klikitat')
+ alternate_language_tags('i-klikitat') is ('x-klikitat')
+
+This function returns empty-list if given anything other than a formally
+valid language tag.
+
+=cut
+
+my %alt = qw( i x x i I X X I );
+sub alternate_language_tags {
+ my $tag = $_[0];
+ return() unless &is_language_tag($tag);
+
+ my @em; # push 'em real goood!
+
+ # For the moment, these legacy variances are few enough that
+ # we can just handle them here with regexps.
+
+ if( $tag =~ m/^[ix]-hakka\b(.*)/i) {push @em, "zh-hakka$1";
+ } elsif($tag =~ m/^zh-hakka\b(.*)/i) { push @em, "x-hakka$1", "i-hakka$1";
+
+ } elsif($tag =~ m/^he\b(.*)/i) { push @em, "iw$1";
+ } elsif($tag =~ m/^iw\b(.*)/i) { push @em, "he$1";
+
+ } elsif($tag =~ m/^in\b(.*)/i) { push @em, "id$1";
+ } elsif($tag =~ m/^id\b(.*)/i) { push @em, "in$1";
+
+ } elsif($tag =~ m/^[ix]-lux\b(.*)/i) { push @em, "lb$1";
+ } elsif($tag =~ m/^lb\b(.*)/i) { push @em, "i-lux$1", "x-lux$1";
+
+ } elsif($tag =~ m/^[ix]-navajo\b(.*)/i) { push @em, "nv$1";
+ } elsif($tag =~ m/^nv\b(.*)/i) { push @em, "i-navajo$1", "x-navajo$1";
+
+ } elsif($tag =~ m/^yi\b(.*)/i) { push @em, "ji$1";
+ } elsif($tag =~ m/^ji\b(.*)/i) { push @em, "yi$1";
+
+ } elsif($tag =~ m/^nb\b(.*)/i) { push @em, "no-bok$1";
+ } elsif($tag =~ m/^no-bok\b(.*)/i) { push @em, "nb$1";
+
+ } elsif($tag =~ m/^nn\b(.*)/i) { push @em, "no-nyn$1";
+ } elsif($tag =~ m/^no-nyn\b(.*)/i) { push @em, "nn$1";
+ }
+
+ push @em, $alt{$1} . $2 if $tag =~ /^([XIxi])(-.+)/;
+ return @em;
+}
+
+###########################################################################
+
+{
+ # Init %Panic...
+
+ my @panic = ( # MUST all be lowercase!
+ # Only large ("national") languages make it in this list.
+ # If you, as a user, are so bizarre that the /only/ language
+ # you claim to accept is Galician, then no, we won't do you
+ # the favor of providing Catalan as a panic-fallback for
+ # you. Because if I start trying to add "little languages" in
+ # here, I'll just go crazy.
+
+ # Scandinavian lgs. All based on opinion and hearsay.
+ 'sv' => [qw(nb no da nn)],
+ 'da' => [qw(nb no sv nn)], # I guess
+ [qw(no nn nb)], [qw(no nn nb sv da)],
+ 'is' => [qw(da sv no nb nn)],
+ 'fo' => [qw(da is no nb nn sv)], # I guess
+
+ # I think this is about the extent of tolerable intelligibility
+ # among large modern Romance languages.
+ 'pt' => [qw(es ca it fr)], # Portuguese, Spanish, Catalan, Italian, French
+ 'ca' => [qw(es pt it fr)],
+ 'es' => [qw(ca it fr pt)],
+ 'it' => [qw(es fr ca pt)],
+ 'fr' => [qw(es it ca pt)],
+
+ # Also assume that speakers of the main Indian languages prefer
+ # to read/hear Hindi over English
+ [qw(
+ as bn gu kn ks kok ml mni mr ne or pa sa sd te ta ur
+ )] => 'hi',
+ # Assamese, Bengali, Gujarati, [Hindi,] Kannada (Kanarese), Kashmiri,
+ # Konkani, Malayalam, Meithei (Manipuri), Marathi, Nepali, Oriya,
+ # Punjabi, Sanskrit, Sindhi, Telugu, Tamil, and Urdu.
+ 'hi' => [qw(bn pa as or)],
+ # I welcome finer data for the other Indian languages.
+ # E.g., what should Oriya's list be, besides just Hindi?
+
+ # And the panic languages for English is, of course, nil!
+
+ # My guesses at Slavic intelligibility:
+ ([qw(ru be uk)]) x 2, # Russian, Belarusian, Ukranian
+ 'sr' => 'hr', 'hr' => 'sr', # Serb + Croat
+ 'cs' => 'sk', 'sk' => 'cs', # Czech + Slovak
+
+ 'ms' => 'id', 'id' => 'ms', # Malay + Indonesian
+
+ 'et' => 'fi', 'fi' => 'et', # Estonian + Finnish
+
+ #?? 'lo' => 'th', 'th' => 'lo', # Lao + Thai
+
+ );
+ my($k,$v);
+ while(@panic) {
+ ($k,$v) = splice(@panic,0,2);
+ foreach my $k (ref($k) ? @$k : $k) {
+ foreach my $v (ref($v) ? @$v : $v) {
+ push @{$Panic{$k} ||= []}, $v unless $k eq $v;
+ }
+ }
+ }
+}
+
+=item * the function @langs = panic_languages(@accept_languages)
+
+This function takes a list of 0 or more language
+tags that constitute a given user's Accept-Language list, and
+returns a list of tags for I<other> (non-super)
+languages that are probably acceptable to the user, to be
+used I<if all else fails>.
+
+For example, if a user accepts only 'ca' (Catalan) and
+'es' (Spanish), and the documents/interfaces you have
+available are just in German, Italian, and Chinese, then
+the user will most likely want the Italian one (and not
+the Chinese or German one!), instead of getting
+nothing. So C<panic_languages('ca', 'es')> returns
+a list containing 'it' (Italian).
+
+English ('en') is I<always> in the return list, but
+whether it's at the very end or not depends
+on the input languages. This function works by consulting
+an internal table that stipulates what common
+languages are "close" to each other.
+
+A useful construct you might consider using is:
+
+ @fallbacks = super_languages(@accept_languages);
+ push @fallbacks, panic_languages(
+ @accept_languages, @fallbacks,
+ );
+
+=cut
+
+sub panic_languages {
+ # When in panic or in doubt, run in circles, scream, and shout!
+ my(@out, %seen);
+ foreach my $t (@_) {
+ next unless $t;
+ next if $seen{$t}++; # so we don't return it or hit it again
+ # push @out, super_languages($t); # nah, keep that separate
+ push @out, @{ $Panic{lc $t} || next };
+ }
+ return grep !$seen{$_}++, @out, 'en';
+}
+
+###########################################################################
+1;
+__END__
+
+=back
+
+=head1 ABOUT LOWERCASING
+
+I've considered making all the above functions that output language
+tags return all those tags strictly in lowercase. Having all your
+language tags in lowercase does make some things easier. But you
+might as well just lowercase as you like, or call
+C<encode_language_tag($lang1)> where appropriate.
+
+=head1 ABOUT UNICODE PLAINTEXT LANGUAGE TAGS
+
+In some future version of I18N::LangTags, I plan to include support
+for RFC2482-style language tags -- which are basically just normal
+language tags with their ASCII characters shifted into Plane 14.
+
+=head1 SEE ALSO
+
+* L<I18N::LangTags::List|I18N::LangTags::List>
+
+* RFC 3066, C<ftp://ftp.isi.edu/in-notes/rfc3066.txt>, "Tags for the
+Identification of Languages". (Obsoletes RFC 1766)
+
+* RFC 2277, C<ftp://ftp.isi.edu/in-notes/rfc2277.txt>, "IETF Policy on
+Character Sets and Languages".
+
+* RFC 2231, C<ftp://ftp.isi.edu/in-notes/rfc2231.txt>, "MIME Parameter
+Value and Encoded Word Extensions: Character Sets, Languages, and
+Continuations".
+
+* RFC 2482, C<ftp://ftp.isi.edu/in-notes/rfc2482.txt>,
+"Language Tagging in Unicode Plain Text".
+
+* Locale::Codes, in
+C<http://www.perl.com/CPAN/modules/by-module/Locale/>
+
+* ISO 639, "Code for the representation of names of languages",
+C<http://www.indigo.ie/egt/standards/iso639/iso639-1-en.html>
+
+* ISO 639-2, "Codes for the representation of names of languages",
+including three-letter codes,
+C<http://lcweb.loc.gov/standards/iso639-2/bibcodes.html>
+
+* The IANA list of registered languages (hopefully up-to-date),
+C<ftp://ftp.isi.edu/in-notes/iana/assignments/languages/>
+
+=head1 COPYRIGHT
+
+Copyright (c) 1998-2001 Sean M. Burke. All rights reserved.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+The programs and documentation in this dist are distributed in
+the hope that they will be useful, but without any warranty; without
+even the implied warranty of merchantability or fitness for a
+particular purpose.
+
+=head1 AUTHOR
+
+Sean M. Burke C<sburke@cpan.org>
+
+=cut
+
diff --git a/gnu/usr.bin/perl/lib/I18N/LangTags/ChangeLog b/gnu/usr.bin/perl/lib/I18N/LangTags/ChangeLog
new file mode 100644
index 00000000000..f3608f7125e
--- /dev/null
+++ b/gnu/usr.bin/perl/lib/I18N/LangTags/ChangeLog
@@ -0,0 +1,107 @@
+Revision history for Perl module I18N::LangTags.
+ Time-stamp: "2002-02-02 20:45:47 MST"
+
+2002-02-02 Sean M. Burke sburke@cpan.org
+
+ * Release 0.27 -- minor mods to ::List:
+ Fixing its entries for sv-se and sv-fi.
+ Typo-fixes and rewordings in the incidental Pod text elsewhere.
+
+2001-06-21 Sean M. Burke sburke@cpan.org
+
+ * Release 0.26 -- just making cosmetic changes
+ to test.pl, at Jarkko's request.
+
+2001-06-20 Sean M. Burke sburke@cpan.org
+
+ * Release 0.25 -- just tweaking panic_languages behavior
+ for Scandinavian languages. Much better now.
+ Slight tweak to ::List's entries for Greek.
+
+2001-06-20 Sean M. Burke sburke@cpan.org
+
+ * Release 0.24
+
+ * I18N::LangTags -- some elaborate hacks to make us
+ recognize legacy aliases like no-nyn == nn.
+ Added panic_languages().
+ Added :ALL export tag.
+ Minor docs fixes, and spiffing up test.pl.
+
+ * I18N::LangTags::List -- minor corrections; added
+ a few aliases.
+
+2001-05-29 Sean M. Burke sburke@cpan.org
+
+ * Release 0.23
+
+ * I18N::LangTags::List -- minor corrections. And is now
+ a module, not just documentation.
+
+2001-05-27 Sean M. Burke sburke@cpan.org
+
+ * Release 0.22
+
+ * Now bundling I18N::LangTags::List, a reference for lang tags,
+ replacing generate_language_table.plx and language_codes.txt
+
+2001-05-25 Sean M. Burke sburke@cpan.org
+
+ * Release 0.21
+
+ * extract_language_tags and locale2langauge_tag now
+ return untainted output. Useful if you feed tainted
+ things, like $ENV{'LANG'}.
+
+2001-03-13 Sean M. Burke sburke@cpan.org
+
+ * Release 0.20
+
+ * Added support for RFC 3066 tags: allowing three-letter primary
+ tags ("nav"), and allowing digits in subtags ("x-borg-prot3252").
+
+ * Changed all references from RFC 1766 to RFC 3066.
+
+ * Now bundling fulltext of RFC 3066 in the dist.
+
+ * Now bundling generate_language_table.plx and language_codes.txt
+
+ * Added some nice tests to test.pl
+
+ * Inverting order of listings in this ChangeLog file.
+
+2000-05-13 Sean M. Burke sburke@cpan.org
+
+ * Release 0.13
+
+ * Just noting my new email address.
+
+1999-03-06 Sean M. Burke sburke@netadventure.net
+
+ * Release 0.11
+
+ * Added functions
+ similarity_language_tag, is_dialect_of,
+ locale2language_tag, alternate_language_tags, and
+ encode_language_tag
+
+1998-12-14 Sean M. Burke sburke@netadventure.net
+
+ * Release 0.09
+
+ * Added function super_languages()
+
+1998-10-31 Sean M. Burke sburke@netadventure.net
+
+ * Release 0.08
+
+ * Just changes in the docs and bundle -- no change
+ in functionality.
+
+1998-04-02 Sean M. Burke sburke@netadventure.net
+
+ * Release 0.07
+
+ * First public release.
+
+[END OF CHANGELOG]
diff --git a/gnu/usr.bin/perl/lib/I18N/LangTags/List.pm b/gnu/usr.bin/perl/lib/I18N/LangTags/List.pm
new file mode 100644
index 00000000000..2dbd19a5d78
--- /dev/null
+++ b/gnu/usr.bin/perl/lib/I18N/LangTags/List.pm
@@ -0,0 +1,1622 @@
+
+require 5;
+package I18N::LangTags::List;
+# Time-stamp: "2002-02-02 20:13:58 MST"
+use strict;
+use vars qw(%Name $Debug $VERSION);
+$VERSION = '0.25';
+# POD at the end.
+
+#----------------------------------------------------------------------
+{
+# read the table out of our own POD!
+ my $seeking = 1;
+ my $count = 0;
+ my($tag,$name);
+ while(<I18N::LangTags::List::DATA>) {
+ if($seeking) {
+ $seeking = 0 if m/=for woohah/;
+ } else {
+ next unless ($tag, $name) =
+ m/\{([-0-9a-zA-Z]+)\}(?:\s*:)?\s*([^\[\]]+)/;
+ $name =~ s/\s*[;\.]*\s*$//g;
+ next unless $name;
+ ++$count;
+ print "<$tag> <$name>\n" if $Debug;
+ $Name{$tag} = $name;
+ }
+ }
+ die "No tags read??" unless $count;
+}
+#----------------------------------------------------------------------
+
+sub name {
+ my $tag = lc($_[0] || return);
+ $tag =~ s/^\s+//s;
+ $tag =~ s/\s+$//s;
+
+ my $alt;
+ if($tag =~ m/^x-(.+)/) {
+ $alt = "i-$1";
+ } elsif($tag =~ m/^i-(.+)/) {
+ $alt = "x-$1";
+ } else {
+ $alt = '';
+ }
+
+ my $subform = '';
+ my $name = '';
+ print "Input: {$tag}\n" if $Debug;
+ while(length $tag) {
+ last if $name = $Name{$tag};
+ last if $name = $Name{$alt};
+ if($tag =~ s/(-[a-z0-9]+)$//s) {
+ print "Shaving off: $1 leaving $tag\n" if $Debug;
+ $subform = "$1$subform";
+ # and loop around again
+
+ $alt =~ s/(-[a-z0-9]+)$//s && $Debug && print " alt -> $alt\n";
+ } else {
+ # we're trying to pull a subform off a primary tag. TILT!
+ print "Aborting on: {$name}{$subform}\n" if $Debug;
+ last;
+ }
+ }
+ print "Output: {$name}{$subform}\n" if $Debug;
+
+ return unless $name; # Failure
+ return $name unless $subform; # Exact match
+ $subform =~ s/^-//s;
+ $subform =~ s/-$//s;
+ return "$name (Subform \"$subform\")";
+}
+
+1;
+
+__DATA__
+
+=head1 NAME
+
+I18N::LangTags::List -- tags and names for human languages
+
+=head1 SYNOPSIS
+
+ use I18N::LangTags::List;
+ print "Parlez-vous... ", join(', ',
+ I18N::LangTags::List::name('elx') || 'unknown_language',
+ I18N::LangTags::List::name('ar-Kw') || 'unknown_language',
+ I18N::LangTags::List::name('en') || 'unknown_language',
+ I18N::LangTags::List::name('en-CA') || 'unknown_language',
+ ), "?\n";
+
+prints:
+
+ Parlez-vous... Elamite, Kuwait Arabic, English, Canadian English?
+
+=head1 DESCRIPTION
+
+This module provides a function
+C<I18N::LangTags::List::name( I<langtag> ) > that takes
+a language tag (see L<I18N::LangTags|I18N::LangTags>)
+and returns the best attempt at an English name for it, or
+undef if it can't make sense of the tag.
+
+The function I18N::LangTags::List::name(...) is not exported.
+
+The map of tags-to-names that it uses is accessable as
+%I18N::LangTags::List::Name, and it's the same as the list
+that follows in this documentation, which should be useful
+to you even if you don't use this module.
+
+=head1 ABOUT LANGUAGE TAGS
+
+Internet language tags, as defined in RFC 3066, are a formalism
+for denoting human languages. The two-letter ISO 639-1 language
+codes are well known (as "en" for English), as are their forms
+when qualified by a country code ("en-US"). Less well-known are the
+arbitrary-length non-ISO codes (like "i-mingo"), and the
+recently (in 2001) introduced three-letter ISO-639-2 codes.
+
+Remember these important facts:
+
+=over
+
+=item *
+
+Language tags are not locale IDs. A locale ID is written with a "_"
+instead of a "-", (almost?) always matches C<m/^\w\w_\w\w\b/>, and
+I<means> something different than a language tag. A language tag
+denotes a language. A locale ID denotes a language I<as used in>
+a particular place, in combination with non-linguistic
+location-specific information such as what currency is used
+there. Locales I<also> often denote character set information,
+as in "en_US.ISO8859-1".
+
+=item *
+
+Language tags are not for computer languages.
+
+=item *
+
+"Dialect" is not a useful term, since there is no objective
+criterion for establishing when two language-forms are
+dialects of eachother, or are separate languages.
+
+=item *
+
+Language tags are not case-sensitive. en-US, en-us, En-Us, etc.,
+are all the same tag, and denote the same language.
+
+=item *
+
+Not every language tag really refers to a single language. Some
+language tags refer to conditions: i-default (system-message text
+in English plus maybe other languages), und (undetermined
+language). Others (notably lots of the three-letter codes) are
+bibliographic tags that classify whole groups of languages, as
+with cus "Cushitic (Other)" (i.e., a
+language that has been classed as Cushtic, but which has no more
+specific code) or the even less linguistically coherent
+sai for "South American Indian (Other)". Though useful in
+bibliography, B<SUCH TAGS ARE NOT
+FOR GENERAL USE>. For further guidance, email me.
+
+=item *
+
+Language tags are not country codes. In fact, they are often
+distinct codes, as with language tag ja for Japanese, and
+ISO 3166 country code C<.jp> for Japan.
+
+=back
+
+=head1 LIST OF LANGUAGES
+
+The first part of each item is the language tag, between
+{...}. It
+is followed by an English name for the language or language-group.
+Language tags that I judge to be not for general use, are bracketed.
+
+This list is in alphabetical order by English name of the language.
+
+=for reminder
+ The name in the =item line MUST NOT have E<...>'s in it!!
+
+=for woohah START
+
+=over
+
+=item {ab} : Abkhazian
+
+eq Abkhaz
+
+=item {ace} : Achinese
+
+=item {ach} : Acoli
+
+=item {ada} : Adangme
+
+=item {aa} : Afar
+
+=item {afh} : Afrihili
+
+(Artificial)
+
+=item {af} : Afrikaans
+
+=item [{afa} : Afro-Asiatic (Other)]
+
+=item {aka} : Akan
+
+=item {akk} : Akkadian
+
+(Historical)
+
+=item {sq} : Albanian
+
+=item {ale} : Aleut
+
+=item [{alg} : Algonquian languages]
+
+NOT Algonquin!
+
+=item [{tut} : Altaic (Other)]
+
+=item {am} : Amharic
+
+NOT Aramaic!
+
+=item {i-ami} : Ami
+
+eq Amis. eq 'Amis. eq Pangca.
+
+=item [{apa} : Apache languages]
+
+=item {ar} : Arabic
+
+Many forms are mutually un-intelligible in spoken media.
+Notable forms:
+{ar-ae} UAE Arabic;
+{ar-bh} Bahrain Arabic;
+{ar-dz} Algerian Arabic;
+{ar-eg} Egyptian Arabic;
+{ar-iq} Iraqi Arabic;
+{ar-jo} Jordanian Arabic;
+{ar-kw} Kuwait Arabic;
+{ar-lb} Lebanese Arabic;
+{ar-ly} Libyan Arabic;
+{ar-ma} Moroccan Arabic;
+{ar-om} Omani Arabic;
+{ar-qa} Qatari Arabic;
+{ar-sa} Sauda Arabic;
+{ar-sy} Syrian Arabic;
+{ar-tn} Tunisian Arabic;
+{ar-ye} Yemen Arabic.
+
+=item {arc} : Aramaic
+
+NOT Amharic! NOT Samaritan Aramaic!
+
+=item {arp} : Arapaho
+
+=item {arn} : Araucanian
+
+=item {arw} : Arawak
+
+=item {hy} : Armenian
+
+=item [{art} : Artificial (Other)]
+
+=item {as} : Assamese
+
+=item [{ath} : Athapascan languages]
+
+eq Athabaskan. eq Athapaskan. eq Athabascan.
+
+=item [{aus} : Australian languages]
+
+=item [{map} : Austronesian (Other)]
+
+=item {ava} : Avaric
+
+=item {ae} : Avestan
+
+eq Zend
+
+=item {awa} : Awadhi
+
+=item {ay} : Aymara
+
+=item {az} : Azerbaijani
+
+eq Azeri
+
+=item {ban} : Balinese
+
+=item [{bat} : Baltic (Other)]
+
+=item {bal} : Baluchi
+
+=item {bam} : Bambara
+
+=item [{bai} : Bamileke languages]
+
+=item {bad} : Banda
+
+=item [{bnt} : Bantu (Other)]
+
+=item {bas} : Basa
+
+=item {ba} : Bashkir
+
+=item {eu} : Basque
+
+=item {btk} : Batak (Indonesia)
+
+=item {bej} : Beja
+
+=item {be} : Belarusian
+
+eq Belarussian. eq Byelarussian.
+eq Belorussian. eq Byelorussian.
+eq White Russian. eq White Ruthenian.
+NOT Ruthenian!
+
+=item {bem} : Bemba
+
+=item {bn} : Bengali
+
+eq Bangla.
+
+=item [{ber} : Berber (Other)]
+
+=item {bho} : Bhojpuri
+
+=item {bh} : Bihari
+
+=item {bik} : Bikol
+
+=item {bin} : Bini
+
+=item {bi} : Bislama
+
+eq Bichelamar.
+
+=item {bs} : Bosnian
+
+=item {bra} : Braj
+
+=item {br} : Breton
+
+=item {bug} : Buginese
+
+=item {bg} : Bulgarian
+
+=item {i-bnn} : Bunun
+
+=item {bua} : Buriat
+
+=item {my} : Burmese
+
+=item {cad} : Caddo
+
+=item {car} : Carib
+
+=item {ca} : Catalan
+
+eq CatalE<aacute>n. eq Catalonian.
+
+=item [{cau} : Caucasian (Other)]
+
+=item {ceb} : Cebuano
+
+=item [{cel} : Celtic (Other)]
+
+Notable forms:
+{cel-gaulish} Gaulish (Historical)
+
+=item [{cai} : Central American Indian (Other)]
+
+=item {chg} : Chagatai
+
+(Historical?)
+
+=item [{cmc} : Chamic languages]
+
+=item {ch} : Chamorro
+
+=item {ce} : Chechen
+
+=item {chr} : Cherokee
+
+eq Tsalagi
+
+=item {chy} : Cheyenne
+
+=item {chb} : Chibcha
+
+(Historical) NOT Chibchan (which is a language family).
+
+=item {ny} : Chichewa
+
+eq Nyanja. eq Chinyanja.
+
+=item {zh} : Chinese
+
+Many forms are mutually un-intelligible in spoken media.
+Notable subforms:
+{zh-cn} PRC Chinese;
+{zh-hk} Hong Kong Chinese;
+{zh-mo} Macau Chinese;
+{zh-sg} Singapore Chinese;
+{zh-tw} Taiwan Chinese;
+{zh-guoyu} Mandarin [Putonghua/Guoyu];
+{zh-hakka} Hakka [formerly i-hakka];
+{zh-min} Hokkien;
+{zh-min-nan} Southern Hokkien;
+{zh-wuu} Shanghaiese;
+{zh-xiang} Hunanese;
+{zh-gan} Gan;
+{zh-yue} Cantonese.
+
+=for etc
+{i-hakka} Hakka (old tag)
+
+=item {chn} : Chinook Jargon
+
+eq Chinook Wawa.
+
+=item {chp} : Chipewyan
+
+=item {cho} : Choctaw
+
+=item {cu} : Church Slavic
+
+eq Old Church Slavonic.
+
+=item {chk} : Chuukese
+
+eq Trukese. eq Chuuk. eq Truk. eq Ruk.
+
+=item {cv} : Chuvash
+
+=item {cop} : Coptic
+
+=item {kw} : Cornish
+
+=item {co} : Corsican
+
+eq Corse.
+
+=item {cre} : Cree
+
+NOT Creek!
+
+=item {mus} : Creek
+
+NOT Cree!
+
+=item [{cpe} : English-based Creoles and pidgins (Other)]
+
+=item [{cpf} : French-based Creoles and pidgins (Other)]
+
+=item [{cpp} : Portuguese-based Creoles and pidgins (Other)]
+
+=item [{crp} : Creoles and pidgins (Other)]
+
+=item {hr} : Croatian
+
+eq Croat.
+
+=item [{cus} : Cushitic (Other)]
+
+=item {cs} : Czech
+
+=item {dak} : Dakota
+
+eq Nakota. eq Latoka.
+
+=item {da} : Danish
+
+=item {day} : Dayak
+
+=item {i-default} : Default (Fallthru) Language
+
+Defined in RFC 2277, this is for tagging text
+(which must include English text, and might/should include text
+in other appropriate languages) that is emitted in a context
+where language-negotiation wasn't possible -- in SMTP mail failure
+messages, for example.
+
+=item {del} : Delaware
+
+=item {din} : Dinka
+
+=item {div} : Divehi
+
+=item {doi} : Dogri
+
+NOT Dogrib!
+
+=item {dgr} : Dogrib
+
+NOT Dogri!
+
+=item [{dra} : Dravidian (Other)]
+
+=item {dua} : Duala
+
+=item {nl} : Dutch
+
+eq Netherlander. Notable forms:
+{nl-nl} Netherlands Dutch;
+{nl-be} Belgian Dutch.
+
+=item {dum} : Middle Dutch (ca.1050-1350)
+
+(Historical)
+
+=item {dyu} : Dyula
+
+=item {dz} : Dzongkha
+
+=item {efi} : Efik
+
+=item {egy} : Ancient Egyptian
+
+(Historical)
+
+=item {eka} : Ekajuk
+
+=item {elx} : Elamite
+
+(Historical)
+
+=item {en} : English
+
+Notable forms:
+{en-au} Australian English;
+{en-bz} Belize English;
+{en-ca} Canadian English;
+{en-gb} UK English;
+{en-ie} Irish English;
+{en-jm} Jamaican English;
+{en-nz} New Zealand English;
+{en-ph} Philippine English;
+{en-tt} Trinidad English;
+{en-us} US English;
+{en-za} South African English;
+{en-zw} Zimbabwe English.
+
+=item {enm} : Old English (1100-1500)
+
+(Historical)
+
+=item {ang} : Old English (ca.450-1100)
+
+eq Anglo-Saxon. (Historical)
+
+=item {eo} : Esperanto
+
+(Artificial)
+
+=item {et} : Estonian
+
+=item {ewe} : Ewe
+
+=item {ewo} : Ewondo
+
+=item {fan} : Fang
+
+=item {fat} : Fanti
+
+=item {fo} : Faroese
+
+=item {fj} : Fijian
+
+=item {fi} : Finnish
+
+=item [{fiu} : Finno-Ugrian (Other)]
+
+eq Finno-Ugric. NOT Ugaritic!
+
+=item {fon} : Fon
+
+=item {fr} : French
+
+Notable forms:
+{fr-fr} France French;
+{fr-be} Belgian French;
+{fr-ca} Canadian French;
+{fr-ch} Swiss French;
+{fr-lu} Luxembourg French;
+{fr-mc} Monaco French.
+
+=item {frm} : Middle French (ca.1400-1600)
+
+(Historical)
+
+=item {fro} : Old French (842-ca.1400)
+
+(Historical)
+
+=item {fy} : Frisian
+
+=item {fur} : Friulian
+
+=item {ful} : Fulah
+
+=item {gaa} : Ga
+
+=item {gd} : Scots Gaelic
+
+NOT Scots!
+
+=item {gl} : Gallegan
+
+eq Galician
+
+=item {lug} : Ganda
+
+=item {gay} : Gayo
+
+=item {gba} : Gbaya
+
+=item {gez} : Geez
+
+eq Ge'ez
+
+=item {ka} : Georgian
+
+=item {de} : German
+
+Notable forms:
+{de-at} Austrian German;
+{de-be} Belgian German;
+{de-ch} Swiss German;
+{de-de} Germany German;
+{de-li} Liechtenstein German;
+{de-lu} Luxembourg German.
+
+=item {gmh} : Middle High German (ca.1050-1500)
+
+(Historical)
+
+=item {goh} : Old High German (ca.750-1050)
+
+(Historical)
+
+=item [{gem} : Germanic (Other)]
+
+=item {gil} : Gilbertese
+
+=item {gon} : Gondi
+
+=item {gor} : Gorontalo
+
+=item {got} : Gothic
+
+(Historical)
+
+=item {grb} : Grebo
+
+=item {grc} : Ancient Greek
+
+(Historical) (Until 15th century or so.)
+
+=item {el} : Modern Greek
+
+(Since 15th century or so.)
+
+=item {gn} : Guarani
+
+GuaranE<iacute>
+
+=item {gu} : Gujarati
+
+=item {gwi} : Gwich'in
+
+eq Gwichin
+
+=item {hai} : Haida
+
+=item {ha} : Hausa
+
+=item {haw} : Hawaiian
+
+Hawai'ian
+
+=item {he} : Hebrew
+
+(Formerly "iw".)
+
+=for etc
+{iw} Hebrew (old tag)
+
+=item {hz} : Herero
+
+=item {hil} : Hiligaynon
+
+=item {him} : Himachali
+
+=item {hi} : Hindi
+
+=item {ho} : Hiri Motu
+
+=item {hit} : Hittite
+
+(Historical)
+
+=item {hmn} : Hmong
+
+=item {hu} : Hungarian
+
+=item {hup} : Hupa
+
+=item {iba} : Iban
+
+=item {is} : Icelandic
+
+=item {ibo} : Igbo
+
+=item {ijo} : Ijo
+
+=item {ilo} : Iloko
+
+=item [{inc} : Indic (Other)]
+
+=item [{ine} : Indo-European (Other)]
+
+=item {id} : Indonesian
+
+(Formerly "in".)
+
+=for etc
+{in} Indonesian (old tag)
+
+=item {ia} : Interlingua (International Auxiliary Language Association)
+
+(Artificial) NOT Interlingue!
+
+=item {ie} : Interlingue
+
+(Artificial) NOT Interlingua!
+
+=item {iu} : Inuktitut
+
+A subform of "Eskimo".
+
+=item {ik} : Inupiaq
+
+A subform of "Eskimo".
+
+=item [{ira} : Iranian (Other)]
+
+=item {ga} : Irish
+
+=item {mga} : Middle Irish (900-1200)
+
+(Historical)
+
+=item {sga} : Old Irish (to 900)
+
+(Historical)
+
+=item [{iro} : Iroquoian languages]
+
+=item {it} : Italian
+
+Notable forms:
+{it-it} Italy Italian;
+{it-ch} Swiss Italian.
+
+=item {ja} : Japanese
+
+(NOT "jp"!)
+
+=item {jw} : Javanese
+
+=item {jrb} : Judeo-Arabic
+
+=item {jpr} : Judeo-Persian
+
+=item {kab} : Kabyle
+
+=item {kac} : Kachin
+
+=item {kl} : Kalaallisut
+
+eq Greenlandic "Eskimo"
+
+=item {kam} : Kamba
+
+=item {kn} : Kannada
+
+eq Kanarese. NOT Canadian!
+
+=item {kau} : Kanuri
+
+=item {kaa} : Kara-Kalpak
+
+=item {kar} : Karen
+
+=item {ks} : Kashmiri
+
+=item {kaw} : Kawi
+
+=item {kk} : Kazakh
+
+=item {kha} : Khasi
+
+=item {km} : Khmer
+
+eq Cambodian. eq Kampuchean.
+
+=item [{khi} : Khoisan (Other)]
+
+=item {kho} : Khotanese
+
+=item {ki} : Kikuyu
+
+eq Gikuyu.
+
+=item {kmb} : Kimbundu
+
+=item {rw} : Kinyarwanda
+
+=item {ky} : Kirghiz
+
+=item {i-klingon} : Klingon
+
+=item {kv} : Komi
+
+=item {kon} : Kongo
+
+=item {kok} : Konkani
+
+=item {ko} : Korean
+
+=item {kos} : Kosraean
+
+=item {kpe} : Kpelle
+
+=item {kro} : Kru
+
+=item {kj} : Kuanyama
+
+=item {kum} : Kumyk
+
+=item {ku} : Kurdish
+
+=item {kru} : Kurukh
+
+=item {kut} : Kutenai
+
+=item {lad} : Ladino
+
+eq Judeo-Spanish. NOT Ladin (a minority language in Italy).
+
+=item {lah} : Lahnda
+
+NOT Lamba!
+
+=item {lam} : Lamba
+
+NOT Lahnda!
+
+=item {lo} : Lao
+
+eq Laotian.
+
+=item {la} : Latin
+
+(Historical) NOT Ladin! NOT Ladino!
+
+=item {lv} : Latvian
+
+eq Lettish.
+
+=item {lb} : Letzeburgesch
+
+eq Luxemburgian, eq Luxemburger. (Formerly i-lux.)
+
+=for etc
+{i-lux} Letzeburgesch (old tag)
+
+=item {lez} : Lezghian
+
+=item {ln} : Lingala
+
+=item {lt} : Lithuanian
+
+=item {nds} : Low German
+
+eq Low Saxon. eq Low German. eq Low Saxon.
+
+=item {loz} : Lozi
+
+=item {lub} : Luba-Katanga
+
+=item {lua} : Luba-Lulua
+
+=item {lui} : Luiseno
+
+eq LuiseE<ntilde>o.
+
+=item {lun} : Lunda
+
+=item {luo} : Luo (Kenya and Tanzania)
+
+=item {lus} : Lushai
+
+=item {mk} : Macedonian
+
+eq the modern Slavic language spoken in what was Yugoslavia.
+NOT the form of Greek spoken in Greek Macedonia!
+
+=item {mad} : Madurese
+
+=item {mag} : Magahi
+
+=item {mai} : Maithili
+
+=item {mak} : Makasar
+
+=item {mg} : Malagasy
+
+=item {ms} : Malay
+
+NOT Malayalam!
+
+=item {ml} : Malayalam
+
+NOT Malay!
+
+=item {mt} : Maltese
+
+=item {mnc} : Manchu
+
+=item {mdr} : Mandar
+
+NOT Mandarin!
+
+=item {man} : Mandingo
+
+=item {mni} : Manipuri
+
+eq Meithei.
+
+=item [{mno} : Manobo languages]
+
+=item {gv} : Manx
+
+=item {mi} : Maori
+
+NOT Mari!
+
+=item {mr} : Marathi
+
+=item {chm} : Mari
+
+NOT Maori!
+
+=item {mh} : Marshall
+
+eq Marshallese.
+
+=item {mwr} : Marwari
+
+=item {mas} : Masai
+
+=item [{myn} : Mayan languages]
+
+=item {men} : Mende
+
+=item {mic} : Micmac
+
+=item {min} : Minangkabau
+
+=item {i-mingo} : Mingo
+
+eq the Irquoian language West Virginia Seneca. NOT New York Seneca!
+
+=item [{mis} : Miscellaneous languages]
+
+Don't use this.
+
+=item {moh} : Mohawk
+
+=item {mo} : Moldavian
+
+eq Moldovan.
+
+=item [{mkh} : Mon-Khmer (Other)]
+
+=item {lol} : Mongo
+
+=item {mn} : Mongolian
+
+eq Mongol.
+
+=item {mos} : Mossi
+
+=item [{mul} : Multiple languages]
+
+Not for normal use.
+
+=item [{mun} : Munda languages]
+
+=item {nah} : Nahuatl
+
+=item {na} : Nauru
+
+=item {nv} : Navajo
+
+eq Navaho. (Formerly i-navajo.)
+
+=for etc
+{i-navajo} Navajo (old tag)
+
+=item {nd} : North Ndebele
+
+=item {nr} : South Ndebele
+
+=item {ng} : Ndonga
+
+=item {ne} : Nepali
+
+eq Nepalese. Notable forms:
+{ne-np} Nepal Nepali;
+{ne-in} India Nepali.
+
+=item {new} : Newari
+
+=item {nia} : Nias
+
+=item [{nic} : Niger-Kordofanian (Other)]
+
+=item [{ssa} : Nilo-Saharan (Other)]
+
+=item {niu} : Niuean
+
+=item {non} : Old Norse
+
+(Historical)
+
+=item [{nai} : North American Indian]
+
+Do not use this.
+
+=item {se} : Northern Sami
+
+eq Lappish. eq Lapp. eq (Northern) Saami.
+
+=item {no} : Norwegian
+
+Note the two following forms:
+
+=item {nb} : Norwegian Bokmal
+
+eq BokmE<aring>l, (A form of Norwegian.) (Formerly no-bok.)
+
+=for etc
+{no-bok} Norwegian Bokmal (old tag)
+
+=item {nn} : Norwegian Nynorsk
+
+(A form of Norwegian.) (Formerly no-nyn.)
+
+=for etc
+{no-nyn} Norwegian Nynorsk (old tag)
+
+=item [{nub} : Nubian languages]
+
+=item {nym} : Nyamwezi
+
+=item {nyn} : Nyankole
+
+=item {nyo} : Nyoro
+
+=item {nzi} : Nzima
+
+=item {oc} : Occitan (post 1500)
+
+eq ProvenE<ccedil>al, eq Provencal
+
+=item {oji} : Ojibwa
+
+eq Ojibwe.
+
+=item {or} : Oriya
+
+=item {om} : Oromo
+
+=item {osa} : Osage
+
+=item {os} : Ossetian; Ossetic
+
+=item [{oto} : Otomian languages]
+
+Group of languages collectively called "OtomE<iacute>".
+
+=item {pal} : Pahlavi
+
+eq Pahlevi
+
+=item {i-pwn} : Paiwan
+
+eq Pariwan
+
+=item {pau} : Palauan
+
+=item {pi} : Pali
+
+(Historical?)
+
+=item {pam} : Pampanga
+
+=item {pag} : Pangasinan
+
+=item {pa} : Panjabi
+
+eq Punjabi
+
+=item {pap} : Papiamento
+
+eq Papiamentu.
+
+=item [{paa} : Papuan (Other)]
+
+=item {fa} : Persian
+
+eq Farsi. eq Iranian.
+
+=item {peo} : Old Persian (ca.600-400 B.C.)
+
+=item [{phi} : Philippine (Other)]
+
+=item {phn} : Phoenician
+
+(Historical)
+
+=item {pon} : Pohnpeian
+
+NOT Pompeiian!
+
+=item {pl} : Polish
+
+=item {pt} : Portuguese
+
+eq Portugese. Notable forms:
+{pt-pt} Portugal Portuguese;
+{pt-br} Brazilian Portuguese.
+
+=item [{pra} : Prakrit languages]
+
+=item {pro} : Old Provencal (to 1500)
+
+eq Old ProvenE<ccedil>al. (Historical.)
+
+=item {ps} : Pushto
+
+eq Pashto. eq Pushtu.
+
+=item {qu} : Quechua
+
+eq Quecha.
+
+=item {rm} : Raeto-Romance
+
+eq Romansh.
+
+=item {raj} : Rajasthani
+
+=item {rap} : Rapanui
+
+=item {rar} : Rarotongan
+
+=item [{qaa - qtz} : Reserved for local use.]
+
+=item [{roa} : Romance (Other)]
+
+NOT Romanian! NOT Romany! NOT Romansh!
+
+=item {ro} : Romanian
+
+eq Rumanian. NOT Romany!
+
+=item {rom} : Romany
+
+eq Rom. NOT Romanian!
+
+=item {rn} : Rundi
+
+=item {ru} : Russian
+
+NOT White Russian! NOT Rusyn!
+
+=item [{sal} : Salishan languages]
+
+Large language group.
+
+=item {sam} : Samaritan Aramaic
+
+NOT Aramaic!
+
+=item [{smi} : Sami languages (Other)]
+
+=item {sm} : Samoan
+
+=item {sad} : Sandawe
+
+=item {sg} : Sango
+
+=item {sa} : Sanskrit
+
+(Historical)
+
+=item {sat} : Santali
+
+=item {sc} : Sardinian
+
+eq Sard.
+
+=item {sas} : Sasak
+
+=item {sco} : Scots
+
+NOT Scots Gaelic!
+
+=item {sel} : Selkup
+
+=item [{sem} : Semitic (Other)]
+
+=item {sr} : Serbian
+
+eq Serb. NOT Sorbian.
+
+=item {srr} : Serer
+
+=item {shn} : Shan
+
+=item {sn} : Shona
+
+=item {sid} : Sidamo
+
+=item {sgn-...} : Sign Languages
+
+Always use with a subtag. Notable forms:
+{sgn-gb} British Sign Language (BSL);
+{sgn-ie} Irish Sign Language (ESL);
+{sgn-ni} Nicaraguan Sign Language (ISN);
+{sgn-us} American Sign Language (ASL).
+
+=item {bla} : Siksika
+
+eq Blackfoot. eq Pikanii.
+
+=item {sd} : Sindhi
+
+=item {si} : Sinhalese
+
+eq Sinhala.
+
+=item [{sit} : Sino-Tibetan (Other)]
+
+=item [{sio} : Siouan languages]
+
+=item {den} : Slave (Athapascan)
+
+("Slavey" is a subform.)
+
+=item [{sla} : Slavic (Other)]
+
+=item {sk} : Slovak
+
+eq Slovakian.
+
+=item {sl} : Slovenian
+
+eq Slovene.
+
+=item {sog} : Sogdian
+
+=item {so} : Somali
+
+=item {son} : Songhai
+
+=item {snk} : Soninke
+
+=item {wen} : Sorbian languages
+
+eq Wendish. eq Sorb. eq Lusatian. eq Wend. NOT Venda! NOT Serbian!
+
+=item {nso} : Northern Sotho
+
+=item {st} : Southern Sotho
+
+eq Sutu. eq Sesotho.
+
+=item [{sai} : South American Indian (Other)]
+
+=item {es} : Spanish
+
+Notable forms:
+{es-ar} Argentine Spanish;
+{es-bo} Bolivian Spanish;
+{es-cl} Chilean Spanish;
+{es-co} Colombian Spanish;
+{es-do} Dominican Spanish;
+{es-ec} Ecuadorian Spanish;
+{es-es} Spain Spanish;
+{es-gt} Guatemalan Spanish;
+{es-hn} Honduran Spanish;
+{es-mx} Mexican Spanish;
+{es-pa} Panamanian Spanish;
+{es-pe} Peruvian Spanish;
+{es-pr} Puerto Rican Spanish;
+{es-py} Paraguay Spanish;
+{es-sv} Salvadoran Spanish;
+{es-us} US Spanish;
+{es-uy} Uruguayan Spanish;
+{es-ve} Venezuelan Spanish.
+
+=item {suk} : Sukuma
+
+=item {sux} : Sumerian
+
+(Historical)
+
+=item {su} : Sundanese
+
+=item {sus} : Susu
+
+=item {sw} : Swahili
+
+eq Kiswahili
+
+=item {ss} : Swati
+
+=item {sv} : Swedish
+
+Notable forms:
+{sv-se} Sweden Swedish;
+{sv-fi} Finland Swedish.
+
+=item {syr} : Syriac
+
+=item {tl} : Tagalog
+
+=item {ty} : Tahitian
+
+=item [{tai} : Tai (Other)]
+
+NOT Thai!
+
+=item {tg} : Tajik
+
+=item {tmh} : Tamashek
+
+=item {ta} : Tamil
+
+=item {i-tao} : Tao
+
+eq Yami.
+
+=item {tt} : Tatar
+
+=item {i-tay} : Tayal
+
+eq Atayal. eq Atayan.
+
+=item {te} : Telugu
+
+=item {ter} : Tereno
+
+=item {tet} : Tetum
+
+=item {th} : Thai
+
+NOT Tai!
+
+=item {bo} : Tibetan
+
+=item {tig} : Tigre
+
+=item {ti} : Tigrinya
+
+=item {tem} : Timne
+
+eq Themne. eq Timene.
+
+=item {tiv} : Tiv
+
+=item {tli} : Tlingit
+
+=item {tpi} : Tok Pisin
+
+=item {tkl} : Tokelau
+
+=item {tog} : Tonga (Nyasa)
+
+NOT Tsonga!
+
+=item {to} : Tonga (Tonga Islands)
+
+(Pronounced "Tong-a", not "Tong-ga")
+
+NOT Tsonga!
+
+=item {tsi} : Tsimshian
+
+eq Sm'algyax
+
+=item {ts} : Tsonga
+
+NOT Tonga!
+
+=item {i-tsu} : Tsou
+
+=item {tn} : Tswana
+
+Same as Setswana.
+
+=item {tum} : Tumbuka
+
+=item {tr} : Turkish
+
+(Typically in Roman script)
+
+=item {ota} : Ottoman Turkish (1500-1928)
+
+(Typically in Arabic script) (Historical)
+
+=item {tk} : Turkmen
+
+eq Turkmeni.
+
+=item {tvl} : Tuvalu
+
+=item {tyv} : Tuvinian
+
+eq Tuvan. eq Tuvin.
+
+=item {tw} : Twi
+
+=item {uga} : Ugaritic
+
+NOT Ugric!
+
+=item {ug} : Uighur
+
+=item {uk} : Ukrainian
+
+=item {umb} : Umbundu
+
+=item {und} : Undetermined
+
+Not a tag for normal use.
+
+=item {ur} : Urdu
+
+=item {uz} : Uzbek
+
+eq E<Ouml>zbek
+
+=item {vai} : Vai
+
+=item {ven} : Venda
+
+NOT Wendish! NOT Wend! NOT Avestan!
+
+=item {vi} : Vietnamese
+
+eq Viet.
+
+=item {vo} : Volapuk
+
+eq VolapE<uuml>k. (Artificial)
+
+=item {vot} : Votic
+
+eq Votian. eq Vod.
+
+=item [{wak} : Wakashan languages]
+
+=item {wal} : Walamo
+
+eq Wolaytta.
+
+=item {war} : Waray
+
+Presumably the Philippine language Waray-Waray (SamareE<ntilde>o),
+not the smaller Philippine language Waray Sorsogon, nor the extinct
+Australian language Waray.
+
+=item {was} : Washo
+
+eq Washoe
+
+=item {cy} : Welsh
+
+=item {wo} : Wolof
+
+=item {x-...} : Unregistered (Semi-Private Use)
+
+"x-" is a prefix for language tags that are not registered with ISO
+or IANA. Example, x-double-dutch
+
+=item {xh} : Xhosa
+
+=item {sah} : Yakut
+
+=item {yao} : Yao
+
+(The Yao in Malawi?)
+
+=item {yap} : Yapese
+
+eq Yap
+
+=item {yi} : Yiddish
+
+Formerly "ji". Sometimes in Roman script, sometimes in Hebrew script.
+
+=for etc
+{ji} Yiddish (old tag)
+
+=item {yo} : Yoruba
+
+=item [{ypk} : Yupik languages]
+
+Several "Eskimo" languages.
+
+=item {znd} : Zande
+
+=item [{zap} : Zapotec]
+
+(A group of languages.)
+
+=item {zen} : Zenaga
+
+NOT Zend.
+
+=item {za} : Zhuang
+
+=item {zu} : Zulu
+
+=item {zun} : Zuni
+
+eq ZuE<ntilde>i
+
+=back
+
+=for woohah END
+
+=head1 SEE ALSO
+
+L<I18N::LangTags|I18N::LangTags> and its "See Also" section.
+
+=head1 COPYRIGHT AND DISCLAIMER
+
+Copyright (c) 2001,2002 Sean M. Burke. All rights reserved.
+
+You can redistribute and/or
+modify this document under the same terms as Perl itself.
+
+This document is provided in the hope that it will be
+useful, but without any warranty;
+without even the implied warranty of accuracy, authoritativeness,
+completeness, merchantability, or fitness for a particular purpose.
+
+Email any corrections or questions to me.
+
+=head1 AUTHOR
+
+Sean M. Burke, sburkeE<64>cpan.org
+
+=cut
+
+
+# To generate a list of just the two and three-letter codes:
+
+#!/usr/local/bin/perl -w
+
+require 5; # Time-stamp: "2001-03-13 21:53:39 MST"
+ # Sean M. Burke, sburke@cpan.org
+ # This program is for generating the language_codes.txt file
+use strict;
+use LWP::Simple;
+use HTML::TreeBuilder 3.10;
+my $root = HTML::TreeBuilder->new();
+my $url = 'http://lcweb.loc.gov/standards/iso639-2/bibcodes.html';
+$root->parse(get($url) || die "Can't get $url");
+$root->eof();
+
+my @codes;
+
+foreach my $tr ($root->find_by_tag_name('tr')) {
+ my @f = map $_->as_text(), $tr->content_list();
+ #print map("<$_> ", @f), "\n";
+ next unless @f == 5;
+ pop @f; # nix the French name
+ next if $f[-1] eq 'Language Name (English)'; # it's a header line
+ my $xx = splice(@f, 2,1); # pull out the two-letter code
+ $f[-1] =~ s/^\s+//;
+ $f[-1] =~ s/\s+$//;
+ if($xx =~ m/[a-zA-Z]/) { # there's a two-letter code for it
+ push @codes, [ lc($f[-1]), "$xx\t$f[-1]\n" ];
+ } else { # print the three-letter codes.
+ if($f[0] eq $f[1]) {
+ push @codes, [ lc($f[-1]), "$f[1]\t$f[2]\n" ];
+ } else { # shouldn't happen
+ push @codes, [ lc($f[-1]), "@f !!!!!!!!!!\n" ];
+ }
+ }
+}
+
+print map $_->[1], sort {; $a->[0] cmp $b->[0] } @codes;
+print "[ based on $url\n at ", scalar(localtime), "]\n",
+ "[Note: doesn't include IANA-registered codes.]\n";
+exit;
+__END__
+
diff --git a/gnu/usr.bin/perl/lib/I18N/LangTags/README b/gnu/usr.bin/perl/lib/I18N/LangTags/README
new file mode 100644
index 00000000000..fbae05f43d3
--- /dev/null
+++ b/gnu/usr.bin/perl/lib/I18N/LangTags/README
@@ -0,0 +1,78 @@
+README for I18N::LangTags
+ Time-stamp: "2001-05-29 21:52:15 MDT"
+
+ I18N::LangTags
+
+I18N::LangTags - functions for dealing with RFC3066-style language
+tags
+
+Language tags are a formalism, described in RFC 3066 (obsoleting
+1766), for declaring what language form (language and possibly
+dialect) a given chunk of information is in.
+
+This library provides functions for common tasks involving language
+tags (notably the extraction of them, comparing them, and testing the
+formal validity of them) as is needed in a variety of protocols and
+applications.
+
+
+I18N::LangTags::List -- tags and names for human languages. This
+module goes from known language tag names ("fr-CA") to their English
+names ("Canadian French"). Its documentation also lists the several
+hundred known tags and some common subforms. You may find this useful
+as a reference.
+
+
+See the POD for more information.
+
+
+INSTALLATION
+
+You install I18N::LangTags and I18N::LangTags::List, as you would
+install any perl module library, by running these commands:
+
+ perl Makefile.PL
+ make
+ make test
+ make install
+
+If you want to install a private copy of I18N::LangTags in your home
+directory, then you should try to produce the initial Makefile with
+something like this command:
+
+ perl Makefile.PL LIB=~/perl
+
+See perldoc perlmodinstall for more information on installing modules.
+
+
+DOCUMENTATION
+
+POD-format documentation is included in LangTags.pm. POD is readable
+with the 'perldoc' utility. See ChangeLog for recent changes.
+
+
+SUPPORT
+
+Questions, bug reports, useful code bits, and suggestions for
+I18N::LangTags should just be sent to me at sburke@cpan.org
+
+
+AVAILABILITY
+
+The latest version of I18N::LangTags is available from the
+Comprehensive Perl Archive Network (CPAN). Visit
+<http://www.cpan.org/> to find a CPAN site near you.
+
+
+COPYRIGHT
+
+Copyright 1998-2001, Sean M. Burke <sburke@cpan.org>, all rights
+reserved.
+
+The programs and documentation in this dist are distributed in
+the hope that they will be useful, but without any warranty; without
+even the implied warranty of merchantability or fitness for a
+particular purpose.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
diff --git a/gnu/usr.bin/perl/lib/I18N/LangTags/test.pl b/gnu/usr.bin/perl/lib/I18N/LangTags/test.pl
new file mode 100644
index 00000000000..88a7bf66ae8
--- /dev/null
+++ b/gnu/usr.bin/perl/lib/I18N/LangTags/test.pl
@@ -0,0 +1,79 @@
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.pl'
+
+######################### We start with some black magic to print on failure.
+require 5;
+ # Time-stamp: "2001-06-21 22:59:38 MDT"
+use strict;
+use Test;
+BEGIN { plan tests => 46 };
+BEGIN { ok 1 }
+use I18N::LangTags (':ALL');
+
+print "# Perl v$], I18N::LangTags v$I18N::LangTags::VERSION\n";
+
+ok !is_language_tag('');
+ok is_language_tag('fr');
+ok is_language_tag('fr-ca');
+ok is_language_tag('fr-CA');
+ok !is_language_tag('fr-CA-');
+ok !is_language_tag('fr_CA');
+ok is_language_tag('fr-ca-joual');
+ok !is_language_tag('frca');
+ok is_language_tag('nav');
+ok is_language_tag('nav-shiprock');
+ok !is_language_tag('nav-ceremonial'); # subtag too long
+ok !is_language_tag('x');
+ok !is_language_tag('i');
+ok is_language_tag('i-borg'); # NB: fictitious tag
+ok is_language_tag('x-borg');
+ok is_language_tag('x-borg-prot5123');
+ok same_language_tag('x-borg-prot5123', 'i-BORG-Prot5123' );
+ok !same_language_tag('en', 'en-us' );
+
+ok 0 == similarity_language_tag('en-ca', 'fr-ca');
+ok 1 == similarity_language_tag('en-ca', 'en-us');
+ok 2 == similarity_language_tag('en-us-southern', 'en-us-western');
+ok 2 == similarity_language_tag('en-us-southern', 'en-us');
+
+ok grep $_ eq 'hi', panic_languages('kok');
+ok grep $_ eq 'en', panic_languages('x-woozle-wuzzle');
+ok ! grep $_ eq 'mr', panic_languages('it');
+ok grep $_ eq 'es', panic_languages('it');
+ok grep $_ eq 'it', panic_languages('es');
+
+
+print "# Now the ::List tests...\n";
+use I18N::LangTags::List;
+foreach my $lt (qw(
+ en
+ en-us
+ en-kr
+ el
+ elx
+ i-mingo
+ i-mingo-tom
+ x-mingo-tom
+ it
+ it-it
+ it-IT
+ it-FR
+ yi
+ ji
+ cre-syllabic
+ cre-syllabic-western
+ cre-western
+ cre-latin
+)) {
+ my $name = I18N::LangTags::List::name($lt);
+ if($name) {
+ ok(1);
+ print "# $lt -> $name\n";
+ } else {
+ ok(0);
+ print "# Failed lookup on $lt\n";
+ }
+}
+
+print "# So there!\n";
+