src - OpenBSD base system

diff options


context:
space:
mode:

author	Todd C. Miller <millert@cvs.openbsd.org>	2002-10-27 22:15:15 +0000
committer	Todd C. Miller <millert@cvs.openbsd.org>	2002-10-27 22:15:15 +0000
commit	74cfb115ac810480c0000dc742b20383c1578bac (patch)
tree	316d96e5123617976f1637b143570c309a662045 /gnu/usr.bin/perl/lib/I18N
parent	453ade492b8e06c619009d6cd52a85cb04e8cf17 (diff)

stock perl 5.8.0 from CPAN

Diffstat (limited to 'gnu/usr.bin/perl/lib/I18N')

-rw-r--r--

gnu/usr.bin/perl/lib/I18N/Collate.t

-rw-r--r--

gnu/usr.bin/perl/lib/I18N/LangTags.pm

800

-rw-r--r--

gnu/usr.bin/perl/lib/I18N/LangTags/ChangeLog

107

-rw-r--r--

gnu/usr.bin/perl/lib/I18N/LangTags/List.pm

1622

-rw-r--r--

gnu/usr.bin/perl/lib/I18N/LangTags/README

-rw-r--r--

gnu/usr.bin/perl/lib/I18N/LangTags/test.pl

6 files changed, 2730 insertions, 0 deletions

diff --git a/gnu/usr.bin/perl/lib/I18N/Collate.t b/gnu/usr.bin/perl/lib/I18N/Collate.t
new file mode 100644
index 00000000000..bf3ba20b6aa
--- /dev/null
+++ b/gnu/usr.bin/perl/lib/I18N/Collate.t

@@ -0,0 +1,44 @@

+#!./perl

+BEGIN {

+ chdir 't' if -d 't';

+ @INC = '../lib';

+ require Config; import Config;

+ if (!$Config{d_setlocale} || $Config{ccflags} =~ /\bD?NO_LOCALE\b/) {

+ print "1..0\n";

+ exit;

+ }

+print "1..7\n";

+use I18N::Collate;

+print "ok 1\n";

+$a = I18N::Collate->new("foo");

+print "ok 2\n";

+ use warnings;

+ local $SIG{__WARN__} = sub { $@ = $_[0] };

+ $b = I18N::Collate->new("foo");

+ print "not " unless $@ =~ /\bHAS BEEN DEPRECATED\b/;

+ print "ok 3\n";

+ $@ = '';

+print "not " unless $a eq $b;

+print "ok 4\n";

+$b = I18N::Collate->new("bar");

+print "not " if $@ =~ /\bHAS BEEN DEPRECATED\b/;

+print "ok 5\n";

+print "not " if $a eq $b;

+print "ok 6\n";

+print "not " if $a lt $b == $a gt $b;

+print "ok 7\n";

diff --git a/gnu/usr.bin/perl/lib/I18N/LangTags.pm b/gnu/usr.bin/perl/lib/I18N/LangTags.pm
new file mode 100644
index 00000000000..ab5ef38245e
--- /dev/null
+++ b/gnu/usr.bin/perl/lib/I18N/LangTags.pm

@@ -0,0 +1,800 @@

+# Time-stamp: "2002-02-02 20:43:03 MST"

+# Sean M. Burke <sburke@cpan.org>

+require 5.000;

+package I18N::LangTags;

+use strict;

+use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION %Panic);

+require Exporter;

+@ISA = qw(Exporter);

+@EXPORT = qw();

+@EXPORT_OK = qw(is_language_tag same_language_tag

+ extract_language_tags super_languages

+ similarity_language_tag is_dialect_of

+ locale2language_tag alternate_language_tags

+ encode_language_tag panic_languages

+ );

+%EXPORT_TAGS = ('ALL' => \@EXPORT_OK);

+$VERSION = "0.27";

+=head1 NAME

+I18N::LangTags - functions for dealing with RFC3066-style language tags

+=head1 SYNOPSIS

+ use I18N::LangTags qw(is_language_tag same_language_tag

+ extract_language_tags super_languages

+ similarity_language_tag is_dialect_of

+ locale2language_tag alternate_language_tags

+ encode_language_tag panic_languages

+ );

+...or whatever of those functions you want to import. Those are

+all the exportable functions -- you're free to import only some,

+or none at all. By default, none are imported. If you say:

+ use I18N::LangTags qw(:ALL)

+...then all are exported. (This saves you from having to use

+something less obvious like C<use I18N::LangTags qw(/./)>.)

+If you don't import any of these functions, assume a C<&I18N::LangTags::>

+in front of all the function names in the following examples.

+=head1 DESCRIPTION

+Language tags are a formalism, described in RFC 3066 (obsoleting

+1766), for declaring what language form (language and possibly

+dialect) a given chunk of information is in.

+This library provides functions for common tasks involving language

+tags as they are needed in a variety of protocols and applications.

+Please see the "See Also" references for a thorough explanation

+of how to correctly use language tags.

+=over

+=cut

+###########################################################################

+=item * the function is_language_tag($lang1)

+Returns true iff $lang1 is a formally valid language tag.

+ is_language_tag("fr") is TRUE

+ is_language_tag("x-jicarilla") is FALSE

+ (Subtags can be 8 chars long at most -- 'jicarilla' is 9)

+ is_language_tag("sgn-US") is TRUE

+ (That's American Sign Language)

+ is_language_tag("i-Klikitat") is TRUE

+ (True without regard to the fact noone has actually

+ registered Klikitat -- it's a formally valid tag)

+ is_language_tag("fr-patois") is TRUE

+ (Formally valid -- altho descriptively weak!)

+ is_language_tag("Spanish") is FALSE

+ is_language_tag("french-patois") is FALSE

+ (No good -- first subtag has to match

+ /^([xXiI]|[a-zA-Z]{2,3})$/ -- see RFC3066)

+ is_language_tag("x-borg-prot2532") is TRUE

+ (Yes, subtags can contain digits, as of RFC3066)

+=cut

+sub is_language_tag {

+ ## Changes in the language tagging standards may have to be reflected here.

+ my($tag) = lc($_[0]);

+ return 0 if $tag eq "i" or $tag eq "x";

+ # Bad degenerate cases that the following

+ # regexp would erroneously let pass

+ return $tag =~

+ /^(?: # First subtag

+ [xi] | [a-z]{2,3}

+ )

+ (?: # Subtags thereafter

+ - # separator

+ [a-z0-9]{1,8} # subtag

+ )*

+ $/xs ? 1 : 0;

+###########################################################################

+=item * the function extract_language_tags($whatever)

+Returns a list of whatever looks like formally valid language tags

+in $whatever. Not very smart, so don't get too creative with

+what you want to feed it.

+ extract_language_tags("fr, fr-ca, i-mingo")

+ returns: ('fr', 'fr-ca', 'i-mingo')

+ extract_language_tags("It's like this: I'm in fr -- French!")

+ returns: ('It', 'in', 'fr')

+ (So don't just feed it any old thing.)

+The output is untainted. If you don't know what tainting is,

+don't worry about it.

+=cut

+sub extract_language_tags {

+ ## Changes in the language tagging standards may have to be reflected here.

+ my($text) =

+ $_[0] =~ m/(.+)/ # to make for an untainted result

+ ? $1 : ''

+ ;

+ return grep(!m/^[ixIX]$/s, # 'i' and 'x' aren't good tags

+ $text =~

+ m/

+ \b

+ (?: # First subtag

+ [iIxX] | [a-zA-Z]{2,3}

+ )

+ (?: # Subtags thereafter

+ - # separator

+ [a-zA-Z0-9]{1,8} # subtag

+ )*

+ \b

+ /xsg

+ );

+###########################################################################

+=item * the function same_language_tag($lang1, $lang2)

+Returns true iff $lang1 and $lang2 are acceptable variant tags

+representing the same language-form.

+ same_language_tag('x-kadara', 'i-kadara') is TRUE

+ (The x/i- alternation doesn't matter)

+ same_language_tag('X-KADARA', 'i-kadara') is TRUE

+ (...and neither does case)

+ same_language_tag('en', 'en-US') is FALSE

+ (all-English is not the SAME as US English)

+ same_language_tag('x-kadara', 'x-kadar') is FALSE

+ (these are totally unrelated tags)

+ same_language_tag('no-bok', 'nb') is TRUE

+ (no-bok is a legacy tag for nb (Norwegian Bokmal))

+C<same_language_tag> works by just seeing whether

+C<encode_language_tag($lang1)> is the same as

+C<encode_language_tag($lang2)>.

+(Yes, I know this function is named a bit oddly. Call it historic

+reasons.)

+=cut

+sub same_language_tag {

+ my $el1 = &encode_language_tag($_[0]);

+ return 0 unless defined $el1;

+ # this avoids the problem of

+ # encode_language_tag($lang1) eq and encode_language_tag($lang2)

+ # being true if $lang1 and $lang2 are both undef

+ return $el1 eq &encode_language_tag($_[1]) ? 1 : 0;

+###########################################################################

+=item * the function similarity_language_tag($lang1, $lang2)

+Returns an integer representing the degree of similarity between

+tags $lang1 and $lang2 (the order of which does not matter), where

+similarity is the number of common elements on the left,

+without regard to case and to x/i- alternation.

+ similarity_language_tag('fr', 'fr-ca') is 1

+ (one element in common)

+ similarity_language_tag('fr-ca', 'fr-FR') is 1

+ (one element in common)

+ similarity_language_tag('fr-CA-joual',

+ 'fr-CA-PEI') is 2

+ similarity_language_tag('fr-CA-joual', 'fr-CA') is 2

+ (two elements in common)

+ similarity_language_tag('x-kadara', 'i-kadara') is 1

+ (x/i- doesn't matter)

+ similarity_language_tag('en', 'x-kadar') is 0

+ similarity_language_tag('x-kadara', 'x-kadar') is 0

+ (unrelated tags -- no similarity)

+ similarity_language_tag('i-cree-syllabic',

+ 'i-cherokee-syllabic') is 0

+ (no B<leftmost> elements in common!)

+=cut

+sub similarity_language_tag {

+ my $lang1 = &encode_language_tag($_[0]);

+ my $lang2 = &encode_language_tag($_[1]);

+ # And encode_language_tag takes care of the whole

+ # no-nyn==nn, i-hakka==zh-hakka, etc, things

+ # NB: (i-sil-...)? (i-sgn-...)?

+ return undef if !defined($lang1) and !defined($lang2);

+ return 0 if !defined($lang1) or !defined($lang2);

+ my @l1_subtags = split('-', $lang1);

+ my @l2_subtags = split('-', $lang2);

+ my $similarity = 0;

+ while(@l1_subtags and @l2_subtags) {

+ if(shift(@l1_subtags) eq shift(@l2_subtags)) {

+ ++$similarity;

+ } else {

+ last;

+ }

+ return $similarity;

+###########################################################################

+=item * the function is_dialect_of($lang1, $lang2)

+Returns true iff language tag $lang1 represents a subform of

+language tag $lang2.

+B<Get the order right! It doesn't work the other way around!>

+ is_dialect_of('en-US', 'en') is TRUE

+ (American English IS a dialect of all-English)

+ is_dialect_of('fr-CA-joual', 'fr-CA') is TRUE

+ is_dialect_of('fr-CA-joual', 'fr') is TRUE

+ (Joual is a dialect of (a dialect of) French)

+ is_dialect_of('en', 'en-US') is FALSE

+ (all-English is a NOT dialect of American English)

+ is_dialect_of('fr', 'en-CA') is FALSE

+ is_dialect_of('en', 'en' ) is TRUE

+ is_dialect_of('en-US', 'en-US') is TRUE

+ (B<Note:> these are degenerate cases)

+ is_dialect_of('i-mingo-tom', 'x-Mingo') is TRUE

+ (the x/i thing doesn't matter, nor does case)

+ is_dialect_of('nn', 'no') is TRUE

+ (because 'nn' (New Norse) is aliased to 'no-nyn',

+ as a special legacy case, and 'no-nyn' is a

+ subform of 'no' (Norwegian))

+=cut

+sub is_dialect_of {

+ my $lang1 = &encode_language_tag($_[0]);

+ my $lang2 = &encode_language_tag($_[1]);

+ return undef if !defined($lang1) and !defined($lang2);

+ return 0 if !defined($lang1) or !defined($lang2);

+ return 1 if $lang1 eq $lang2;

+ return 0 if length($lang1) < length($lang2);

+ $lang1 .= '-';

+ $lang2 .= '-';

+ return

+ (substr($lang1, 0, length($lang2)) eq $lang2) ? 1 : 0;

+###########################################################################

+=item * the function super_languages($lang1)

+Returns a list of language tags that are superordinate tags to $lang1

+-- it gets this by removing subtags from the end of $lang1 until

+nothing (or just "i" or "x") is left.

+ super_languages("fr-CA-joual") is ("fr-CA", "fr")

+ super_languages("en-AU") is ("en")

+ super_languages("en") is empty-list, ()

+ super_languages("i-cherokee") is empty-list, ()

+ ...not ("i"), which would be illegal as well as pointless.

+If $lang1 is not a valid language tag, returns empty-list in

+a list context, undef in a scalar context.

+A notable and rather unavoidable problem with this method:

+"x-mingo-tom" has an "x" because the whole tag isn't an

+IANA-registered tag -- but super_languages('x-mingo-tom') is

+('x-mingo') -- which isn't really right, since 'i-mingo' is

+registered. But this module has no way of knowing that. (But note

+that same_language_tag('x-mingo', 'i-mingo') is TRUE.)

+More importantly, you assume I<at your peril> that superordinates of

+$lang1 are mutually intelligible with $lang1. Consider this

+carefully.

+=cut

+sub super_languages {

+ my $lang1 = $_[0];

+ return() unless defined($lang1) && &is_language_tag($lang1);

+ # a hack for those annoying new (2001) tags:

+ $lang1 =~ s/^nb\b/no-bok/i; # yes, backwards

+ $lang1 =~ s/^nn\b/no-nyn/i; # yes, backwards

+ $lang1 =~ s/^[ix](-hakka\b)/zh$1/i; # goes the right way

+ # i-hakka-bork-bjork-bjark => zh-hakka-bork-bjork-bjark

+ my @l1_subtags = split('-', $lang1);

+ ## Changes in the language tagging standards may have to be reflected here.

+ # NB: (i-sil-...)?

+ my @supers = ();

+ foreach my $bit (@l1_subtags) {

+ push @supers,

+ scalar(@supers) ? ($supers[-1] . '-' . $bit) : $bit;

+ }

+ pop @supers if @supers;

+ shift @supers if @supers && $supers[0] =~ m<^[iIxX]$>s;

+ return reverse @supers;

+###########################################################################

+=item * the function locale2language_tag($locale_identifier)

+This takes a locale name (like "en", "en_US", or "en_US.ISO8859-1")

+and maps it to a language tag. If it's not mappable (as with,

+notably, "C" and "POSIX"), this returns empty-list in a list context,

+or undef in a scalar context.

+ locale2language_tag("en") is "en"

+ locale2language_tag("en_US") is "en-US"

+ locale2language_tag("en_US.ISO8859-1") is "en-US"

+ locale2language_tag("C") is undef or ()

+ locale2language_tag("POSIX") is undef or ()

+I'm not totally sure that locale names map satisfactorily to language

+tags. Think REAL hard about how you use this. YOU HAVE BEEN WARNED.

+The output is untainted. If you don't know what tainting is,

+don't worry about it.

+=cut

+sub locale2language_tag {

+ my $lang =

+ $_[0] =~ m/(.+)/ # to make for an untainted result

+ ? $1 : ''

+ ;

+ return $lang if &is_language_tag($lang); # like "en"

+ $lang =~ tr<_><->; # "en_US" -> en-US

+ $lang =~ s<\.[-_a-zA-Z0-9\.]*><>s; # "en_US.ISO8859-1" -> en-US

+ return $lang if &is_language_tag($lang);

+ return;

+###########################################################################

+=item * the function encode_language_tag($lang1)

+This function, if given a language tag, returns an encoding of it such

+that:

+* tags representing different languages never get the same encoding.

+* tags representing the same language always get the same encoding.

+* an encoding of a formally valid language tag always is a string

+value that is defined, has length, and is true if considered as a

+boolean.

+Note that the encoding itself is B<not> a formally valid language tag.

+Note also that you cannot, currently, go from an encoding back to a

+language tag that it's an encoding of.

+Note also that you B<must> consider the encoded value as atomic; i.e.,

+you should not consider it as anything but an opaque, unanalysable

+string value. (The internals of the encoding method may change in

+future versions, as the language tagging standard changes over time.)

+C<encode_language_tag> returns undef if given anything other than a

+formally valid language tag.

+The reason C<encode_language_tag> exists is because different language

+tags may represent the same language; this is normally treatable with

+C<same_language_tag>, but consider this situation:

+You have a data file that expresses greetings in different languages.

+Its format is "[language tag]=[how to say 'Hello']", like:

+ en-US=Hiho

+ fr=Bonjour

+ i-mingo=Hau'

+And suppose you write a program that reads that file and then runs as

+a daemon, answering client requests that specify a language tag and

+then expect the string that says how to greet in that language. So an

+interaction looks like:

+ greeting-client asks: fr

+ greeting-server answers: Bonjour

+So far so good. But suppose the way you're implementing this is:

+ my %greetings;

+ die unless open(IN, "<in.dat");

+ while(<IN>) {

+ chomp;

+ next unless /^([^=]+)=(.+)/s;

+ my($lang, $expr) = ($1, $2);

+ $greetings{$lang} = $expr;

+ }

+ close(IN);

+at which point %greetings has the contents:

+ "en-US" => "Hiho"

+ "fr" => "Bonjour"

+ "i-mingo" => "Hau'"

+And suppose then that you answer client requests for language $wanted

+by just looking up $greetings{$wanted}.

+If the client asks for "fr", that will look up successfully in

+%greetings, to the value "Bonjour". And if the client asks for

+"i-mingo", that will look up successfully in %greetings, to the value

+"Hau'".

+But if the client asks for "i-Mingo" or "x-mingo", or "Fr", then the

+lookup in %greetings fails. That's the Wrong Thing.

+You could instead do lookups on $wanted with:

+ use I18N::LangTags qw(same_language_tag);

+ my $repsonse = '';

+ foreach my $l2 (keys %greetings) {

+ if(same_language_tag($wanted, $l2)) {

+ $response = $greetings{$l2};

+ last;

+ }

+But that's rather inefficient. A better way to do it is to start your

+program with:

+ use I18N::LangTags qw(encode_language_tag);

+ my %greetings;

+ die unless open(IN, "<in.dat");

+ while(<IN>) {

+ chomp;

+ next unless /^([^=]+)=(.+)/s;

+ my($lang, $expr) = ($1, $2);

+ $greetings{

+ encode_language_tag($lang)

+ } = $expr;

+ }

+ close(IN);

+and then just answer client requests for language $wanted by just

+looking up

+ $greetings{encode_language_tag($wanted)}

+And that does the Right Thing.

+=cut

+sub encode_language_tag {

+ # Only similarity_language_tag() is allowed to analyse encodings!

+ ## Changes in the language tagging standards may have to be reflected here.

+ my($tag) = $_[0] || return undef;

+ return undef unless &is_language_tag($tag);

+ # For the moment, these legacy variances are few enough that

+ # we can just handle them here with regexps.

+ $tag =~ s/^iw\b/he/i; # Hebrew

+ $tag =~ s/^in\b/id/i; # Indonesian

+ $tag =~ s/^[ix]-lux\b/lb/i; # Luxemburger

+ $tag =~ s/^[ix]-navajo\b/nv/i; # Navajo

+ $tag =~ s/^ji\b/yi/i; # Yiddish

+ #

+ # These go FROM the simplex to complex form, to get

+ # similarity-comparison right. And that's okay, since

+ # similarity_language_tag is the only thing that

+ # analyzes our output.

+ $tag =~ s/^[ix]-hakka\b/zh-hakka/i; # Hakka

+ $tag =~ s/^nb\b/no-bok/i; # BACKWARDS for Bokmal

+ $tag =~ s/^nn\b/no-nyn/i; # BACKWARDS for Nynorsk

+ $tag =~ s/^[xiXI]-//s;

+ # Just lop off any leading "x/i-"

+ return "~" . uc($tag);

+#--------------------------------------------------------------------------

+=item * the function alternate_language_tags($lang1)

+This function, if given a language tag, returns all language tags that

+are alternate forms of this language tag. (I.e., tags which refer to

+the same language.) This is meant to handle legacy tags caused by

+the minor changes in language tag standards over the years; and

+the x-/i- alternation is also dealt with.

+Note that this function does I<not> try to equate new (and never-used,

+and unusable)

+ISO639-2 three-letter tags to old (and still in use) ISO639-1

+two-letter equivalents -- like "ara" -> "ar" -- because

+"ara" has I<never> been in use as an Internet language tag,

+and RFC 3066 stipulates that it never should be, since a shorter

+tag ("ar") exists.

+Examples:

+ alternate_language_tags('no-bok') is ('nb')

+ alternate_language_tags('nb') is ('no-bok')

+ alternate_language_tags('he') is ('iw')

+ alternate_language_tags('iw') is ('he')

+ alternate_language_tags('i-hakka') is ('zh-hakka', 'x-hakka')

+ alternate_language_tags('zh-hakka') is ('i-hakka', 'x-hakka')

+ alternate_language_tags('en') is ()

+ alternate_language_tags('x-mingo-tom') is ('i-mingo-tom')

+ alternate_language_tags('x-klikitat') is ('i-klikitat')

+ alternate_language_tags('i-klikitat') is ('x-klikitat')

+This function returns empty-list if given anything other than a formally

+valid language tag.

+=cut

+my %alt = qw( i x x i I X X I );

+sub alternate_language_tags {

+ my $tag = $_[0];

+ return() unless &is_language_tag($tag);

+ my @em; # push 'em real goood!

+ # For the moment, these legacy variances are few enough that

+ # we can just handle them here with regexps.

+ if( $tag =~ m/^[ix]-hakka\b(.*)/i) {push @em, "zh-hakka$1";

+ } elsif($tag =~ m/^zh-hakka\b(.*)/i) { push @em, "x-hakka$1", "i-hakka$1";

+ } elsif($tag =~ m/^he\b(.*)/i) { push @em, "iw$1";

+ } elsif($tag =~ m/^iw\b(.*)/i) { push @em, "he$1";

+ } elsif($tag =~ m/^in\b(.*)/i) { push @em, "id$1";

+ } elsif($tag =~ m/^id\b(.*)/i) { push @em, "in$1";

+ } elsif($tag =~ m/^[ix]-lux\b(.*)/i) { push @em, "lb$1";

+ } elsif($tag =~ m/^lb\b(.*)/i) { push @em, "i-lux$1", "x-lux$1";

+ } elsif($tag =~ m/^[ix]-navajo\b(.*)/i) { push @em, "nv$1";

+ } elsif($tag =~ m/^nv\b(.*)/i) { push @em, "i-navajo$1", "x-navajo$1";

+ } elsif($tag =~ m/^yi\b(.*)/i) { push @em, "ji$1";

+ } elsif($tag =~ m/^ji\b(.*)/i) { push @em, "yi$1";

+ } elsif($tag =~ m/^nb\b(.*)/i) { push @em, "no-bok$1";

+ } elsif($tag =~ m/^no-bok\b(.*)/i) { push @em, "nb$1";

+ } elsif($tag =~ m/^nn\b(.*)/i) { push @em, "no-nyn$1";

+ } elsif($tag =~ m/^no-nyn\b(.*)/i) { push @em, "nn$1";

+ }

+ push @em, $alt{$1} . $2 if $tag =~ /^([XIxi])(-.+)/;

+ return @em;

+###########################################################################

+ # Init %Panic...

+ my @panic = ( # MUST all be lowercase!

+ # Only large ("national") languages make it in this list.

+ # If you, as a user, are so bizarre that the /only/ language

+ # you claim to accept is Galician, then no, we won't do you

+ # the favor of providing Catalan as a panic-fallback for

+ # you. Because if I start trying to add "little languages" in

+ # here, I'll just go crazy.

+ # Scandinavian lgs. All based on opinion and hearsay.

+ 'sv' => [qw(nb no da nn)],

+ 'da' => [qw(nb no sv nn)], # I guess

+ [qw(no nn nb)], [qw(no nn nb sv da)],

+ 'is' => [qw(da sv no nb nn)],

+ 'fo' => [qw(da is no nb nn sv)], # I guess

+ # I think this is about the extent of tolerable intelligibility

+ # among large modern Romance languages.

+ 'pt' => [qw(es ca it fr)], # Portuguese, Spanish, Catalan, Italian, French

+ 'ca' => [qw(es pt it fr)],

+ 'es' => [qw(ca it fr pt)],

+ 'it' => [qw(es fr ca pt)],

+ 'fr' => [qw(es it ca pt)],

+ # Also assume that speakers of the main Indian languages prefer

+ # to read/hear Hindi over English

+ [qw(

+ as bn gu kn ks kok ml mni mr ne or pa sa sd te ta ur

+ )] => 'hi',

+ # Assamese, Bengali, Gujarati, [Hindi,] Kannada (Kanarese), Kashmiri,

+ # Konkani, Malayalam, Meithei (Manipuri), Marathi, Nepali, Oriya,

+ # Punjabi, Sanskrit, Sindhi, Telugu, Tamil, and Urdu.

+ 'hi' => [qw(bn pa as or)],

+ # I welcome finer data for the other Indian languages.

+ # E.g., what should Oriya's list be, besides just Hindi?

+ # And the panic languages for English is, of course, nil!

+ # My guesses at Slavic intelligibility:

+ ([qw(ru be uk)]) x 2, # Russian, Belarusian, Ukranian

+ 'sr' => 'hr', 'hr' => 'sr', # Serb + Croat

+ 'cs' => 'sk', 'sk' => 'cs', # Czech + Slovak

+ 'ms' => 'id', 'id' => 'ms', # Malay + Indonesian

+ 'et' => 'fi', 'fi' => 'et', # Estonian + Finnish

+ #?? 'lo' => 'th', 'th' => 'lo', # Lao + Thai

+ );

+ my($k,$v);

+ while(@panic) {

+ ($k,$v) = splice(@panic,0,2);

+ foreach my $k (ref($k) ? @$k : $k) {

+ foreach my $v (ref($v) ? @$v : $v) {

+ push @{$Panic{$k} ||= []}, $v unless $k eq $v;

+ }

+=item * the function @langs = panic_languages(@accept_languages)

+This function takes a list of 0 or more language

+tags that constitute a given user's Accept-Language list, and

+returns a list of tags for I<other> (non-super)

+languages that are probably acceptable to the user, to be

+used I<if all else fails>.

+For example, if a user accepts only 'ca' (Catalan) and

+'es' (Spanish), and the documents/interfaces you have

+available are just in German, Italian, and Chinese, then

+the user will most likely want the Italian one (and not

+the Chinese or German one!), instead of getting

+nothing. So C<panic_languages('ca', 'es')> returns

+a list containing 'it' (Italian).

+English ('en') is I<always> in the return list, but

+whether it's at the very end or not depends

+on the input languages. This function works by consulting

+an internal table that stipulates what common

+languages are "close" to each other.

+A useful construct you might consider using is:

+ @fallbacks = super_languages(@accept_languages);

+ push @fallbacks, panic_languages(

+ @accept_languages, @fallbacks,

+ );

+=cut

+sub panic_languages {

+ # When in panic or in doubt, run in circles, scream, and shout!

+ my(@out, %seen);

+ foreach my $t (@_) {

+ next unless $t;

+ next if $seen{$t}++; # so we don't return it or hit it again

+ # push @out, super_languages($t); # nah, keep that separate

+ push @out, @{ $Panic{lc $t} || next };

+ }

+ return grep !$seen{$_}++, @out, 'en';

+###########################################################################

+1;

+__END__

+=back

+=head1 ABOUT LOWERCASING

+I've considered making all the above functions that output language

+tags return all those tags strictly in lowercase. Having all your

+language tags in lowercase does make some things easier. But you

+might as well just lowercase as you like, or call

+C<encode_language_tag($lang1)> where appropriate.

+=head1 ABOUT UNICODE PLAINTEXT LANGUAGE TAGS

+In some future version of I18N::LangTags, I plan to include support

+for RFC2482-style language tags -- which are basically just normal

+language tags with their ASCII characters shifted into Plane 14.

+=head1 SEE ALSO

+* L<I18N::LangTags::List|I18N::LangTags::List>

+* RFC 3066, C<ftp://ftp.isi.edu/in-notes/rfc3066.txt>, "Tags for the

+Identification of Languages". (Obsoletes RFC 1766)

+* RFC 2277, C<ftp://ftp.isi.edu/in-notes/rfc2277.txt>, "IETF Policy on

+Character Sets and Languages".

+* RFC 2231, C<ftp://ftp.isi.edu/in-notes/rfc2231.txt>, "MIME Parameter

+Value and Encoded Word Extensions: Character Sets, Languages, and

+Continuations".

+* RFC 2482, C<ftp://ftp.isi.edu/in-notes/rfc2482.txt>,

+"Language Tagging in Unicode Plain Text".

+* Locale::Codes, in

+C<http://www.perl.com/CPAN/modules/by-module/Locale/>

+* ISO 639, "Code for the representation of names of languages",

+C<http://www.indigo.ie/egt/standards/iso639/iso639-1-en.html>

+* ISO 639-2, "Codes for the representation of names of languages",

+including three-letter codes,

+C<http://lcweb.loc.gov/standards/iso639-2/bibcodes.html>

+* The IANA list of registered languages (hopefully up-to-date),

+C<ftp://ftp.isi.edu/in-notes/iana/assignments/languages/>

+=head1 COPYRIGHT

+This library is free software; you can redistribute it and/or

+modify it under the same terms as Perl itself.

+The programs and documentation in this dist are distributed in

+the hope that they will be useful, but without any warranty; without

+even the implied warranty of merchantability or fitness for a

+particular purpose.

+=head1 AUTHOR

+Sean M. Burke C<sburke@cpan.org>

+=cut

diff --git a/gnu/usr.bin/perl/lib/I18N/LangTags/ChangeLog b/gnu/usr.bin/perl/lib/I18N/LangTags/ChangeLog
new file mode 100644
index 00000000000..f3608f7125e
--- /dev/null
+++ b/gnu/usr.bin/perl/lib/I18N/LangTags/ChangeLog

@@ -0,0 +1,107 @@

+Revision history for Perl module I18N::LangTags.

+ Time-stamp: "2002-02-02 20:45:47 MST"

+2002-02-02 Sean M. Burke sburke@cpan.org

+ * Release 0.27 -- minor mods to ::List:

+ Fixing its entries for sv-se and sv-fi.

+ Typo-fixes and rewordings in the incidental Pod text elsewhere.

+2001-06-21 Sean M. Burke sburke@cpan.org

+ * Release 0.26 -- just making cosmetic changes

+ to test.pl, at Jarkko's request.

+2001-06-20 Sean M. Burke sburke@cpan.org

+ * Release 0.25 -- just tweaking panic_languages behavior

+ for Scandinavian languages. Much better now.

+ Slight tweak to ::List's entries for Greek.

+2001-06-20 Sean M. Burke sburke@cpan.org

+ * Release 0.24

+ * I18N::LangTags -- some elaborate hacks to make us

+ recognize legacy aliases like no-nyn == nn.

+ Added panic_languages().

+ Added :ALL export tag.

+ Minor docs fixes, and spiffing up test.pl.

+ * I18N::LangTags::List -- minor corrections; added

+ a few aliases.

+2001-05-29 Sean M. Burke sburke@cpan.org

+ * Release 0.23

+ * I18N::LangTags::List -- minor corrections. And is now

+ a module, not just documentation.

+2001-05-27 Sean M. Burke sburke@cpan.org

+ * Release 0.22

+ * Now bundling I18N::LangTags::List, a reference for lang tags,

+ replacing generate_language_table.plx and language_codes.txt

+2001-05-25 Sean M. Burke sburke@cpan.org

+ * Release 0.21

+ * extract_language_tags and locale2langauge_tag now

+ return untainted output. Useful if you feed tainted

+ things, like $ENV{'LANG'}.

+2001-03-13 Sean M. Burke sburke@cpan.org

+ * Release 0.20

+ * Added support for RFC 3066 tags: allowing three-letter primary

+ tags ("nav"), and allowing digits in subtags ("x-borg-prot3252").

+ * Changed all references from RFC 1766 to RFC 3066.

+ * Now bundling fulltext of RFC 3066 in the dist.

+ * Now bundling generate_language_table.plx and language_codes.txt

+ * Added some nice tests to test.pl

+ * Inverting order of listings in this ChangeLog file.

+2000-05-13 Sean M. Burke sburke@cpan.org

+ * Release 0.13

+ * Just noting my new email address.

+1999-03-06 Sean M. Burke sburke@netadventure.net

+ * Release 0.11

+ * Added functions

+ similarity_language_tag, is_dialect_of,

+ locale2language_tag, alternate_language_tags, and

+ encode_language_tag

+1998-12-14 Sean M. Burke sburke@netadventure.net

+ * Release 0.09

+ * Added function super_languages()

+1998-10-31 Sean M. Burke sburke@netadventure.net

+ * Release 0.08

+ * Just changes in the docs and bundle -- no change

+ in functionality.

+1998-04-02 Sean M. Burke sburke@netadventure.net

+ * Release 0.07

+ * First public release.

+[END OF CHANGELOG]

diff --git a/gnu/usr.bin/perl/lib/I18N/LangTags/List.pm b/gnu/usr.bin/perl/lib/I18N/LangTags/List.pm
new file mode 100644
index 00000000000..2dbd19a5d78
--- /dev/null
+++ b/gnu/usr.bin/perl/lib/I18N/LangTags/List.pm

@@ -0,0 +1,1622 @@

+require 5;

+package I18N::LangTags::List;

+# Time-stamp: "2002-02-02 20:13:58 MST"

+use strict;

+use vars qw(%Name $Debug $VERSION);

+$VERSION = '0.25';

+# POD at the end.

+#----------------------------------------------------------------------

+# read the table out of our own POD!

+ my $seeking = 1;

+ my $count = 0;

+ my($tag,$name);

+ while(<I18N::LangTags::List::DATA>) {

+ if($seeking) {

+ $seeking = 0 if m/=for woohah/;

+ } else {

+ next unless ($tag, $name) =

+ m/\{([-0-9a-zA-Z]+)\}(?:\s*:)?\s*([^\[\]]+)/;

+ $name =~ s/\s*[;\.]*\s*$//g;

+ next unless $name;

+ ++$count;

+ print "<$tag> <$name>\n" if $Debug;

+ $Name{$tag} = $name;

+ }

+ die "No tags read??" unless $count;

+#----------------------------------------------------------------------

+sub name {

+ my $tag = lc($_[0] || return);

+ $tag =~ s/^\s+//s;

+ $tag =~ s/\s+$//s;

+ my $alt;

+ if($tag =~ m/^x-(.+)/) {

+ $alt = "i-$1";

+ } elsif($tag =~ m/^i-(.+)/) {

+ $alt = "x-$1";

+ } else {

+ $alt = '';

+ }

+ my $subform = '';

+ my $name = '';

+ print "Input: {$tag}\n" if $Debug;

+ while(length $tag) {

+ last if $name = $Name{$tag};

+ last if $name = $Name{$alt};

+ if($tag =~ s/(-[a-z0-9]+)$//s) {

+ print "Shaving off: $1 leaving $tag\n" if $Debug;

+ $subform = "$1$subform";

+ # and loop around again

+ $alt =~ s/(-[a-z0-9]+)$//s && $Debug && print " alt -> $alt\n";

+ } else {

+ # we're trying to pull a subform off a primary tag. TILT!

+ print "Aborting on: {$name}{$subform}\n" if $Debug;

+ last;

+ }

+ print "Output: {$name}{$subform}\n" if $Debug;

+ return unless $name; # Failure

+ return $name unless $subform; # Exact match

+ $subform =~ s/^-//s;

+ $subform =~ s/-$//s;

+ return "$name (Subform \"$subform\")";

+1;

+__DATA__

+=head1 NAME

+I18N::LangTags::List -- tags and names for human languages

+=head1 SYNOPSIS

+ use I18N::LangTags::List;

+ print "Parlez-vous... ", join(', ',

+ I18N::LangTags::List::name('elx') || 'unknown_language',

+ I18N::LangTags::List::name('ar-Kw') || 'unknown_language',

+ I18N::LangTags::List::name('en') || 'unknown_language',

+ I18N::LangTags::List::name('en-CA') || 'unknown_language',

+ ), "?\n";

+prints:

+ Parlez-vous... Elamite, Kuwait Arabic, English, Canadian English?

+=head1 DESCRIPTION

+This module provides a function

+C<I18N::LangTags::List::name( I<langtag> ) > that takes

+a language tag (see L<I18N::LangTags|I18N::LangTags>)

+and returns the best attempt at an English name for it, or

+undef if it can't make sense of the tag.

+The function I18N::LangTags::List::name(...) is not exported.

+The map of tags-to-names that it uses is accessable as

+%I18N::LangTags::List::Name, and it's the same as the list

+that follows in this documentation, which should be useful

+to you even if you don't use this module.

+=head1 ABOUT LANGUAGE TAGS

+Internet language tags, as defined in RFC 3066, are a formalism

+for denoting human languages. The two-letter ISO 639-1 language

+codes are well known (as "en" for English), as are their forms

+when qualified by a country code ("en-US"). Less well-known are the

+arbitrary-length non-ISO codes (like "i-mingo"), and the

+recently (in 2001) introduced three-letter ISO-639-2 codes.

+Remember these important facts:

+=over

+=item *

+Language tags are not locale IDs. A locale ID is written with a "_"

+instead of a "-", (almost?) always matches C<m/^\w\w_\w\w\b/>, and

+I<means> something different than a language tag. A language tag

+denotes a language. A locale ID denotes a language I<as used in>

+a particular place, in combination with non-linguistic

+location-specific information such as what currency is used

+there. Locales I<also> often denote character set information,

+as in "en_US.ISO8859-1".

+=item *

+Language tags are not for computer languages.

+=item *

+"Dialect" is not a useful term, since there is no objective

+criterion for establishing when two language-forms are

+dialects of eachother, or are separate languages.

+=item *

+Language tags are not case-sensitive. en-US, en-us, En-Us, etc.,

+are all the same tag, and denote the same language.

+=item *

+Not every language tag really refers to a single language. Some

+language tags refer to conditions: i-default (system-message text

+in English plus maybe other languages), und (undetermined

+language). Others (notably lots of the three-letter codes) are

+bibliographic tags that classify whole groups of languages, as

+with cus "Cushitic (Other)" (i.e., a

+language that has been classed as Cushtic, but which has no more

+specific code) or the even less linguistically coherent

+sai for "South American Indian (Other)". Though useful in

+bibliography, B<SUCH TAGS ARE NOT

+FOR GENERAL USE>. For further guidance, email me.

+=item *

+Language tags are not country codes. In fact, they are often

+distinct codes, as with language tag ja for Japanese, and

+ISO 3166 country code C<.jp> for Japan.

+=back

+=head1 LIST OF LANGUAGES

+The first part of each item is the language tag, between

+{...}. It

+is followed by an English name for the language or language-group.

+Language tags that I judge to be not for general use, are bracketed.

+This list is in alphabetical order by English name of the language.

+=for reminder

+ The name in the =item line MUST NOT have E<...>'s in it!!

+=for woohah START

+=over

+=item {ab} : Abkhazian

+eq Abkhaz

+=item {ace} : Achinese

+=item {ach} : Acoli

+=item {ada} : Adangme

+=item {aa} : Afar

+=item {afh} : Afrihili

+(Artificial)

+=item {af} : Afrikaans

+=item [{afa} : Afro-Asiatic (Other)]

+=item {aka} : Akan

+=item {akk} : Akkadian

+(Historical)

+=item {sq} : Albanian

+=item {ale} : Aleut

+=item [{alg} : Algonquian languages]

+NOT Algonquin!

+=item [{tut} : Altaic (Other)]

+=item {am} : Amharic

+NOT Aramaic!

+=item {i-ami} : Ami

+eq Amis. eq 'Amis. eq Pangca.

+=item [{apa} : Apache languages]

+=item {ar} : Arabic

+Many forms are mutually un-intelligible in spoken media.

+Notable forms:

+{ar-ae} UAE Arabic;

+{ar-bh} Bahrain Arabic;

+{ar-dz} Algerian Arabic;

+{ar-eg} Egyptian Arabic;

+{ar-iq} Iraqi Arabic;

+{ar-jo} Jordanian Arabic;

+{ar-kw} Kuwait Arabic;

+{ar-lb} Lebanese Arabic;

+{ar-ly} Libyan Arabic;

+{ar-ma} Moroccan Arabic;

+{ar-om} Omani Arabic;

+{ar-qa} Qatari Arabic;

+{ar-sa} Sauda Arabic;

+{ar-sy} Syrian Arabic;

+{ar-tn} Tunisian Arabic;

+{ar-ye} Yemen Arabic.

+=item {arc} : Aramaic

+NOT Amharic! NOT Samaritan Aramaic!

+=item {arp} : Arapaho

+=item {arn} : Araucanian

+=item {arw} : Arawak

+=item {hy} : Armenian

+=item [{art} : Artificial (Other)]

+=item {as} : Assamese

+=item [{ath} : Athapascan languages]

+eq Athabaskan. eq Athapaskan. eq Athabascan.

+=item [{aus} : Australian languages]

+=item [{map} : Austronesian (Other)]

+=item {ava} : Avaric

+=item {ae} : Avestan

+eq Zend

+=item {awa} : Awadhi

+=item {ay} : Aymara

+=item {az} : Azerbaijani

+eq Azeri

+=item {ban} : Balinese

+=item [{bat} : Baltic (Other)]

+=item {bal} : Baluchi

+=item {bam} : Bambara

+=item [{bai} : Bamileke languages]

+=item {bad} : Banda

+=item [{bnt} : Bantu (Other)]

+=item {bas} : Basa

+=item {ba} : Bashkir

+=item {eu} : Basque

+=item {btk} : Batak (Indonesia)

+=item {bej} : Beja

+=item {be} : Belarusian

+eq Belarussian. eq Byelarussian.

+eq Belorussian. eq Byelorussian.

+eq White Russian. eq White Ruthenian.

+NOT Ruthenian!

+=item {bem} : Bemba

+=item {bn} : Bengali

+eq Bangla.

+=item [{ber} : Berber (Other)]

+=item {bho} : Bhojpuri

+=item {bh} : Bihari

+=item {bik} : Bikol

+=item {bin} : Bini

+=item {bi} : Bislama

+eq Bichelamar.

+=item {bs} : Bosnian

+=item {bra} : Braj

+=item {br} : Breton

+=item {bug} : Buginese

+=item {bg} : Bulgarian

+=item {i-bnn} : Bunun

+=item {bua} : Buriat

+=item {my} : Burmese

+=item {cad} : Caddo

+=item {car} : Carib

+=item {ca} : Catalan

+eq CatalE<aacute>n. eq Catalonian.

+=item [{cau} : Caucasian (Other)]

+=item {ceb} : Cebuano

+=item [{cel} : Celtic (Other)]

+Notable forms:

+{cel-gaulish} Gaulish (Historical)

+=item [{cai} : Central American Indian (Other)]

+=item {chg} : Chagatai

+(Historical?)

+=item [{cmc} : Chamic languages]

+=item {ch} : Chamorro

+=item {ce} : Chechen

+=item {chr} : Cherokee

+eq Tsalagi

+=item {chy} : Cheyenne

+=item {chb} : Chibcha

+(Historical) NOT Chibchan (which is a language family).

+=item {ny} : Chichewa

+eq Nyanja. eq Chinyanja.

+=item {zh} : Chinese

+Many forms are mutually un-intelligible in spoken media.

+Notable subforms:

+{zh-cn} PRC Chinese;

+{zh-hk} Hong Kong Chinese;

+{zh-mo} Macau Chinese;

+{zh-sg} Singapore Chinese;

+{zh-tw} Taiwan Chinese;

+{zh-guoyu} Mandarin [Putonghua/Guoyu];

+{zh-hakka} Hakka [formerly i-hakka];

+{zh-min} Hokkien;

+{zh-min-nan} Southern Hokkien;

+{zh-wuu} Shanghaiese;

+{zh-xiang} Hunanese;

+{zh-gan} Gan;

+{zh-yue} Cantonese.

+=for etc

+{i-hakka} Hakka (old tag)

+=item {chn} : Chinook Jargon

+eq Chinook Wawa.

+=item {chp} : Chipewyan

+=item {cho} : Choctaw

+=item {cu} : Church Slavic

+eq Old Church Slavonic.

+=item {chk} : Chuukese

+eq Trukese. eq Chuuk. eq Truk. eq Ruk.

+=item {cv} : Chuvash

+=item {cop} : Coptic

+=item {kw} : Cornish

+=item {co} : Corsican

+eq Corse.

+=item {cre} : Cree

+NOT Creek!

+=item {mus} : Creek

+NOT Cree!

+=item [{cpe} : English-based Creoles and pidgins (Other)]

+=item [{cpf} : French-based Creoles and pidgins (Other)]

+=item [{cpp} : Portuguese-based Creoles and pidgins (Other)]

+=item [{crp} : Creoles and pidgins (Other)]

+=item {hr} : Croatian

+eq Croat.

+=item [{cus} : Cushitic (Other)]

+=item {cs} : Czech

+=item {dak} : Dakota

+eq Nakota. eq Latoka.

+=item {da} : Danish

+=item {day} : Dayak

+=item {i-default} : Default (Fallthru) Language

+Defined in RFC 2277, this is for tagging text

+(which must include English text, and might/should include text

+in other appropriate languages) that is emitted in a context

+where language-negotiation wasn't possible -- in SMTP mail failure

+messages, for example.

+=item {del} : Delaware

+=item {din} : Dinka

+=item {div} : Divehi

+=item {doi} : Dogri

+NOT Dogrib!

+=item {dgr} : Dogrib

+NOT Dogri!

+=item [{dra} : Dravidian (Other)]

+=item {dua} : Duala

+=item {nl} : Dutch

+eq Netherlander. Notable forms:

+{nl-nl} Netherlands Dutch;

+{nl-be} Belgian Dutch.

+=item {dum} : Middle Dutch (ca.1050-1350)

+(Historical)

+=item {dyu} : Dyula

+=item {dz} : Dzongkha

+=item {efi} : Efik

+=item {egy} : Ancient Egyptian

+(Historical)

+=item {eka} : Ekajuk

+=item {elx} : Elamite

+(Historical)

+=item {en} : English

+Notable forms:

+{en-au} Australian English;

+{en-bz} Belize English;

+{en-ca} Canadian English;

+{en-gb} UK English;

+{en-ie} Irish English;

+{en-jm} Jamaican English;

+{en-nz} New Zealand English;

+{en-ph} Philippine English;

+{en-tt} Trinidad English;

+{en-us} US English;

+{en-za} South African English;

+{en-zw} Zimbabwe English.

+=item {enm} : Old English (1100-1500)

+(Historical)

+=item {ang} : Old English (ca.450-1100)

+eq Anglo-Saxon. (Historical)

+=item {eo} : Esperanto

+(Artificial)

+=item {et} : Estonian

+=item {ewe} : Ewe

+=item {ewo} : Ewondo

+=item {fan} : Fang

+=item {fat} : Fanti

+=item {fo} : Faroese

+=item {fj} : Fijian

+=item {fi} : Finnish

+=item [{fiu} : Finno-Ugrian (Other)]

+eq Finno-Ugric. NOT Ugaritic!

+=item {fon} : Fon

+=item {fr} : French

+Notable forms:

+{fr-fr} France French;

+{fr-be} Belgian French;

+{fr-ca} Canadian French;

+{fr-ch} Swiss French;

+{fr-lu} Luxembourg French;

+{fr-mc} Monaco French.

+=item {frm} : Middle French (ca.1400-1600)

+(Historical)

+=item {fro} : Old French (842-ca.1400)

+(Historical)

+=item {fy} : Frisian

+=item {fur} : Friulian

+=item {ful} : Fulah

+=item {gaa} : Ga

+=item {gd} : Scots Gaelic

+NOT Scots!

+=item {gl} : Gallegan

+eq Galician

+=item {lug} : Ganda

+=item {gay} : Gayo

+=item {gba} : Gbaya

+=item {gez} : Geez

+eq Ge'ez

+=item {ka} : Georgian

+=item {de} : German

+Notable forms:

+{de-at} Austrian German;

+{de-be} Belgian German;

+{de-ch} Swiss German;

+{de-de} Germany German;

+{de-li} Liechtenstein German;

+{de-lu} Luxembourg German.

+=item {gmh} : Middle High German (ca.1050-1500)

+(Historical)

+=item {goh} : Old High German (ca.750-1050)

+(Historical)

+=item [{gem} : Germanic (Other)]

+=item {gil} : Gilbertese

+=item {gon} : Gondi

+=item {gor} : Gorontalo

+=item {got} : Gothic

+(Historical)

+=item {grb} : Grebo

+=item {grc} : Ancient Greek

+(Historical) (Until 15th century or so.)

+=item {el} : Modern Greek

+(Since 15th century or so.)

+=item {gn} : Guarani

+GuaranE<iacute>

+=item {gu} : Gujarati

+=item {gwi} : Gwich'in

+eq Gwichin

+=item {hai} : Haida

+=item {ha} : Hausa

+=item {haw} : Hawaiian

+Hawai'ian

+=item {he} : Hebrew

+(Formerly "iw".)

+=for etc

+{iw} Hebrew (old tag)

+=item {hz} : Herero

+=item {hil} : Hiligaynon

+=item {him} : Himachali

+=item {hi} : Hindi

+=item {ho} : Hiri Motu

+=item {hit} : Hittite

+(Historical)

+=item {hmn} : Hmong

+=item {hu} : Hungarian

+=item {hup} : Hupa

+=item {iba} : Iban

+=item {is} : Icelandic

+=item {ibo} : Igbo

+=item {ijo} : Ijo

+=item {ilo} : Iloko

+=item [{inc} : Indic (Other)]

+=item [{ine} : Indo-European (Other)]

+=item {id} : Indonesian

+(Formerly "in".)

+=for etc

+{in} Indonesian (old tag)

+=item {ia} : Interlingua (International Auxiliary Language Association)

+(Artificial) NOT Interlingue!

+=item {ie} : Interlingue

+(Artificial) NOT Interlingua!

+=item {iu} : Inuktitut

+A subform of "Eskimo".

+=item {ik} : Inupiaq

+A subform of "Eskimo".

+=item [{ira} : Iranian (Other)]

+=item {ga} : Irish

+=item {mga} : Middle Irish (900-1200)

+(Historical)

+=item {sga} : Old Irish (to 900)

+(Historical)

+=item [{iro} : Iroquoian languages]

+=item {it} : Italian

+Notable forms:

+{it-it} Italy Italian;

+{it-ch} Swiss Italian.

+=item {ja} : Japanese

+(NOT "jp"!)

+=item {jw} : Javanese

+=item {jrb} : Judeo-Arabic

+=item {jpr} : Judeo-Persian

+=item {kab} : Kabyle

+=item {kac} : Kachin

+=item {kl} : Kalaallisut

+eq Greenlandic "Eskimo"

+=item {kam} : Kamba

+=item {kn} : Kannada

+eq Kanarese. NOT Canadian!

+=item {kau} : Kanuri

+=item {kaa} : Kara-Kalpak

+=item {kar} : Karen

+=item {ks} : Kashmiri

+=item {kaw} : Kawi

+=item {kk} : Kazakh

+=item {kha} : Khasi

+=item {km} : Khmer

+eq Cambodian. eq Kampuchean.

+=item [{khi} : Khoisan (Other)]

+=item {kho} : Khotanese

+=item {ki} : Kikuyu

+eq Gikuyu.

+=item {kmb} : Kimbundu

+=item {rw} : Kinyarwanda

+=item {ky} : Kirghiz

+=item {i-klingon} : Klingon

+=item {kv} : Komi

+=item {kon} : Kongo

+=item {kok} : Konkani

+=item {ko} : Korean

+=item {kos} : Kosraean

+=item {kpe} : Kpelle

+=item {kro} : Kru

+=item {kj} : Kuanyama

+=item {kum} : Kumyk

+=item {ku} : Kurdish

+=item {kru} : Kurukh

+=item {kut} : Kutenai

+=item {lad} : Ladino

+eq Judeo-Spanish. NOT Ladin (a minority language in Italy).

+=item {lah} : Lahnda

+NOT Lamba!

+=item {lam} : Lamba

+NOT Lahnda!

+=item {lo} : Lao

+eq Laotian.

+=item {la} : Latin

+(Historical) NOT Ladin! NOT Ladino!

+=item {lv} : Latvian

+eq Lettish.

+=item {lb} : Letzeburgesch

+eq Luxemburgian, eq Luxemburger. (Formerly i-lux.)

+=for etc

+{i-lux} Letzeburgesch (old tag)

+=item {lez} : Lezghian

+=item {ln} : Lingala

+=item {lt} : Lithuanian

+=item {nds} : Low German

+eq Low Saxon. eq Low German. eq Low Saxon.

+=item {loz} : Lozi

+=item {lub} : Luba-Katanga

+=item {lua} : Luba-Lulua

+=item {lui} : Luiseno

+eq LuiseE<ntilde>o.

+=item {lun} : Lunda

+=item {luo} : Luo (Kenya and Tanzania)

+=item {lus} : Lushai

+=item {mk} : Macedonian

+eq the modern Slavic language spoken in what was Yugoslavia.

+NOT the form of Greek spoken in Greek Macedonia!

+=item {mad} : Madurese

+=item {mag} : Magahi

+=item {mai} : Maithili

+=item {mak} : Makasar

+=item {mg} : Malagasy

+=item {ms} : Malay

+NOT Malayalam!

+=item {ml} : Malayalam

+NOT Malay!

+=item {mt} : Maltese

+=item {mnc} : Manchu

+=item {mdr} : Mandar

+NOT Mandarin!

+=item {man} : Mandingo

+=item {mni} : Manipuri

+eq Meithei.

+=item [{mno} : Manobo languages]

+=item {gv} : Manx

+=item {mi} : Maori

+NOT Mari!

+=item {mr} : Marathi

+=item {chm} : Mari

+NOT Maori!

+=item {mh} : Marshall

+eq Marshallese.

+=item {mwr} : Marwari

+=item {mas} : Masai

+=item [{myn} : Mayan languages]

+=item {men} : Mende

+=item {mic} : Micmac

+=item {min} : Minangkabau

+=item {i-mingo} : Mingo

+eq the Irquoian language West Virginia Seneca. NOT New York Seneca!

+=item [{mis} : Miscellaneous languages]

+Don't use this.

+=item {moh} : Mohawk

+=item {mo} : Moldavian

+eq Moldovan.

+=item [{mkh} : Mon-Khmer (Other)]

+=item {lol} : Mongo

+=item {mn} : Mongolian

+eq Mongol.

+=item {mos} : Mossi

+=item [{mul} : Multiple languages]

+Not for normal use.

+=item [{mun} : Munda languages]

+=item {nah} : Nahuatl

+=item {na} : Nauru

+=item {nv} : Navajo

+eq Navaho. (Formerly i-navajo.)

+=for etc

+{i-navajo} Navajo (old tag)

+=item {nd} : North Ndebele

+=item {nr} : South Ndebele

+=item {ng} : Ndonga

+=item {ne} : Nepali

+eq Nepalese. Notable forms:

+{ne-np} Nepal Nepali;

+{ne-in} India Nepali.

+=item {new} : Newari

+=item {nia} : Nias

+=item [{nic} : Niger-Kordofanian (Other)]

+=item [{ssa} : Nilo-Saharan (Other)]

+=item {niu} : Niuean

+=item {non} : Old Norse

+(Historical)

+=item [{nai} : North American Indian]

+Do not use this.

+=item {se} : Northern Sami

+eq Lappish. eq Lapp. eq (Northern) Saami.

+=item {no} : Norwegian

+Note the two following forms:

+=item {nb} : Norwegian Bokmal

+eq BokmE<aring>l, (A form of Norwegian.) (Formerly no-bok.)

+=for etc

+{no-bok} Norwegian Bokmal (old tag)

+=item {nn} : Norwegian Nynorsk

+(A form of Norwegian.) (Formerly no-nyn.)

+=for etc

+{no-nyn} Norwegian Nynorsk (old tag)

+=item [{nub} : Nubian languages]

+=item {nym} : Nyamwezi

+=item {nyn} : Nyankole

+=item {nyo} : Nyoro

+=item {nzi} : Nzima

+=item {oc} : Occitan (post 1500)

+eq ProvenE<ccedil>al, eq Provencal

+=item {oji} : Ojibwa

+eq Ojibwe.

+=item {or} : Oriya

+=item {om} : Oromo

+=item {osa} : Osage

+=item {os} : Ossetian; Ossetic

+=item [{oto} : Otomian languages]

+Group of languages collectively called "OtomE<iacute>".

+=item {pal} : Pahlavi

+eq Pahlevi

+=item {i-pwn} : Paiwan

+eq Pariwan

+=item {pau} : Palauan

+=item {pi} : Pali

+(Historical?)

+=item {pam} : Pampanga

+=item {pag} : Pangasinan

+=item {pa} : Panjabi

+eq Punjabi

+=item {pap} : Papiamento

+eq Papiamentu.

+=item [{paa} : Papuan (Other)]

+=item {fa} : Persian

+eq Farsi. eq Iranian.

+=item {peo} : Old Persian (ca.600-400 B.C.)

+=item [{phi} : Philippine (Other)]

+=item {phn} : Phoenician

+(Historical)

+=item {pon} : Pohnpeian

+NOT Pompeiian!

+=item {pl} : Polish

+=item {pt} : Portuguese

+eq Portugese. Notable forms:

+{pt-pt} Portugal Portuguese;

+{pt-br} Brazilian Portuguese.

+=item [{pra} : Prakrit languages]

+=item {pro} : Old Provencal (to 1500)

+eq Old ProvenE<ccedil>al. (Historical.)

+=item {ps} : Pushto

+eq Pashto. eq Pushtu.

+=item {qu} : Quechua

+eq Quecha.

+=item {rm} : Raeto-Romance

+eq Romansh.

+=item {raj} : Rajasthani

+=item {rap} : Rapanui

+=item {rar} : Rarotongan

+=item [{qaa - qtz} : Reserved for local use.]

+=item [{roa} : Romance (Other)]

+NOT Romanian! NOT Romany! NOT Romansh!

+=item {ro} : Romanian

+eq Rumanian. NOT Romany!

+=item {rom} : Romany

+eq Rom. NOT Romanian!

+=item {rn} : Rundi

+=item {ru} : Russian

+NOT White Russian! NOT Rusyn!

+=item [{sal} : Salishan languages]

+Large language group.

+=item {sam} : Samaritan Aramaic

+NOT Aramaic!

+=item [{smi} : Sami languages (Other)]

+=item {sm} : Samoan

+=item {sad} : Sandawe

+=item {sg} : Sango

+=item {sa} : Sanskrit

+(Historical)

+=item {sat} : Santali

+=item {sc} : Sardinian

+eq Sard.

+=item {sas} : Sasak

+=item {sco} : Scots

+NOT Scots Gaelic!

+=item {sel} : Selkup

+=item [{sem} : Semitic (Other)]

+=item {sr} : Serbian

+eq Serb. NOT Sorbian.

+=item {srr} : Serer

+=item {shn} : Shan

+=item {sn} : Shona

+=item {sid} : Sidamo

+=item {sgn-...} : Sign Languages

+Always use with a subtag. Notable forms:

+{sgn-gb} British Sign Language (BSL);

+{sgn-ie} Irish Sign Language (ESL);

+{sgn-ni} Nicaraguan Sign Language (ISN);

+{sgn-us} American Sign Language (ASL).

+=item {bla} : Siksika

+eq Blackfoot. eq Pikanii.

+=item {sd} : Sindhi

+=item {si} : Sinhalese

+eq Sinhala.

+=item [{sit} : Sino-Tibetan (Other)]

+=item [{sio} : Siouan languages]

+=item {den} : Slave (Athapascan)

+("Slavey" is a subform.)

+=item [{sla} : Slavic (Other)]

+=item {sk} : Slovak

+eq Slovakian.

+=item {sl} : Slovenian

+eq Slovene.

+=item {sog} : Sogdian

+=item {so} : Somali

+=item {son} : Songhai

+=item {snk} : Soninke

+=item {wen} : Sorbian languages

+eq Wendish. eq Sorb. eq Lusatian. eq Wend. NOT Venda! NOT Serbian!

+=item {nso} : Northern Sotho

+=item {st} : Southern Sotho

+eq Sutu. eq Sesotho.

+=item [{sai} : South American Indian (Other)]

+=item {es} : Spanish

+Notable forms:

+{es-ar} Argentine Spanish;

+{es-bo} Bolivian Spanish;

+{es-cl} Chilean Spanish;

+{es-co} Colombian Spanish;

+{es-do} Dominican Spanish;

+{es-ec} Ecuadorian Spanish;

+{es-es} Spain Spanish;

+{es-gt} Guatemalan Spanish;

+{es-hn} Honduran Spanish;

+{es-mx} Mexican Spanish;

+{es-pa} Panamanian Spanish;

+{es-pe} Peruvian Spanish;

+{es-pr} Puerto Rican Spanish;

+{es-py} Paraguay Spanish;

+{es-sv} Salvadoran Spanish;

+{es-us} US Spanish;

+{es-uy} Uruguayan Spanish;

+{es-ve} Venezuelan Spanish.

+=item {suk} : Sukuma

+=item {sux} : Sumerian

+(Historical)

+=item {su} : Sundanese

+=item {sus} : Susu

+=item {sw} : Swahili

+eq Kiswahili

+=item {ss} : Swati

+=item {sv} : Swedish

+Notable forms:

+{sv-se} Sweden Swedish;

+{sv-fi} Finland Swedish.

+=item {syr} : Syriac

+=item {tl} : Tagalog

+=item {ty} : Tahitian

+=item [{tai} : Tai (Other)]

+NOT Thai!

+=item {tg} : Tajik

+=item {tmh} : Tamashek

+=item {ta} : Tamil

+=item {i-tao} : Tao

+eq Yami.

+=item {tt} : Tatar

+=item {i-tay} : Tayal

+eq Atayal. eq Atayan.

+=item {te} : Telugu

+=item {ter} : Tereno

+=item {tet} : Tetum

+=item {th} : Thai

+NOT Tai!

+=item {bo} : Tibetan

+=item {tig} : Tigre

+=item {ti} : Tigrinya

+=item {tem} : Timne

+eq Themne. eq Timene.

+=item {tiv} : Tiv

+=item {tli} : Tlingit

+=item {tpi} : Tok Pisin

+=item {tkl} : Tokelau

+=item {tog} : Tonga (Nyasa)

+NOT Tsonga!

+=item {to} : Tonga (Tonga Islands)

+(Pronounced "Tong-a", not "Tong-ga")

+NOT Tsonga!

+=item {tsi} : Tsimshian

+eq Sm'algyax

+=item {ts} : Tsonga

+NOT Tonga!

+=item {i-tsu} : Tsou

+=item {tn} : Tswana

+Same as Setswana.

+=item {tum} : Tumbuka

+=item {tr} : Turkish

+(Typically in Roman script)

+=item {ota} : Ottoman Turkish (1500-1928)

+(Typically in Arabic script) (Historical)

+=item {tk} : Turkmen

+eq Turkmeni.

+=item {tvl} : Tuvalu

+=item {tyv} : Tuvinian

+eq Tuvan. eq Tuvin.

+=item {tw} : Twi

+=item {uga} : Ugaritic

+NOT Ugric!

+=item {ug} : Uighur

+=item {uk} : Ukrainian

+=item {umb} : Umbundu

+=item {und} : Undetermined

+Not a tag for normal use.

+=item {ur} : Urdu

+=item {uz} : Uzbek

+eq E<Ouml>zbek

+=item {vai} : Vai

+=item {ven} : Venda

+NOT Wendish! NOT Wend! NOT Avestan!

+=item {vi} : Vietnamese

+eq Viet.

+=item {vo} : Volapuk

+eq VolapE<uuml>k. (Artificial)

+=item {vot} : Votic

+eq Votian. eq Vod.

+=item [{wak} : Wakashan languages]

+=item {wal} : Walamo

+eq Wolaytta.

+=item {war} : Waray

+Presumably the Philippine language Waray-Waray (SamareE<ntilde>o),

+not the smaller Philippine language Waray Sorsogon, nor the extinct

+Australian language Waray.

+=item {was} : Washo

+eq Washoe

+=item {cy} : Welsh

+=item {wo} : Wolof

+=item {x-...} : Unregistered (Semi-Private Use)

+"x-" is a prefix for language tags that are not registered with ISO

+or IANA. Example, x-double-dutch

+=item {xh} : Xhosa

+=item {sah} : Yakut

+=item {yao} : Yao

+(The Yao in Malawi?)

+=item {yap} : Yapese

+eq Yap

+=item {yi} : Yiddish

+Formerly "ji". Sometimes in Roman script, sometimes in Hebrew script.

+=for etc

+{ji} Yiddish (old tag)

+=item {yo} : Yoruba

+=item [{ypk} : Yupik languages]

+Several "Eskimo" languages.

+=item {znd} : Zande

+=item [{zap} : Zapotec]

+(A group of languages.)

+=item {zen} : Zenaga

+NOT Zend.

+=item {za} : Zhuang

+=item {zu} : Zulu

+=item {zun} : Zuni

+eq ZuE<ntilde>i

+=back

+=for woohah END

+=head1 SEE ALSO

+L<I18N::LangTags|I18N::LangTags> and its "See Also" section.

+=head1 COPYRIGHT AND DISCLAIMER

+You can redistribute and/or

+modify this document under the same terms as Perl itself.

+This document is provided in the hope that it will be

+useful, but without any warranty;

+without even the implied warranty of accuracy, authoritativeness,

+completeness, merchantability, or fitness for a particular purpose.

+Email any corrections or questions to me.

+=head1 AUTHOR

+Sean M. Burke, sburkeE<64>cpan.org

+=cut

+# To generate a list of just the two and three-letter codes:

+#!/usr/local/bin/perl -w

+require 5; # Time-stamp: "2001-03-13 21:53:39 MST"

+ # Sean M. Burke, sburke@cpan.org

+ # This program is for generating the language_codes.txt file

+use strict;

+use LWP::Simple;

+use HTML::TreeBuilder 3.10;

+my $root = HTML::TreeBuilder->new();

+my $url = 'http://lcweb.loc.gov/standards/iso639-2/bibcodes.html';

+$root->parse(get($url) || die "Can't get $url");

+$root->eof();

+my @codes;

+foreach my $tr ($root->find_by_tag_name('tr')) {

+ my @f = map $_->as_text(), $tr->content_list();

+ #print map("<$_> ", @f), "\n";

+ next unless @f == 5;

+ pop @f; # nix the French name

+ next if $f[-1] eq 'Language Name (English)'; # it's a header line

+ my $xx = splice(@f, 2,1); # pull out the two-letter code

+ $f[-1] =~ s/^\s+//;

+ $f[-1] =~ s/\s+$//;

+ if($xx =~ m/[a-zA-Z]/) { # there's a two-letter code for it

+ push @codes, [ lc($f[-1]), "$xx\t$f[-1]\n" ];

+ } else { # print the three-letter codes.

+ if($f[0] eq $f[1]) {

+ push @codes, [ lc($f[-1]), "$f[1]\t$f[2]\n" ];

+ } else { # shouldn't happen

+ push @codes, [ lc($f[-1]), "@f !!!!!!!!!!\n" ];

+ }

+print map $_->[1], sort {; $a->[0] cmp $b->[0] } @codes;

+print "[ based on $url\n at ", scalar(localtime), "]\n",

+ "[Note: doesn't include IANA-registered codes.]\n";

+exit;

+__END__

diff --git a/gnu/usr.bin/perl/lib/I18N/LangTags/README b/gnu/usr.bin/perl/lib/I18N/LangTags/README
new file mode 100644
index 00000000000..fbae05f43d3
--- /dev/null
+++ b/gnu/usr.bin/perl/lib/I18N/LangTags/README

@@ -0,0 +1,78 @@

+README for I18N::LangTags

+ Time-stamp: "2001-05-29 21:52:15 MDT"

+ I18N::LangTags

+I18N::LangTags - functions for dealing with RFC3066-style language

+tags

+Language tags are a formalism, described in RFC 3066 (obsoleting

+1766), for declaring what language form (language and possibly

+dialect) a given chunk of information is in.

+This library provides functions for common tasks involving language

+tags (notably the extraction of them, comparing them, and testing the

+formal validity of them) as is needed in a variety of protocols and

+applications.

+I18N::LangTags::List -- tags and names for human languages. This

+module goes from known language tag names ("fr-CA") to their English

+names ("Canadian French"). Its documentation also lists the several

+hundred known tags and some common subforms. You may find this useful

+as a reference.

+See the POD for more information.

+INSTALLATION

+You install I18N::LangTags and I18N::LangTags::List, as you would

+install any perl module library, by running these commands:

+ perl Makefile.PL

+ make

+ make test

+ make install

+If you want to install a private copy of I18N::LangTags in your home

+directory, then you should try to produce the initial Makefile with

+something like this command:

+ perl Makefile.PL LIB=~/perl

+See perldoc perlmodinstall for more information on installing modules.

+DOCUMENTATION

+POD-format documentation is included in LangTags.pm. POD is readable

+with the 'perldoc' utility. See ChangeLog for recent changes.

+SUPPORT

+Questions, bug reports, useful code bits, and suggestions for

+I18N::LangTags should just be sent to me at sburke@cpan.org

+AVAILABILITY

+The latest version of I18N::LangTags is available from the

+Comprehensive Perl Archive Network (CPAN). Visit

+<http://www.cpan.org/> to find a CPAN site near you.

+COPYRIGHT

+reserved.

+The programs and documentation in this dist are distributed in

+the hope that they will be useful, but without any warranty; without

+even the implied warranty of merchantability or fitness for a

+particular purpose.

+This library is free software; you can redistribute it and/or modify

+it under the same terms as Perl itself.

diff --git a/gnu/usr.bin/perl/lib/I18N/LangTags/test.pl b/gnu/usr.bin/perl/lib/I18N/LangTags/test.pl
new file mode 100644
index 00000000000..88a7bf66ae8
--- /dev/null
+++ b/gnu/usr.bin/perl/lib/I18N/LangTags/test.pl

@@ -0,0 +1,79 @@

+# Before `make install' is performed this script should be runnable with

+# `make test'. After `make install' it should work as `perl test.pl'

+######################### We start with some black magic to print on failure.

+require 5;

+ # Time-stamp: "2001-06-21 22:59:38 MDT"

+use strict;

+use Test;

+BEGIN { plan tests => 46 };

+BEGIN { ok 1 }

+use I18N::LangTags (':ALL');

+print "# Perl v$], I18N::LangTags v$I18N::LangTags::VERSION\n";

+ok !is_language_tag('');

+ok is_language_tag('fr');

+ok is_language_tag('fr-ca');

+ok is_language_tag('fr-CA');

+ok !is_language_tag('fr-CA-');

+ok !is_language_tag('fr_CA');

+ok is_language_tag('fr-ca-joual');

+ok !is_language_tag('frca');

+ok is_language_tag('nav');

+ok is_language_tag('nav-shiprock');

+ok !is_language_tag('nav-ceremonial'); # subtag too long

+ok !is_language_tag('x');

+ok !is_language_tag('i');

+ok is_language_tag('i-borg'); # NB: fictitious tag

+ok is_language_tag('x-borg');

+ok is_language_tag('x-borg-prot5123');

+ok same_language_tag('x-borg-prot5123', 'i-BORG-Prot5123' );

+ok !same_language_tag('en', 'en-us' );

+ok 0 == similarity_language_tag('en-ca', 'fr-ca');

+ok 1 == similarity_language_tag('en-ca', 'en-us');

+ok 2 == similarity_language_tag('en-us-southern', 'en-us-western');

+ok 2 == similarity_language_tag('en-us-southern', 'en-us');

+ok grep $_ eq 'hi', panic_languages('kok');

+ok grep $_ eq 'en', panic_languages('x-woozle-wuzzle');

+ok ! grep $_ eq 'mr', panic_languages('it');

+ok grep $_ eq 'es', panic_languages('it');

+ok grep $_ eq 'it', panic_languages('es');

+print "# Now the ::List tests...\n";

+use I18N::LangTags::List;

+foreach my $lt (qw(

+ en

+ en-us

+ en-kr

+ el

+ elx

+ i-mingo

+ i-mingo-tom

+ x-mingo-tom

+ it

+ it-it

+ it-IT

+ it-FR

+ yi

+ ji

+ cre-syllabic

+ cre-syllabic-western

+ cre-western

+ cre-latin

+)) {

+ my $name = I18N::LangTags::List::name($lt);

+ if($name) {

+ ok(1);

+ print "# $lt -> $name\n";

+ } else {

+ ok(0);

+ print "# Failed lookup on $lt\n";

+ }

+print "# So there!\n";