diff options
Diffstat (limited to 'gnu/usr.bin/perl/cpan/Pod-Simple')
36 files changed, 863 insertions, 410 deletions
diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple.pm index 20924153b65..6c91b8ac1fb 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple.pm +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple.pm @@ -18,7 +18,7 @@ use vars qw( ); @ISA = ('Pod::Simple::BlackBox'); -$VERSION = '3.35'; +$VERSION = '3.40'; @Known_formatting_codes = qw(I B C L E F S X Z); %Known_formatting_codes = map(($_=>1), @Known_formatting_codes); @@ -74,6 +74,9 @@ else { # EBCDIC on early Perl. We know what the values are for the code #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ __PACKAGE__->_accessorize( + '_output_is_for_JustPod', # For use only by Pod::Simple::JustPod, + # If non-zero, don't expand Z<> E<> S<> L<>, + # and count how many brackets in format codes 'nbsp_for_S', # Whether to map S<...>'s to \xA0 characters 'source_filename', # Filename of the source, for use in warnings 'source_dead', # Whether to consider this parser's source dead @@ -103,6 +106,8 @@ __PACKAGE__->_accessorize( 'preserve_whitespace', # whether to try to keep whitespace as-is 'strip_verbatim_indent', # What indent to strip from verbatim + 'expand_verbatim_tabs', # 0: preserve tabs in verbatim blocks + # n: expand tabs to stops every n columns 'parse_characters', # Whether parser should expect chars rather than octets @@ -168,6 +173,7 @@ sub encoding { BEGIN { *pretty = \&Pod::Simple::BlackBox::pretty; *stringify_lol = \&Pod::Simple::BlackBox::stringify_lol; + *my_qr = \&Pod::Simple::BlackBox::my_qr; } #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@ -219,11 +225,14 @@ sub new { my $class = ref($_[0]) || $_[0]; #Carp::croak(__PACKAGE__ . " is a virtual base class -- see perldoc " # . __PACKAGE__ ); - return bless { + my $obj = bless { 'accept_codes' => { map( ($_=>$_), @Known_formatting_codes ) }, 'accept_directives' => { %Known_directives }, 'accept_targets' => {}, }, $class; + + $obj->expand_verbatim_tabs(8); + return $obj; } @@ -339,10 +348,9 @@ sub unaccept_targets { # XXX Probably it is an error that the digit '9' is excluded from these re's. # Broken for early Perls on EBCDIC -my $xml_name_re = eval "qr/[^-.0-8:A-Z_a-z[:^ascii:]]/"; -if (! defined $xml_name_re) { - $xml_name_re = qr/[\x00-\x2C\x2F\x39\x3B-\x40\x5B-\x5E\x60\x7B-\x7F]/; -} +my $xml_name_re = my_qr('[^-.0-8:A-Z_a-z[:^ascii:]]', '9'); +$xml_name_re = qr/[\x00-\x2C\x2F\x39\x3B-\x40\x5B-\x5E\x60\x7B-\x7F]/ + unless $xml_name_re; sub accept_code { shift->accept_codes(@_) } # alias @@ -652,12 +660,13 @@ sub _make_treelet { $treelet = $self->_treelet_from_formatting_codes(@_); } - if( $self->_remap_sequences($treelet) ) { + if( ! $self->{'_output_is_for_JustPod'} # Retain these as-is for pod output + && $self->_remap_sequences($treelet) ) + { $self->_treat_Zs($treelet); # Might as well nix these first $self->_treat_Ls($treelet); # L has to precede E and S $self->_treat_Es($treelet); $self->_treat_Ss($treelet); # S has to come after E - $self->_wrap_up($treelet); # Nix X's and merge texties } else { @@ -1080,9 +1089,14 @@ sub _treat_Ls { # Process our dear dear friends, the L<...> sequences # By here, $treelet->[$i] is definitely an L node my $ell = $treelet->[$i]; - DEBUG > 1 and print STDERR "Ogling L node $ell\n"; + DEBUG > 1 and print STDERR "Ogling L node " . pretty($ell) . "\n"; - # bitch if it's empty + # bitch if it's empty or is just '/' + if (@{$ell} == 3 and $ell->[2] =~ m!\A\s*/\s*\z!) { + $self->whine( $start_line, "L<> contains only '/'" ); + $treelet->[$i] = 'L</>'; # just make it a text node + next; # and move on + } if( @{$ell} == 2 or (@{$ell} == 3 and $ell->[2] eq '') ) { @@ -1289,6 +1303,7 @@ sub _treat_Ls { # Process our dear dear friends, the L<...> sequences $section_name = [splice @ell_content]; $section_name->[ 0] =~ s/^\"//s; $section_name->[-1] =~ s/\"$//s; + $ell->[1]{'~tolerated'} = 1; } # Turn L<Foo Bar> into L</Foo Bar>. @@ -1296,8 +1311,8 @@ sub _treat_Ls { # Process our dear dear friends, the L<...> sequences and grep !ref($_) && m/ /s, @ell_content ) { $section_name = [splice @ell_content]; + $ell->[1]{'~deprecated'} = 1; # That's support for the now-deprecated syntax. - # (Maybe generate a warning eventually?) # Note that it deliberately won't work on L<...|Foo Bar> } @@ -1347,7 +1362,7 @@ sub _treat_Ls { # Process our dear dear friends, the L<...> sequences # And update children to be the link-text: @$ell = (@$ell[0,1], defined($link_text) ? splice(@$link_text) : ''); - DEBUG > 2 and print STDERR "End of L-parsing for this node $treelet->[$i]\n"; + DEBUG > 2 and print STDERR "End of L-parsing for this node " . pretty($treelet->[$i]) . "\n"; unshift @stack, $treelet->[$i]; # might as well recurse } @@ -1507,6 +1522,7 @@ sub _accessorize { # A simple-minded method-maker $Carp::CarpLevel = 1, Carp::croak( "Accessor usage: \$obj->$attrname() or \$obj->$attrname(\$new_value)" ) unless (@_ == 1 or @_ == 2) and ref $_[0]; + (@_ == 1) ? $_[0]->{$attrname} : ($_[0]->{$attrname} = $_[1]); }; diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple.pod b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple.pod index 67a18df0d64..c569e979ae9 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple.pod +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple.pod @@ -19,7 +19,11 @@ Be sure to read L</ENCODING> if your Pod contains non-ASCII characters. Pod formatters can use Pod::Simple to parse Pod documents and render them into plain text, HTML, or any number of other formats. Typically, such formatters will be subclasses of Pod::Simple, and so they will inherit its methods, like -C<parse_file>. +C<parse_file>. But note that Pod::Simple doesn't understand and +properly parse Perl itself, so if you have a file which contains a Perl +program that has a multi-line quoted string which has lines that look +like pod, Pod::Simple will treat them as pod. This can be avoided if +the file makes these into indented here documents instead. If you're reading this document just because you have a Pod-processing subclass that you want to use, this document (plus the documentation for the @@ -219,6 +223,21 @@ that you don't want I<any> lines indented. You can do something like this: return undef; }); +=item C<< $parser->expand_verbatim_tabs( I<n> ) >> + +Default: 8 + +If after any stripping of indentation in verbatim blocks, there remain +tabs, this method call indicates what to do with them. C<0> +means leave them as tabs, any other number indicates that each tab is to +be translated so as to have tab stops every C<n> columns. + +This is independent of other methods (except that it operates after any +verbatim input stripping is done). + +Like the other methods, the input parameter is not checked for validity. +C<undef> or containing non-digits has the same effect as 8. + =back =head1 TERTIARY METHODS @@ -390,8 +409,8 @@ This module is managed in an open GitHub repository, L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or to clone L<git://github.com/perl-pod/pod-simple.git> and send patches! -Patches against Pod::Simple are welcome. Please send bug reports to -<bug-pod-simple@rt.cpan.org>. +Please use L<https://github.com/perl-pod/pod-simple/issues/new> to file a bug +report. =head1 COPYRIGHT AND DISCLAIMERS @@ -419,6 +438,8 @@ Pod::Simple is maintained by: =item * David E. Wheeler C<dwheeler@cpan.org> +=item * Karl Williamson C<khw@cpan.org> + =back Documentation has been contributed by: diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm index 9fe3f702ef9..d115aee7e3e 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm @@ -22,8 +22,36 @@ use integer; # vroom! use strict; use Carp (); use vars qw($VERSION ); -$VERSION = '3.35'; +$VERSION = '3.40'; #use constant DEBUG => 7; + +sub my_qr ($$) { + + # $1 is a pattern to compile and return. Older perls compile any + # syntactically valid property, even if it isn't legal. To cope with + # this, return an empty string unless the compiled pattern also + # successfully matches $2, which the caller furnishes. + + my ($input_re, $should_match) = @_; + # XXX could have a third parameter $shouldnt_match for extra safety + + my $use_utf8 = ($] le 5.006002) ? 'use utf8;' : ""; + + my $re = eval "no warnings; $use_utf8 qr/$input_re/"; + #print STDERR __LINE__, ": $input_re: $@\n" if $@; + return "" if $@; + + my $matches = eval "no warnings; $use_utf8 '$should_match' =~ /$re/"; + #print STDERR __LINE__, ": $input_re: $@\n" if $@; + return "" if $@; + + #print STDERR __LINE__, ": SUCCESS: $re\n" if $matches; + return $re if $matches; + + #print STDERR __LINE__, ": $re: didn't match\n"; + return ""; +} + BEGIN { require Pod::Simple; *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG @@ -32,8 +60,37 @@ BEGIN { # Matches a character iff the character will have a different meaning # if we choose CP1252 vs UTF-8 if there is no =encoding line. # This is broken for early Perls on non-ASCII platforms. -my $non_ascii_re = eval "qr/[[:^ascii:]]/"; -$non_ascii_re = qr/[\x80-\xFF]/ if ! defined $non_ascii_re; +my $non_ascii_re = my_qr('[[:^ascii:]]', "\xB6"); +$non_ascii_re = qr/[\x80-\xFF]/ unless $non_ascii_re; + +# Use patterns understandable by Perl 5.6, if possible +my $cs_re = my_qr('\p{IsCs}', "\x{D800}"); +my $cn_re = my_qr('\p{IsCn}', "\x{09E4}"); # <reserved> code point unlikely + # to get assigned +my $rare_blocks_re = my_qr('[\p{InIPAExtensions}\p{InSpacingModifierLetters}]', + "\x{250}"); +$rare_blocks_re = my_qr('[\x{0250}-\x{02FF}]', "\x{250}") unless $rare_blocks_re; + +my $script_run_re = eval 'no warnings "experimental::script_run"; + qr/(*script_run: ^ .* $ )/x'; +my $latin_re = my_qr('[\p{IsLatin}\p{IsInherited}\p{IsCommon}]', "\x{100}"); +unless ($latin_re) { + # This was machine generated to be the ranges of the union of the above + # three properties, with things that were undefined by Unicode 4.1 filling + # gaps. That is the version in use when Perl advanced enough to + # successfully compile and execute the above pattern. + $latin_re = my_qr('[\x00-\x{02E9}\x{02EC}-\x{0374}\x{037E}\x{0385}\x{0387}\x{0485}\x{0486}\x{0589}\x{060C}\x{061B}\x{061F}\x{0640}\x{064B}-\x{0655}\x{0670}\x{06DD}\x{0951}-\x{0954}\x{0964}\x{0965}\x{0E3F}\x{10FB}\x{16EB}-\x{16ED}\x{1735}\x{1736}\x{1802}\x{1803}\x{1805}\x{1D00}-\x{1D25}\x{1D2C}-\x{1D5C}\x{1D62}-\x{1D65}\x{1D6B}-\x{1D77}\x{1D79}-\x{1DBE}\x{1DC0}-\x{1EF9}\x{2000}-\x{2125}\x{2127}-\x{27FF}\x{2900}-\x{2B13}\x{2E00}-\x{2E1D}\x{2FF0}-\x{3004}\x{3006}\x{3008}-\x{3020}\x{302A}-\x{302D}\x{3030}-\x{3037}\x{303C}-\x{303F}\x{3099}-\x{309C}\x{30A0}\x{30FB}\x{30FC}\x{3190}-\x{319F}\x{31C0}-\x{31CF}\x{3220}-\x{325F}\x{327F}-\x{32CF}\x{3358}-\x{33FF}\x{4DC0}-\x{4DFF}\x{A700}-\x{A716}\x{FB00}-\x{FB06}\x{FD3E}\x{FD3F}\x{FE00}-\x{FE6B}\x{FEFF}-\x{FF65}\x{FF70}\x{FF9E}\x{FF9F}\x{FFE0}-\x{FFFD}\x{10100}-\x{1013F}\x{1D000}-\x{1D1DD}\x{1D300}-\x{1D7FF}]', "\x{100}"); +} + +my $every_char_is_latin_re = my_qr("^(?:$latin_re)*\\z", "A"); + +# Latin script code points not in the first release of Unicode +my $later_latin_re = my_qr('[^\P{IsLatin}\p{IsAge=1.1}]', "\x{1F6}"); + +# If this perl doesn't have the Deprecated property, there's only one code +# point in it that we need be concerned with. +my $deprecated_re = my_qr('\p{IsDeprecated}', "\x{149}"); +$deprecated_re = qr/\x{149}/ unless $deprecated_re; my $utf8_bom; if (($] ge 5.007_003)) { @@ -43,6 +100,11 @@ if (($] ge 5.007_003)) { $utf8_bom = "\xEF\xBB\xBF"; # No EBCDIC BOM detection for early Perls. } +# This is used so that the 'content_seen' method doesn't return true on a +# file that just happens to have a line that matches /^=[a-zA-z]/. Only if +# there is a valid =foo line will we return that content was seen. +my $seen_legal_directive = 0; + #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ sub parse_line { shift->parse_lines(@_) } # alias @@ -57,10 +119,10 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines) my $cut_handler = $self->{'cut_handler'}; my $wl_handler = $self->{'whiteline_handler'}; $self->{'line_count'} ||= 0; - + my $scratch; - DEBUG > 4 and + DEBUG > 4 and print STDERR "# Parsing starting at line ", $self->{'line_count'}, ".\n"; DEBUG > 5 and @@ -71,9 +133,17 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines) # paragraph buffer. Because we need to defer processing of =over # directives and verbatim paragraphs. We call _ponder_paragraph_buffer # to process this. - + $self->{'pod_para_count'} ||= 0; + # An attempt to match the pod portions of a line. This is not fool proof, + # but is good enough to serve as part of the heuristic for guessing the pod + # encoding if not specified. + my $format_codes = join "", '[', grep { / ^ [A-Za-z] $/x } + keys %{$self->{accept_codes}}; + $format_codes .= ']'; + my $pod_chars_re = qr/ ^ = [A-Za-z]+ | $format_codes < /x; + my $line; foreach my $source_line (@_) { if( $self->{'source_dead'} ) { @@ -97,7 +167,7 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines) ($line = $source_line) =~ tr/\n\r//d; # If we don't have two vars, we'll end up with that there # tr/// modding the (potentially read-only) original source line! - + } else { DEBUG > 2 and print STDERR "First line: [$source_line]\n"; @@ -106,7 +176,7 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines) $self->_handle_encoding_line( "=encoding utf8" ); delete $self->{'_processed_encoding'}; $line =~ tr/\n\r//d; - + } elsif( $line =~ s/^\xFE\xFF//s ) { DEBUG and print STDERR "Big-endian UTF-16 BOM seen. Aborting parsing.\n"; $self->scream( @@ -130,7 +200,7 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines) next; # TODO: implement somehow? - + } else { DEBUG > 2 and print STDERR "First line is BOM-less.\n"; ($line = $source_line) =~ tr/\n\r//d; @@ -144,8 +214,8 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines) my $encoding; - # No =encoding line, and we are at the first line in the input that - # contains a non-ascii byte, that is one whose meaning varies depending + # No =encoding line, and we are at the first pod line in the input that + # contains a non-ascii byte, that is, one whose meaning varies depending # on whether the file is encoded in UTF-8 or CP1252, which are the two # possibilities permitted by the pod spec. (ASCII is assumed if the # file only contains ASCII bytes.) In order to process this line, we @@ -162,22 +232,28 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines) # without conflict. CP 1252 uses most of them for graphic characters. # # Note that all ASCII-range bytes represent their corresponding code - # points in CP1252 and UTF-8. In ASCII platform UTF-8 all other code - # points require multiple (non-ASCII) bytes to represent. (A separate - # paragraph for EBCDIC is below.) The multi-byte representation is - # quite structured. If we find an isolated byte that requires multiple - # bytes to represent in UTF-8, we know that the encoding is not UTF-8. - # If we find a sequence of bytes that violates the UTF-8 structure, we - # also can presume the encoding isn't UTF-8, and hence must be 1252. + # points in both CP1252 and UTF-8. In ASCII platform UTF-8, all other + # code points require multiple (non-ASCII) bytes to represent. (A + # separate paragraph for EBCDIC is below.) The multi-byte + # representation is quite structured. If we find an isolated byte that + # would require multiple bytes to represent in UTF-8, we know that the + # encoding is not UTF-8. If we find a sequence of bytes that violates + # the UTF-8 structure, we also can presume the encoding isn't UTF-8, and + # hence must be 1252. # # But there are ambiguous cases where we could guess wrong. If so, the # user will end up having to supply an =encoding line. We use all # readily available information to improve our chances of guessing # right. The odds of something not being UTF-8, but still passing a # UTF-8 validity test go down very rapidly with increasing length of the - # sequence. Therefore we look at all the maximal length non-ascii - # sequences on the line. If any of the sequences can't be UTF-8, we - # quit there and choose CP1252. If all could be UTF-8, we guess UTF-8. + # sequence. Therefore we look at all non-ascii sequences on the line. + # If any of the sequences can't be UTF-8, we quit there and choose + # CP1252. If all could be UTF-8, we see if any of the code points + # represented are unlikely to be in pod. If so, we guess CP1252. If + # not, we check if the line is all in the same script; if not guess + # CP1252; otherwise UTF-8. For perls that don't have convenient script + # run testing, see if there is both Latin and non-Latin. If so, CP1252, + # otherwise UTF-8. # # On EBCDIC platforms, the situation is somewhat different. In # UTF-EBCDIC, not only do ASCII-range bytes represent their code points, @@ -188,51 +264,188 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines) # very unlikely to be in pod text. So if we encounter one of them, it # means that it is quite likely CP1252 and not UTF-8. The net result is # the same code below is used for both platforms. - while ($line =~ m/($non_ascii_re+)/g) { - my $non_ascii_seq = $1; - - if (length $non_ascii_seq == 1) { - $encoding = 'CP1252'; - goto guessed; - } elsif ($] ge 5.007_003) { - - # On Perls that have this function, we can see if the sequence is - # valid UTF-8 or not. - my $is_utf8; - { - no warnings 'utf8'; - $is_utf8 = utf8::decode($non_ascii_seq); + # + # XXX probably if the line has E<foo> that evaluates to illegal CP1252, + # then it is UTF-8. But we haven't processed E<> yet. + + goto set_1252 if $] lt 5.006_000; # No UTF-8 on very early perls + + my $copy; + + no warnings 'utf8'; + + if ($] ge 5.007_003) { + $copy = $line; + + # On perls that have this function, we can use it to easily see if the + # sequence is valid UTF-8 or not; if valid it turns on the UTF-8 flag + # needed below for script run detection + goto set_1252 if ! utf8::decode($copy); + } + elsif (ord("A") != 65) { # Early EBCDIC, assume UTF-8. What's a windows + # code page doing here anyway? + goto set_utf8; + } + else { # ASCII, no decode(): do it ourselves using the fundamental + # characteristics of UTF-8 + use if $] le 5.006002, 'utf8'; + + my $char_ord; + my $needed; # How many continuation bytes to gobble up + + # Initialize the translated line with a dummy character that will be + # deleted after everything else is done. This dummy makes sure that + # $copy will be in UTF-8. Doing it now avoids the bugs in early perls + # with upgrading in the middle + $copy = chr(0x100); + + # Parse through the line + for (my $i = 0; $i < length $line; $i++) { + my $byte = substr($line, $i, 1); + + # ASCII bytes are trivially dealt with + if ($byte !~ $non_ascii_re) { + $copy .= $byte; + next; + } + + my $b_ord = ord $byte; + + # Now figure out what this code point would be if the input is + # actually in UTF-8. If, in the process, we discover that it isn't + # well-formed UTF-8, we guess CP1252. + # + # Start the process. If it is UTF-8, we are at the first, start + # byte, of a multi-byte sequence. We look at this byte to figure + # out how many continuation bytes are needed, and to initialize the + # code point accumulator with the data from this byte. + # + # Normally the minimum continuation byte is 0x80, but in certain + # instances the minimum is a higher number. So the code below + # overrides this for those instances. + my $min_cont = 0x80; + + if ($b_ord < 0xC2) { # A start byte < C2 is malformed + goto set_1252; + } + elsif ($b_ord <= 0xDF) { + $needed = 1; + $char_ord = $b_ord & 0x1F; } - if (! $is_utf8) { - $encoding = 'CP1252'; - goto guessed; + elsif ($b_ord <= 0xEF) { + $min_cont = 0xA0 if $b_ord == 0xE0; + $needed = 2; + $char_ord = $b_ord & (0x1F >> 1); } - } elsif (ord("A") == 65) { # An early Perl, ASCII platform - - # Without utf8::decode, it's a lot harder to do a rigorous check - # (though some early releases had a different function that - # accomplished the same thing). Since these are ancient Perls, not - # likely to be in use today, we take the easy way out, and look at - # just the first two bytes of the sequence to see if they are the - # start of a UTF-8 character. In ASCII UTF-8, continuation bytes - # must be between 0x80 and 0xBF. Start bytes can range from 0xC2 - # through 0xFF, but anything above 0xF4 is not Unicode, and hence - # extremely unlikely to be in a pod. - if ($non_ascii_seq !~ /^[\xC2-\xF4][\x80-\xBF]/) { - $encoding = 'CP1252'; - goto guessed; + elsif ($b_ord <= 0xF4) { + $min_cont = 0x90 if $b_ord == 0xF0; + $needed = 3; + $char_ord = $b_ord & (0x1F >> 2); + } + else { # F4 is the highest start byte for legal Unicode; higher is + # unlikely to be in pod. + goto set_1252; } - # We don't bother doing anything special for EBCDIC on early Perls. - # If there is a solitary variant, CP1252 will be chosen; otherwise - # UTF-8. - } - } # End of loop through all variant sequences on the line + # ? not enough continuation bytes available + goto set_1252 if $i + $needed >= length $line; + + # Accumulate the ordinal of the character from the remaining + # (continuation) bytes. + while ($needed-- > 0) { + my $cont = substr($line, ++$i, 1); + $b_ord = ord $cont; + goto set_1252 if $b_ord < $min_cont || $b_ord > 0xBF; + + # In all cases, any next continuation bytes all have the same + # minimum legal value + $min_cont = 0x80; + + # Accumulate this byte's contribution to the code point + $char_ord <<= 6; + $char_ord |= ($b_ord & 0x3F); + } + + # Here, the sequence that formed this code point was valid UTF-8, + # so add the completed character to the output + $copy .= chr $char_ord; + } # End of loop through line + + # Delete the dummy first character + $copy = substr($copy, 1); + } + + # Here, $copy is legal UTF-8. + + # If it can't be legal CP1252, no need to look further. (These bytes + # aren't valid in CP1252.) This test could have been placed higher in + # the code, but it seemed wrong to set the encoding to UTF-8 without + # making sure that the very first instance is well-formed. But what if + # it isn't legal CP1252 either? We have to choose one or the other, and + # It seems safer to favor the single-byte encoding over the multi-byte. + goto set_utf8 if ord("A") == 65 && $line =~ /[\x81\x8D\x8F\x90\x9D]/; + + # The C1 controls are not likely to appear in pod + goto set_1252 if ord("A") == 65 && $copy =~ /[\x80-\x9F]/; + + # Nor are surrogates nor unassigned, nor deprecated. + DEBUG > 8 and print STDERR __LINE__, ": $copy: surrogate\n" if $copy =~ $cs_re; + goto set_1252 if $cs_re && $copy =~ $cs_re; + DEBUG > 8 and print STDERR __LINE__, ": $copy: unassigned\n" if $cn_re && $copy =~ $cn_re; + goto set_1252 if $cn_re && $copy =~ $cn_re; + DEBUG > 8 and print STDERR __LINE__, ": $copy: deprecated\n" if $copy =~ $deprecated_re; + goto set_1252 if $copy =~ $deprecated_re; + + # Nor are rare code points. But this is hard to determine. khw + # believes that IPA characters and the modifier letters are unlikely to + # be in pod (and certainly very unlikely to be the in the first line in + # the pod containing non-ASCII) + DEBUG > 8 and print STDERR __LINE__, ": $copy: rare\n" if $copy =~ $rare_blocks_re; + goto set_1252 if $rare_blocks_re && $copy =~ $rare_blocks_re; + + # The first Unicode version included essentially every Latin character + # in modern usage. So, a Latin character not in the first release will + # unlikely be in pod. + DEBUG > 8 and print STDERR __LINE__, ": $copy: later_latin\n" if $later_latin_re && $copy =~ $later_latin_re; + goto set_1252 if $later_latin_re && $copy =~ $later_latin_re; + + # On perls that handle script runs, if the UTF-8 interpretation yields + # a single script, we guess UTF-8, otherwise just having a mixture of + # scripts is suspicious, so guess CP1252. We first strip off, as best + # we can, the ASCII characters that look like they are pod directives, + # as these would always show as mixed with non-Latin text. + $copy =~ s/$pod_chars_re//g; + + if ($script_run_re) { + goto set_utf8 if $copy =~ $script_run_re; + DEBUG > 8 and print STDERR __LINE__, ": not script run\n"; + goto set_1252; + } + + # Even without script runs, but on recent enough perls and Unicodes, we + # can check if there is a mixture of both Latin and non-Latin. Again, + # having a mixture of scripts is suspicious, so assume CP1252 + + # If it's all non-Latin, there is no CP1252, as that is Latin + # characters and punct, etc. + DEBUG > 8 and print STDERR __LINE__, ": $copy: not latin\n" if $copy !~ $latin_re; + goto set_utf8 if $copy !~ $latin_re; + + DEBUG > 8 and print STDERR __LINE__, ": $copy: all latin\n" if $copy =~ $every_char_is_latin_re; + goto set_utf8 if $copy =~ $every_char_is_latin_re; + + DEBUG > 8 and print STDERR __LINE__, ": $copy: mixed\n"; + + set_1252: + DEBUG > 9 and print STDERR __LINE__, ": $copy: is 1252\n"; + $encoding = 'CP1252'; + goto done_set; - # All sequences in the line could be UTF-8. Guess that. + set_utf8: + DEBUG > 9 and print STDERR __LINE__, ": $copy: is UTF-8\n"; $encoding = 'UTF-8'; - guessed: + done_set: $self->_handle_encoding_line( "=encoding $encoding" ); delete $self->{'_processed_encoding'}; $self->{'_transcoder'} && $self->{'_transcoder'}->($line); @@ -254,13 +467,13 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines) $self->{'line_count'}, "=cut found outside a pod block. Skipping to next block." ); - + ## Before there were errata sections in the world, it was ## least-pessimal to abort processing the file. But now we can ## just barrel on thru (but still not start a pod block). #splice @_; #push @_, undef; - + next; } else { $self->{'in_pod'} = $self->{'start_of_pod_block'} @@ -273,7 +486,7 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines) if $code_handler; # Note: this may cause code to be processed out of order relative # to pods, but in order relative to cuts. - + # Note also that we haven't yet applied the transcoding to $line # by time we call $code_handler! @@ -284,11 +497,11 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines) DEBUG > 1 and print STDERR "# Setting nextline to $1\n"; $self->{'line_count'} = $1 - 1; } - + next; } } - + # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . # Else we're in pod mode: @@ -308,12 +521,13 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines) # ++$self->{'pod_para_count'}; $self->_ponder_paragraph_buffer(); # by now it's safe to consider the previous paragraph as done. + DEBUG > 6 and print STDERR "Processing any cut handler, line ${$self}{'line_count'}\n"; $cut_handler->(map $_, $line, $self->{'line_count'}, $self) if $cut_handler; # TODO: add to docs: Note: this may cause cuts to be processed out # of order relative to pods, but in order relative to code. - + } elsif($line =~ m/^(\s*)$/s) { # it's a blank line if (defined $1 and $1 =~ /[^\S\r\n]/) { # it's a white line $wl_handler->(map $_, $line, $self->{'line_count'}, $self) @@ -324,29 +538,30 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines) DEBUG > 1 and print STDERR "Saving blank line at line ${$self}{'line_count'}\n"; push @{$paras->[-1]}, $line; } # otherwise it's not interesting - + if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) { DEBUG > 1 and print STDERR "Noting para ends with blank line at ${$self}{'line_count'}\n"; } - + $self->{'last_was_blank'} = 1; - + } elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para... - - if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(?:\s+|$)(.*)/s) { + + if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(\s+|$)(.*)/s) { # THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS - my $new = [$1, {'start_line' => $self->{'line_count'}}, $2]; + my $new = [$1, {'start_line' => $self->{'line_count'}}, $3]; + $new->[1]{'~orig_spacer'} = $2 if $2 && $2 ne " "; # Note that in "=head1 foo", the WS is lost. # Example: ['=head1', {'start_line' => 123}, ' foo'] - + ++$self->{'pod_para_count'}; - + $self->_ponder_paragraph_buffer(); # by now it's safe to consider the previous paragraph as done. - + push @$paras, $new; # the new incipient paragraph DEBUG > 1 and print STDERR "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n"; - + } elsif($line =~ m/^\s/s) { if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { @@ -379,7 +594,7 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines) } $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; } - + } # ends the big while loop DEBUG > 1 and print STDERR (pretty(@$paras), "\n"); @@ -390,7 +605,7 @@ sub parse_lines { # Usage: $parser->parse_lines(@lines) sub _handle_encoding_line { my($self, $line) = @_; - + return if $self->parse_characters; # The point of this routine is to set $self->{'_transcoder'} as indicated. @@ -492,7 +707,7 @@ sub _handle_encoding_line { sub _handle_encoding_second_level { # By time this is called, the encoding (if well formed) will already - # have been acted one. + # have been acted on. my($self, $para) = @_; my @x = @$para; my $content = join ' ', splice @x, 2; @@ -500,7 +715,7 @@ sub _handle_encoding_second_level { $content =~ s/\s+$//s; DEBUG > 2 and print STDERR "Ogling encoding directive: =encoding $content\n"; - + if (defined($self->{'_processed_encoding'})) { #if($content ne $self->{'_processed_encoding'}) { # Could it happen? @@ -518,14 +733,14 @@ sub _handle_encoding_second_level { } else { DEBUG > 2 and print STDERR " (Yup, it was successfully handled already.)\n"; } - + } else { # Otherwise it's a syntax error $self->whine( $para->[1]{'start_line'}, "Invalid =encoding syntax: $content" ); } - + return; } @@ -542,7 +757,7 @@ sub _gen_errata { return() unless $self->{'errata'} and keys %{$self->{'errata'}}; my @out; - + foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) { push @out, ['=item', {'start_line' => $m}, "Around line $line:"], @@ -555,7 +770,7 @@ sub _gen_errata { ) ; } - + # TODO: report of unknown entities? unrenderable characters? unshift @out, @@ -569,7 +784,7 @@ sub _gen_errata { ['=over', {'start_line' => $m, 'errata' => 1}, ''], ; - push @out, + push @out, ['=back', {'start_line' => $m, 'errata' => 1}, ''], ; @@ -610,7 +825,7 @@ sub _ponder_paragraph_buffer { # Document, # Data, Para, Verbatim # B, C, longdirname (TODO -- wha?), etc. for all directives - # + # my $self = $_[0]; my $paras; @@ -624,11 +839,11 @@ sub _ponder_paragraph_buffer { # We have something in our buffer. So apparently the document has started. unless($self->{'doc_has_started'}) { $self->{'doc_has_started'} = 1; - + my $starting_contentless; $starting_contentless = ( - !@$curr_open + !@$curr_open and @$paras and ! grep $_->[0] ne '~end', @$paras # i.e., if the paras is all ~ends ) @@ -637,7 +852,7 @@ sub _ponder_paragraph_buffer { $starting_contentless ? 'contentless' : 'contentful', " document\n" ; - + $self->_handle_element_start( ($scratch = 'Document'), { @@ -649,15 +864,32 @@ sub _ponder_paragraph_buffer { my($para, $para_type); while(@$paras) { - last if @$paras == 1 and - ( $paras->[0][0] eq '=over' or $paras->[0][0] eq '~Verbatim' - or $paras->[0][0] eq '=item' ) - ; + + # If a directive, assume it's legal; subtract below if found not to be + $seen_legal_directive++ if $paras->[0][0] =~ /^=/; + + last if @$paras == 1 + and ( $paras->[0][0] eq '=over' + or $paras->[0][0] eq '=item' + or ($paras->[0][0] eq '~Verbatim' and $self->{'in_pod'})); # Those're the three kinds of paragraphs that require lookahead. # Actually, an "=item Foo" inside an <over type=text> region # and any =item inside an <over type=block> region (rare) # don't require any lookahead, but all others (bullets # and numbers) do. + # The verbatim is different from the other two, because those might be + # like: + # + # =item + # ... + # =cut + # ... + # =item + # + # The =cut here finishes the paragraph but doesn't terminate the =over + # they should be in. (khw apologizes that he didn't comment at the time + # why the 'in_pod' works, and no longer remembers why, and doesn't think + # it is currently worth the effort to re-figure it out.) # TODO: whinge about many kinds of directives in non-resolving =for regions? # TODO: many? like what? =head1 etc? @@ -667,7 +899,7 @@ sub _ponder_paragraph_buffer { DEBUG > 1 and print STDERR "Pondering a $para_type paragraph, given the stack: (", $self->_dump_curr_open(), ")\n"; - + if($para_type eq '=for') { next if $self->_ponder_for($para,$curr_open,$paras); @@ -704,7 +936,7 @@ sub _ponder_paragraph_buffer { } else { # All non-magical codes!!! - + # Here we start using $para_type for our own twisted purposes, to # mean how it should get treated, not as what the element name # should be. @@ -744,10 +976,10 @@ sub _ponder_paragraph_buffer { ; next; } - - + + my $over_type = $over->[1]{'~type'}; - + if(!$over_type) { # Shouldn't happen1 die "Typeless over in stack, starting at line " @@ -772,7 +1004,7 @@ sub _ponder_paragraph_buffer { my $item_type = $self->_get_item_type($para); # That kills the content of the item if it's a number or bullet. DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; - + if($item_type eq 'text') { # Nothing special needs doing for 'text' } elsif($item_type eq 'number' or $item_type eq 'bullet') { @@ -788,16 +1020,16 @@ sub _ponder_paragraph_buffer { } else { die "Unhandled item type $item_type"; # should never happen } - + # =item-text thingies don't need any assimilation, it seems. } elsif($over_type eq 'number') { my $item_type = $self->_get_item_type($para); # That kills the content of the item if it's a number or bullet. DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; - + my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; - + if($item_type eq 'bullet') { # Hm, it's not numeric. Correct for this. $para->[1]{'number'} = $expected_value; @@ -822,7 +1054,7 @@ sub _ponder_paragraph_buffer { } elsif($expected_value == $para->[1]{'number'}) { DEBUG > 1 and print STDERR " Numeric item has the expected value of $expected_value\n"; - + } else { DEBUG > 1 and print STDERR " Numeric item has ", $para->[1]{'number'}, " instead of the expected value of $expected_value\n"; @@ -833,7 +1065,7 @@ sub _ponder_paragraph_buffer { ); $para->[1]{'number'} = $expected_value; # correcting!! } - + if(@$para == 2) { # For the cases where we /didn't/ push to @$para if($paras->[0][0] eq '~Para') { @@ -850,13 +1082,13 @@ sub _ponder_paragraph_buffer { my $item_type = $self->_get_item_type($para); # That kills the content of the item if it's a number or bullet. DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; - + if($item_type eq 'bullet') { # as expected! if( $para->[1]{'~_freaky_para_hack'} ) { DEBUG and print STDERR "Accomodating '=item * Foo' tolerance hack.\n"; - push @$para, delete $para->[1]{'~_freaky_para_hack'}; + push @$para, $para->[1]{'~_freaky_para_hack'}; } } elsif($item_type eq 'number') { @@ -925,6 +1157,7 @@ sub _ponder_paragraph_buffer { DEBUG > 1 and print STDERR " Pondering known directive ${$para}[0] as $para_type\n"; } else { # An unknown directive! + $seen_legal_directive--; DEBUG > 1 and printf STDERR "Unhandled directive %s (Handled: %s)\n", $para->[0], join(' ', sort keys %{$self->{'accept_directives'}} ) ; @@ -944,15 +1177,15 @@ sub _ponder_paragraph_buffer { my @fors = grep $_->[0] eq '=for', @$curr_open; DEBUG > 1 and print STDERR "Containing fors: ", join(',', map $_->[1]{'target'}, @fors), "\n"; - + if(! @fors) { DEBUG and print STDERR "Treating $para_type paragraph as such because stack has no =for's\n"; - + #} elsif(grep $_->[1]{'~resolve'}, @fors) { #} elsif(not grep !$_->[1]{'~resolve'}, @fors) { } elsif( $fors[-1][1]{'~resolve'} ) { # Look to the immediately containing for - + if($para_type eq 'Data') { DEBUG and print STDERR "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n"; $para->[0] = 'Para'; @@ -971,7 +1204,7 @@ sub _ponder_paragraph_buffer { if($para_type eq 'Plain') { $self->_ponder_Plain($para); } elsif($para_type eq 'Verbatim') { - $self->_ponder_Verbatim($para); + $self->_ponder_Verbatim($para); } elsif($para_type eq 'Data') { $self->_ponder_Data($para); } else { @@ -985,11 +1218,12 @@ sub _ponder_paragraph_buffer { DEBUG and print STDERR "\n", pretty($para), "\n"; # traverse the treelet (which might well be just one string scalar) - $self->{'content_seen'} ||= 1; + $self->{'content_seen'} ||= 1 if $seen_legal_directive + && ! $self->{'~tried_gen_errata'}; $self->_traverse_treelet_bit(@$para); } } - + return; } @@ -1024,9 +1258,9 @@ sub _ponder_for { } DEBUG > 1 and print STDERR "Faking out a =for $target as a =begin $target / =end $target\n"; - + $para->[0] = 'Data'; - + unshift @$paras, ['=begin', {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, @@ -1038,7 +1272,7 @@ sub _ponder_for { $target, ], ; - + return 1; } @@ -1055,20 +1289,20 @@ sub _ponder_begin { DEBUG and print STDERR "Ignoring targetless =begin\n"; return 1; } - + my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/; $para->[1]{'title'} = $title if ($title); $para->[1]{'target'} = $target; # without any ':' $content = $target; # strip off the title - + $content =~ s/^:!/!:/s; my $neg; # whether this is a negation-match $neg = 1 if $content =~ s/^!//s; my $to_resolve; # whether to process formatting codes $to_resolve = 1 if $content =~ s/^://s; - + my $dont_ignore; # whether this target matches us - + foreach my $target_name ( split(',', $content, -1), $neg ? () : '*' @@ -1076,7 +1310,7 @@ sub _ponder_begin { DEBUG > 2 and print STDERR " Considering whether =begin $content matches $target_name\n"; next unless $self->{'accept_targets'}{$target_name}; - + DEBUG > 2 and print STDERR " It DOES match the acceptable target $target_name!\n"; $to_resolve = 1 @@ -1113,7 +1347,7 @@ sub _ponder_begin { if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) { DEBUG > 1 and print STDERR "Ignoring ignorable =begin\n"; } else { - $self->{'content_seen'} ||= 1; + $self->{'content_seen'} ||= 1 unless $self->{'~tried_gen_errata'}; $self->_handle_element_start((my $scratch='for'), $para->[1]); } @@ -1139,7 +1373,7 @@ sub _ponder_end { DEBUG and print STDERR "Ignoring targetless =end\n"; return 1; } - + unless($content =~ m/^\S+$/) { # i.e., unless it's one word $self->whine( $para->[1]{'start_line'}, @@ -1149,7 +1383,7 @@ sub _ponder_end { DEBUG and print STDERR "Ignoring mistargetted =end $content\n"; return 1; } - + unless(@$curr_open and $curr_open->[-1][0] eq '=for') { $self->whine( $para->[1]{'start_line'}, @@ -1159,11 +1393,11 @@ sub _ponder_end { DEBUG and print STDERR "Ignoring mistargetted =end $content\n"; return 1; } - + unless($content eq $curr_open->[-1][1]{'target'}) { $self->whine( $para->[1]{'start_line'}, - "=end $content doesn't match =begin " + "=end $content doesn't match =begin " . $curr_open->[-1][1]{'target'} . ". (Stack: " . $self->_dump_curr_open() . ')' @@ -1180,22 +1414,22 @@ sub _ponder_end { } else { $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'}; # what's that for? - - $self->{'content_seen'} ||= 1; + + $self->{'content_seen'} ||= 1 unless $self->{'~tried_gen_errata'}; $self->_handle_element_end( my $scratch = 'for', $para->[1]); } DEBUG > 1 and print STDERR "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n"; pop @$curr_open; return 1; -} +} sub _ponder_doc_end { my ($self,$para,$curr_open,$paras) = @_; if(@$curr_open) { # Deal with things left open DEBUG and print STDERR "Stack is nonempty at end-document: (", $self->_dump_curr_open(), ")\n"; - + DEBUG > 9 and print STDERR "Stack: ", pretty($curr_open), "\n"; unshift @$paras, $self->_closers_for_all_curr_open; # Make sure there is exactly one ~end in the parastack, at the end: @@ -1205,11 +1439,11 @@ sub _ponder_doc_end { # generate errata, and then another to be at the end # when that loop back around to process the errata. return 1; - + } else { DEBUG and print STDERR "Okay, stack is empty now.\n"; } - + # Try generating errata section, if applicable unless($self->{'~tried_gen_errata'}) { $self->{'~tried_gen_errata'} = 1; @@ -1220,7 +1454,7 @@ sub _ponder_doc_end { return 1; # I.e., loop around again to process these fake-o paragraphs } } - + splice @$paras; # Well, that's that for this paragraph buffer. DEBUG and print STDERR "Throwing end-document event.\n"; @@ -1245,7 +1479,7 @@ sub _ponder_pod { # The surrounding methods set content_seen, so let us remain consistent. # I do not know why it was not here before -- should it not be here? - # $self->{'content_seen'} ||= 1; + # $self->{'content_seen'} ||= 1 unless $self->{'~tried_gen_errata'}; return; } @@ -1278,8 +1512,9 @@ sub _ponder_over { $para->[1]{'~type'} = $list_type; push @$curr_open, $para; # yes, we reuse the paragraph as a stack item - + my $content = join ' ', splice @$para, 2; + $para->[1]{'~orig_content'} = $content; my $overness; if($content =~ m/^\s*$/s) { $para->[1]{'indent'} = 4; @@ -1301,13 +1536,13 @@ sub _ponder_over { $para->[1]{'indent'} = 4; } DEBUG > 1 and print STDERR "=over found of type $list_type\n"; - - $self->{'content_seen'} ||= 1; + + $self->{'content_seen'} ||= 1 unless $self->{'~tried_gen_errata'}; $self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]); return; } - + sub _ponder_back { my ($self,$para,$curr_open,$paras) = @_; # TODO: fire off </item-number> or </item-bullet> or </item-text> ?? @@ -1324,7 +1559,7 @@ sub _ponder_back { DEBUG > 1 and print STDERR "=back happily closes matching =over\n"; # Expected case: we're closing the most recently opened thing #my $over = pop @$curr_open; - $self->{'content_seen'} ||= 1; + $self->{'content_seen'} ||= 1 unless $self->{'~tried_gen_errata'}; $self->_handle_element_end( my $scratch = 'over-' . ( (pop @$curr_open)->[1]{'~type'} ), $para->[1] ); @@ -1354,10 +1589,10 @@ sub _ponder_item { ; return 1; } - - + + my $over_type = $over->[1]{'~type'}; - + if(!$over_type) { # Shouldn't happen1 die "Typeless over in stack, starting at line " @@ -1382,7 +1617,7 @@ sub _ponder_item { my $item_type = $self->_get_item_type($para); # That kills the content of the item if it's a number or bullet. DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; - + if($item_type eq 'text') { # Nothing special needs doing for 'text' } elsif($item_type eq 'number' or $item_type eq 'bullet') { @@ -1398,16 +1633,16 @@ sub _ponder_item { } else { die "Unhandled item type $item_type"; # should never happen } - + # =item-text thingies don't need any assimilation, it seems. } elsif($over_type eq 'number') { my $item_type = $self->_get_item_type($para); # That kills the content of the item if it's a number or bullet. DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; - + my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; - + if($item_type eq 'bullet') { # Hm, it's not numeric. Correct for this. $para->[1]{'number'} = $expected_value; @@ -1432,7 +1667,7 @@ sub _ponder_item { } elsif($expected_value == $para->[1]{'number'}) { DEBUG > 1 and print STDERR " Numeric item has the expected value of $expected_value\n"; - + } else { DEBUG > 1 and print STDERR " Numeric item has ", $para->[1]{'number'}, " instead of the expected value of $expected_value\n"; @@ -1443,7 +1678,7 @@ sub _ponder_item { ); $para->[1]{'number'} = $expected_value; # correcting!! } - + if(@$para == 2) { # For the cases where we /didn't/ push to @$para if($paras->[0][0] eq '~Para') { @@ -1460,13 +1695,13 @@ sub _ponder_item { my $item_type = $self->_get_item_type($para); # That kills the content of the item if it's a number or bullet. DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; - + if($item_type eq 'bullet') { # as expected! if( $para->[1]{'~_freaky_para_hack'} ) { DEBUG and print STDERR "Accomodating '=item * Foo' tolerance hack.\n"; - push @$para, delete $para->[1]{'~_freaky_para_hack'}; + push @$para, $para->[1]{'~_freaky_para_hack'}; } } elsif($item_type eq 'number') { @@ -1533,30 +1768,44 @@ sub _ponder_Verbatim { $para->[1]{'xml:space'} = 'preserve'; - my $indent = $self->strip_verbatim_indent; - if ($indent && ref $indent eq 'CODE') { - my @shifted = (shift @{$para}, shift @{$para}); - $indent = $indent->($para); - unshift @{$para}, @shifted; - } - - for(my $i = 2; $i < @$para; $i++) { - foreach my $line ($para->[$i]) { # just for aliasing - # Strip indentation. - $line =~ s/^\Q$indent// if $indent - && !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted}); - while( $line =~ - # Sort of adapted from Text::Tabs -- yes, it's hardwired in that - # tabs are at every EIGHTH column. For portability, it has to be - # one setting everywhere, and 8th wins. - s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e - ) {} + unless ($self->{'_output_is_for_JustPod'}) { + # Fix illegal settings for expand_verbatim_tabs() + # This is because this module doesn't do input error checking, but khw + # doesn't want to add yet another instance of that. + $self->expand_verbatim_tabs(8) + if ! defined $self->expand_verbatim_tabs() + || $self->expand_verbatim_tabs() =~ /\D/; + + my $indent = $self->strip_verbatim_indent; + if ($indent && ref $indent eq 'CODE') { + my @shifted = (shift @{$para}, shift @{$para}); + $indent = $indent->($para); + unshift @{$para}, @shifted; + } - # TODO: whinge about (or otherwise treat) unindented or overlong lines + for(my $i = 2; $i < @$para; $i++) { + foreach my $line ($para->[$i]) { # just for aliasing + # Strip indentation. + $line =~ s/^\Q$indent// if $indent; + next unless $self->expand_verbatim_tabs; + + # This is commented out because of github issue #85, and the + # current maintainers don't know why it was there in the first + # place. + #&& !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted}); + while( $line =~ + # Sort of adapted from Text::Tabs. + s/^([^\t]*)(\t+)/$1.(" " x ((length($2) + * $self->expand_verbatim_tabs) + -(length($1)&7)))/e + ) {} + + # TODO: whinge about (or otherwise treat) unindented or overlong lines + } } } - + # Now the VerbatimFormatted hoodoo... if( $self->{'accept_codes'} and $self->{'accept_codes'}{'VerbatimFormatted'} @@ -1596,7 +1845,7 @@ sub _traverse_treelet_bit { # for use only by the routine above my $scratch; $self->_handle_element_start(($scratch=$name), shift @_); - + while (@_) { my $x = shift; if (ref($x)) { @@ -1606,7 +1855,7 @@ sub _traverse_treelet_bit { # for use only by the routine above $self->_handle_text($x); } } - + $self->_handle_element_end($scratch=$name); return; } @@ -1651,7 +1900,7 @@ sub _closers_for_all_curr_open { sub _verbatim_format { my($it, $p) = @_; - + my $formatting; for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines @@ -1659,7 +1908,7 @@ sub _verbatim_format { $p->[$i] .= "\n"; # Unlike with simple Verbatim blocks, we don't end up just doing # a join("\n", ...) on the contents, so we have to append a - # newline to ever line, and then nix the last one later. + # newline to every line, and then nix the last one later. } if( DEBUG > 4 ) { @@ -1672,7 +1921,7 @@ sub _verbatim_format { for(my $i = $#$p; $i > 2; $i--) { # work backwards over the lines, except the first (#2) - + #next unless $p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s # and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s; # look at a formatty line preceding a nonformatty one @@ -1680,7 +1929,7 @@ sub _verbatim_format { if($p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s) { DEBUG > 5 and print STDERR " It's a formatty line. ", "Peeking at previous line ", $i-1, ": $$p[$i-1]: \n"; - + if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) { DEBUG > 5 and print STDERR " Previous line is formatty! Skipping this one.\n"; next; @@ -1696,11 +1945,11 @@ sub _verbatim_format { # "^" to mean bold, "/" to mean underline, and "%" to mean bold italic. # Example: # What do you want? i like pie. [or whatever] - # #:^^^^^^^^^^^^^^^^^ ///////////// - + # #:^^^^^^^^^^^^^^^^^ ///////////// + DEBUG > 4 and print STDERR "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n"; - + $formatting = ' ' . $1; $formatting =~ s/\s+$//s; # nix trailing whitespace unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op @@ -1716,7 +1965,7 @@ sub _verbatim_format { } # Make $formatting and the previous line be exactly the same length, # with $formatting having a " " as the last character. - + DEBUG > 4 and print STDERR "Formatting <$formatting> on <", $p->[$i-1], ">\n"; @@ -1741,10 +1990,10 @@ sub _verbatim_format { #print STDERR "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n"; } } - my @nixed = + my @nixed = splice @$p, $i-1, 2, @new_line; # replace myself and the next line DEBUG > 10 and print STDERR "Nixed count: ", scalar(@nixed), "\n"; - + DEBUG > 6 and print STDERR "New version of the above line is these tokens (", scalar(@new_line), "):", map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n"; @@ -1791,29 +2040,46 @@ sub _treelet_from_formatting_codes { # [ 'B', {}, "pie" ], # "!" # ] - + # This illustrates the general format of a treelet. It is an array: + # [0] is a scalar indicating its type. In the example above, the + # types are '~Top' and 'B' + # [1] is a hash of various flags about it, possibly empty + # [2] - [N] are an ordered list of the subcomponents of the treelet. + # Scalars are literal text, refs are sub-treelets, to + # arbitrary levels. Stringifying a treelet will recursively + # stringify the sub-treelets, concatentating everything + # together to form the exact text of the treelet. + my($self, $para, $start_line, $preserve_space) = @_; - + my $treelet = ['~Top', {'start_line' => $start_line},]; - + unless ($preserve_space || $self->{'preserve_whitespace'}) { $para =~ s/\s+/ /g; # collapse and trim all whitespace first. $para =~ s/ $//; $para =~ s/^ //; } - + # Only apparent problem the above code is that N<< >> turns into # N<< >>. But then, word wrapping does that too! So don't do that! - + + + # As a Start-code is encountered, the number of opening bracket '<' + # characters minus 1 is pushed onto @stack (so 0 means a single bracket, + # etc). When closing brackets are found in the text, at least this number + # (plus the 1) will be required to mean the Start-code is terminated. When + # those are found, @stack is popped. my @stack; + my @lineage = ($treelet); my $raw = ''; # raw content of L<> fcode before splitting/processing # XXX 'raw' is not 100% accurate: all surrounding whitespace is condensed - # into just 1 ' '. Is this the regex's doing or 'raw's? + # into just 1 ' '. Is this the regex's doing or 'raw's? Answer is it's + # the 'collapse and trim all whitespace first' lines just above. my $inL = 0; DEBUG > 4 and print STDERR "Paragraph:\n$para\n\n"; - + # Here begins our frightening tokenizer RE. The following regex matches # text in four main parts: # @@ -1846,7 +2112,11 @@ sub _treelet_from_formatting_codes { | # Match multiple-bracket end codes. $3 gets the whitespace that # should be discarded before an end bracket but kept in other cases - # and $4 gets the end brackets themselves. + # and $4 gets the end brackets themselves. ($3 can be empty if the + # construct is empty, like C<< >>, and all the white-space has been + # gobbled up already, considered to be space after the opening + # bracket. In this case we use look-behind to verify that there are + # at least 2 spaces in a row before the ">".) (\s+|(?<=\s\s))(>{2,}) | (\s?>) # $5: simple end-codes @@ -1872,23 +2142,48 @@ sub _treelet_from_formatting_codes { ) { DEBUG > 4 and print STDERR "\nParagraphic tokenstack = (@stack)\n"; if(defined $1) { + my $bracket_count; # How many '<<<' in a row this has. Needed for + # Pod::Simple::JustPod if(defined $2) { DEBUG > 3 and print STDERR "Found complex start-text code \"$1\"\n"; - push @stack, length($2) + 1; - # length of the necessary complex end-code string + $bracket_count = length($2) + 1; + push @stack, $bracket_count; # length of the necessary complex + # end-code string } else { DEBUG > 3 and print STDERR "Found simple start-text code \"$1\"\n"; push @stack, 0; # signal that we're looking for simple + $bracket_count = 1; } - push @lineage, [ substr($1,0,1), {}, ]; # new node object - push @{ $lineage[-2] }, $lineage[-1]; - if ('L' eq substr($1,0,1)) { - $raw = $inL ? $raw.$1 : ''; # reset raw content accumulator - $inL = 1; + my $code = substr($1,0,1); + if ('L' eq $code) { + if ($inL) { + $raw .= $1; + $self->scream( $start_line, + 'Nested L<> are illegal. Pretending inner one is ' + . 'X<...> so can continue looking for other errors.'); + $code = "X"; + } + else { + $raw = ""; # reset raw content accumulator + $inL = @stack; + } } else { $raw .= $1 if $inL; } - + push @lineage, [ $code, {}, ]; # new node object + + # Tell Pod::Simple::JustPod how many brackets there were, but to save + # space, not in the most usual case of there was just 1. It can be + # inferred by the absence of this element. Similarly, if there is more + # than one bracket, extract the white space between the final bracket + # and the real beginning of the interior. Save that if it isn't just a + # single space + if ($self->{'_output_is_for_JustPod'} && $bracket_count > 1) { + $lineage[-1][1]{'~bracket_count'} = $bracket_count; + my $lspacer = substr($1, 1 + $bracket_count); + $lineage[-1][1]{'~lspacer'} = $lspacer if $lspacer ne " "; + } + push @{ $lineage[-2] }, $lineage[-1]; } elsif(defined $4) { DEBUG > 3 and print STDERR "Found apparent complex end-text code \"$3$4\"\n"; # This is where it gets messy... @@ -1917,20 +2212,35 @@ sub _treelet_from_formatting_codes { } #print STDERR "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n"; + if ($3 ne " " && $self->{'_output_is_for_JustPod'}) { + if ($3 ne "") { + $lineage[-1][1]{'~rspacer'} = $3; + } + elsif ($lineage[-1][1]{'~lspacer'} eq " ") { + + # Here we had something like C<< >> which was a false positive + delete $lineage[-1][1]{'~lspacer'}; + } + else { + $lineage[-1][1]{'~rspacer'} + = substr($lineage[-1][1]{'~lspacer'}, -1, 1); + chop $lineage[-1][1]{'~lspacer'}; + } + } + push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; # Keep the element from being childless - - pop @stack; - pop @lineage; - unless (@stack) { # not in an L if there are no open fcodes + if ($inL == @stack) { + $lineage[-1][1]{'raw'} = $raw; $inL = 0; - if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') { - $lineage[-1][-1][1]{'raw'} = $raw - } } + + pop @stack; + pop @lineage; + $raw .= $3.$4 if $inL; - + } elsif(defined $5) { DEBUG > 3 and print STDERR "Found apparent simple end-text code \"$5\"\n"; @@ -1944,6 +2254,11 @@ sub _treelet_from_formatting_codes { push @{ $lineage[-1] }, ''; # keep it from being really childless } + if ($inL == @stack) { + $lineage[-1][1]{'raw'} = $raw; + $inL = 0; + } + pop @stack; pop @lineage; } else { @@ -1951,12 +2266,6 @@ sub _treelet_from_formatting_codes { push @{ $lineage[-1] }, $5; } - unless (@stack) { # not in an L if there are no open fcodes - $inL = 0; - if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') { - $lineage[-1][-1][1]{'raw'} = $raw - } - } $raw .= $5 if $inL; } elsif(defined $6) { @@ -1965,6 +2274,7 @@ sub _treelet_from_formatting_codes { $raw .= $6 if $inL; # XXX does not capture multiplace whitespaces -- 'raw' ends up with # at most 1 leading/trailing whitespace, why not all of it? + # Answer, because we deliberately trimmed it above } else { # should never ever ever ever happen @@ -2095,7 +2405,7 @@ sub pretty { # adopted from Class::Classless # letters, but I don't know if it has always worked without bugs. It # seemed safest just to list the characters. # s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])> - s<([^ !#'()*+,\-./0123456789:;\<=\>?ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]^_`abcdefghijklmnopqrstuvwxyz{|}~])> + s<([^ !"#'()*+,\-./0123456789:;\<=\>?ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]^_`abcdefghijklmnopqrstuvwxyz{|}~])> <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg; #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg; qq{"$_"}; diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Checker.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Checker.pm index 83415f8e25e..2fef0305a5c 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Checker.pm +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Checker.pm @@ -9,7 +9,7 @@ use Carp (); use Pod::Simple::Methody (); use Pod::Simple (); use vars qw( @ISA $VERSION ); -$VERSION = '3.35'; +$VERSION = '3.40'; @ISA = ('Pod::Simple::Methody'); BEGIN { *DEBUG = defined(&Pod::Simple::DEBUG) ? \&Pod::Simple::DEBUG @@ -88,8 +88,10 @@ sub end_item_text { $_[0]->emit_par(-2) } sub emit_par { return unless $_[0]{'Errata_seen'}; my($self, $tweak_indent) = splice(@_,0,2); - my $indent = ' ' x ( 2 * $self->{'Indent'} + ($tweak_indent||0) ); + my $length = 2 * $self->{'Indent'} + ($tweak_indent||0); + my $indent = ' ' x ($length > 0 ? $length : 0); # Yes, 'STRING' x NEGATIVE gives '', same as 'STRING' x 0 + # 'Negative repeat count does nothing' since 5.22 $self->{'Thispara'} =~ s/$Pod::Simple::shy//g; my $out = Text::Wrap::wrap($indent, $indent, $self->{'Thispara'} .= "\n"); diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Debug.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Debug.pm index 428cc723594..aaa5a887e6b 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Debug.pm +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Debug.pm @@ -2,7 +2,7 @@ require 5; package Pod::Simple::Debug; use strict; use vars qw($VERSION ); -$VERSION = '3.35'; +$VERSION = '3.40'; sub import { my($value,$variable); diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/DumpAsText.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/DumpAsText.pm index 71bef5070be..bade6fcc472 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/DumpAsText.pm +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/DumpAsText.pm @@ -1,7 +1,7 @@ require 5; package Pod::Simple::DumpAsText; -$VERSION = '3.35'; +$VERSION = '3.40'; use Pod::Simple (); BEGIN {@ISA = ('Pod::Simple')} diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/DumpAsXML.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/DumpAsXML.pm index 9d84878cb78..6f0b7b18621 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/DumpAsXML.pm +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/DumpAsXML.pm @@ -1,7 +1,7 @@ require 5; package Pod::Simple::DumpAsXML; -$VERSION = '3.35'; +$VERSION = '3.40'; use Pod::Simple (); BEGIN {@ISA = ('Pod::Simple')} diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/HTML.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/HTML.pm index 9cdbed217e5..0219b979100 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/HTML.pm +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/HTML.pm @@ -9,7 +9,7 @@ use vars qw( $Doctype_decl $Content_decl ); @ISA = ('Pod::Simple::PullParser'); -$VERSION = '3.35'; +$VERSION = '3.40'; BEGIN { if(defined &DEBUG) { } # no-op elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG } @@ -29,7 +29,7 @@ $LamePad = '' unless defined $LamePad; $Linearization_Limit = 120 unless defined $Linearization_Limit; # headings/items longer than that won't get an <a name="..."> -$Perldoc_URL_Prefix = 'http://search.cpan.org/perldoc?' +$Perldoc_URL_Prefix = 'https://metacpan.org/pod/' unless defined $Perldoc_URL_Prefix; $Perldoc_URL_Postfix = '' unless defined $Perldoc_URL_Postfix; diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/HTMLBatch.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/HTMLBatch.pm index 661266d0de4..227d6d3af0d 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/HTMLBatch.pm +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/HTMLBatch.pm @@ -5,7 +5,7 @@ use strict; use vars qw( $VERSION $HTML_RENDER_CLASS $HTML_EXTENSION $CSS $JAVASCRIPT $SLEEPY $SEARCH_CLASS @ISA ); -$VERSION = '3.35'; +$VERSION = '3.40'; @ISA = (); # Yup, we're NOT a subclass of Pod::Simple::HTML! # TODO: nocontents stylesheets. Strike some of the color variations? @@ -720,22 +720,21 @@ sub _gen_css_wad { } # Now a few indexless variations: - foreach my $variation ( - 'blkbluw', # black_with_blue_on_white - 'whtpurk', # white_with_purple_on_black - 'whtgrng', # white_with_green_on_grey - 'grygrnw', # grey_with_green_on_white - ) { - my $outname = $variation; + for (my ($outfile, $variation) = each %{{ + blkbluw => 'black_with_blue_on_white', + whtpurk => 'white_with_purple_on_black', + whtgrng => 'white_with_green_on_grey', + grygrnw => 'grey_with_green_on_white', + }}) { my $this_css = join "\n", - "/* This file is autogenerated. Do not edit. $outname */\n", + "/* This file is autogenerated. Do not edit. $outfile */\n", "\@import url(\"./_$variation.css\");", ".indexgroup { display: none; }", "\n", ; - my $name = $outname; + my $name = $outfile; $name =~ tr/-_/ /; - $self->add_css( "_$outname.css", 0, $name, 0, 0, \$this_css); + $self->add_css( "_$outfile.css", 0, $name, 0, 0, \$this_css); } return; @@ -1110,12 +1109,15 @@ Example: =item $batchconv = Pod::Simple::HTMLBatch->new; -This TODO - +This creates a new batch converter. The method doesn't take parameters. +To change the converter's attributes, use the L<"/ACCESSOR METHODS"> +below. =item $batchconv->batch_convert( I<indirs>, I<outdir> ); -this TODO +This searches the directories given in I<indirs> and writes +HTML files for each of these to a corresponding directory +in I<outdir>. The directory I<outdir> must exist. =item $batchconv->batch_convert( undef , ...); diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/LinkSection.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/LinkSection.pm index 04612f202e9..b9ca19cdf93 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/LinkSection.pm +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/LinkSection.pm @@ -2,13 +2,11 @@ require 5; package Pod::Simple::LinkSection; # Based somewhat dimly on Array::Autojoin -use vars qw($VERSION ); -$VERSION = '3.35'; use strict; use Pod::Simple::BlackBox; use vars qw($VERSION ); -$VERSION = '3.35'; +$VERSION = '3.40'; use overload( # So it'll stringify nice '""' => \&Pod::Simple::BlackBox::stringify_lol, diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Methody.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Methody.pm index 67b87067416..5bcee54d4f7 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Methody.pm +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Methody.pm @@ -4,7 +4,7 @@ package Pod::Simple::Methody; use strict; use Pod::Simple (); use vars qw(@ISA $VERSION); -$VERSION = '3.35'; +$VERSION = '3.40'; @ISA = ('Pod::Simple'); # Yes, we could use named variables, but I want this to be impose diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Progress.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Progress.pm index 0c18a5b37d6..3d6f4031125 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Progress.pm +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Progress.pm @@ -1,7 +1,7 @@ require 5; package Pod::Simple::Progress; -$VERSION = '3.35'; +$VERSION = '3.40'; use strict; # Objects of this class are used for noting progress of an diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/PullParser.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/PullParser.pm index 7c326ec6aee..ceeb3f92504 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/PullParser.pm +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/PullParser.pm @@ -1,6 +1,6 @@ require 5; package Pod::Simple::PullParser; -$VERSION = '3.35'; +$VERSION = '3.40'; use Pod::Simple (); BEGIN {@ISA = ('Pod::Simple')} diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/PullParserEndToken.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/PullParserEndToken.pm index d3066a8e87c..d9ebdcbcf0e 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/PullParserEndToken.pm +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/PullParserEndToken.pm @@ -5,7 +5,7 @@ use Pod::Simple::PullParserToken (); use strict; use vars qw(@ISA $VERSION); @ISA = ('Pod::Simple::PullParserToken'); -$VERSION = '3.35'; +$VERSION = '3.40'; sub new { # Class->new(tagname); my $class = shift; diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/PullParserStartToken.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/PullParserStartToken.pm index d938e0adb21..61608fb466c 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/PullParserStartToken.pm +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/PullParserStartToken.pm @@ -5,7 +5,7 @@ use Pod::Simple::PullParserToken (); use strict; use vars qw(@ISA $VERSION); @ISA = ('Pod::Simple::PullParserToken'); -$VERSION = '3.35'; +$VERSION = '3.40'; sub new { # Class->new(tagname, optional_attrhash); my $class = shift; diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/PullParserTextToken.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/PullParserTextToken.pm index a11ce0fd92d..c8247a081e7 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/PullParserTextToken.pm +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/PullParserTextToken.pm @@ -5,7 +5,7 @@ use Pod::Simple::PullParserToken (); use strict; use vars qw(@ISA $VERSION); @ISA = ('Pod::Simple::PullParserToken'); -$VERSION = '3.35'; +$VERSION = '3.40'; sub new { # Class->new(text); my $class = shift; diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/PullParserToken.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/PullParserToken.pm index c6618168e6b..f14b5637cd4 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/PullParserToken.pm +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/PullParserToken.pm @@ -3,7 +3,7 @@ require 5; package Pod::Simple::PullParserToken; # Base class for tokens gotten from Pod::Simple::PullParser's $parser->get_token @ISA = (); -$VERSION = '3.35'; +$VERSION = '3.40'; use strict; sub new { # Class->new('type', stuff...); ## Overridden in derived classes anyway diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/RTF.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/RTF.pm index 153c3d3e287..ed0de149ae0 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/RTF.pm +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/RTF.pm @@ -8,24 +8,67 @@ package Pod::Simple::RTF; use strict; use vars qw($VERSION @ISA %Escape $WRAP %Tagmap); -$VERSION = '3.35'; +$VERSION = '3.40'; use Pod::Simple::PullParser (); BEGIN {@ISA = ('Pod::Simple::PullParser')} use Carp (); BEGIN { *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG } +sub to_uni ($) { # Convert native code point to Unicode + my $x = shift; + + # Broken for early EBCDICs + $x = chr utf8::native_to_unicode(ord $x) if $] ge 5.007_003 + && ord("A") != 65; + return $x; +} + +# We escape out 'F' so that we can send RTF files thru the mail without the +# slightest worry that paragraphs beginning with "From" will get munged. +# We also escape '\', '{', '}', and '_' +my $map_to_self = ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEGHIJKLMNOPQRSTUVWXYZ[]^`abcdefghijklmnopqrstuvwxyz|~'; + $WRAP = 1 unless defined $WRAP; +%Escape = ( + + # Start with every character mapping to its hex equivalent + map( (chr($_) => sprintf("\\'%02x", $_)), 0 .. 0xFF), + + # Override most ASCII printables with themselves (or on non-ASCII platforms, + # their ASCII values. This is because the output is UTF-16, which is always + # based on Unicode code points) + map( ( substr($map_to_self, $_, 1) + => to_uni(substr($map_to_self, $_, 1))), 0 .. length($map_to_self) - 1), + + # And some refinements: + "\r" => "\n", + "\cj" => "\n", + "\n" => "\n\\line ", + + "\t" => "\\tab ", # Tabs (altho theoretically raw \t's are okay) + "\f" => "\n\\page\n", # Formfeed + "-" => "\\_", # Turn plaintext '-' into a non-breaking hyphen + $Pod::Simple::nbsp => "\\~", # Latin-1 non-breaking space + $Pod::Simple::shy => "\\-", # Latin-1 soft (optional) hyphen -# These are broken for early Perls on EBCDIC; they could be fixed to work -# better there, but not worth it. These are part of a larger [...] class, so -# are just the strings to substitute into it, as opposed to compiled patterns. -my $cntrl = '[:cntrl:]'; -$cntrl = '\x00-\x1F\x7F' unless eval "qr/[$cntrl]/"; + # CRAZY HACKS: + "\n" => "\\line\n", + "\r" => "\n", + "\cb" => "{\n\\cs21\\lang1024\\noproof ", # \\cf1 + "\cc" => "}", +); -my $not_ascii = '[:^ascii:]'; -$not_ascii = '\x80-\xFF' unless eval "qr/[$not_ascii]/"; +# Generate a string of all the characters in %Escape that don't map to +# themselves. First, one without the hyphen, then one with. +my $escaped_sans_hyphen = ""; +$escaped_sans_hyphen .= $_ for grep { $_ ne $Escape{$_} && $_ ne '-' } + sort keys %Escape; +my $escaped = "-$escaped_sans_hyphen"; +# Then convert to patterns +$escaped_sans_hyphen = qr/[\Q$escaped_sans_hyphen \E]/; +$escaped= qr/[\Q$escaped\E]/; #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -158,6 +201,13 @@ sub run { #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Match something like an identifier. Prefer XID if available, then plain ID, +# then just ASCII +my $id_re = Pod::Simple::BlackBox::my_qr('[\'_\p{XIDS}][\'\p{XIDC}]+', "ab"); +$id_re = Pod::Simple::BlackBox::my_qr('[\'_\p{IDS}][\'\p{IDC}]+', "ab") + unless $id_re; +$id_re = qr/['_a-zA-Z]['a-zA-Z0-9_]+/ unless $id_re; + sub do_middle { # the main work my $self = $_[0]; my $fh = $self->{'output_fh'}; @@ -172,7 +222,7 @@ sub do_middle { # the main work if( ($type = $token->type) eq 'text' ) { if( $self->{'rtfverbatim'} ) { DEBUG > 1 and print STDERR " $type " , $token->text, " in verbatim!\n"; - rtf_esc_codely($scratch = $token->text); + rtf_esc(0, $scratch = $token->text); # 0 => Don't escape hyphen print $fh $scratch; next; } @@ -195,13 +245,13 @@ sub do_middle { # the main work | # or starting alpha, but containing anything strange: (?: - [a-zA-Z'${not_ascii}]+[\$\@\:_<>\(\\\*]\S+ + ${id_re}[\$\@\:_<>\(\\\*]\S+ ) ) /\cb$1\cc/xsg ; - rtf_esc($scratch); + rtf_esc(1, $scratch); # 1 => escape hyphen $scratch =~ s/( [^\r\n]{65} # Snare 65 characters from a line @@ -311,7 +361,7 @@ sub do_middle { # the main work print $fh $token->attr('number'), ". \n"; } elsif ($tagname eq 'item-bullet') { print $fh "\\'", ord("_"), "\n"; - #for funky testing: print $fh '', rtf_esc("\x{4E4B}\x{9053}"); + #for funky testing: print $fh '', rtf_esc(1, "\x{4E4B}\x{9053}"); } } elsif( $type eq 'end' ) { @@ -465,7 +515,7 @@ sub doc_start { # catches the most common case, at least DEBUG and print STDERR "Title0: <$title>\n"; - $title = rtf_esc($title); + $title = rtf_esc(1, $title); # 1 => escape hyphen DEBUG and print STDERR "Title1: <$title>\n"; $title = '\lang1024\noproof ' . $title if $is_obviously_module_name; @@ -489,90 +539,69 @@ END #------------------------------------------------------------------------- use integer; -sub rtf_esc { - my $x; # scratch - if(!defined wantarray) { # void context: alter in-place! - for(@_) { - s/([F${cntrl}\-\\\{\}${not_ascii}])/$Escape{$1}/g; # ESCAPER - s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg; - } - return; - } elsif(wantarray) { # return an array - return map {; ($x = $_) =~ - s/([F${cntrl}\-\\\{\}${not_ascii}])/$Escape{$1}/g; # ESCAPER - $x =~ s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg; - $x; - } @_; - } else { # return a single scalar - ($x = ((@_ == 1) ? $_[0] : join '', @_) - ) =~ s/([F${cntrl}\-\\\{\}${not_ascii}])/$Escape{$1}/g; # ESCAPER - # Escape \, {, }, -, control chars, and 7f-ff. - $x =~ s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg; + +my $question_mark_code_points = + Pod::Simple::BlackBox::my_qr('([^\x00-\x{D7FF}\x{E000}-\x{10FFFF}])', + "\x{110000}"); +my $plane0 = + Pod::Simple::BlackBox::my_qr('([\x{100}-\x{FFFF}])', "\x{100}"); +my $other_unicode = + Pod::Simple::BlackBox::my_qr('([\x{10000}-\x{10FFFF}])', "\x{10000}"); + +sub esc_uni($) { + use if $] le 5.006002, 'utf8'; + + my $x = shift; + + # The output is expected to be UTF-16. Surrogates and above-Unicode get + # mapped to '?' + $x =~ s/$question_mark_code_points/?/g if $question_mark_code_points; + + # Non-surrogate Plane 0 characters get mapped to their code points. But + # the standard calls for a 16bit SIGNED value. + $x =~ s/$plane0/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg + if $plane0; + + # Use surrogate pairs for the rest + $x =~ s/$other_unicode/'\\uc1\\u' . ((ord($1) >> 10) + 0xD7C0 - 65536) . '\\u' . (((ord$1) & 0x03FF) + 0xDC00 - 65536) . '?'/eg if $other_unicode; + return $x; - } } -sub rtf_esc_codely { - # Doesn't change "-" to hard-hyphen, nor apply computerese style-smarts. - # We don't want to change the "-" to hard-hyphen, because we want to +sub rtf_esc ($$) { + # The parameter is true if we should escape hyphens + my $escape_re = ((shift) ? $escaped : $escaped_sans_hyphen); + + # When false, it doesn't change "-" to hard-hyphen. + # We don't want to change the "-" to hard-hyphen, because we want to # be able to paste this into a file and run it without there being # dire screaming about the mysterious hard-hyphen character (which # looks just like a normal dash character). - + # XXX The comments used to claim that when false it didn't apply computerese + # style-smarts, but khw didn't see this actually + my $x; # scratch if(!defined wantarray) { # void context: alter in-place! for(@_) { - s/([F${cntrl}\\\{\}${not_ascii}])/$Escape{$1}/g; # ESCAPER - s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg; + s/($escape_re)/$Escape{$1}/g; # ESCAPER + $_ = esc_uni($_); } return; } elsif(wantarray) { # return an array return map {; ($x = $_) =~ - s/([F${cntrl}\\\{\}${not_ascii}])/$Escape{$1}/g; # ESCAPER - $x =~ s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg; + s/($escape_re)/$Escape{$1}/g; # ESCAPER + $x = esc_uni($x); $x; } @_; } else { # return a single scalar ($x = ((@_ == 1) ? $_[0] : join '', @_) - ) =~ s/([F${cntrl}\\\{\}${not_ascii}])/$Escape{$1}/g; # ESCAPER + ) =~ s/($escape_re)/$Escape{$1}/g; # ESCAPER # Escape \, {, }, -, control chars, and 7f-ff. - $x =~ s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg; + $x = esc_uni($x); return $x; } } -%Escape = ( - (($] lt 5.007_003) # Broken for non-ASCII on early Perls - ? (map( (chr($_),chr($_)), # things not apparently needing escaping - 0x20 .. 0x7E ), - map( (chr($_),sprintf("\\'%02x", $_)), # apparently escapeworthy things - 0x00 .. 0x1F, 0x5c, 0x7b, 0x7d, 0x7f .. 0xFF, 0x46)) - : (map( (chr(utf8::unicode_to_native($_)),chr(utf8::unicode_to_native($_))), - 0x20 .. 0x7E ), - map( (chr($_),sprintf("\\'%02x", utf8::unicode_to_native($_))), - 0x00 .. 0x1F, 0x5c, 0x7b, 0x7d, 0x7f .. 0xFF, 0x46))), - - # We get to escape out 'F' so that we can send RTF files thru the mail - # without the slightest worry that paragraphs beginning with "From" - # will get munged. - - # And some refinements: - "\r" => "\n", - "\cj" => "\n", - "\n" => "\n\\line ", - - "\t" => "\\tab ", # Tabs (altho theoretically raw \t's are okay) - "\f" => "\n\\page\n", # Formfeed - "-" => "\\_", # Turn plaintext '-' into a non-breaking hyphen - $Pod::Simple::nbsp => "\\~", # Latin-1 non-breaking space - $Pod::Simple::shy => "\\-", # Latin-1 soft (optional) hyphen - - # CRAZY HACKS: - "\n" => "\\line\n", - "\r" => "\n", - "\cb" => "{\n\\cs21\\lang1024\\noproof ", # \\cf1 - "\cc" => "}", -); 1; __END__ diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Search.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Search.pm index df499cacf2d..a07d33b85ac 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Search.pm +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Search.pm @@ -3,7 +3,7 @@ package Pod::Simple::Search; use strict; use vars qw($VERSION $MAX_VERSION_WITHIN $SLEEPY); -$VERSION = '3.35'; ## Current version of this package +$VERSION = '3.40'; ## Current version of this package BEGIN { *DEBUG = sub () {0} unless defined &DEBUG; } # set DEBUG level use Carp (); @@ -12,7 +12,6 @@ $SLEEPY = 1 if !defined $SLEEPY and $^O =~ /mswin|mac/i; # flag to occasionally sleep for $SLEEPY - 1 seconds. $MAX_VERSION_WITHIN ||= 60; -my $IS_CASE_INSENSITIVE = -e uc __FILE__ && -e lc __FILE__; ############################################################################# @@ -26,7 +25,7 @@ use Cwd qw( cwd ); __PACKAGE__->_accessorize( # Make my dumb accessor methods 'callback', 'progress', 'dir_prefix', 'inc', 'laborious', 'limit_glob', 'limit_re', 'shadows', 'verbose', 'name2path', 'path2name', 'recurse', - 'ciseen' + 'ciseen', 'is_case_insensitive' ); #========================================================================== @@ -42,6 +41,7 @@ sub init { $self->inc(1); $self->recurse(1); $self->verbose(DEBUG); + $self->is_case_insensitive(-e uc __FILE__ && -e lc __FILE__); return $self; } @@ -130,12 +130,12 @@ sub _make_search_callback { # Put the options in variables, for easy access my( $laborious, $verbose, $shadows, $limit_re, $callback, $progress, - $path2name, $name2path, $recurse, $ciseen) = + $path2name, $name2path, $recurse, $ciseen, $is_case_insensitive) = map scalar($self->$_()), qw(laborious verbose shadows limit_re callback progress - path2name name2path recurse ciseen); + path2name name2path recurse ciseen is_case_insensitive); my ($seen, $remember, $files_for); - if ($IS_CASE_INSENSITIVE) { + if ($is_case_insensitive) { $seen = sub { $ciseen->{ lc $_[0] } }; $remember = sub { $name2path->{ $_[0] } = $ciseen->{ lc $_[0] } = $_[1]; }; $files_for = sub { my $n = lc $_[0]; grep { lc $path2name->{$_} eq $n } %{ $path2name } }; @@ -259,7 +259,7 @@ sub _path2modname { while(@m and defined($x = lc( $m[0] )) and( $x eq 'site_perl' - or($x eq 'pod' and @m == 1 and $shortname =~ m{^perl.*\.pod$}s ) + or($x =~ m/^pods?$/ and @m == 1 and $shortname =~ m{^perl.*\.pod$}s ) or $x =~ m{\\d+\\.z\\d+([_.]?\\d+)?} # if looks like a vernum or $x eq lc( $Config::Config{'archname'} ) )) { shift @m } @@ -546,7 +546,7 @@ sub _limit_glob_to_limit_re { sub _actual_filenames { my $dir = shift; my $fn = lc shift; - opendir my $dh, $dir or return; + opendir my ($dh), $dir or return; return map { File::Spec->catdir($dir, $_) } grep { lc $_ eq $fn } readdir $dh; } @@ -588,7 +588,7 @@ sub find { my $fullext = $fullname . $ext; if ( -f $fullext and $self->contains_pod($fullext) ) { print "FOUND: $fullext\n" if $verbose; - if (@parts > 1 && lc $parts[0] eq 'pod' && $IS_CASE_INSENSITIVE && $ext eq '.pod') { + if (@parts > 1 && lc $parts[0] eq 'pod' && $self->is_case_insensitive() && $ext eq '.pod') { # Well, this file could be for a program (perldoc) but we actually # want a module (Pod::Perldoc). So see if there is a .pm with the # proper casing. @@ -611,7 +611,7 @@ sub find { } # Case-insensitively Look for ./pod directories and slip them in. - for my $subdir ( _actual_filenames($dir, 'pod') ) { + for my $subdir ( _actual_filenames($dir, 'pods'), _actual_filenames($dir, 'pod') ) { if (-d $subdir) { $verbose and print "Noticing $subdir and looking there...\n"; unshift @search_dirs, $subdir; @@ -849,6 +849,20 @@ inspected too, and are noted in the pathname2podname return hash. This attribute's default value is false; and normally you won't need to turn it on. +=item $search->is_case_insensitive( I<true-or-false> ); + +Pod::Simple::Search will by default internally make an assumption +based on the underlying filesystem where the class file is found +whether it is case insensitive or not. + +If it is determined to be case insensitive, during survey() it may +skip pod files/modules that happen to be equal to names it's already +seen, ignoring case. + +However, it's possible to have distinct files in different directories +that intentionally has the same name, just differing in case, that should +be reported. Hence, you may force the behavior by setting this to true +or false. =item $search->limit_re( I<some-regxp> ); @@ -857,7 +871,6 @@ to limit the results just to items whose podnames match the given regexp. Normally this option is not needed, and the more efficient C<limit_glob> attribute is used instead. - =item $search->dir_prefix( I<some-string-value> ); Setting this attribute to a string value means that the searches should diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/SimpleTree.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/SimpleTree.pm index bff5af84c4b..85dbabcd70e 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/SimpleTree.pm +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/SimpleTree.pm @@ -5,7 +5,7 @@ use strict; use Carp (); use Pod::Simple (); use vars qw( $ATTR_PAD @ISA $VERSION $SORT_ATTRS); -$VERSION = '3.35'; +$VERSION = '3.40'; BEGIN { @ISA = ('Pod::Simple'); *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG; diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Subclassing.pod b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Subclassing.pod index 88f85e86de2..f9cb09a52ef 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Subclassing.pod +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Subclassing.pod @@ -98,9 +98,14 @@ nodes that represent preformatted text (from verbatim sections). TODO intro... mention that events are supplied for implicits, like for missing >'s - In the following section, we use XML to represent the event structure -associated with a particular construct. That is, TODO +associated with a particular construct. That is, an opening tag +represents the element start, the attributes of that opening tag are +the attributes given to the callback, and the closing tag represents +the end element. + +Three callback methods must be supplied by a class extending +L<Pod::Simple> to receive the corresponding event: =over @@ -112,8 +117,9 @@ associated with a particular construct. That is, TODO =back -TODO describe - +Here's the comprehensive list of values you can expect as +I<element_name> in your implementation of C<_handle_element_start> +and C<_handle_element_end>:: =over diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Text.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Text.pm index 66e15f48cce..de50b510eae 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Text.pm +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Text.pm @@ -6,7 +6,7 @@ use Carp (); use Pod::Simple::Methody (); use Pod::Simple (); use vars qw( @ISA $VERSION $FREAKYMODE); -$VERSION = '3.35'; +$VERSION = '3.40'; @ISA = ('Pod::Simple::Methody'); BEGIN { *DEBUG = defined(&Pod::Simple::DEBUG) ? \&Pod::Simple::DEBUG diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/TextContent.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/TextContent.pm index 980612b3132..ad4172b7a31 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/TextContent.pm +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/TextContent.pm @@ -6,7 +6,7 @@ use strict; use Carp (); use Pod::Simple (); use vars qw( @ISA $VERSION ); -$VERSION = '3.35'; +$VERSION = '3.40'; @ISA = ('Pod::Simple'); sub new { diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/TiedOutFH.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/TiedOutFH.pm index a7364dfa585..0dd12c412dd 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/TiedOutFH.pm +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/TiedOutFH.pm @@ -4,7 +4,7 @@ package Pod::Simple::TiedOutFH; use Symbol ('gensym'); use Carp (); use vars qw($VERSION ); -$VERSION = '3.35'; +$VERSION = '3.40'; #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Transcode.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Transcode.pm index a4bb29ffdb6..eb127022827 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Transcode.pm +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/Transcode.pm @@ -3,7 +3,7 @@ require 5; package Pod::Simple::Transcode; use strict; use vars qw($VERSION @ISA); -$VERSION = '3.35'; +$VERSION = '3.40'; BEGIN { if(defined &DEBUG) {;} # Okay diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/TranscodeDumb.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/TranscodeDumb.pm index c2069056574..2b675ccb787 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/TranscodeDumb.pm +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/TranscodeDumb.pm @@ -5,7 +5,7 @@ require 5; package Pod::Simple::TranscodeDumb; use strict; use vars qw($VERSION %Supported); -$VERSION = '3.35'; +$VERSION = '3.40'; # This module basically pretends it knows how to transcode, except # only for null-transcodings! We use this when Encode isn't # available. diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/TranscodeSmart.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/TranscodeSmart.pm index e4d4f7eb60e..99f55683ab3 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/TranscodeSmart.pm +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/TranscodeSmart.pm @@ -9,7 +9,7 @@ use strict; use Pod::Simple; require Encode; use vars qw($VERSION ); -$VERSION = '3.35'; +$VERSION = '3.40'; sub is_dumb {0} sub is_smart {1} diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/XHTML.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/XHTML.pm index 8c2cf1a01ba..b9c6269bf98 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/XHTML.pm +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/XHTML.pm @@ -45,7 +45,7 @@ declare the output character set as UTF-8 before parsing, like so: package Pod::Simple::XHTML; use strict; use vars qw( $VERSION @ISA $HAS_HTML_ENTITIES ); -$VERSION = '3.35'; +$VERSION = '3.40'; use Pod::Simple::Methody (); @ISA = ('Pod::Simple::Methody'); @@ -92,7 +92,7 @@ the call to C<parse_file>: In turning L<Foo::Bar> into http://whatever/Foo%3a%3aBar, what to put before the "Foo%3a%3aBar". The default value is -"http://search.cpan.org/perldoc?". +"https://metacpan.org/pod/". =head2 perldoc_url_postfix @@ -247,7 +247,7 @@ sub new { my $self = shift; my $new = $self->SUPER::new(@_); $new->{'output_fh'} ||= *STDOUT{IO}; - $new->perldoc_url_prefix('http://search.cpan.org/perldoc?'); + $new->perldoc_url_prefix('https://metacpan.org/pod/'); $new->man_url_prefix('http://man.he.net/man'); $new->html_charset('ISO-8859-1'); $new->nix_X_codes(1); @@ -685,8 +685,8 @@ sub emit { Resolves a POD link target (typically a module or POD file name) and section name to a URL. The resulting link will be returned for the above examples as: - http://search.cpan.org/perldoc?Net::Ping#INSTALL - http://search.cpan.org/perldoc?perlpodspec + https://metacpan.org/pod/Net::Ping#INSTALL + https://metacpan.org/pod/perlpodspec #SYNOPSIS Note that when there is only a section argument the URL will simply be a link diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/XMLOutStream.pm b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/XMLOutStream.pm index 62fe39549da..cb818a17409 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/XMLOutStream.pm +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/XMLOutStream.pm @@ -5,7 +5,7 @@ use strict; use Carp (); use Pod::Simple (); use vars qw( $ATTR_PAD @ISA $VERSION $SORT_ATTRS); -$VERSION = '3.35'; +$VERSION = '3.40'; BEGIN { @ISA = ('Pod::Simple'); *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG; diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/t/encod04.t b/gnu/usr.bin/perl/cpan/Pod-Simple/t/encod04.t index 88727cca521..8f41f98a6cc 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/t/encod04.t +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/t/encod04.t @@ -12,14 +12,14 @@ BEGIN { use strict; use Test; BEGIN { - if ($] lt 5.007_003) { - plan tests => 5, todo => [4, 5]; # Need utf8::decode() to pass #5 - # and isn't available in this - # release - } - else { - plan tests => 5, todo => [4]; - } + plan tests => 6, todo => []; +} + +# fail with the supplied diagnostic + +sub my_nok { + my ($diag) = @_; + ok (1, 0, $diag); } ok 1; @@ -61,16 +61,13 @@ if( $guess ) { if( grep m{Dash $dash}, @output_lines ) { ok 1; } else { - ok 0; - print STDERR "# failed to find expected control character in output\n" + my_nok "failed to find expected control character in output"; } } else { - ok 0; - print STDERR "# parser guessed wrong encoding expected 'CP1252' got '$guess'\n"; + my_nok "parser guessed wrong encoding expected 'CP1252' got '$guess'"; } } else { - ok 0; - print STDERR "# parser failed to detect non-ASCII bytes in input\n"; + my_nok "parser failed to detect non-ASCII bytes in input"; } @@ -95,18 +92,18 @@ else { if( $guess eq 'CP1252' ) { ok 1; } else { - ok 0; - print STDERR "# parser guessed wrong encoding expected 'CP1252' got '$guess'\n"; + my_nok "parser guessed wrong encoding expected 'CP1252' got '$guess'"; } } else { - ok 0; - print STDERR "# parser failed to detect non-ASCII bytes in input\n"; + my_nok "parser failed to detect non-ASCII bytes in input"; } } -# Initial accented character followed by 'smart' apostrophe causes heuristic -# to choose UTF8 (a somewhat contrived example) +# Initial accented character (E acute) followed by 'smart' apostrophe is legal +# CP1252, which should be preferred over UTF-8 because the latter +# interpretation would be "JOS" . \N{LATIN SMALL LETTER TURNED ALPHA} . "S +# PLACE", and that \N{} letter is an IPA one. @output_lines = split m/[\r\n]+/, Pod::Simple::XMLOutStream->_out( qq{ @@ -127,12 +124,10 @@ else { if( $guess eq 'CP1252' ) { ok 1; } else { - ok 0; - print STDERR "# parser guessed wrong encoding expected 'CP1252' got '$guess'\n"; + my_nok "parser guessed wrong encoding expected 'CP1252' got '$guess'"; } } else { - ok 0; - print STDERR "# parser failed to detect non-ASCII bytes in input\n"; + my_nok "parser failed to detect non-ASCII bytes in input"; } } @@ -160,12 +155,40 @@ else { if( $guess eq 'CP1252' ) { ok 1; } else { - ok 0; - print STDERR "# parser guessed wrong encoding expected 'CP1252' got '$guess'\n"; + my_nok "parser guessed wrong encoding expected 'CP1252' got '$guess'"; + } + } else { + my_nok "parser failed to detect non-ASCII bytes in input"; + } +} + +# The following is a real word example of something in CP1252 expressible in +# UTF-8, but doesn't make sense in UTF-8, contributed by Bo Lindbergh. +# Muvrarášša is a Sami word + +@output_lines = split m/[\r\n]+/, Pod::Simple::XMLOutStream->_out( qq{ + +=head1 NAME + +Muvrar\xE1\x9A\x9Aa is a mountain in Norway + +=cut + +} ); + +if (ord("A") != 65) { # ASCII-platform dependent test skipped on this platform + ok (1); +} +else { + ($guess) = "@output_lines" =~ m{Non-ASCII.*?Assuming ([\w-]+)}; + if( $guess ) { + if( $guess eq 'CP1252' ) { + ok 1; + } else { + my_nok "parser guessed wrong encoding expected 'CP1252' got '$guess'"; } } else { - ok 0; - print STDERR "# parser failed to detect non-ASCII bytes in input\n"; + my_nok "parser failed to detect non-ASCII bytes in input"; } } diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/t/html01.t b/gnu/usr.bin/perl/cpan/Pod-Simple/t/html01.t index b4caa39dc66..8d8e528320d 100755 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/t/html01.t +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/t/html01.t @@ -9,7 +9,7 @@ BEGIN { use strict; use Test; -BEGIN { plan tests => 13 }; +BEGIN { plan tests => 14 }; #use Pod::Simple::Debug (10); @@ -137,6 +137,16 @@ ok( "\n<dl>\n<dt><a name=\"howdy\"\n>Foo</a></dt>\n</dl>\n", ); +{ # Test that strip_verbatim_indent() works. github issue #i5 + my $output; + + my $obj = Pod::Simple::HTML->new; + $obj->strip_verbatim_indent(" "); + $obj->output_string(\$output); + $obj->parse_string_document("=pod\n\n First line\n 2nd line\n"); + ok($output, qr!<pre>First line\n2nd line</pre>!s); +} + print "# And one for the road...\n"; ok 1; diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/t/search20.t b/gnu/usr.bin/perl/cpan/Pod-Simple/t/search20.t index cbc3ac3fcf5..1b17c3b021a 100755 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/t/search20.t +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/t/search20.t @@ -78,16 +78,16 @@ require $ascii_order; { my $names = join "|", sort ascii_order values %$where2name; -skip $^O eq 'VMS' ? '-- case may or may not be preserved' : 0, - $names, - "Blorm|Suzzle|Zonk::Pronk|hinkhonk::Glunk|hinkhonk::Vliff|perlflif|perlthng|perlzuk|squaa|squaa::Glunk|squaa::Vliff|squaa::Wowo|zikzik"; +skip $^O eq 'VMS' ? '-- case may or may not be preserved' : 0, + $names, + "Blorm|Suzzle|Zonk::Pronk|hinkhonk::Glunk|hinkhonk::Vliff|perlflif|perlthng|perlzoned|perlzuk|squaa|squaa::Glunk|squaa::Vliff|squaa::Wowo|zikzik"; } { my $names = join "|", sort ascii_order keys %$name2where; -skip $^O eq 'VMS' ? '-- case may or may not be preserved' : 0, - $names, - "Blorm|Suzzle|Zonk::Pronk|hinkhonk::Glunk|hinkhonk::Vliff|perlflif|perlthng|perlzuk|squaa|squaa::Glunk|squaa::Vliff|squaa::Wowo|zikzik"; +skip $^O eq 'VMS' ? '-- case may or may not be preserved' : 0, + $names, + "Blorm|Suzzle|Zonk::Pronk|hinkhonk::Glunk|hinkhonk::Vliff|perlflif|perlthng|perlzoned|perlzuk|squaa|squaa::Glunk|squaa::Vliff|squaa::Wowo|zikzik"; } ok( ($name2where->{'squaa'} || 'huh???'), '/squaa\.pm$/'); diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/t/search22.t b/gnu/usr.bin/perl/cpan/Pod-Simple/t/search22.t index c6b33eea753..6fb498a3318 100755 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/t/search22.t +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/t/search22.t @@ -8,7 +8,7 @@ BEGIN { use strict; use Pod::Simple::Search; use Test; -BEGIN { plan tests => 13 } +BEGIN { plan tests => 15 } print "# ", __FILE__, ": Testing the scanning of several docroots...\n"; @@ -80,17 +80,17 @@ require $ascii_order; { print "# won't show any shadows, since we're just looking at the name2where keys\n"; my $names = join "|", sort ascii_order keys %$name2where; -skip $^O eq 'VMS' ? '-- case may or may not be preserved' : 0, - $names, - "Blorm|Suzzle|Zonk::Pronk|hinkhonk::Glunk|hinkhonk::Vliff|perlflif|perlthng|perlzuk|squaa|squaa::Glunk|squaa::Vliff|squaa::Wowo|zikzik"; +skip $^O eq 'VMS' ? '-- case may or may not be preserved' : 0, + $names, + "Blorm|Suzzle|Zonk::Pronk|hinkhonk::Glunk|hinkhonk::Vliff|perlflif|perlthng|perlzoned|perlzuk|squaa|squaa::Glunk|squaa::Vliff|squaa::Wowo|zikzik"; } { print "# but here we'll see shadowing:\n"; my $names = join "|", sort ascii_order values %$where2name; -skip $^O eq 'VMS' ? '-- case may or may not be preserved' : 0, - $names, - "Blorm|Suzzle|Zonk::Pronk|hinkhonk::Glunk|hinkhonk::Glunk|hinkhonk::Vliff|hinkhonk::Vliff|perlflif|perlthng|perlthng|perlzuk|squaa|squaa::Glunk|squaa::Vliff|squaa::Vliff|squaa::Vliff|squaa::Wowo|zikzik"; +skip $^O eq 'VMS' ? '-- case may or may not be preserved' : 0, + $names, + "Blorm|Suzzle|Zonk::Pronk|hinkhonk::Glunk|hinkhonk::Glunk|hinkhonk::Vliff|hinkhonk::Vliff|perlflif|perlthng|perlthng|perlzoned|perlzuk|squaa|squaa::Glunk|squaa::Vliff|squaa::Vliff|squaa::Vliff|squaa::Wowo|zikzik"; my %count; for(values %$where2name) { ++$count{$_} }; @@ -120,7 +120,9 @@ skip $^O eq 'VMS' ? '-- case may or may not be preserved' : 0, ($name2where->{'squaa::Wowo'} || 'huh???'), '/testlib2/'; - +my $in_pods = $x->find('perlzoned', $here2); +ok $in_pods, qr{^\Q$here2\E}; +ok $in_pods, qr{perlzoned.pod$}; print "# OK, bye from ", __FILE__, "\n"; ok 1; diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/t/search50.t b/gnu/usr.bin/perl/cpan/Pod-Simple/t/search50.t index 126f24a7b1e..0dc9d75a296 100755 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/t/search50.t +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/t/search50.t @@ -23,6 +23,7 @@ ok $x->inc; # make sure inc=1 is the default use Pod::Simple; *pretty = \&Pod::Simple::BlackBox::pretty; +*pretty = \&Pod::Simple::BlackBox::pretty; # avoid 'once' warning my $found = 0; $x->callback(sub { diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/t/whine.t b/gnu/usr.bin/perl/cpan/Pod-Simple/t/whine.t index b33f0a91efa..4ac76e5bd3d 100644 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/t/whine.t +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/t/whine.t @@ -1,6 +1,6 @@ use strict; use warnings; -use Test::More tests => 4; +use Test::More tests => 6; { package Pod::Simple::ErrorFinder; @@ -51,3 +51,23 @@ sub errors { Pod::Simple::ErrorFinder->errors_for_input(@_) } "warning for / in text part of L<>", ); } + +{ + my $input = "=pod\n\nnested LE<lt>E<sol>E<gt>: L<Nested L<http://foobar>|http://baz>\n"; + my $errors = errors("$input"); + is_deeply( + $errors, + { 3 => [ "Nested L<> are illegal. Pretending inner one is X<...> so can continue looking for other errors." ] }, + "warning for nested L<>", + ); +} + +{ + my $input = "=pod\n\nLE<lt>E<sol>E<gt> containing only slash: L< / >\n"; + my $errors = errors("$input"); + is_deeply( + $errors, + { 3 => [ "L<> contains only '/'" ] }, + "warning for L< / > containing only a slash", + ); +} diff --git a/gnu/usr.bin/perl/cpan/Pod-Simple/t/xhtml01.t b/gnu/usr.bin/perl/cpan/Pod-Simple/t/xhtml01.t index 01e6f189b42..7ee08652162 100755 --- a/gnu/usr.bin/perl/cpan/Pod-Simple/t/xhtml01.t +++ b/gnu/usr.bin/perl/cpan/Pod-Simple/t/xhtml01.t @@ -18,7 +18,7 @@ isa_ok ($parser, 'Pod::Simple::XHTML'); my $results; -my $PERLDOC = "http://search.cpan.org/perldoc"; +my $PERLDOC = "https://metacpan.org/pod"; my $MANURL = "http://man.he.net/man"; initialize($parser, $results); @@ -541,7 +541,7 @@ $parser->parse_string_document(<<'EOPOD'); A plain paragraph with a L<Newlines>. EOPOD is($results, <<"EOHTML", "Link entity in a paragraph"); -<p>A plain paragraph with a <a href="$PERLDOC?Newlines">Newlines</a>.</p> +<p>A plain paragraph with a <a href="$PERLDOC/Newlines">Newlines</a>.</p> EOHTML @@ -552,7 +552,7 @@ $parser->parse_string_document(<<'EOPOD'); A plain paragraph with a L<perlport/Newlines>. EOPOD is($results, <<"EOHTML", "Link entity in a paragraph"); -<p>A plain paragraph with a <a href="$PERLDOC?perlport#Newlines">"Newlines" in perlport</a>.</p> +<p>A plain paragraph with a <a href="$PERLDOC/perlport#Newlines">"Newlines" in perlport</a>.</p> EOHTML @@ -742,16 +742,16 @@ like $results, qr{\Q<meta http-equiv="Content-Type" content="text/html; charset= # Test the link generation methods. is $parser->resolve_pod_page_link('Net::Ping', 'INSTALL'), - "$PERLDOC?Net::Ping#INSTALL", + "$PERLDOC/Net::Ping#INSTALL", 'POD link with fragment'; is $parser->resolve_pod_page_link('perlpodspec'), - "$PERLDOC?perlpodspec", 'Simple POD link'; + "$PERLDOC/perlpodspec", 'Simple POD link'; is $parser->resolve_pod_page_link(undef, 'SYNOPSIS'), '#SYNOPSIS', 'Simple fragment link'; is $parser->resolve_pod_page_link(undef, 'this that'), '#this-that', 'Fragment link with space'; is $parser->resolve_pod_page_link('perlpod', 'this that'), - "$PERLDOC?perlpod#this-that", + "$PERLDOC/perlpod#this-that", 'POD link with fragment with space'; is $parser->resolve_man_page_link('crontab(5)', 'EXAMPLE CRON FILE'), |