From d2cbcfd403af89dfc13f2ec24b4dc05f952a063c Mon Sep 17 00:00:00 2001 From: Theo de Raadt Date: Sat, 1 Apr 2000 03:41:48 +0000 Subject: some man page tree checking tools from tchrist@perl.com --- regress/share/man/cfman | 869 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 869 insertions(+) create mode 100644 regress/share/man/cfman (limited to 'regress/share/man/cfman') diff --git a/regress/share/man/cfman b/regress/share/man/cfman new file mode 100644 index 00000000000..0f4473151cf --- /dev/null +++ b/regress/share/man/cfman @@ -0,0 +1,869 @@ +#!/usr/bin/perl -w +# +# cfman - make sure manpages have accurate SEE ALSOs +# tchrist@perl.com + +use strict; + +my $VERSION = do { + my @r = (q$Revision: 1.1 $ =~ /\d+/g); + sprintf "%d."."%02d" x $#r, @r; +}; + +my $Debug = 0; + +my($Ignore_Manpath, $CF_File, $CF_Style, + $No_Guessing, $Verbose, $Rebuild_Indices); + +parse_opts(); + +my $Manpath = get_manpath(); +print "MANPATH is $Manpath\n" if $Debug; + +check_can_whence(); + +# (re)set to what we've computed so that when we launch man below, +# it'll use the specified or inferred manpath. + +unless ($Ignore_Manpath) { + print "Limiting external manpath\n" if $Debug; + $ENV{MANPATH} = $Manpath; +} + +for my $tree (split /:/, $Manpath) { + print "tree chdir('$tree')\n" if $Debug > 1; + chdir($tree) || die "cannot cd to main tree $tree: $!"; + + for my $mandir ( grep { -d } <{man,cat}*> ) { + print "subdir chdir('$tree/$mandir')\n" if $Debug > 1; + chdir("$tree/$mandir") || die "cannot cd to subdir $tree/$mandir: $!"; + my($ext, @pages); + ($ext = $mandir) =~ s/^(?:cat|man)//; + for (@pages = <*.*>) { + s/\.gz$//; + s/\.(?:0|${ext}\w*)$//; + } + my $option = adjust_ext($ext); + for my $page (@pages) { + print "man $option $page\n" if $Debug > 1; + open(MAN, "man $option $page 2>&1 | col -b |") + or die "cannot fork man lookup: $!"; + local $/ = ''; + while () { + next unless /^SEE ALSO/ || + $_ eq "S\bSE\bEE\bE A\bAL\bLS\bSO\bO\n"; + s/.\010//g; + s/-\n\s*//g; + my @refs = /\S+\(\S+\)/g; + print "$page.$ext SEE ALSOs @refs\n" if $Debug > 2; + for my $ref (@refs) { + my $place = whereis($ref); + if ($place =~ /\*\*\*/ || $Debug || $Verbose) { + print "$page.$ext: $ref -> $place\n"; + } + } + last; + } + 1 while ; # drain to suppress broken pipe + close(MAN) || warn "close on man $option $page failed"; + } + } + +} + +sub usage { + print STDERR "@_\n" if @_; + die "Usage: $0 [-hdrivg] [-f cf-file] [-s cf-style] [mandir ...]\n"; +} + +sub run_help { + my $pager; + unless ($pager = $ENV{PAGER}) { + require Config; + # lint happiness. blech. + $pager = $Config::Config{'pager'} || $Config::Config{'pager'}; + } + + $pager = "/bin/cat" unless has_cmd($pager); + + if (has_cmd("pod2man") && has_cmd("nroff") ) { + { exec("pod2man $0 | nroff -man | $pager") } # lint happiness + warn "exec of pod2man | nroff | $pager failed: $!"; + } + + if (has_cmd("pod2text")) { + { exec("pod2text $0 | $pager") } # lint happiness + warn "exec of pod2text | $pager failed: $!"; + } + + # sucks to be you! + + if (eval q{ require Pod::Text; 1; }) { + open (STDOUT, "| $pager") || die "no pager $pager: $!"; + # this forces a wait on child if needed + sub END { close(STDOUT) || die "cannot close STDOUT: $!" } + Pod::Text::pod2text($0); + exit 0; + } + + # it REALLY REALLY REALLY sucks to be you! + open 0 or die "$0: cannot open myself: $!"; + $/ = ''; + while (<0>) { + last if /__(END|DATA)__/; # must be careful here + } + print <0>; + exit; +} + + +sub has_cmd { + my $cmd = shift; + for (split(/:/, $ENV{PATH})) { + my $path = "$_/$cmd"; + return $path if -f $path && -x _; + } + return; +} + +sub parse_opts { +ARG: while (@ARGV && $ARGV[0] =~ s/^-(?=.)//) { +OPT: for (shift @ARGV) { # getopts is for wimps + m/^$/ && do { next ARG; }; + m/^-$/ && do { last ARG; }; + s/^d// && do { $Debug++; redo OPT; }; + s/^i// && do { $Ignore_Manpath++; redo OPT; }; + s/^g// && do { $No_Guessing++; redo OPT; }; + s/^v// && do { $Verbose++; redo OPT; }; + s/^r// && do { $Rebuild_Indices++; redo OPT; }; + s/^f(.*)// && do { $CF_File = $1 || shift @ARGV; next ARG; }; + s/^s(.*)// && do { $CF_Style = $1 || shift @ARGV; next ARG; }; + + m/^-h(elp)?$/ # stupid fsf broken crappy excuse for real manpages + && do { run_help(); exit; }; + + usage("unknown option: -$_"); + } + } + + if ($CF_Style && !$CF_File) { + for (glob("/etc/man*.c*f*")) { + $CF_File = $_; + last; + } + print "Guessed CF file of $CF_File\n" if $Debug; + } + +} + +{ # extra scope for function private "static" variable + my $linux_griped = 0; + sub get_osname { + my $name = $^O; + + if ($name eq 'linux' && ! $linux_griped++ + && (! $CF_Style || $CF_Style eq 'linux') ) + { + # there are many different linux operating systems, and + # it torques me off that they pretend there aren't. + # i have no idea whether this works anywhere but redhat. + warn "$0: Your osname claims linux; assuming redhat instead\n"; + } + + return $name; + } +} + + +# everything beneath here should be in a module + +{ # extra scope for function private "static" variable + my %Whereis; + sub whereis { + my $manref = shift; + my ($page, $ext) = $manref =~ /(\S+)\((\S+)\)/; + $ext = lc($ext); + return $Whereis{$page, $ext} if $Whereis{$page, $ext}; + if ($Rebuild_Indices) { + $Whereis{$page, $ext} = "*** No manual entry for $page "; + if ($Whereis{$page}) { + $Whereis{$page, $ext} .= "(really in $Whereis{$page})"; + } + return $Whereis{$page, $ext}; + } + + my $swext = adjust_ext($ext); + + print "man -w $swext '$page'\n" if $Debug > 1; + + ($Whereis{$page, $ext} = `man -w $swext '$page' 2>&1 `) =~ s/\n/ /g; + if ($?) { + $Whereis{$page, $ext} =~ s/^/*** /; + print "man -w -a '$page'\n" if $Debug > 1; + my $try_again = `man -w -a '$page' 2>&1 `; + if (! $?) { + $try_again =~ s/\n/ /g; +#/: (\S+)\(([^\s)]+)\).*cat\Q$ext\E.*\b\Q$page\E\.0/ + if ($try_again =~ /\bcat\Q$ext\E.*\b\Q$page\E\.0/) { + $Whereis{$page, $ext} = $try_again; + print "BSD REALLY: $page.$ext really in $try_again\n" + if $Debug > 1; + } else { + $Whereis{$page, $ext} =~ s/$/ (really $try_again)/; + } + } + } + return $Whereis{$page, $ext}; + } + + sub check_can_whence { + if (! $Rebuild_Indices) { + # stupid solaris sh bug. how stupid can these + # people be? + system "(man -w man) 2>&1 > /dev/null"; + return unless $?; + warn "$0: Your system is stupid: it cannot whence.\n"; + } + + $Rebuild_Indices++; + + print "$0: Hold on, this may take a while....\n"; + + if (get_osname() eq 'solaris') { + for my $dir (split /:/, $Manpath) { + local *WINDEX; + next unless open(WINDEX, "< $dir/windex"); + print "reading $dir/windex\n" if $Debug; + local $_; + while () { + next unless /^(\S+)\s+(\S+)\s+\((\S+)\)/; + my ($name, $page, $ext) = ($1, $2, $3); + $Whereis{$name} .= "$dir/man$ext/$page.$ext "; + $Whereis{$page,$ext} = "$dir/man$ext/$page.$ext"; + } + } + } + else { + for my $tree (split /:/, $Manpath) { + print "reading $tree directory entries\n" if $Debug; + for my $dir ( glob("$tree/man*") ) { + next unless -d $dir; + local *DH; + opendir(DH, $dir) || die "cannot opendir $dir: $!"; + my @pages = grep { /[^.].*\./ } readdir(DH); + closedir DH; + my($section) = $dir =~ /man([^\/]+)$/; + for my $page ( @pages ) { + my $name; + $page =~ s/\.gz$//; + my $ext; + unless (index($page, ".$section") >= 0) { + warn "wrong section for $dir/$page\n"; + } + ($name = $page) =~ s/\.([^.]*)$//; + $ext = $1; + die "no ext in $page" unless $ext; + die "no name" unless $name; + $Whereis{$name,$ext} = "$dir/$name.$ext"; + $Whereis{$name} .= "$dir/$page "; + } + } + } + } + } + +} + +# add a -s or a -S or no flag for calling up +# a page from a particular section +sub adjust_ext { + my $ext = shift; + my $osname = $^O; + + if ($osname eq 'solaris') { + # stupid solaris REQUIRES this -s crap; + # they don't understand -S either + $ext = "-s $ext"; + } + elsif ($osname eq 'freebsd') { + # stupid freebsd FORBIDS the -s + # they also require a -S if it's a two-char word, + # like "man 3x curs_util". it doesn't harm in any event, + # so do it anyway + $ext = "-S $ext"; + } + elsif ($osname eq 'linux') { + # stupid redhat FORBIDS the -s; + # they tolerate -S, however. but unlike bsd, they + # don't seem to require it for 3x sections. interesting. + $ext = "-S $ext"; + } + elsif ($osname eq 'openbsd') { + # openbsd neither requires nor forbids -s nor -S, + # which both mean the same thing. + # + # then again, they still need it for two-char words, + # so do it anyway. Seems dumb. Config issue? + $ext = "-S $ext"; + } + + return $ext; +} + +sub get_manpath { + my $pathstr; + + if (@ARGV) { + return join ":", @ARGV; + } + + if ($ENV{MANPATH} && ! $Ignore_Manpath) { + return $ENV{MANPATH}; + } + + my $osname = get_osname(); + + if ($CF_File) { + $pathstr = readcf($CF_File, $CF_Style || $osname); + return $pathstr if $pathstr; + } + + if ($osname eq 'freebsd') { + # freebsd has a manpath program + $pathstr = run_manpath() || readcf('/etc/manpath.config'); + } + elsif ($osname eq 'openbsd') { + # but openbsd does not + $pathstr = readcf('/etc/man.conf'); + } + elsif ($osname eq 'linux') { + # this sucks - osname should say which linux we have. idiots. + $pathstr = readcf('/etc/man.config'); + } + else { + if ($CF_File && $CF_Style) { + $pathstr = readcf($CF_File, $CF_Style); + } else { + $pathstr = run_manpath() || compute_manpath(); + } + } + + unless ($pathstr) { + for (qw( /usr/man /usr/share/man )) { + next unless -d; + $pathstr = $_; + warn "no manpath set, assuming $_.\n"; + last; + } + die "cannot find any manpages" unless $pathstr; + } + + return $pathstr; + +} + +# traverse binpath and guess +sub compute_manpath { + return if $No_Guessing; + my (@manpath, %seen); + for (split(/:/, $ENV{PATH})) { + next if /^\.?$/; # don't care about dot dirs + if (s![^/+]*$!man! && -d && !$seen{$_}++) { + my($dev,$ino) = stat(_); + push(@manpath,$_) unless $seen{$dev,$ino}++; + } + } + print "Guessing manpath of: @manpath\n" if $Debug; + return join(":", @manpath); +} + +# try an external manpath program +sub run_manpath { + # the silly subshell is to dodge a solaris bug + my $path = `(manpath) 2>/dev/null`; + return if $?; + chomp $path; + return $path; +} + +# try reading config files in various formats +sub readcf { + die "readcf(): expected 1 or 2 args" if @_ < 1 || @_ > 2; + + my($cfpath, $ostype) = @_; + + my $pathfunc; + + my @styles = qw/freebsd openbsd redhat/; + + if (@_ == 2) { + $pathfunc = { + 'freebsd' => \&cf_freebsd, + 'openbsd' => \&cf_openbsd, + 'redhat' => \&cf_redhat, + 'linux' => \&cf_redhat, + }->{$ostype} || die "unknown CF style: $ostype (want @styles)"; + } + else { + $pathfunc = { + '/etc/manpath.config' => \&cf_freebsd, + '/etc/man.conf' => \&cf_openbsd, + '/etc/man.config' => \&cf_redhat, + }->{$cfpath} || die "no CF reader for $cfpath"; + } + + local(*CF, $_); + + print "reading CF file $cfpath\n" if $Debug; + + open(CF, "< $cfpath") || die "cannot open $cfpath: $!"; + + my(@dir_list, %seen_dir); + + # we're run the guesser first to catch things in the path. + unless ($No_Guessing) { + for (@dir_list = split /:/, compute_manpath()) { + my($dev,$ino) = stat $_; + $seen_dir{$dev,$ino} = 1; + } + } + + while () { + s/^#.*//; + next unless /\S/; + for (my @newpaths = &$pathfunc) { + # XXX: near-dup code + if (-d && !$seen_dir{$_}++) { + my($dev,$ino) = stat(_); + push(@dir_list,$_) unless $seen_dir{$dev,$ino}++; + } + } + } + + close(CF) || die "cannot close config $cfpath: $!"; + + return join ":", @dir_list; +} + +sub cf_freebsd { + return $1 if /^\s*MANDATORY_MANPATH\s+(\S+)/; + return $1 if /^\s*MANPATH_MAP\s+\S+\s+(\S+)/; + return; + +} + +sub cf_openbsd { + return glob($1) if /^\s*_default\s+(.*\S)\s*$/; + return glob($1) if /^\s*[^_\s]\S+\s+(.*\S)\s*$/; + return; +} + +sub cf_redhat { + return $1 if /^\s*MANPATH\s+(\S+)/; + return $1 if /^\s*MANPATH_MAP\s+\S+\s+(\S+)/; + return; +} + +__END__ + +=head1 NAME + +cfman - make sure manpages have accurate SEE ALSOs + +=head1 SYNOPSIS + +B [B<-hdrivg>] +[B<-f> I] [-B I] [I ...] + +=head1 DESCRIPTION + +The B program attempts to search your system manpages for SEE +ALSO entries that are incorrect. To determine which manpages to look at, +the system's manpage directories are searched. However, to look at the +SEE ALSO list, the man(1) program is called. That's because some systems +have funny ideas about whether pages are installed already formatted +or not. + +For each SEE ALSO reference, we attempt to call B on a +particular page in the references section or subsection it +to figure out the real path. If this fails, we call B +irrespective of section. If it's found somewhere it's not expected, +we still report the problem, as we do if it's not found at all. + +On systems too primitive to support the useful B syntax, we +try to figure it out by hand by reading all the directories first. +On Solaris, we'll look at the I files in each directory. +You can force this behaviour by using the B<-r> option described below. + +=head1 OPTIONS + +Most options can be clustered. + +=over + +=item -d + +Run with debugging. This option is cumulative. Currently +debugging level one through three are provided. + +=item -f I + +Specify a man(1) config file to read in if need be. + +=item -g + +Disable guessing of manpage using current PATH variable. + +=item -h + +Give a help message. Actually, try valiantly to give this manpage, +even if it's not installed. It's very hard to misplace this one. :-) + +=item -i + +Ignore the current manpage. This has two effects. First, it means +that the program will not consult the MANPATH variable for default +paths. Secondly, it will not attempt to reset the MANPATH variable +before calling man(1) to do its work. See examples below. + +=item -r + +Rebuild indices of what is installed where manually. This is a simplistic +check only. We consult each I subdirectory beneath each element +in the list of supplied man directories, and within that, we look for +each page inside. This is necessary on systems that don't support +a B<-w> option to man(1), and will be inferred if needed. It may +be faster than running B that often. + +=item -s I + +Supply a parsing style for the config files. Only three are currently +supported: B, B, and B. + +=item -v + +Verbose mode. This just means that it will show where all the +SEE ALSO references apppear to resolve to, not merely report +the missing or misdirected ones. + +=back + +=head1 EXAMPLES + +Run the program using the current manpath if set, or the +system one otherwise: + + $ cfman + +Run the program against the listed mantrees only. References +to something outside those trees will fail: + + $ cfman /usr/man /usr/X11R6/man + +Run on one tree only, but do not restrict references to being +in those trees only: + + $ cfman -i /usr/local/perl/man + +=head1 ENVIRONMENT + +=over + +=item MANPATH + +The user's current MANPATH is used unless the B<-i> option +is supplied. + +=item PAGER + +This is used to feed the the self-generating manapge into. + +=item PATH + +This is used if we need to guess a MANPATH. + +=back + +=head1 FILES + +The system-wide config file (such as I, I, +or I) is used if it's needed. + +Numerous B-related directories and files will be grovelled through, +both directly and indirectly. + +The B, B, and B programs may also be called +for the self-generating manpage in the help message, as may your +B program or preferred pager. + +=head1 SEE ALSO + +In no particular order: catman(1), man(1), manpath(1), more(1), nroff(1), +pod2man(1), pod2text(1), whatis(1), man.conf(5), man(7), and noman(8). + +=head1 NOTES + +The B<-w> option to the man(1) program was first introduced in the work +presented at the 1990 Usenix LISA conference in Colorado Springs in the +paper entitled title I. This option, +along with several others invented there, have since been adopted by all +modern Unixes. Other work presented in that paper included an earlier +version of this program. Sadly, vendors have been negligently remiss +in their duties since that time. + +The paper is available upon request from the author. It uses the ms(7) +macro set. You have been warned. :-) + +=head1 DIAGNOSTICS + +Classes of diagnostics are as follows. + +=over + +=item N + +A normal message. This is the program's expected output. + +=item D1 + +A level-one debugging message. + +=item D2 + +A level-two debugging message. + +=item W + +A warning. + +=item WI + +An internal warning, with extra diagnostics appended +telling the file name and line number of the problem. + +=item F + +A fatal error. + +=item FI + +An internal fatal error, with extra diagnostics appended +telling the file name and line number of the problem. + +=back + +Any instances of C<%s> below are replaced with a string in the actual +error message. Any instances of C<%M> below are replaced with the +current errno string. + +=over + +=item %s: %s -> %s + +(N) Where a reference resolves to. The first field is the page being +consulted. The second field is what it contains. The third field +is where the reference solves to. If there is no resolution, then a +message beginning with three stars will be emitted. In some cases, +a parenthesized suggestion is made. + +=item cannot fork man lookup: %m + +(FI) Tried to run the man(1) program to parse output, but +couldn't. Usually means out of processes; or sometimes, +command not found. + +=item cannot cd to main tree %s: %m + +(FI) One of the elements in the MANPATH was inaccessible. + +=item cannot cd to subdir: %m + +(FI) One of the subdirectories in one of the MANPATH elements was +inaccessible. + +=item cannot close config %s: %m + +(FI) The config file wouldn't close properly. + +=item cannot close STDOUT: %m + +(FI) The pager used for the help manpage wouldn't close properly. + +=item cannot find any manpages + +(FI) Unable to figure out a manpage any other way, +we tried looking in I and I, +but those weren't there. + +=item cannot open myself: %m + +(FI) In the worst case, we open our own program file to +produce a help page. But that open failed. Strange. + +=item cannot opendir %s: %m + +(FI) One of the subdirectories in a man tree +was inaccessible. + +=item close on man %s failed + +(WI) We were unable to correctly close the pipe +from man(1) we were running to read its SEE ALSO entries. + +=item exec of pod2man | nroff | %s failed: %m + +(WI) We couldn't pod2manify ourselves. Usually this is just +a broken pipe because you exited early. + +=item exec of pod2text | %s failed: %m + +(WI) We couldn't pod2textify ourselves. Usually this is just +a broken pipe because you exited early. + +=item Guessed CF file of %s + +(D1) You specified a parsing style, but no file. +So we guessed one. We look in I for a match. + +=item Guessing manpath of: %s + +(D1) We ran down your binpath and suspected that these +were valid man directories for each piece. + +=item Hold on, this may take a while.... + +(N) We have to exhaustively read each directory looking for +manpages. This is not fast. But in the end, it might be +faster than calling B a zillion times. You can enable +this with the C<-r> flag. + +=item Limiting external manpath + +(D1) The MANPATH envariable is set before +calling man(1) again. + +=item man %s %s + +(D2) We're calling man(1) to parse the SEE ALSO references. + +=item man -w %s '%s' + +(D2) We're trying to look up the path where a manpath is located. + +=item man -w -a %s'%s' + +(D2) We're trying harder look up the path where a manpath is located +because the first try failed. + +=item MANPATH is %s + +(D1) This is the colon-separated list of mantree directories +we decided to process. + +=item no CF reader for %s + +(X) The path has no known syntax. + +=item no ext in %s + +(X) We couldn't figure out the subsection by looking for an +extension. + +=item no manpath set, assuming %s. + +(W) We're trying to use a hard-coded path, becuase +nothing else worked. + +=item no name + +(X) Couldn't figure out the name of the page, given the +filename. + +=item no pager %s: %m + +(FI) You don't seem to have a valid pager. + +=item readcf(): expected 1 or 2 args + +(X) Internal error. A function was called wrong. + +=item reading CF file %s + +(D1) We're parsing this file for man config entreies. + +=item reading %s directory entries + +We're reading all the manpages in this directory. +Probably because you used B<-r> or because you have +a primitive and annoying man(1) program. + +=item reading %s/windex + +=item subdir chdir('%s') + +(D2) This message is printed each time we change +to a subdirectory within a mantree. +N This is the program's expected output. The first + +=item tree chdir('%s') + +(D2) This message is printed each time we change +directory to a new mantree. + +=item unknown CF style: %s (want %s) + +(FI) You asked for a config-file parsing style that +we don't support. + +=item unknown option: -%s + +(FI) You specified an invalid option. This will trigger +a usage message. + +=item Usage: %s [-hdrivg] [-f cf-file] [-s cf-style] [mandir ...] + +(N) The usage message. + +=item wrong section for %s/%s + +(W) While searching your directories, we found a strange +page, such as I installed in the I directory, +where we were expecting I instead. + +=item Your osname claims linux; assuming redhat instead + +(W) It is unclear to this author whether all the different +Linux operating systems employ the same man(1) program. +It seems imprudent to assume that the version of the +operating system (read: the kernel) has anything to do with +the installed utility set. uname(1) is not helpful here. +You may suppress this message by explicitly using B<-s redhat>. + +=item Your system is stupid: it cannot whence. + +(W) Your system is too primitive to support B. +This makes us do things the hard way. + +=head1 BUGS + +Various, no doubt. + +=head1 RESTRICTIONS + +This program was tested only under a couple different of BSD operating +systems and a couple of different Linux operating systems. Remedial +support for Solaris is included, but has not been stress tested. +Bugs in their I files messages up this program. + +=head1 AUTHOR + +Tom Christiansen + +=head1 HISTORY + +Version 1: Sometimes in early 1989. + +Version 2: December 15th, 1989. + +Version 3: October 20th, 1999. Just made it in under the decade mark. + -- cgit v1.2.3