#!/usr/bin/perl -w # $OpenBSD: cfman,v 1.5 2007/05/15 22:00:38 jmc Exp $ # # cfman - make sure manpages have accurate SEE ALSOs # tchrist@perl.com use strict; my $VERSION = do { my @r = (q$Revision: 1.5 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r; }; my $Debug = 0; my($Ignore_Manpath, $CF_File, $CF_Style, $No_Guessing, $Verbose, $Rebuild_Indices); parse_opts(); my $Manpath = get_manpath(); print "MANPATH is $Manpath\n" if $Debug; check_can_whence(); # (re)set to what we've computed so that when we launch man below, # it'll use the specified or inferred manpath. unless ($Ignore_Manpath) { print "Limiting external manpath\n" if $Debug; $ENV{MANPATH} = $Manpath; } for my $tree (split /:/, $Manpath) { print "tree chdir('$tree')\n" if $Debug > 1; chdir($tree) || die "cannot cd to main tree $tree: $!"; for my $mandir ( grep { -d } <{man,cat}*> ) { print "subdir chdir('$tree/$mandir')\n" if $Debug > 1; chdir("$tree/$mandir") || die "cannot cd to subdir $tree/$mandir: $!"; my($ext, @pages); ($ext = $mandir) =~ s/^(?:cat|man)//; for (@pages = <*.*>) { s/\.gz$//; s/\.(?:0|${ext}\w*)$//; } my $option = adjust_ext($ext); for my $page (@pages) { print "man $option $page\n" if $Debug > 1; open(MAN, "man $option $page 2>&1 | col -b |") or die "cannot fork man lookup: $!"; local $/ = ''; while () { next unless /^SEE ALSO/ || $_ eq "S\bSE\bEE\bE A\bAL\bLS\bSO\bO\n"; s/.\010//g; s/-\n\s*//g; my @refs = /\S+\(\S+\)/g; print "$page.$ext SEE ALSOs @refs\n" if $Debug > 2; for my $ref (@refs) { my $place = whereis($ref); if ($place =~ /\*\*\*/ || $Debug || $Verbose) { print "$page.$ext: $ref -> $place\n"; } } last; } 1 while ; # drain to suppress broken pipe close(MAN) || warn "close on man $option $page failed"; } } } sub usage { print STDERR "@_\n" if @_; die "Usage: $0 [-hdrivg] [-f cf-file] [-s cf-style] [mandir ...]\n"; } sub run_help { my $pager; unless ($pager = $ENV{PAGER}) { require Config; # lint happiness. blech. $pager = $Config::Config{'pager'} || $Config::Config{'pager'}; } $pager = "/bin/cat" unless has_cmd($pager); if (has_cmd("pod2man") && has_cmd("nroff") ) { { exec("pod2man $0 | nroff -man | $pager") } # lint happiness warn "exec of pod2man | nroff | $pager failed: $!"; } if (has_cmd("pod2text")) { { exec("pod2text $0 | $pager") } # lint happiness warn "exec of pod2text | $pager failed: $!"; } # sucks to be you! if (eval q{ require Pod::Text; 1; }) { open (STDOUT, "| $pager") || die "no pager $pager: $!"; # this forces a wait on child if needed sub END { close(STDOUT) || die "cannot close STDOUT: $!" } Pod::Text::pod2text($0); exit 0; } # it REALLY REALLY REALLY sucks to be you! open 0 or die "$0: cannot open myself: $!"; $/ = ''; while (<0>) { last if /__(END|DATA)__/; # must be careful here } print <0>; exit; } sub has_cmd { my $cmd = shift; for (split(/:/, $ENV{PATH})) { my $path = "$_/$cmd"; return $path if -f $path && -x _; } return; } sub parse_opts { ARG: while (@ARGV && $ARGV[0] =~ s/^-(?=.)//) { OPT: for (shift @ARGV) { # getopts is for wimps m/^$/ && do { next ARG; }; m/^-$/ && do { last ARG; }; s/^d// && do { $Debug++; redo OPT; }; s/^i// && do { $Ignore_Manpath++; redo OPT; }; s/^g// && do { $No_Guessing++; redo OPT; }; s/^v// && do { $Verbose++; redo OPT; }; s/^r// && do { $Rebuild_Indices++; redo OPT; }; s/^f(.*)// && do { $CF_File = $1 || shift @ARGV; next ARG; }; s/^s(.*)// && do { $CF_Style = $1 || shift @ARGV; next ARG; }; m/^-h(elp)?$/ # stupid fsf broken crappy excuse for real manpages && do { run_help(); exit; }; usage("unknown option: -$_"); } } if ($CF_Style && !$CF_File) { for (glob("/etc/man*.c*f*")) { $CF_File = $_; last; } print "Guessed CF file of $CF_File\n" if $Debug; } } { # extra scope for function private "static" variable my $linux_griped = 0; sub get_osname { my $name = $^O; if ($name eq 'linux' && ! $linux_griped++ && (! $CF_Style || $CF_Style eq 'linux') ) { # there are many different linux operating systems, and # it torques me off that they pretend there aren't. # i have no idea whether this works anywhere but redhat. warn "$0: Your osname claims linux; assuming redhat instead\n"; } return $name; } } # everything beneath here should be in a module { # extra scope for function private "static" variable my %Whereis; sub whereis { my $manref = shift; my ($page, $ext) = $manref =~ /(\S+)\((\S+)\)/; $ext = lc($ext); return $Whereis{$page, $ext} if $Whereis{$page, $ext}; if ($Rebuild_Indices) { $Whereis{$page, $ext} = "*** No manual entry for $page "; if ($Whereis{$page}) { $Whereis{$page, $ext} .= "(really in $Whereis{$page})"; } return $Whereis{$page, $ext}; } my $swext = adjust_ext($ext); print "man -w $swext '$page'\n" if $Debug > 1; ($Whereis{$page, $ext} = `man -w $swext '$page' 2>&1 `) =~ s/\n/ /g; if ($?) { $Whereis{$page, $ext} =~ s/^/*** /; print "man -w -a '$page'\n" if $Debug > 1; my $try_again = `man -w -a '$page' 2>&1 `; if (! $?) { $try_again =~ s/\n/ /g; #/: (\S+)\(([^\s)]+)\).*cat\Q$ext\E.*\b\Q$page\E\.0/ if ($try_again =~ /\bcat\Q$ext\E.*\b\Q$page\E\.0/) { $Whereis{$page, $ext} = $try_again; print "BSD REALLY: $page.$ext really in $try_again\n" if $Debug > 1; } else { $Whereis{$page, $ext} =~ s/$/ (really $try_again)/; } } } return $Whereis{$page, $ext}; } sub check_can_whence { if (! $Rebuild_Indices) { # stupid solaris sh bug. how stupid can these # people be? system "(man -w man) 2>&1 > /dev/null"; return unless $?; warn "$0: Your system is stupid: it cannot whence.\n"; } $Rebuild_Indices++; print "$0: Hold on, this may take a while....\n"; if (get_osname() eq 'solaris') { for my $dir (split /:/, $Manpath) { local *WINDEX; next unless open(WINDEX, "< $dir/windex"); print "reading $dir/windex\n" if $Debug; local $_; while () { next unless /^(\S+)\s+(\S+)\s+\((\S+)\)/; my ($name, $page, $ext) = ($1, $2, $3); $Whereis{$name} .= "$dir/man$ext/$page.$ext "; $Whereis{$page,$ext} = "$dir/man$ext/$page.$ext"; } } } else { for my $tree (split /:/, $Manpath) { print "reading $tree directory entries\n" if $Debug; for my $dir ( glob("$tree/man*") ) { next unless -d $dir; local *DH; opendir(DH, $dir) || die "cannot opendir $dir: $!"; my @pages = grep { /[^.].*\./ } readdir(DH); closedir DH; my($section) = $dir =~ /man([^\/]+)$/; for my $page ( @pages ) { my $name; $page =~ s/\.gz$//; my $ext; unless (index($page, ".$section") >= 0) { warn "wrong section for $dir/$page\n"; } ($name = $page) =~ s/\.([^.]*)$//; $ext = $1; die "no ext in $page" unless $ext; die "no name" unless $name; $Whereis{$name,$ext} = "$dir/$name.$ext"; $Whereis{$name} .= "$dir/$page "; } } } } } } # add a -s or a -S or no flag for calling up # a page from a particular section sub adjust_ext { my $ext = shift; my $osname = $^O; if ($osname eq 'solaris') { # stupid solaris REQUIRES this -s crap; # they don't understand -S either $ext = "-s $ext"; } elsif ($osname eq 'freebsd') { # stupid freebsd FORBIDS the -s # they also require a -S if it's a two-char word, # like "man 3x curs_util". it doesn't harm in any event, # so do it anyway $ext = "-S $ext"; } elsif ($osname eq 'linux') { # stupid redhat FORBIDS the -s; # they tolerate -S, however. but unlike bsd, they # don't seem to require it for 3x sections. interesting. $ext = "-S $ext"; } elsif ($osname eq 'openbsd') { # openbsd neither requires nor forbids -s nor -S, # which both mean the same thing. # # then again, they still need it for two-char words, # so do it anyway. Seems dumb. Config issue? $ext = "-S $ext"; } return $ext; } sub get_manpath { my $pathstr; if (@ARGV) { return join ":", @ARGV; } if ($ENV{MANPATH} && ! $Ignore_Manpath) { return $ENV{MANPATH}; } my $osname = get_osname(); if ($CF_File) { $pathstr = readcf($CF_File, $CF_Style || $osname); return $pathstr if $pathstr; } if ($osname eq 'freebsd') { # freebsd has a manpath program $pathstr = run_manpath() || readcf('/etc/manpath.config'); } elsif ($osname eq 'openbsd') { # but openbsd does not $pathstr = readcf('/etc/man.conf'); } elsif ($osname eq 'linux') { # this sucks - osname should say which linux we have. idiots. $pathstr = readcf('/etc/man.config'); } else { if ($CF_File && $CF_Style) { $pathstr = readcf($CF_File, $CF_Style); } else { $pathstr = run_manpath() || compute_manpath(); } } unless ($pathstr) { for (qw( /usr/man /usr/share/man )) { next unless -d; $pathstr = $_; warn "no manpath set, assuming $_.\n"; last; } die "cannot find any manpages" unless $pathstr; } return $pathstr; } # traverse binpath and guess sub compute_manpath { return if $No_Guessing; my (@manpath, %seen); for (split(/:/, $ENV{PATH})) { next if /^\.?$/; # don't care about dot dirs if (s![^/+]*$!man! && -d && !$seen{$_}++) { my($dev,$ino) = stat(_); push(@manpath,$_) unless $seen{$dev,$ino}++; } } print "Guessing manpath of: @manpath\n" if $Debug; return join(":", @manpath); } # try an external manpath program sub run_manpath { # the silly subshell is to dodge a solaris bug my $path = `(manpath) 2>/dev/null`; return if $?; chomp $path; return $path; } # try reading config files in various formats sub readcf { die "readcf(): expected 1 or 2 args" if @_ < 1 || @_ > 2; my($cfpath, $ostype) = @_; my $pathfunc; my @styles = qw/freebsd openbsd redhat/; if (@_ == 2) { $pathfunc = { 'freebsd' => \&cf_freebsd, 'openbsd' => \&cf_openbsd, 'redhat' => \&cf_redhat, 'linux' => \&cf_redhat, }->{$ostype} || die "unknown CF style: $ostype (want @styles)"; } else { $pathfunc = { '/etc/manpath.config' => \&cf_freebsd, '/etc/man.conf' => \&cf_openbsd, '/etc/man.config' => \&cf_redhat, }->{$cfpath} || die "no CF reader for $cfpath"; } local(*CF, $_); print "reading CF file $cfpath\n" if $Debug; open(CF, "< $cfpath") || die "cannot open $cfpath: $!"; my(@dir_list, %seen_dir); # we're run the guesser first to catch things in the path. unless ($No_Guessing) { for (@dir_list = split /:/, compute_manpath()) { my($dev,$ino) = stat $_; $seen_dir{$dev,$ino} = 1; } } while () { s/^#.*//; next unless /\S/; for (my @newpaths = &$pathfunc) { # XXX: near-dup code if (-d && !$seen_dir{$_}++) { my($dev,$ino) = stat(_); push(@dir_list,$_) unless $seen_dir{$dev,$ino}++; } } } close(CF) || die "cannot close config $cfpath: $!"; return join ":", @dir_list; } sub cf_freebsd { return $1 if /^\s*MANDATORY_MANPATH\s+(\S+)/; return $1 if /^\s*MANPATH_MAP\s+\S+\s+(\S+)/; return; } sub cf_openbsd { return glob($1) if /^\s*_default\s+(.*\S)\s*$/; return glob($1) if /^\s*[^_\s]\S+\s+(.*\S)\s*$/; return; } sub cf_redhat { return $1 if /^\s*MANPATH\s+(\S+)/; return $1 if /^\s*MANPATH_MAP\s+\S+\s+(\S+)/; return; } __END__ =head1 NAME cfman - make sure manpages have accurate SEE ALSOs =head1 SYNOPSIS B [B<-hdrivg>] [B<-f> I] [-B I] [I ...] =head1 DESCRIPTION The B program attempts to search your system manpages for SEE ALSO entries that are incorrect. To determine which manpages to look at, the system's manpage directories are searched. However, to look at the SEE ALSO list, the man(1) program is called. That's because some systems have funny ideas about whether pages are installed already formatted or not. For each SEE ALSO reference, we attempt to call B on a particular page in the references section or subsection it to figure out the real path. If this fails, we call B irrespective of section. If it's found somewhere it's not expected, we still report the problem, as we do if it's not found at all. On systems too primitive to support the useful B syntax, we try to figure it out by hand by reading all the directories first. On Solaris, we'll look at the I files in each directory. You can force this behaviour by using the B<-r> option described below. =head1 OPTIONS Most options can be clustered. =over =item -d Run with debugging. This option is cumulative. Currently debugging level one through three are provided. =item -f I Specify a man(1) config file to read in if need be. =item -g Disable guessing of manpage using current PATH variable. =item -h Give a help message. Actually, try valiantly to give this manpage, even if it's not installed. It's very hard to misplace this one. :-) =item -i Ignore the current manpage. This has two effects. First, it means that the program will not consult the MANPATH variable for default paths. Secondly, it will not attempt to reset the MANPATH variable before calling man(1) to do its work. See examples below. =item -r Rebuild indices of what is installed where manually. This is a simplistic check only. We consult each I subdirectory beneath each element in the list of supplied man directories, and within that, we look for each page inside. This is necessary on systems that don't support a B<-w> option to man(1), and will be inferred if needed. It may be faster than running B that often. =item -s I Supply a parsing style for the config files. Only three are currently supported: B, B, and B. =item -v Verbose mode. This just means that it will show where all the SEE ALSO references apppear to resolve to, not merely report the missing or misdirected ones. =back =head1 EXAMPLES Run the program using the current manpath if set, or the system one otherwise: $ cfman Run the program against the listed mantrees only. References to something outside those trees will fail: $ cfman /usr/man /usr/X11R6/man Run on one tree only, but do not restrict references to being in those trees only: $ cfman -i /usr/local/perl/man =head1 ENVIRONMENT =over =item MANPATH The user's current MANPATH is used unless the B<-i> option is supplied. =item PAGER This is used to feed the self-generating manpage into. =item PATH This is used if we need to guess a MANPATH. =back =head1 FILES The system-wide config file (such as I, I, or I) is used if it's needed. Numerous B-related directories and files will be grovelled through, both directly and indirectly. The B, B, and B programs may also be called for the self-generating manpage in the help message, as may your B program or preferred pager. =head1 SEE ALSO In no particular order: man(1), manpath(1), more(1), nroff(1), pod2man(1), pod2text(1), whatis(1), man.conf(5), man(7), and noman(8). =head1 NOTES The B<-w> option to the man(1) program was first introduced in the work presented at the 1990 Usenix LISA conference in Colorado Springs in the paper entitled title I. This option, along with several others invented there, have since been adopted by all modern Unixes. Other work presented in that paper included an earlier version of this program. Sadly, vendors have been negligently remiss in their duties since that time. The paper is available upon request from the author. It uses the ms(7) macro set. You have been warned. :-) =head1 DIAGNOSTICS Classes of diagnostics are as follows. =over =item N A normal message. This is the program's expected output. =item D1 A level-one debugging message. =item D2 A level-two debugging message. =item W A warning. =item WI An internal warning, with extra diagnostics appended telling the file name and line number of the problem. =item F A fatal error. =item FI An internal fatal error, with extra diagnostics appended telling the file name and line number of the problem. =back Any instances of C<%s> below are replaced with a string in the actual error message. Any instances of C<%M> below are replaced with the current errno string. =over =item %s: %s -> %s (N) Where a reference resolves to. The first field is the page being consulted. The second field is what it contains. The third field is where the reference solves to. If there is no resolution, then a message beginning with three stars will be emitted. In some cases, a parenthesized suggestion is made. =item cannot fork man lookup: %m (FI) Tried to run the man(1) program to parse output, but couldn't. Usually means out of processes; or sometimes, command not found. =item cannot cd to main tree %s: %m (FI) One of the elements in the MANPATH was inaccessible. =item cannot cd to subdir: %m (FI) One of the subdirectories in one of the MANPATH elements was inaccessible. =item cannot close config %s: %m (FI) The config file wouldn't close properly. =item cannot close STDOUT: %m (FI) The pager used for the help manpage wouldn't close properly. =item cannot find any manpages (FI) Unable to figure out a manpage any other way, we tried looking in I and I, but those weren't there. =item cannot open myself: %m (FI) In the worst case, we open our own program file to produce a help page. But that open failed. Strange. =item cannot opendir %s: %m (FI) One of the subdirectories in a man tree was inaccessible. =item close on man %s failed (WI) We were unable to correctly close the pipe from man(1) we were running to read its SEE ALSO entries. =item exec of pod2man | nroff | %s failed: %m (WI) We couldn't pod2manify ourselves. Usually this is just a broken pipe because you exited early. =item exec of pod2text | %s failed: %m (WI) We couldn't pod2textify ourselves. Usually this is just a broken pipe because you exited early. =item Guessed CF file of %s (D1) You specified a parsing style, but no file. So we guessed one. We look in I for a match. =item Guessing manpath of: %s (D1) We ran down your binpath and suspected that these were valid man directories for each piece. =item Hold on, this may take a while.... (N) We have to exhaustively read each directory looking for manpages. This is not fast. But in the end, it might be faster than calling B a zillion times. You can enable this with the C<-r> flag. =item Limiting external manpath (D1) The MANPATH envariable is set before calling man(1) again. =item man %s %s (D2) We're calling man(1) to parse the SEE ALSO references. =item man -w %s '%s' (D2) We're trying to look up the path where a manpath is located. =item man -w -a %s'%s' (D2) We're trying harder look up the path where a manpath is located because the first try failed. =item MANPATH is %s (D1) This is the colon-separated list of mantree directories we decided to process. =item no CF reader for %s (X) The path has no known syntax. =item no ext in %s (X) We couldn't figure out the subsection by looking for an extension. =item no manpath set, assuming %s. (W) We're trying to use a hard-coded path, because nothing else worked. =item no name (X) Couldn't figure out the name of the page, given the filename. =item no pager %s: %m (FI) You don't seem to have a valid pager. =item readcf(): expected 1 or 2 args (X) Internal error. A function was called wrong. =item reading CF file %s (D1) We're parsing this file for man config entreies. =item reading %s directory entries We're reading all the manpages in this directory. Probably because you used B<-r> or because you have a primitive and annoying man(1) program. =item reading %s/windex =item subdir chdir('%s') (D2) This message is printed each time we change to a subdirectory within a mantree. N This is the program's expected output. The first =item tree chdir('%s') (D2) This message is printed each time we change directory to a new mantree. =item unknown CF style: %s (want %s) (FI) You asked for a config-file parsing style that we don't support. =item unknown option: -%s (FI) You specified an invalid option. This will trigger a usage message. =item Usage: %s [-hdrivg] [-f cf-file] [-s cf-style] [mandir ...] (N) The usage message. =item wrong section for %s/%s (W) While searching your directories, we found a strange page, such as I installed in the I directory, where we were expecting I instead. =item Your osname claims linux; assuming redhat instead (W) It is unclear to this author whether all the different Linux operating systems employ the same man(1) program. It seems imprudent to assume that the version of the operating system (read: the kernel) has anything to do with the installed utility set. uname(1) is not helpful here. You may suppress this message by explicitly using B<-s redhat>. =item Your system is stupid: it cannot whence. (W) Your system is too primitive to support B. This makes us do things the hard way. =head1 BUGS Various, no doubt. =head1 RESTRICTIONS This program was tested only under a couple different of BSD operating systems and a couple of different Linux operating systems. Remedial support for Solaris is included, but has not been stress tested. Bugs in their I files messages up this program. =head1 AUTHOR Tom Christiansen =head1 HISTORY Version 1: Sometimes in early 1989. Version 2: December 15th, 1989. Version 3: October 20th, 1999. Just made it in under the decade mark.