diff options
author | Marc Espie <espie@cvs.openbsd.org> | 2004-08-06 12:05:09 +0000 |
---|---|---|
committer | Marc Espie <espie@cvs.openbsd.org> | 2004-08-06 12:05:09 +0000 |
commit | 03af0a3fa0ff444df42bbfc7bf59c1db8f3a404d (patch) | |
tree | 203d5a48e890570c4775ee285dd4e68460a2967f /libexec/makewhatis/OpenBSD/Makewhatis | |
parent | 1702f711eb5a6da7d226fd633c14c8a22cc5c448 (diff) |
Reorganize makewhatis in smaller modules.
Avoid pulling unneeded code: for instance, if you have only formated
manpages in your system, Unformated will never be used; Check put aside
and only used in -p mode. -t mode won't pull Whatis. Find; File::Compare
and File::Copy and File::Temp likewise.
okay millert@
Diffstat (limited to 'libexec/makewhatis/OpenBSD/Makewhatis')
-rw-r--r-- | libexec/makewhatis/OpenBSD/Makewhatis/Check.pm | 84 | ||||
-rw-r--r-- | libexec/makewhatis/OpenBSD/Makewhatis/Find.pm | 45 | ||||
-rw-r--r-- | libexec/makewhatis/OpenBSD/Makewhatis/Formated.pm | 139 | ||||
-rw-r--r-- | libexec/makewhatis/OpenBSD/Makewhatis/Unformated.pm | 231 | ||||
-rw-r--r-- | libexec/makewhatis/OpenBSD/Makewhatis/Whatis.pm | 64 |
5 files changed, 563 insertions, 0 deletions
diff --git a/libexec/makewhatis/OpenBSD/Makewhatis/Check.pm b/libexec/makewhatis/OpenBSD/Makewhatis/Check.pm new file mode 100644 index 00000000000..66ee4ba12e9 --- /dev/null +++ b/libexec/makewhatis/OpenBSD/Makewhatis/Check.pm @@ -0,0 +1,84 @@ +# ex:ts=8 sw=4: +# $OpenBSD: Check.pm,v 1.1 2004/08/06 12:05:08 espie Exp $ +# Copyright (c) 2000-2004 Marc Espie <espie@openbsd.org> +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +use strict; +use warnings; +package OpenBSD::Makewhatis::Check; + +sub found($$) +{ + my ($pattern, $filename) = @_; + my @candidates = glob $pattern; + if (@candidates > 0) { + # quick check of inode, dev number + my ($dev_cmp, $inode_cmp) = (stat $filename)[0,1]; + for my $f (@candidates) { + my ($dev, $inode) = (stat $f)[0, 1]; + if ($dev == $dev_cmp && $inode == $inode_cmp) { + return 1; + } + } + # slow check with File::Compare + require File::Compare; + + for my $f (@candidates) { + if (File::Compare::compare($f, $filename) == 0) { + return 1; + } + } + } + return 0; +} +# verify_subject($subject, $filename): +# +# reparse the subject we're about to add, and check whether it makes +# sense, e.g., is there a man page around. +sub verify_subject +{ + local $_ = shift; + my $filename = shift; + if (m/\s*(.*?)\s*\((.*?)\)\s-\s/) { + my $man = $1; + my $section = $2; + my @mans = split(/\s*,\s*|\s+/, $man); + my $base = $filename; + if ($base =~ m|/|) { + $base =~ s,/[^/]*$,,; + } else { + $base = '.'; + } + my @notfound = (); + for my $func (@mans) { + my $i = $func; + next if found("$base/$i.*", $filename); + # try harder + $i =~ s/\(\)//; + $i =~ s/\-//g; + $i =~ s,^etc/,,; + next if found("$base/$i.*", $filename); + # and harder... + $i =~ tr/[A-Z]/[a-z]/; + next if found("$base/$i.*", $filename); + push(@notfound, $func); + } + if (@notfound > 0) { + print STDERR "Couldn't find ", join(', ', @notfound), + " in $filename:\n$_\n" + } + } +} + +1; diff --git a/libexec/makewhatis/OpenBSD/Makewhatis/Find.pm b/libexec/makewhatis/OpenBSD/Makewhatis/Find.pm new file mode 100644 index 00000000000..2673b86e9c2 --- /dev/null +++ b/libexec/makewhatis/OpenBSD/Makewhatis/Find.pm @@ -0,0 +1,45 @@ +# ex:ts=8 sw=4: +# $OpenBSD: Find.pm,v 1.1 2004/08/06 12:05:08 espie Exp $ +# Copyright (c) 2000-2004 Marc Espie <espie@openbsd.org> +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +use strict; +use warnings; +package OpenBSD::Makewhatis::Find; + +use File::Find; + + +# $list = find_manpages($dir) +# +# find all manpages under $dir, trim some duplicates. +# +sub find_manpages +{ + my $dir = shift; + my ($list, %nodes); + $list=[]; + find( + sub { + return unless /\.[\dln]\w*(?:\.Z|\.gz)?$/; + return unless -f $_; + my $unique = (stat _)[0]."/".(stat _)[1]; + return if defined $nodes{$unique}; + $nodes{$unique} = 1; + push(@$list, $File::Find::name); + }, $dir); + return $list; +} + +1; diff --git a/libexec/makewhatis/OpenBSD/Makewhatis/Formated.pm b/libexec/makewhatis/OpenBSD/Makewhatis/Formated.pm new file mode 100644 index 00000000000..33343e8c587 --- /dev/null +++ b/libexec/makewhatis/OpenBSD/Makewhatis/Formated.pm @@ -0,0 +1,139 @@ +# ex:ts=8 sw=4: +# $OpenBSD: Formated.pm,v 1.1 2004/08/06 12:05:08 espie Exp $ +# Copyright (c) 2000-2004 Marc Espie <espie@openbsd.org> +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +use strict; +use warnings; +package OpenBSD::Makewhatis::Formated; + +# add_formated_subject($subjects, $_, $section): +# add subject $_ to the list of current $subjects, in section $section. +# +sub add_formated_subject +{ + my ($subjects, $line, $section, $filename, $picky) = @_; + local $_ = $line; + + # some twits underline the command name + while (s/_\cH//g || s/(.)\cH\1/$1/g) + {} + if (m/-/) { + s/([-+.\w\d,])\s+/$1 /g; + s/([a-z][A-z])-\s+/$1/g; + # some twits use: func -- description + if (m/^[^-+.\w\d]*(.*?) -(?:-?)\s+(.*)/) { + my ($func, $descr) = ($1, $2); + $func =~ s/,\s*$//; + # nroff will tend to cut function names at the weirdest places + if (length($func) > 40 && $func =~ m/,/ && $section =~ /^3/) { + $func =~ s/\b \b//g; + } + $_ = "$func ($section) - $descr"; + push(@$subjects, $_); + return; + } + } + + print STDERR "Weird subject line in $filename:\n$_\n" if $picky; + + # try to find subject in line anyway + if (m/^\s*(.*\S)(?:\s{3,}|\(\)\s+)(.*?)\s*$/) { + my ($func, $descr) = ($1, $2); + $func =~ s/\s+/ /g; + $descr =~ s/\s+/ /g; + $_ = "$func ($section) - $descr"; + push(@$subjects, $_); + return; + } + + print STDERR "Weird subject line in $filename:\n$_\n" unless $picky; +} + +# $lines = handle($file, $filename, $picky) +# +# handle a formatted manpage in $file +# +# may return several subjects, perl(3p) do ! +# +sub handle +{ + my ($file, $filename, $picky) = @_; + local $_; + my ($section, $subject); + my @lines=(); + while (<$file>) { + next if /^$/; + chomp; + # Remove boldface from wide characters + while (s/(..)\cH\cH\1/$1/g) + {} + # Remove boldface and underlining + while (s/_\cH//g || s/(.)\cH\1/$1/g) + {} + if (m/\w[-+.\w\d]*\(([-+.\w\d\/]+)\)/) { + $section = $1; + # Find architecture + if (m/Manual\s+\((.*?)\)/) { + $section = "$section/$1"; + } + } + # Not all man pages are in english + # weird hex is `Namae' in japanese + if (m/^(?:NAME|NAMES|NAMN|Name|\xbe|\xcc\xbe\xbe\xce|\xcc\xbe\xc1\xb0)\s*$/) { + unless (defined $section) { + # try to retrieve section from filename + if ($filename =~ m/(?:cat|man)([\dln])\//) { + $section = $1; + print STDERR "Can't find section in $filename, deducting $section from context\n" if $picky; + } else { + $section='??'; + print STDERR "Can't find section in $filename\n"; + } + } + while (<$file>) { + chomp; + # perl aggregates several subjects in one manpage + if (m/^$/) { + add_formated_subject(\@lines, $subject, $section, $filename, $picky) + if defined $subject; + $subject = undef; + } elsif (m/^\S/ || m/^\s+\*{3,}\s*$/) { + add_formated_subject(\@lines, $subject, $section, $filename, $picky) + if defined $subject; + last; + } else { + # deal with troff hyphenations + if (defined $subject and $subject =~ m/\xad\s*$/) { + $subject =~ s/(?:\xad\cH)*\xad\s*$//; + s/^\s*//; + } + # more troff hyphenation + if (defined $subject and $subject =~ m/\S(?:\-\cH)*\-$/) { + $subject =~ s/(?:\-\cH)*\-$//; + s/^\s*//; + } + s/^\s+/ /; + $subject.=$_; + } + } + last; + } + } + + print STDERR "Can't parse $filename (not a manpage ?)\n" if @lines == 0; + return \@lines; +} + +1; diff --git a/libexec/makewhatis/OpenBSD/Makewhatis/Unformated.pm b/libexec/makewhatis/OpenBSD/Makewhatis/Unformated.pm new file mode 100644 index 00000000000..6cc133dd645 --- /dev/null +++ b/libexec/makewhatis/OpenBSD/Makewhatis/Unformated.pm @@ -0,0 +1,231 @@ +# ex:ts=8 sw=4: +# $OpenBSD: Unformated.pm,v 1.1 2004/08/06 12:05:08 espie Exp $ +# Copyright (c) 2000-2004 Marc Espie <espie@openbsd.org> +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +use strict; +use warnings; +package OpenBSD::Makewhatis::Unformated; + +# add_unformated_subject($lines, $toadd, $section, $filename, $toexpand): +# +# build subject from list of $toadd lines, and add it to the list +# of current subjects as section $section +# +sub add_unformated_subject +{ + my ($subjects, $toadd, $section, $filename, $toexpand, $picky) = @_; + + my $exp = sub { + if (defined $toexpand->{$_[0]}) { + return $toexpand->{$_[0]}; + } else { + print STDERR "$filename: can't expand $_[0]\n"; + return ""; + } + }; + + local $_ = join(' ', @$toadd); + # do interpolations + s/\\\*\((..)/&$exp($1)/ge; + s/\\\*\[(.*?)\]/&$exp($1)/ge; + + # horizontal space adjustments + while (s/\\s[-+]?\d+//g) + {} + # unbreakable spaces + s/\\\s+/ /g; + # unbreakable em dashes + s/\\\|\\\(em\\\|/-/g; + # em dashes + s/\\\(em\s+/- /g; + # em dashes in the middle of lines + s/\\\(em/-/g; + s/\\\*[LO]//g; + s/\\\(tm/(tm)/g; + # font changes + s/\\f[BIRP]//g; + s/\\f\(..//g; + # fine space adjustments + while (s/\\[vh]\'.*?\'//g) + {} + unless (s/\s+\\-\s+/ ($section) - / || s/\s*\\\-/ ($section) -/ || + s/\s-\s/ ($section) - /) { + print STDERR "Weird subject line in $filename:\n$_\n" if $picky; + # Try guessing where the separation falls... + s/\s+\:\s+/ ($section) - / || s/\S+\s+/$& ($section) - / || s/\s*$/ ($section) - (empty subject)/; + } + # other dashes + s/\\-/-/g; + # escaped characters + s/\\\&(.)/$1/g; + s/\\\|/|/g; + # gremlins... + s/\\c//g; + # sequence of spaces + s/\s+$//; + s/^\s+//; + s/\s+/ /g; + # some damage control + if (m/^\Q($section) - \E/) { + print STDERR "Rejecting non-subject line from $filename:\n$_\n" + if $picky; + return; + } + push(@$subjects, $_); +} + +# $lines = handle($file, $filename, $picky) +# +# handle an unformated manpage in $file +# +# may return several subjects, perl(3p) do ! +# +sub handle +{ + my ($f, $filename, $picky) = @_; + my @lines = (); + my %toexpand = (); + my $so_found = 0; + local $_; + # retrieve basename of file + my ($name, $section) = $filename =~ m|(?:.*/)?(.*)\.([\w\d]+)|; + # scan until macro + while (<$f>) { + next unless m/^\./; + if (m/^\.\s*de/) { + while (<$f>) { + last if m/^\.\s*\./; + } + next; + } + if (m/^\.\s*ds\s+(\S+)\s+/) { + chomp($toexpand{$1} = $'); + next; + } + # Some cross-refs just link to another manpage + $so_found = 1 if m/^\.\s*so/; + if (m/^\.\s*TH/ || m/^\.\s*th/) { + # in pricky mode, we should try to match these + # ($name2, $section2) = m/^\.(?:TH|th)\s+(\S+)\s+(\S+)/; + # scan until first section + while (<$f>) { + if (m/^\.\s*de/) { + while (<$f>) { + last if m/^\.\s*\./; + } + next; + } + if (m/^\.\s*ds\s+(\S+)\s+/) { + chomp($toexpand{$1} = $'); + next; + } + next unless m/^\./; + if (m/^\.\s*SH/ || m/^\.\s*sh/) { + my @subject = (); + while (<$f>) { + last if m/^\.\s*(?:SH|sh|SS|ss|nf|LI)/; + # several subjects in one manpage + if (m/^\.\s*(?:PP|Pp|br|PD|LP|sp)/) { + add_unformated_subject(\@lines, \@subject, + $section, $filename, \%toexpand, $picky) + if @subject != 0; + @subject = (); + next; + } + next if m/^\'/ || m/^\.\s*tr\s+/ || m/^\.\s*\\\"/ || + m/^\.\s*sv/ || m/^\.\s*Vb\s+/ || m/\.\s*HP\s+/; + if (m/^\.\s*de/) { + while (<$f>) { + last if m/^\.\s*\./; + } + next; + } + if (m/^\.\s*ds\s+(\S+)\s+/) { + chomp($toexpand{$1} = $'); + next; + } + # Motif index entries, don't do anything for now. + next if m/^\.\s*iX/; + # Some other index (cook) + next if m/^\.\s*XX/; + chomp; + s/\.\s*(?:B|I|IR|SM|BR)\s+//; + if (m/^\.\s*(\S\S)/) { + print STDERR "$filename: not grokking $_\n" + if $picky; + next; + } + push(@subject, $_) unless m/^\s*$/; + } + add_unformated_subject(\@lines, \@subject, $section, + $filename, \%toexpand, $picky) if @subject != 0; + return \@lines; + } + } + print STDERR "Couldn't find subject in old manpage $filename\n"; + } elsif (m/^\.\s*Dt/) { + $section .= "/$1" if (m/^\.\s*Dt\s+\S+\s+\d\S*\s+(\S+)/); + while (<$f>) { + next unless m/^\./; + if (m/^\.\s*Sh/) { + # subject/keep is the only way to deal with Nm/Nd pairs + my @subject = (); + my @keep = (); + my $nd_seen = 0; + while (<$f>) { + next if m/^\.\\\"/; + last if m/^\.\s*Sh/; + s/\s,/,/g; + if (s/^\.\s*(\S\S)\s+//) { + my $macro = $1; + next if $macro eq "\\\""; + s/\"(.*?)\"/$1/g; + s/\\-/-/g; + $macro eq 'Xr' and s/^(\S+)\s+(\d\S*)/$1 ($2)/; + $macro eq 'Ox' and s/^/OpenBSD /; + $macro eq 'Nx' and s/^/NetBSD /; + if ($macro eq 'Nd') { + if (@keep != 0) { + add_unformated_subject(\@lines, \@keep, + $section, $filename, \%toexpand, $picky); + @keep = (); + } + push(@subject, "\\-"); + $nd_seen = 1; + } + if ($nd_seen && $macro eq 'Nm') { + @keep = @subject; + @subject = (); + $nd_seen = 0; + } + } + push(@subject, $_) unless m/^\s*$/; + } + unshift(@subject, @keep) if @keep != 0; + add_unformated_subject(\@lines, \@subject, $section, + $filename, \%toexpand, $picky) + if @subject != 0; + return \@lines; + } + } + } + } + if ($so_found == 0) { + print STDERR "Unknown manpage type $filename\n"; + } + return \@lines; +} + +1; diff --git a/libexec/makewhatis/OpenBSD/Makewhatis/Whatis.pm b/libexec/makewhatis/OpenBSD/Makewhatis/Whatis.pm new file mode 100644 index 00000000000..5b941dd21de --- /dev/null +++ b/libexec/makewhatis/OpenBSD/Makewhatis/Whatis.pm @@ -0,0 +1,64 @@ +# ex:ts=8 sw=4: +# $OpenBSD: Whatis.pm,v 1.1 2004/08/06 12:05:08 espie Exp $ +# Copyright (c) 2000-2004 Marc Espie <espie@openbsd.org> +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +use strict; +use warnings; +package OpenBSD::Makewhatis::Whatis; + +use File::Temp qw/tempfile/; +use File::Compare; + +# write($list, $dir): +# +# write $list to file named $file, removing duplicate entries. +# Change $file mode/owners to expected values +# Write to temporary file first, and do the copy only if changes happened. +# +sub write +{ + my ($list, $dir) = @_; + my $f = "$dir/whatis.db"; + local $_; + + my ($out, $tempname); + ($out, $tempname) = tempfile('/tmp/makewhatis.XXXXXXXXXX') or die "$0: Can't open temporary file"; + + my @sorted = sort @$list; + my $last; + + while ($_ = shift @sorted) { + print $out $_, "\n" unless defined $last and $_ eq $last; + $last = $_; + } + close $out; + if (compare($tempname, $f) == 0) { + unlink($tempname); + } else { + require File::Copy; + + unlink($f); + if (File::Copy::move($tempname, $f)) { + chmod 0444, $f; + chown 0, (getgrnam 'bin')[2], $f; + } else { + print STDERR "$0: Can't create $f ($!)\n"; + unlink($tempname); + exit 1; + } + } +} + +1; |