summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTed Unangst <tedu@cvs.openbsd.org>2010-07-01 20:12:34 +0000
committerTed Unangst <tedu@cvs.openbsd.org>2010-07-01 20:12:34 +0000
commit7b902249f5fa5694148003f12f18ef2ed9371848 (patch)
treebc2297cd4a6764d611ba79ed02e990cb5d8fac1b
parent57d41f42174e86367d5ee3c73380ab0a15ab60b6 (diff)
these files are unbelievably out of date
-rw-r--r--share/doc/papers/beyond4.3/Makefile11
-rw-r--r--share/doc/papers/beyond4.3/beyond43.ms516
-rw-r--r--share/doc/papers/diskperf/Makefile15
-rw-r--r--share/doc/papers/diskperf/abs.ms174
-rw-r--r--share/doc/papers/diskperf/appendix.ms96
-rw-r--r--share/doc/papers/diskperf/conclusions.ms125
-rw-r--r--share/doc/papers/diskperf/equip.ms175
-rw-r--r--share/doc/papers/diskperf/methodology.ms109
-rw-r--r--share/doc/papers/diskperf/motivation.ms91
-rw-r--r--share/doc/papers/diskperf/results.ms335
-rw-r--r--share/doc/papers/diskperf/tests.ms106
-rw-r--r--share/doc/papers/fsinterface/Makefile11
-rw-r--r--share/doc/papers/fsinterface/abstract.ms71
-rw-r--r--share/doc/papers/fsinterface/fsinterface.ms1174
-rw-r--r--share/doc/papers/fsinterface/slides.t316
-rw-r--r--share/doc/papers/future/0.t58
-rw-r--r--share/doc/papers/future/1.t154
-rw-r--r--share/doc/papers/future/2.t180
-rw-r--r--share/doc/papers/future/Makefile13
-rw-r--r--share/doc/papers/future/r.t140
-rw-r--r--share/doc/papers/future/spell.ok90
-rw-r--r--share/doc/papers/jus/Makefile10
-rw-r--r--share/doc/papers/jus/paper.ms431
-rw-r--r--share/doc/papers/kernmalloc/Makefile16
-rw-r--r--share/doc/papers/kernmalloc/alloc.fig113
-rw-r--r--share/doc/papers/kernmalloc/appendix.t135
-rw-r--r--share/doc/papers/kernmalloc/kernmalloc.t647
-rw-r--r--share/doc/papers/kernmalloc/spell.ok57
-rw-r--r--share/doc/papers/kernmalloc/usage.tbl73
-rw-r--r--share/doc/papers/kerntune/0.t127
-rw-r--r--share/doc/papers/kerntune/1.t46
-rw-r--r--share/doc/papers/kerntune/2.t232
-rw-r--r--share/doc/papers/kerntune/3.t288
-rw-r--r--share/doc/papers/kerntune/4.t97
-rw-r--r--share/doc/papers/kerntune/Makefile15
-rw-r--r--share/doc/papers/kerntune/fig2.pic55
-rw-r--r--share/doc/papers/malloc/Makefile16
-rw-r--r--share/doc/papers/malloc/abs.ms35
-rw-r--r--share/doc/papers/malloc/alternatives.ms45
-rw-r--r--share/doc/papers/malloc/conclusion.ms48
-rw-r--r--share/doc/papers/malloc/implementation.ms223
-rw-r--r--share/doc/papers/malloc/intro.ms74
-rw-r--r--share/doc/papers/malloc/kernel.ms56
-rw-r--r--share/doc/papers/malloc/malloc.ms72
-rw-r--r--share/doc/papers/malloc/performance.ms113
-rw-r--r--share/doc/papers/malloc/problems.ms54
-rw-r--r--share/doc/papers/memfs/0.t84
-rw-r--r--share/doc/papers/memfs/1.t390
-rw-r--r--share/doc/papers/memfs/A.t171
-rw-r--r--share/doc/papers/memfs/Makefile25
-rw-r--r--share/doc/papers/memfs/ref.bib49
-rw-r--r--share/doc/papers/memfs/spell.ok18
-rw-r--r--share/doc/papers/memfs/tmac.srefs179
-rw-r--r--share/doc/papers/newvm/0.t84
-rw-r--r--share/doc/papers/newvm/1.t375
-rw-r--r--share/doc/papers/newvm/Makefile14
-rw-r--r--share/doc/papers/newvm/a.t237
-rw-r--r--share/doc/papers/newvm/spell.ok56
-rw-r--r--share/doc/papers/nqnfs/Makefile14
-rw-r--r--share/doc/papers/nqnfs/nqnfs.me2009
-rw-r--r--share/doc/papers/px/Makefile20
-rw-r--r--share/doc/papers/px/fig1.1.n69
-rw-r--r--share/doc/papers/px/fig1.2.n66
-rw-r--r--share/doc/papers/px/fig1.3.n58
-rw-r--r--share/doc/papers/px/fig2.3.raw103
-rw-r--r--share/doc/papers/px/fig2.4.n55
-rw-r--r--share/doc/papers/px/fig3.2.n54
-rw-r--r--share/doc/papers/px/fig3.3.n55
-rw-r--r--share/doc/papers/px/pxin0.n138
-rw-r--r--share/doc/papers/px/pxin1.n536
-rw-r--r--share/doc/papers/px/pxin2.n921
-rw-r--r--share/doc/papers/px/pxin3.n595
-rw-r--r--share/doc/papers/px/pxin4.n65
-rw-r--r--share/doc/papers/px/table2.1.n81
-rw-r--r--share/doc/papers/px/table2.2.n83
-rw-r--r--share/doc/papers/px/table2.3.n43
-rw-r--r--share/doc/papers/px/table3.1.n45
-rw-r--r--share/doc/papers/px/tmac.p110
-rw-r--r--share/doc/papers/relengr/0.t89
-rw-r--r--share/doc/papers/relengr/1.t67
-rw-r--r--share/doc/papers/relengr/2.t144
-rw-r--r--share/doc/papers/relengr/3.t388
-rw-r--r--share/doc/papers/relengr/Makefile16
-rw-r--r--share/doc/papers/relengr/ref.bib26
-rw-r--r--share/doc/papers/relengr/ref.bib.ig3
-rw-r--r--share/doc/papers/relengr/spell.ok15
-rw-r--r--share/doc/papers/relengr/tmac.srefs181
-rw-r--r--share/doc/papers/sysperf/0.t245
-rw-r--r--share/doc/papers/sysperf/1.t79
-rw-r--r--share/doc/papers/sysperf/2.t256
-rw-r--r--share/doc/papers/sysperf/3.t692
-rw-r--r--share/doc/papers/sysperf/4.t772
-rw-r--r--share/doc/papers/sysperf/5.t283
-rw-r--r--share/doc/papers/sysperf/6.t68
-rw-r--r--share/doc/papers/sysperf/7.t162
-rw-r--r--share/doc/papers/sysperf/Makefile25
-rw-r--r--share/doc/papers/sysperf/a1.t666
-rw-r--r--share/doc/papers/sysperf/a2.t115
98 files changed, 0 insertions, 18132 deletions
diff --git a/share/doc/papers/beyond4.3/Makefile b/share/doc/papers/beyond4.3/Makefile
deleted file mode 100644
index d76b11422cb..00000000000
--- a/share/doc/papers/beyond4.3/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-# $OpenBSD: Makefile,v 1.3 2004/02/01 14:22:44 jmc Exp $
-
-
-DIR= papers/beyond43
-SRCS= beyond43.ms
-MACROS= -ms
-
-paper.txt: ${SRCS}
- ${ROFF} -Tascii ${SRCS} > ${.TARGET}
-
-.include <bsd.doc.mk>
diff --git a/share/doc/papers/beyond4.3/beyond43.ms b/share/doc/papers/beyond4.3/beyond43.ms
deleted file mode 100644
index cb0d9157ae5..00000000000
--- a/share/doc/papers/beyond4.3/beyond43.ms
+++ /dev/null
@@ -1,516 +0,0 @@
-.\" $OpenBSD: beyond43.ms,v 1.4 2003/06/02 23:30:09 millert Exp $
-.\"
-.\" Copyright (c) 1989 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)beyond43.ms 5.1 (Berkeley) 6/5/90
-.\"
-.\" *troff -ms
-.rm CM
-.sp 2
-.ce 100
-\fB\s+2Current Research by
-The Computer Systems Research Group
-of Berkeley\s-2\fP
-.ds DT "February 10, 1989
-.\" \fBDRAFT of \*(DT\fP
-.sp 2
-.nf
-Marshall Kirk McKusick
-Michael J Karels
-Keith Sklower
-Kevin Fall
-Marc Teitelbaum
-Keith Bostic
-.fi
-.sp 2
-.ce 1
-\fISummary\fP
-.ce 0
-.PP
-The release of 4.3BSD in April of 1986 addressed many of the
-performance problems and unfinished interfaces
-present in 4.2BSD [Leffler84] [McKusick85].
-The Computer Systems Research Group at Berkeley
-has now embarked on a new development phase to
-update other major components of the system, as well as to offer
-new functionality.
-There are five major ongoing projects.
-The first is to develop an OSI network protocol suite and to integrate
-existing ISO applications into Berkeley UNIX.
-The second is to develop and support an interface compliant with the
-P1003.1 POSIX standard recently approved by the IEEE.
-The third is to refine the TCP/IP networking to improve
-its performance and limit congestion on slow and/or lossy networks.
-The fourth is to provide a standard interface to file systems
-so that multiple local and remote file systems can be supported,
-much as multiple networking protocols are supported by 4.3BSD.
-The fifth is to evaluate alternate access control mechanisms and
-audit the existing security features of the system, particularly
-with respect to network services.
-Other areas of work include multi-architecture support,
-a general purpose kernel memory allocator, disk labels, and
-extensions to the 4.2BSD fast filesystem.
-.PP
-We are planning to finish implementation prototypes for each of the
-five main areas of work over the next year, and provide an informal
-test release sometime next year for interested developers.
-After incorporating feedback and refinements from the testers,
-they will appear in the next full Berkeley release, which is typically
-made about a year after the test release.
-.br
-.ne 10
-.sp 2
-.NH
-Recently Completed Projects
-.PP
-There have been several changes in the system that were included
-in the recent 4.3BSD Tahoe release.
-.NH 2
-Multi-architecture support
-.PP
-Support has been added for the DEC VAX 8600/8650, VAX 8200/8250,
-MicroVAXII and MicroVAXIII.
-.PP
-The largest change has been the incorporation of support for the first
-non-VAX processor, the CCI Power 6/32 and 6/32SX. (This addition also
-supports the
-Harris HCX-7 and HCX-9, as well as the Sperry 7000/40 and ICL machines.)
-The Power 6 version of 4.3BSD is largely based on the compilers and
-device drivers done for CCI's 4.2BSD UNIX,
-and is otherwise similar to the VAX release of 4.3BSD.
-The entire source tree, including all kernel and user-level sources,
-has been merged using a structure that will easily accommodate the addition
-of other processor families. A MIPS R2000 has been donated to us,
-making the MIPS architecture a likely candidate for inclusion into a future
-BSD release.
-.NH 2
-Kernel Memory Allocator
-.PP
-The 4.3BSD UNIX kernel used 10 different memory allocation mechanisms,
-each designed for the particular needs of the utilizing subsystem.
-These mechanisms have been replaced by a general purpose dynamic
-memory allocator that can be used by all of the kernel subsystems.
-The design of this allocator takes advantage of known memory usage
-patterns in the UNIX kernel and a hybrid strategy that is time-efficient
-for small allocations and space-efficient for large allocations.
-This allocator replaces the multiple memory allocation interfaces
-with a single easy-to-program interface,
-results in more efficient use of global memory by eliminating
-partitioned and specialized memory pools,
-and is quick enough (approximately 15 VAX instructions) that no
-performance loss is observed relative to the current implementations.
-[McKusick88].
-.NH 2
-Disk Labels
-.PP
-During the work on the CCI machine,
-it became obvious that disk geometry and filesystem layout information
-must be stored on each disk in a pack label.
-Disk labels were implemented for the CCI disks and for the most common
-types of disk controllers on the VAX.
-A utility was written to create and maintain the disk information,
-and other user-level programs that use such information now obtain
-it from the disk label.
-The use of this facility has allowed improvements in the file system's
-knowledge of irregular disk geometries such as track-to-track skew.
-.NH 2
-Fat Fast File System
-.PP
-The 4.2 fast file sytem [McKusick84]
-contained several statically sized structures,
-imposing limits on the number of cylinders per cylinder group,
-inodes per cylinder group,
-and number of distinguished rotational positions.
-The new ``fat'' filesystem allows these limits to be set at filesystem
-creation time.
-Old kernels will treat the new filesystems as read-only,
-and new kernels
-will accommodate both formats.
-The filesystem check facility, \fCfsck\fP, has also been modified to check
-either type.
-.br
-.ne 10
-.sp 2
-.NH
-Current UNIX Research at Berkeley
-.PP
-Since the release of 4.3BSD in mid 1986,
-we have begun work on several new major areas of research.
-Our goal is to apply leading edge research ideas into a stable
-and reliable implementation that solves current problems in
-operating systems development.
-.NH 2
-OSI network protocol development
-.PP
-The network architecture of 4.2BSD was designed to accommodate
-multiple network protocol families and address formats,
-and an implementation of the ISO OSI network protocols
-should enter into this framework without much difficulty.
-We plan to
-implement the OSI connectionless internet protocol (CLNP),
-and device drivers for X.25, 802.3, and possibly 802.5 interfaces, and
-to integrate these with an OSI transport class 4 (TP-4) implementation.
-We will also incorporate into the Berkeley Software Distribution an
-updated ISO Development Environment (ISODE)
-featuring International Standard (IS) versions of utilities.
-ISODE implements the session and presentation layers of the OSI protocol suite,
-and will include an implementation of the file transfer protocol (FTAM).
-It is also possible that an X.400 implementation now being done at
-University College, London and the University of Nottingham
-will be available for testing and distribution.
-.LP
-This implementation is comprised of four areas.
-.IP 1)
-We are updating the University of
-Wisconsin TP-4 to match GOSIP requirements.
-The University of Wisconsin developed a transport class 4
-implementation for the 4.2BSD kernel under contract to Mitre.
-This implementation must be updated to reflect the National Institute
-of Standards and Technology (NIST, formerly NBS) workshop agreements,
-GOSIP, and 4.3BSD requirements.
-We will make this TP-4 operate with an OSI IP,
-as the original implementation was built to run over the DoD IP.
-.IP 2)
-A kernel version of the OSI IP and ES-IS protocols must be produced.
-We will implement the kernel version of these protocols.
-.IP 3)
-The required device drivers need to be integrated into a BSD kernel.
-4.3BSD has existing device drivers for many ethernet devices; future
-BSD versions may also support X.25 devices as well as token ring
-networks.
-These device drivers must be integrated
-into the kernel OSI protocol implementations.
-.IP 4)
-The existing OSINET interoperability test network is available so
-that the interoperability of the ISODE and BSD kernel protocols
-can be established through tests with several vendors.
-Testing is crucial because an openly available version of GOSIP protocols
-that does not interoperate with DEC, IBM, SUN, ICL, HIS, and other
-major vendors would be embarrassing.
-To allow testing of the integrated pieces the most desirable
-approach is to provide access to OSINET at UCB.
-A second approach is to do the interoperability testing at
-the site of an existing OSINET member, such as the NBS.
-.NH 2
-Compliance with POSIX 1003
-.PP
-Berkeley became involved several months ago in the development
-of the IEEE POSIX P1003.1 system interface standard.
-Since then, we have been parcipating in the working groups
-of P1003.2 (shell and application utility interface),
-P1003.6 (security), P1003.7 (system administration), and P1003.8
-(networking).
-.PP
-The IEEE published the POSIX P1003.1 standard in late 1988.
-POSIX related changes to the BSD system have included a new terminal
-driver, support for POSIX sessions and job control, expanded signal
-functionality, restructured directory access routines, and new set-user
-and set-group id facilities.
-We currently have a prototype implementation of the
-POSIX driver with extensions to provide binary compatibility with
-applications developed for the old Berkeley terminal driver.
-We also have a prototype implementation of the 4.2BSD-based POSIX
-job control facility.
-.PP
-The P1003.2 draft is currently being voted on by the IEEE
-P1003.2 balloting group.
-Berkeley is particularly interested in the results of this standard,
-as it will profoundly influence the user environment.
-The other groups are in comparatively early phases, with drafts
-coming to ballot sometime in the 90's.
-Berkeley will continue to participate in these groups, and
-move in the near future toward a P1003.1 and P1003.2 compliant
-system.
-We have many of the utilities outlined in the current P1003.2 draft
-already implemented, and have other parties willing to contribute
-additional implementations.
-.NH 2
-Improvements to the TCP/IP Networking Protocols
-.PP
-The Internet and the Berkeley collection of local-area networks
-have both grown at high rates in the last year.
-The Bay Area Regional Research Network (BARRNet),
-connecting several UC campuses, Stanford and NASA-Ames
-has recently become operational, increasing the complexity
-of the network connectivity.
-Both Internet and local routing algorithms are showing the strain
-of continued growth.
-We have made several changes in the local routing algorithm
-to keep accommodating the current topology,
-and are participating in the development of new routing algorithms
-and standard protocols.
-.PP
-Recent work in collaboration with Van Jacobson of the Lawrence Berkeley
-Laboratory has led to the design and implementation of several new algorithms
-for TCP that improve throughput on both local and long-haul networks
-while reducing unnecessary retransmission.
-The improvement is especially striking when connections must traverse
-slow and/or lossy networks.
-The new algorithms include ``slow-start,''
-a technique for opening the TCP flow control window slowly
-and using the returning stream of acknowledgements as a clock
-to drive the connection at the highest speed tolerated by the intervening
-network.
-A modification of this technique allows the sender to dynamically modify
-the send window size to adjust to changing network conditions.
-In addition, the round-trip timer has been modified to estimate the variance
-in round-trip time, thus allowing earlier retransmission of lost packets
-with less spurious retransmission due to increasing network delay.
-Along with a scheme proposed by Phil Karn of Bellcore,
-these changes reduce unnecessary retransmission over difficult paths
-such as Satnet by nearly two orders of magnitude
-while improving throughput dramatically.
-.PP
-The current TCP implementation is now being readied
-for more widespread distribution via the network and as a
-standard Berkeley distribution unencumbered by any commercial licensing.
-We are continuing to refine the TCP and IP implementations
-using the ARPANET, BARRNet, the NSF network
-and local campus nets as testbeds.
-In addition, we are incorporating applicable algorithms from this work
-into the TP-4 protocol implementation.
-.NH 2
-Toward a Compatible File System Interface
-.PP
-The most critical shortcoming of the 4.3BSD UNIX system was in the
-area of distributed file systems.
-As with networking protocols,
-there is no single distributed file system
-that provides sufficient speed and functionality for all problems.
-It is frequently necessary to support several different remote
-file system protocols, just as it is necessary to run several
-different network protocols.
-.PP
-As network or remote file systems have been implemented for UNIX,
-several stylized interfaces between the file system implementation
-and the rest of the kernel have been developed.
-Among these are Sun Microsystems' Virtual File System interface (VFS)
-using \fBvnodes\fP [Sandburg85] [Kleiman86],
-Digital Equipment's Generic File System (GFS) architecture [Rodriguez86],
-AT&T's File System Switch (FSS) [Rifkin86],
-the LOCUS distributed file system [Walker85],
-and Masscomp's extended file system [Cole85].
-Other remote file systems have been implemented in research or
-university groups for internal use,
-notably the network file system in the Eighth Edition UNIX
-system [Weinberger84] and two different file systems used at Carnegie Mellon
-University [Satyanarayanan85].
-Numerous other remote file access methods have been devised for use
-within individual UNIX processes,
-many of them by modifications to the C I/O library
-similar to those in the Newcastle Connection [Brownbridge82].
-.PP
-Each design attempts to isolate file system-dependent details
-below a generic interface and to provide a framework within which
-new file systems may be incorporated.
-However, each of these interfaces is different from
-and incompatible with the others.
-Each addresses somewhat different design goals,
-having been based on a different version of UNIX,
-having targeted a different set of file systems with varying characteristics,
-and having selected a different set of file system primitive operations.
-.PP
-Our effort in this area is aimed at providing a common framework to
-support these different distributed file systems simultaneously rather than to
-simply implement yet another protocol.
-This requires a detailed study of the existing protocols,
-and discussion with their implementors to determine whether
-they could modify their implementation to fit within our proposed
-framework. We have studied the various file system interfaces to determine
-their generality, completeness, robustness, efficiency, and aesthetics
-and are currently working on a file system interface
-that we believe includes the best features of
-each of the existing implementations.
-This work and the rationale underlying its development
-have been presented to major software vendors as an early step
-toward convergence on a standard compatible file system interface.
-Briefly, the proposal adopts the 4.3BSD calling convention for file
-name lookup but otherwise is closely related to Sun's VFS
-and DEC's GFS. [Karels86].
-.NH 2
-System Security
-.PP
-The recent invasion of the DARPA Internet by a quickly reproducing ``worm''
-highlighted the need for a thorough review of the access
-safeguards built into the system.
-Until now, we have taken a passive approach to dealing with
-weaknesses in the system access mechanisms, rather than actively
-searching for possible weaknesses.
-When we are notified of a problem or loophole in a system utility
-by one of our users,
-we have a well defined procedure for fixing the problem and
-expeditiously disseminating the fix to the BSD mailing list.
-This procedure has proven itself to be effective in
-solving known problems as they arise
-(witness its success in handling the recent worm).
-However, we feel that it would be useful to take a more active
-role in identifying problems before they are reported (or exploited).
-We will make a complete audit of the system
-utilities and network servers to find unintended system access mechanisms.
-.PP
-As a part of the work to make the system more resistant to attack
-from local users or via the network, it will be necessary to produce
-additional documentation on the configuration and operation of the system.
-This documentation will cover such topics as file and directory ownership
-and access, network and server configuration,
-and control of privileged operations such as file system backups.
-.PP
-We are investigating the addition of access control lists (ACLs) for
-filesystem objects.
-ACLs provide a much finer granularity of control over file access permissions
-than the current
-discretionary access control mechanism (mode bits).
-Furthermore, they are necessary
-in environments where C2 level security or better, as defined in the DoD
-TCSEC [DoD83], is required.
-The POSIX P1003.6 security group has made notable progress in determining
-how an ACL mechanism should work, and several vendors have implemented
-ACLs for their commercial systems.
-Berkeley will investigate the existing implementations and determine
-how to best integrate ACLs with the existing mechanism.
-.PP
-A major shortcoming of the present system is that authentication
-over the network is based solely on the privileged port mechanism
-between trusting hosts and users.
-Although privileged ports can only be created by processes running as root
-on a UNIX system,
-such processes are easy for a workstation user to obtain;
-they simply reboot their workstation in single user mode.
-Thus, a better authentication mechanism is needed.
-At present, we believe that the MIT Kerberos authentication
-server [Steiner88] provides the best solution to this problem.
-We propose to investigate Kerberos further as well as other
-authentication mechanisms and then to integrate
-the best one into Berkeley UNIX.
-Part of this integration would be the addition of the
-authentication mechanism into utilities such as
-telnet, login, remote shell, etc.
-We will add support for telnet (eventually replacing rlogin),
-the X window system, and the mail system within an authentication
-domain (a Kerberos \fIrealm\fP).
-We hope to replace the existing password authentication on each host
-with the network authentication system.
-.NH
-References
-.sp
-.IP Brownbridge82
-Brownbridge, D.R., L.F. Marshall, B. Randell,
-``The Newcastle Connection, or UNIXes of the World Unite!,''
-\fISoftware\- Practice and Experience\fP, Vol. 12, pp. 1147-1162, 1982.
-.sp
-.IP Cole85
-.br
-Cole, C.T., P.B. Flinn, A.B. Atlas,
-``An Implementation of an Extended File System for UNIX,''
-\fIUsenix Conference Proceedings\fP,
-pp. 131-150, June, 1985.
-.sp
-.IP DoD83
-.br
-Department of Defense,
-``Trusted Computer System Evaluation Criteria,''
-\fICSC-STD-001-83\fP,
-DoD Computer Security Center, August, 1983.
-.sp
-.IP Karels86
-Karels, M., M. McKusick,
-``Towards a Compatible File System Interface,''
-\fIProceedings of the European UNIX Users Group Meeting\fP,
-Manchester, England, pp. 481-496, September 1986.
-.sp
-.IP Kleiman86
-Kleiman, S.,
-``Vnodes: An Architecture for Multiple File System Types in Sun UNIX,''
-\fIUsenix Conference Proceedings\fP,
-pp. 238-247, June, 1986.
-.sp
-.IP Leffler84
-Leffler, S., M.K. McKusick, M. Karels,
-``Measuring and Improving the Performance of 4.2BSD,''
-\fIUsenix Conference Proceedings\fP, pp. 237-252, June, 1984.
-.sp
-.IP McKusick84
-McKusick, M.K., W. Joy, S. Leffler, R. Fabry,
-``A Fast File System for UNIX'',
-\fIACM Transactions on Computer Systems 2\fP, 3.
-pp 181-197, August 1984.
-.sp
-.IP McKusick85
-McKusick, M.K., M. Karels, S. Leffler,
-``Performance Improvements and Functional Enhancements in 4.3BSD,''
-\fIUsenix Conference Proceedings\fP, pp. 519-531, June, 1985.
-.sp
-.IP McKusick86
-McKusick, M.K., M. Karels,
-``A New Virtual Memory Implementation for Berkeley UNIX,''
-\fIProceedings of the European UNIX Users Group Meeting\fP,
-Manchester, England, pp. 451-460, September 1986.
-.sp
-.IP McKusick88
-McKusick, M.K., M. Karels,
-``Design of a General Purpose Memory Allocator for the 4.3BSD UNIX Kernel,''
-\fIUsenix Conference Proceedings\fP,
-pp. 295-303, June, 1988.
-.sp
-.IP Rifkin86
-Rifkin, A.P., M.P. Forbes, R.L. Hamilton, M. Sabrio, S. Shah, K. Yueh,
-``RFS Architectural Overview,'' \fIUsenix Conference Proceedings\fP,
-pp. 248-259, June, 1986.
-.sp
-.IP Rodriguez86
-Rodriguez, R., M. Koehler, R. Hyde,
-``The Generic File System,''
-\fIUsenix Conference Proceedings\fP,
-pp. 260-269, June, 1986.
-.sp
-.IP Sandberg85
-Sandberg, R., D. Goldberg, S. Kleiman, D. Walsh, B. Lyon,
-``Design and Implementation of the Sun Network File System,''
-\fIUsenix Conference Proceedings\fP,
-pp. 119-130, June, 1985.
-.sp
-.IP Satyanarayanan85
-Satyanarayanan, M., \fIet al.\fP,
-``The ITC Distributed File System: Principles and Design,''
-\fIProc. 10th Symposium on Operating Systems Principles\fP, pp. 35-50,
-ACM, December, 1985.
-.sp
-.IP Steiner88
-Steiner, J., C. Newman, J. Schiller,
-``\fIKerberos:\fP An Authentication Service for Open Network Systems,''
-\fIUsenix Conference Proceedings\fP, pp. 191-202, February, 1988.
-.sp
-.IP Walker85
-Walker, B.J. and S.H. Kiser, ``The LOCUS Distributed File System,''
-\fIThe LOCUS Distributed System Architecture\fP,
-G.J. Popek and B.J. Walker, ed., The MIT Press, Cambridge, MA, 1985.
-.sp
-.IP Weinberger84
-Weinberger, P.J., ``The Version 8 Network File System,''
-\fIUsenix Conference presentation\fP,
-June, 1984.
diff --git a/share/doc/papers/diskperf/Makefile b/share/doc/papers/diskperf/Makefile
deleted file mode 100644
index fc8f4cc1c08..00000000000
--- a/share/doc/papers/diskperf/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-# $OpenBSD: Makefile,v 1.3 2004/02/01 14:22:44 jmc Exp $
-
-
-DIR= papers/diskperf
-SRCS= abs.ms motivation.ms equip.ms methodology.ms tests.ms results.ms \
- conclusions.ms appendix.ms
-MACROS= -ms
-
-paper.ps: ${SRCS}
- ${TBL} ${SRCS} | ${ROFF} > ${.TARGET}
-
-paper.txt: ${SRCS}
- ${TBL} ${SRCS} | ${ROFF} -Tascii > ${.TARGET}
-
-.include <bsd.doc.mk>
diff --git a/share/doc/papers/diskperf/abs.ms b/share/doc/papers/diskperf/abs.ms
deleted file mode 100644
index cb01db326da..00000000000
--- a/share/doc/papers/diskperf/abs.ms
+++ /dev/null
@@ -1,174 +0,0 @@
-.\" $OpenBSD: abs.ms,v 1.3 2003/06/02 23:30:09 millert Exp $
-.\"
-.\" Copyright (c) 1983 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)abs.ms 6.2 (Berkeley) 4/16/91
-.\"
-.if n .ND
-.TL
-Performance Effects of Disk Subsystem Choices
-for VAX\(dg Systems Running 4.2BSD UNIX*
-.sp
-Revised July 27, 1983
-.AU
-Bob Kridle
-.AI
-mt Xinu
-2560 9th Street
-Suite #312
-Berkeley, California 94710
-.AU
-Marshall Kirk McKusick\(dd
-.AI
-Computer Systems Research Group
-Computer Science Division
-Department of Electrical Engineering and Computer Science
-University of California, Berkeley
-Berkeley, CA 94720
-.AB
-.FS
-\(dgVAX, UNIBUS, and MASSBUS are trademarks of Digital Equipment Corporation.
-.FE
-.FS
-* UNIX is a trademark of Bell Laboratories.
-.FE
-.FS
-\(ddThis work was supported under grants from
-the National Science Foundation under grant MCS80-05144,
-and the Defense Advance Research Projects Agency (DoD) under
-Arpa Order No. 4031 monitored by Naval Electronic System Command under
-Contract No. N00039-82-C-0235.
-.FE
-Measurements were made of the UNIX file system
-throughput for various I/O operations using the most attractive currently
-available Winchester disks and controllers attached to both the
-native busses (SBI/CMI) and the UNIBUS on both VAX 11/780s and VAX 11/750s.
-The tests were designed to highlight the performance of single
-and dual drive subsystems operating in the 4.2BSD
-.I
-fast file system
-.R
-environment.
-Many of the results of the tests were initially counter-intuitive
-and revealed several important aspects of the VAX implementations
-which were surprising to us.
-.PP
-The hardware used included two Fujitsu 2351A
-``Eagle''
-disk drives on each of two foreign-vendor disk controllers
-and two DEC RA-81 disk drives on a DEC UDA-50 disk controller.
-The foreign-vendor controllers were Emulex SC750, SC780
-and Systems Industries 9900 native bus interfaced controllers.
-The DEC UDA-50 controller is a UNIBUS interfaced, heavily buffered
-controller which is the first implementation of a new DEC storage
-system architecture, DSA.
-.PP
-One of the most important results of our testing was the correction
-of several timing parameters in our device handler for devices
-with an RH750/RH780 type interface and having high burst transfer
-rates.
-The correction of these parameters resulted in an increase in
-performance of over twenty percent in some cases.
-In addition, one of the controller manufacturers altered their bus
-arbitration scheme to produce another increase in throughput.
-.AE
-.LP
-.de PT
-.lt \\n(LLu
-.pc %
-.nr PN \\n%
-.tl '\\*(LH'\\*(CH'\\*(RH'
-.lt \\n(.lu
-..
-.af PN i
-.ds LH Performance
-.ds RH Contents
-.bp 1
-.\".if t .ds CF July 27, 1983
-.\".if t .ds LF CSRG TR/8
-.\".if t .ds RF Kridle, et. al.
-.ce
-.B "TABLE OF CONTENTS"
-.LP
-.sp 1
-.nf
-.B "1. Motivation"
-.LP
-.sp .5v
-.nf
-.B "2. Equipment
-2.1. DEC UDA50 disk controller
-2.2. Emulex SC750/SC780 disk controllers
-2.3. Systems Industries 9900 disk controller
-2.4. DEC RA81 disk drives
-2.5. Fujitsu 2351A disk drives
-.LP
-.sp .5v
-.nf
-.B "3. Methodology
-.LP
-.sp .5v
-.nf
-.B "4. Tests
-.LP
-.sp .5v
-.nf
-.B "5. Results
-.LP
-.sp .5v
-.nf
-.B "6. Conclusions
-.LP
-.sp .5v
-.nf
-.B Acknowledgements
-.LP
-.sp .5v
-.nf
-.B References
-.LP
-.sp .5v
-.nf
-.B "Appendix A
-A.1. read_8192
-A.2. write_4096
-A.3. write_8192
-A.4. rewrite_8192
-.ds RH Motivation
-.af PN 1
-.bp 1
-.de _d
-.if t .ta .6i 2.1i 2.6i
-.\" 2.94 went to 2.6, 3.64 to 3.30
-.if n .ta .84i 2.6i 3.30i
-..
-.de _f
-.if t .ta .5i 1.25i 2.5i
-.\" 3.5i went to 3.8i
-.if n .ta .7i 1.75i 3.8i
-..
diff --git a/share/doc/papers/diskperf/appendix.ms b/share/doc/papers/diskperf/appendix.ms
deleted file mode 100644
index 19e81354a50..00000000000
--- a/share/doc/papers/diskperf/appendix.ms
+++ /dev/null
@@ -1,96 +0,0 @@
-.\" $OpenBSD: appendix.ms,v 1.3 2003/06/02 23:30:09 millert Exp $
-.\"
-.\" Copyright (c) 1983 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)appendix.ms 6.2 (Berkeley) 4/16/91
-.\"
-.nr H2 1
-.ds RH Appendix A
-.SH
-\s+2Appendix A\s0
-.SH
-read_8192
-.DS
-#define BUFSIZ 8192
-main( argc, argv)
-char **argv;
-{
- char buf[BUFSIZ];
- int i, j;
-
- j = open(argv[1], 0);
- for (i = 0; i < 1024; i++)
- read(j, buf, BUFSIZ);
-}
-.DE
-.SH
-write_4096
-.DS
-#define BUFSIZ 4096
-main( argc, argv)
-char **argv;
-{
- char buf[BUFSIZ];
- int i, j;
-
- j = creat(argv[1], 0666);
- for (i = 0; i < 2048; i++)
- write(j, buf, BUFSIZ);
-}
-.DE
-.SH
-write_8192
-.DS
-#define BUFSIZ 8192
-main( argc, argv)
-char **argv;
-{
- char buf[BUFSIZ];
- int i, j;
-
- j = creat(argv[1], 0666);
- for (i = 0; i < 1024; i++)
- write(j, buf, BUFSIZ);
-}
-.DE
-.bp
-.SH
-rewrite_8192
-.DS
-#define BUFSIZ 8192
-main( argc, argv)
-char **argv;
-{
- char buf[BUFSIZ];
- int i, j;
-
- j = open(argv[1], 2);
- for (i = 0; i < 1024; i++)
- write(j, buf, BUFSIZ);
-}
-.DE
diff --git a/share/doc/papers/diskperf/conclusions.ms b/share/doc/papers/diskperf/conclusions.ms
deleted file mode 100644
index 4d3ca3c60c8..00000000000
--- a/share/doc/papers/diskperf/conclusions.ms
+++ /dev/null
@@ -1,125 +0,0 @@
-.\" $OpenBSD: conclusions.ms,v 1.4 2003/10/30 14:52:24 jmc Exp $
-.\"
-.\" Copyright (c) 1983 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)conclusions.ms 6.2 (Berkeley) 4/16/91
-.\"
-.ds RH Conclusions
-.NH
-Conclusions
-.PP
-Peak available throughput is only one criterion
-in most storage system purchasing decisions.
-Most of the VAX UNIX systems we are familiar with
-are not I/O bandwidth constrained.
-Nevertheless, an adequate disk bandwidth is necessary for
-good performance and especially to preserve snappy
-response time.
-All of the disk systems we tested provide more than
-adequate bandwidth for typical VAX UNIX system application.
-Perhaps in some I/O-intensive applications such as
-image processing, more consideration should be given
-to the peak throughput available.
-In most situations, we feel that other factors are more
-important in making a storage choice between the systems we
-tested.
-Cost, reliability, availability, and support are some of these
-factors.
-The maturity of the technology purchased must also be weighed
-against the future value and expandability of newer technologies.
-.PP
-Two important conclusions about storage systems in general
-can be drawn from these tests.
-The first is that buffering can be effective in smoothing
-the effects of lower bus speeds and bus contention.
-Even though the UDA50 is located on the relatively slow
-UNIBUS, its performance is similar to controllers located on
-the faster processor busses.
-However, the SC780 with only one sector of buffering shows that
-little buffering is needed if the underlying bus is fast enough.
-.PP
-Placing more intelligence in the controller seems to hinder UNIX system
-performance more than it helps.
-Our profiling tests have indicated that UNIX spends about
-the same percentage of time in the SC780 driver and the UDA50 driver
-(about 10-14%).
-Normally UNIX uses a disk sort algorithm that separates reads and
-writes into two seek order queues.
-The read queue has priority over the write queue,
-since reads cause processes to block,
-while writes can be done asynchronously.
-This is particularly useful when generating large files,
-as it allows the disk allocator to read
-new disk maps and begin doing new allocations
-while the blocks allocated out of the previous map are written to disk.
-Because the UDA50 handles all block ordering,
-and because it keeps all requests in a single queue,
-there is no way to force the longer seek needed to get the next disk map.
-This disfunction causes all the writes to be done before the disk map read,
-which idles the disk until a new set of blocks can be allocated.
-.PP
-The additional functionality of the UDA50 controller that allows it
-to transfer simultaneously from two drives at once tends to make
-the two drive transfer tests run much more effectively.
-Tuning for the single drive case works more effectively in the two
-drive case than when controllers that cannot handle simultaneous
-transfers are used.
-.ds RH Acknowledgements
-.nr H2 1
-.sp 1
-.SH
-\s+2Acknowledgements\s0
-.PP
-We thank Paul Massigilia and Bill Grace
-of Digital Equipment Corp for helping us run our
-disk tests on their UDA50/RA81.
-We also thank Rich Notari and Paul Ritkowski
-of Emulex for making their machines available
-to us to run our tests of the SC780/Eagles.
-Dan McKinster, then of Systems Industries,
-arranged to make their equipment available for the tests.
-We appreciate the time provided by Bob Gross, Joe Wolf, and
-Sam Leffler on their machines to refine our benchmarks.
-Finally we thank our sponsors,
-the National Science Foundation under grant MCS80-05144,
-and the Defense Advance Research Projects Agency (DoD) under
-Arpa Order No. 4031 monitored by Naval Electronic System Command under
-Contract No. N00039-82-C-0235.
-.ds RH References
-.nr H2 1
-.sp 1
-.SH
-\s+2References\s0
-.LP
-.IP [McKusick83] 20
-M. McKusick, W. Joy, S. Leffler, R. Fabry,
-``A Fast File System for UNIX'',
-\fIACM Transactions on Computer Systems 2\fP, 3.
-pp 181-197, August 1984.
-.ds RH Appendix A
-.bp
diff --git a/share/doc/papers/diskperf/equip.ms b/share/doc/papers/diskperf/equip.ms
deleted file mode 100644
index 6347fd692d1..00000000000
--- a/share/doc/papers/diskperf/equip.ms
+++ /dev/null
@@ -1,175 +0,0 @@
-.\" $OpenBSD: equip.ms,v 1.3 2003/06/02 23:30:09 millert Exp $
-.\"
-.\" Copyright (c) 1983 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)equip.ms 6.2 (Berkeley) 4/16/91
-.\"
-.ds RH Equipment
-.NH
-Equipment
-.PP
-Various combinations of the three manufacturers disk controllers,
-and two pairs of Winchester disk drives were tested on both
-VAX 11/780 and VAX 11/750 CPUs. The Emulex and Systems Industries
-disk controllers were interfaced to Fujitsu 2351A
-``Eagle''
-404 Megabyte disk drives.
-The DEC UDA50 disk controller was interfaced to two DEC RA81
-456 Megabyte Winchester disk drives.
-All three controllers were tested on the VAX 780 although
-only the Emulex and DEC controllers were benchmarked on the VAX 11/750.
-Systems Industries makes a VAX 11/750 CMI interface for
-their controller, but we did not have time to test this device.
-In addition, not all the storage systems were tested for
-two drive throughput.
-Each of the controllers and disk drives used in the benchmarks
-is described briefly below.
-.NH 2
-DEC UDA50 disk controller
-.PP
-This is a new controller design which is part of a larger, long range
-storage architecture referred to as
-``DSA''
-or \fBD\fRigital \fBS\fRtorage \fBA\fRrchetecture.
-An important aspect of DSA is migrating a large part
-of the storage management previously handled in the operating
-system to the storage system. Thus, the UDA50 is a much more
-intelligent controller than previous interfaces like the RH750 or
-RH780.
-The UDA50 handles all error correction.
-It also deals with most of the physical storage parameters.
-Typically, system software requests a logical block or
-sequence of blocks.
-The physical locations of these blocks,
-their head, track, and cylinder indices,
-are determined by the controller.
-The UDA50 also orders disk requests to maximize throughput
-where possible, minimizing total seek and rotational delays.
-Where multiple drives are attached to a single controller,
-the UDA50 can interleave
-simultaneous
-data transfers from multiple drives.
-.PP
-The UDA50 is a UNIBUS implementation of a DSA controller.
-It contains 52 sectors of internal buffering to minimize
-the effects of a slow UNIBUS such as the one on the VAX-11/780.
-This buffering also minimizes the effects of contention with
-other UNIBUS peripherals.
-.NH 2
-Emulex SC750/SC780 disk controllers
-.PP
-These two models of the same controller interface to the CMI bus
-of a VAX 11/750 and the SBI bus of a 11/VAX 780, respectively.
-To the operating system, they emulate either an RH750 or
-and RH780.
-The controllers install in the
-MASSBUS
-locations in the CPU cabinets and operate from the
-VAX power suplies.
-They provide an
-``SMD''
-or \fBS\fRtorage \fBM\fRodule \fBD\fRrive
-interface to the disk drives.
-Although a large number of disk drives use this interface, we tested
-the controller exclusively connected to Fujitsu 2351A disks.
-.PP
-The controller ws first implemented for the VAX-11/750 as the SC750
-model several years ago. Although the SC780 was introduced more
-recently, both are stable products with no bugs known to us.
-.NH 2
-System Industries 9900 disk controller
-.PP
-This controller is an evolution of the S.I. 9400 first introduced
-as a UNIBUS SMD interface.
-The 9900 has been enhanced to include an interface to the VAX 11/780 native
-bus, the SBI.
-It has also been upgraded to operate with higher data rate drives such
-as the Fujitsu 2351As we used in this test.
-The controller is contained in its own rack-mounted drawer with an integral
-power supply.
-The interface to the SMD is a four module set which mounts in a
-CPU cabinet slot normally occupied by an RH780.
-The SBI interface derives power from the VAX CPU cabinet power
-supplies.
-.NH 2
-DEC RA81 disk drives
-.PP
-The RA81 is a rack-mountable 456 Megabyte (formatted) Winchester
-disk drive manufactured by DEC.
-It includes a great deal of technology which is an integral part
-of the DEC \fBDSA\fR scheme.
-The novel technology includes a serial packet based communications
-protocol with the controller over a pair of mini-coaxial cables.
-The physical characteristics of the RA81 are shown in the
-table below:
-.DS
-.TS
-box,center;
-c s
-l l.
-DEC RA81 Disk Drive Characteristics
-_
-Peak Transfer Rate 2.2 Mbytes/sec.
-Rotational Speed 3,600 RPM
-Data Sectors/Track 51
-Logical Cylinders 1,248
-Logical Data Heads 14
-Data Capacity 456 Mbytes
-Minimum Seek Time 6 milliseconds
-Average Seek Time 28 milliseconds
-Maximum Seek Time 52 milliseconds
-.TE
-.DE
-.NH 2
-Fujitsu 2351A disk drives
-.PP
-The Fujitsu 2351A disk drive is a Winchester disk drive
-with an SMD controller interface.
-Fujitsu has developed a very good reputation for
-reliable storage products over the last several years.
-The 2351A has the following physical characteristics:
-.DS
-.TS
-box,center;
-c s
-l l.
-Fujitsu 2351A Disk Drive Characteristics
-_
-Peak Transfer Rate 1.859 Mbytes/sec.
-Rotational Speed 3,961 RPM
-Data Sectors/Track 48
-Cylinders 842
-Data Heads 20
-Data Capacity 404 Mbytes
-Minimum Seek Time 5 milliseconds
-Average Seek Time 18 milliseconds
-Maximum Seek Time 35 milliseconds
-.TE
-.DE
-.ds RH Methodology
-.bp
diff --git a/share/doc/papers/diskperf/methodology.ms b/share/doc/papers/diskperf/methodology.ms
deleted file mode 100644
index 8d5ad8d186d..00000000000
--- a/share/doc/papers/diskperf/methodology.ms
+++ /dev/null
@@ -1,109 +0,0 @@
-.\" $OpenBSD: methodology.ms,v 1.3 2003/06/02 23:30:09 millert Exp $
-.\"
-.\" Copyright (c) 1983 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)methodology.ms 6.2 (Berkeley) 4/16/91
-.\"
-.ds RH Methodology
-.NH
-Methodology
-.PP
-Our goal was to evaluate the performance of the target peripherals
-in an environment as much like our 4.2BSD UNIX systems as possible.
-There are two basic approaches to creating this kind of test environment.
-These might be termed the \fIindirect\fR and the \fIdirect\fR approach.
-The approach used by DEC in producing most of the performance data
-on the UDA50/RA81 system under VMS is what we term the indirect
-approach.
-We chose to use the direct approach.
-.PP
-The indirect approach used by DEC involves two steps.
-First, the environment in which performance is to be evaluated
-is parameterized.
-In this case, the disk I/O characteristics of VMS were measured
-as to the distribution of various sizes of accesses and the proportion
-of reads and writes.
-This parameterization of
-typical
-I/O activity was termed a
-``vax mix.''
-The second stage involves simulating this mixture of I/O activities
-with the devices to be tested and noting the total volume of transactions
-processed per unit time by each system.
-.PP
-The problems encountered with this indirect approach often
-have to do with the completeness and correctness of the parameterization
-of the context environment.
-For example, the
-``vax mix''
-model constructed for DECs tests uses a random distribution of seeks
-to the blocks read or written.
-It is not likely that any real system produces a distribution
-of disk transfer locations which is truly random and does not
-exhibit strong locality characteristics.
-.PP
-The methodology chosen by us is direct
-in the sense that it uses the standard structured file system mechanism present
-in the 4.2BSD UNIX operating system to create the sequence of locations
-and sizes of reads and writes to the benchmarked equipment.
-We simply create, write, and read
-files as they would be by user's activities.
-The disk space allocation and disk cacheing mechanism built into
-UNIX is used to produce the actual device reads and writes as well
-as to determine their size and location on the disk.
-We measure and compare the rate at which these
-.I
-user files
-.R
-can be written, rewritten, or read.
-.PP
-The advantage of this approach is the implicit accuracy in
-testing in the same environment in which the peripheral
-will be used.
-Although this system does not account for the I/O produced
-by some paging and swapping, in our memory rich environment
-these activities account for a relatively small portion
-of the total disk activity.
-.PP
-A more significant disadvantage to the direct approach
-is the occasional difficulty we have in accounting for our
-measured results.
-The apparently straight-forward activity of reading or writing a logical file
-on disk can produce a complex mixture of disk traffic.
-File I/O is supported by a file management system that
-buffers disk traffic through an internal cache,
-which allows writes to ba handled asynchronously.
-Reads must be done synchronously,
-however this restriction is moderated by the use of read-ahead.
-Small changes in the performance of the disk controller
-subsystem can result in large and unexpected
-changes in the file system performance,
-as it may change the characteristics of the memory contention
-experienced by the processor.
-.ds RH Tests
-.bp
diff --git a/share/doc/papers/diskperf/motivation.ms b/share/doc/papers/diskperf/motivation.ms
deleted file mode 100644
index d0507448b05..00000000000
--- a/share/doc/papers/diskperf/motivation.ms
+++ /dev/null
@@ -1,91 +0,0 @@
-.\" $OpenBSD: motivation.ms,v 1.3 2003/06/02 23:30:09 millert Exp $
-.\"
-.\" Copyright (c) 1983 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)motivation.ms 6.2 (Berkeley) 4/16/91
-.\"
-.ds RH Motivation
-.NH
-Motivation
-.PP
-These benchmarks were performed for several reasons.
-Foremost was our desire to obtain guideline to aid
-in choosing one the most expensive components of any
-VAX UNIX configuration, the disk storage system.
-The range of choices in this area has increased dramatically
-in the last year.
-DEC has become, with the introduction of the UDA50/RA81 system,
-cost competitive
-in the area of disk storage for the first time.
-Emulex's entry into the VAX 11/780 SBI controller
-field, the SC780, represented a important choice for us to examine, given
-our previous success with their VAX 11/750 SC750 controller and
-their UNIBUS controllers.
-The Fujitsu 2351A
-Winchester disk drive represents the lowest cost-per-byte disk storage
-known to us.
-In addition, Fujitsu's reputation for reliability was appealing.
-The many attractive aspects of these components justified a more
-careful examination of their performance aspects under UNIX.
-.PP
-In addition to the direct motivation of developing an effective
-choice of storage systems, we hoped to gain more insight into
-VAX UNIX file system and I/O performance in general.
-What generic characteristics of I/O subsystems are most
-important?
-How important is the location of the controller on the SBI/CMI versus
-the UNIBUS?
-Is extensive buffering in the controller essential or even important?
-How much can be gained by putting more of the storage system
-management and optimization function in the controller as
-DEC does with the UDA50?
-.PP
-We also wanted to resolve particular speculation about the value of
-storage system optimization by a controller in a UNIX
-environment.
-Is the access optimization as effective as that already provided
-by the existing 4.2BSD UNIX device handlers for traditional disks?
-VMS disk handlers do no seek optimization.
-This gives the UDA50 controller an advantage over other controllers
-under VMS which is not likely to be as important to UNIX.
-Are there penalties associated with greater intelligence in the controller?
-.PP
-A third and last reason for evaluating this equipment is comparable
-to the proverbial mountain climbers answer when asked why he climbs
-a particular mountain,
-``It was there.''
-In our case the equipment
-was there.
-We were lucky enough to assemble all the desired disks and controllers
-and get them installed on a temporarily idle VAX 11/780.
-This got us started collecting data.
-Although many of the tests were later rerun on a variety of other systems,
-this initial test bed was essential for working out the testing bugs
-and getting our feet wet.
-.ds RH Equipment
-.bp
diff --git a/share/doc/papers/diskperf/results.ms b/share/doc/papers/diskperf/results.ms
deleted file mode 100644
index ccb90bb1ce3..00000000000
--- a/share/doc/papers/diskperf/results.ms
+++ /dev/null
@@ -1,335 +0,0 @@
-.\" $OpenBSD: results.ms,v 1.3 2003/06/02 23:30:09 millert Exp $
-.\"
-.\" Copyright (c) 1983 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)results.ms 6.2 (Berkeley) 4/16/91
-.\"
-.ds RH Results
-.NH
-Results
-.PP
-The following tables indicate the results of our
-test runs.
-Note that each table contains results for tests run
-on two varieties of 4.2BSD file systems.
-The first set of results is always for a file system
-with a basic blocking factor of eight Kilobytes and a
-fragment size of 1 Kilobyte. The second sets of measurements
-are for file systems with a four Kilobyte block size and a
-one Kilobyte fragment size.
-The values in parenthesis indicate the percentage of CPU
-time used by the test program.
-In the case of the two disk arm tests,
-the value in parenthesis indicates the sum of the percentage
-of the test programs that were run.
-Entries of ``n. m.'' indicate this value was not measured.
-.DS
-.TS
-box,center;
-c s s s s
-c s s s s
-c s s s s
-l | l s | l s
-l | l s | l s
-l | l l | l l
-l | c c | c c.
-4.2BSD File Systems Tests - \fBVAX 11/750\fR
-=
-Logically Sequential Transfers
-from an \fB8K/1K\fR 4.2BSD File System (Kbytes/sec.)
-_
-Test Emulex SC750/Eagle UDA50/RA81
-
- 1 Drive 2 Drives 1 Drive 2 Drives
-_
-read_8192 490 (69%) 620 (96%) 310 (44%) 520 (65%)
-write_4096 380 (99%) 370 (99%) 370 (97%) 360 (98%)
-write_8192 470 (99%) 470 (99%) 320 (71%) 410 (83%)
-rewrite_8192 650 (99%) 620 (99%) 310 (50%) 450 (70%)
-=
-.T&
-c s s s s
-c s s s s
-l | l s | l s
-l | l s | l s
-l | l l | l l
-l | c c | c c.
-Logically Sequential Transfers
-from \fB4K/1K\fR 4.2BSD File System (Kbytes/sec.)
-_
-Test Emulex SC750/Eagle UDA50/RA81
-
- 1 Drive 2 Drives 1 Drive 2 Drives
-_
-read_8192 300 (60%) 400 (84%) 210 (42%) 340 (77%)
-write_4096 320 (98%) 320 (98%) 220 (67%) 290 (99%)
-write_8192 340 (98%) 340 (99%) 220 (65%) 310 (98%)
-rewrite_8192 450 (99%) 450 (98%) 230 (47%) 340 (78%)
-.TE
-.DE
-.PP
-Note that the rate of write operations on the VAX 11/750 are ultimately
-CPU limited in some cases.
-The write rates saturate the CPU at a lower bandwidth than the reads
-because they must do disk allocation in addition to moving the data
-from the user program to the disk.
-The UDA50/RA81 saturates the CPU at a lower transfer rate for a given
-operation than the SC750/Eagle because
-it causes more memory contention with the CPU.
-We do not know if this contention is caused by
-the UNIBUS controller or the UDA50.
-.PP
-The following table reports the results of test runs on a VAX 11/780
-with 4 Megabytes of main memory.
-.DS
-.TS
-box,center;
-c s s s s s s
-c s s s s s s
-c s s s s s s
-l | l s | l s | l s
-l | l s | l s | l s
-l | l l | l l | l l
-l | c c | c c | c c.
-4.2BSD File Systems Tests - \fBVAX 11/780\fR
-=
-Logically Sequential Transfers
-from an \fB8K/1K\fR 4.2BSD File System (Kbytes/sec.)
-_
-Test Emulex SC780/Eagle UDA50/RA81 Sys. Ind. 9900/Eagle
-
- 1 Drive 2 Drives 1 Drive 2 Drives 1 Drive 2 Drives
-_
-read_8192 560 (70%) 480 (58%) 360 (45%) 540 (72%) 340 (41%) 520 (66%)
-write_4096 440 (98%) 440 (98%) 380 (99%) 480 (96%) 490 (96%) 440 (84%)
-write_8192 490 (98%) 490 (98%) 220 (58%)* 480 (92%) 490 (80%) 430 (72%)
-rewrite_8192 760 (100%) 560 (72%) 220 (50%)* 180 (52%)* 490 (60%) 520 (62%)
-=
-.T&
-c s s s s s s
-c s s s s s s
-l | l s | l s | l s
-l | l s | l s | l s
-l | l l | l l | l l
-l | c c | c c | c c.
-Logically Sequential Transfers
-from an \fB4K/1K\fR 4.2BSD File System (Kbytes/sec.)
-_
-Test Emulex SC780/Eagle UDA50/RA81 Sys. Ind. 9900/Eagle
-
- 1 Drive 2 Drives 1 Drive 2 Drives 1 Drive 2 Drives
-_
-read_8192 490 (77%) 370 (66%) n.m. n.m. 200 (31%) 370 (56%)
-write_4096 380 (98%) 370 (98%) n.m. n.m. 200 (46%) 370 (88%)
-write_8192 380 (99%) 370 (97%) n.m. n.m. 200 (45%) 320 (76%)
-rewrite_8192 490 (87%) 350 (66%) n.m. n.m. 200 (31%) 300 (46%)
-.TE
-* the operation of the hardware was suspect during these tests.
-.DE
-.PP
-The dropoff in reading and writing rates for the two drive SC780/Eagle
-tests are probably due to the file system using insufficient
-rotational delay for these tests.
-We have not fully investigated these times.
-.PP
-The following table compares data rates on VAX 11/750s directly
-with those of VAX 11/780s using the UDA50/RA81 storage system.
-.DS
-.TS
-box,center;
-c s s s s
-c s s s s
-c s s s s
-l | l s | l s
-l | l s | l s
-l | l l | l l
-l | c c | c c.
-4.2BSD File Systems Tests - \fBDEC UDA50 - 750 vs. 780\fR
-=
-Logically Sequential Transfers
-from an \fB8K/1K\fR 4.2BSD File System (Kbytes/sec.)
-_
-Test VAX 11/750 UNIBUS VAX 11/780 UNIBUS
-
- 1 Drive 2 Drives 1 Drive 2 Drives
-_
-read_8192 310 (44%) 520 (84%) 360 (45%) 540 (72%)
-write_4096 370 (97%) 360 (100%) 380 (99%) 480 (96%)
-write_8192 320 (71%) 410 (96%) 220 (58%)* 480 (92%)
-rewrite_8192 310 (50%) 450 (80%) 220 (50%)* 180 (52%)*
-=
-.T&
-c s s s s
-c s s s s
-l | l s | l s
-l | l s | l s
-l | l l | l l
-l | c c | c c.
-Logically Sequential Transfers
-from an \fB4K/1K\fR 4.2BSD File System (Kbytes/sec.)
-_
-Test VAX 11/750 UNIBUS VAX 11/780 UNIBUS
-
- 1 Drive 2 Drives 1 Drive 2 Drives
-_
-read_8192 210 (42%) 342 (77%) n.m. n.m.
-write_4096 215 (67%) 294 (99%) n.m. n.m.
-write_8192 215 (65%) 305 (98%) n.m. n.m.
-rewrite_8192 227 (47%) 336 (78%) n.m. n.m.
-.TE
-* the operation of the hardware was suspect during these tests.
-.DE
-.PP
-The higher throughput available on VAX 11/780s is due to a number
-of factors.
-The larger main memory size allows a larger file system cache.
-The block allocation routines run faster, raising the upper limit
-on the data rates in writing new files.
-.PP
-The next table makes the same comparison using an Emulex controller
-on both systems.
-.DS
-.TS
-box, center;
-c s s s s
-c s s s s
-c s s s s
-l | l s | l s
-l | l s | l s
-l | l l | l l
-l | c c | c c.
-4.2BSD File Systems Tests - \fBEmulex - 750 vs. 780\fR
-=
-Logically Sequential Transfers
-from an \fB8K/1K\fR 4.2BSD File System (Kbytes/sec.)
-_
-Test VAX 11/750 CMI Bus VAX 11/780 SBI Bus
-
- 1 Drive 2 Drives 1 Drive 2 Drives
-_
-read_8192 490 (69%) 620 (96%) 560 (70%) 480 (58%)
-write_4096 380 (99%) 370 (99%) 440 (98%) 440 (98%)
-write_8192 470 (99%) 470 (99%) 490 (98%) 490 (98%)
-rewrite_8192 650 (99%) 620 (99%) 760 (100%) 560 (72%)
-=
-.T&
-c s s s s
-c s s s s
-l | l s | l s
-l | l s | l s
-l | l l | l l
-l | c c | c c.
-Logically Sequential Transfers
-from an \fB4K/1K\fR 4.2BSD File System (Kbytes/sec.)
-_
-Test VAX 11/750 CMI Bus VAX 11/780 SBI Bus
-
- 1 Drive 2 Drives 1 Drive 2 Drives
-_
-read_8192 300 (60%) 400 (84%) 490 (77%) 370 (66%)
-write_4096 320 (98%) 320 (98%) 380 (98%) 370 (98%)
-write_8192 340 (98%) 340 (99%) 380 (99%) 370 (97%)
-rewrite_8192 450 (99%) 450 (98%) 490 (87%) 350 (66%)
-.TE
-.DE
-.PP
-The following table illustrates the evolution of our testing
-process as both hardware and software problems effecting
-the performance of the Emulex SC780 were corrected.
-The software change was suggested to us by George Goble
-of Purdue University.
-.PP
-The 4.2BSD handler for RH750/RH780 interfaced disk drives
-contains several constants which to determine how
-much time is provided between an interrupt signaling the completion
-of a positioning command and the subsequent start of a data transfer
-operation. These lead times are expressed as sectors of rotational delay.
-If they are too small, an extra complete rotation will often be required
-between a seek and subsequent read or write operation.
-The higher bit rate and rotational speed of the 2351A Fujitsu
-disk drives required
-increasing these constants.
-.PP
-The hardware change involved allowing for slightly longer
-delays in arbitrating for cycles on the SBI bus by
-starting the bus arbitration cycle a little further ahead of
-when the data was ready for transfer.
-Finally we had to increase the rotational delay between consecutive
-blocks in the file because
-the higher bandwidth from the disk generated more memory contention,
-which slowed down the processor.
-.DS
-.TS
-box,center,expand;
-c s s s s s s
-c s s s s s s
-c s s s s s s
-l | l s | l s | l s
-l | l s | l s | l s
-l | l s | l s | l s
-l | c c | c c | c c
-l | c c | c c | c c.
-4.2BSD File Systems Tests - \fBEmulex SC780 Disk Controller Evolution\fR
-=
-Logically Sequential Transfers
-from an \fB8K/1K\fR 4.2BSD File System (Kbytes/sec.)
-_
-Test Inadequate Search Lead OK Search Lead OK Search Lead
- Initial SBI Arbitration Init SBI Arb. Improved SBI Arb.
-
- 1 Drive 2 Drives 1 Drive 2 Drives 1 Drive 2 Drives
-_
-read_8192 320 370 440 (60%) n.m. 560 (70%) 480 (58%)
-write_4096 250 270 300 (63%) n.m. 440 (98%) 440 (98%)
-write_8192 250 280 340 (60%) n.m. 490 (98%) 490 (98%)
-rewrite_8192 250 290 380 (48%) n.m. 760 (100%) 560 (72%)
-=
-.T&
-c s s s s s s
-c s s s s s s
-l | l s | l s | l s
-l | l s | l s | l s
-l | l s | l s | l s
-l | c c | c c | c c
-l | c c | c c | c c.
-Logically Sequential Transfers
-from an \fB4K/1K\fR 4.2BSD File System (Kbytes/sec.)
-_
-Test Inadequate Search Lead OK Search Lead OK Search Lead
- Initial SBI Arbitration Init SBI Arb. Improved SBI Arb.
-
- 1 Drive 2 Drives 1 Drive 2 Drives 1 Drive 2 Drives
-_
-read_8192 200 220 280 n.m. 490 (77%) 370 (66%)
-write_4096 180 190 300 n.m. 380 (98%) 370 (98%)
-write_8192 180 200 320 n.m. 380 (99%) 370 (97%)
-rewrite_8192 190 200 340 n.m. 490 (87%) 350 (66%)
-.TE
-.DE
-.ds RH Conclusions
-.bp
diff --git a/share/doc/papers/diskperf/tests.ms b/share/doc/papers/diskperf/tests.ms
deleted file mode 100644
index d2fdcb9a774..00000000000
--- a/share/doc/papers/diskperf/tests.ms
+++ /dev/null
@@ -1,106 +0,0 @@
-.\" $OpenBSD: tests.ms,v 1.4 2003/10/30 14:52:24 jmc Exp $
-.\"
-.\" Copyright (c) 1983 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)tests.ms 6.2 (Berkeley) 4/16/91
-.\"
-.ds RH Tests
-.NH
-Tests
-.PP
-Our battery of tests consists of four programs,
-read_8192, write_8192, write_4096
-and rewrite_8192 originally written by [McKusick83]
-to evaluate the performance of the new file system in 4.2BSD.
-These programs all follow the same model and are typified by
-read_8192 shown here.
-.DS
-#define BUFSIZ 8192
-main( argc, argv)
-char **argv;
-{
- char buf[BUFSIZ];
- int i, j;
-
- j = open(argv[1], 0);
- for (i = 0; i < 1024; i++)
- read(j, buf, BUFSIZ);
-}
-.DE
-The remaining programs are included in appendix A.
-.PP
-These programs read, write with two different blocking factors,
-and rewrite logical files in structured file system on the disk
-under test.
-The write programs create new files while the rewrite program
-overwrites an existing file.
-Each of these programs represents an important segment of the
-typical UNIX file system activity with the read program
-representing by far the largest class and the rewrite the smallest.
-.PP
-A blocking factor of 8192 is used by all programs except write_4096.
-This is typical of most 4.2BSD user programs since a standard set of
-I/O support routines is commonly used and these routines buffer
-data in similar block sizes.
-.PP
-For each test run, a empty eight Kilobyte block
-file system was created in the target
-storage system.
-Then each of the four tests was run and timed.
-Each test was run three times;
-the first to clear out any useful data in the cache,
-and the second two to insure that the experiment
-had stablized and was repeatable.
-Each test operated on eight Megabytes of data to
-insure that the cache did not overly influence the results.
-Another file system was then initialized using a
-basic blocking factor of four Kilobytes and the same tests
-were run again and timed.
-A command script for a run appears as follows:
-.DS
-#!/bin/csh
-set time=2
-echo "8K/1K file system"
-newfs /dev/rhp0g eagle
-mount /dev/hp0g /mnt0
-mkdir /mnt0/foo
-echo "write_8192 /mnt0/foo/tst2"
-rm -f /mnt0/foo/tst2
-write_8192 /mnt0/foo/tst2
-rm -f /mnt0/foo/tst2
-write_8192 /mnt0/foo/tst2
-rm -f /mnt0/foo/tst2
-write_8192 /mnt0/foo/tst2
-echo "read_8192 /mnt0/foo/tst2"
-read_8192 /mnt0/foo/tst2
-read_8192 /mnt0/foo/tst2
-read_8192 /mnt0/foo/tst2
-umount /dev/hp0g
-.DE
-.ds RH Results
-.bp
diff --git a/share/doc/papers/fsinterface/Makefile b/share/doc/papers/fsinterface/Makefile
deleted file mode 100644
index bdcbfdc7755..00000000000
--- a/share/doc/papers/fsinterface/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-# $OpenBSD: Makefile,v 1.3 2004/02/01 14:22:44 jmc Exp $
-
-
-DIR= papers/fsinterface
-SRCS= fsinterface.ms
-MACROS= -ms
-
-paper.txt: ${SRCS}
- ${ROFF} -Tascii ${SRCS} > ${.TARGET}
-
-.include <bsd.doc.mk>
diff --git a/share/doc/papers/fsinterface/abstract.ms b/share/doc/papers/fsinterface/abstract.ms
deleted file mode 100644
index 258e1f63f87..00000000000
--- a/share/doc/papers/fsinterface/abstract.ms
+++ /dev/null
@@ -1,71 +0,0 @@
-.\" $OpenBSD: abstract.ms,v 1.3 2003/06/02 23:30:09 millert Exp $
-.\"
-.\" Copyright (c) 1986 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)abstract.ms 5.2 (Berkeley) 4/16/91
-.\"
-.TL
-Toward a Compatible Filesystem Interface
-.AU
-Michael J. Karels
-Marshall Kirk McKusick
-.AI
-Computer Systems Research Group
-Computer Science Division
-Department of Electrical Engineering and Computer Science
-University of California, Berkeley
-Berkeley, California 94720
-.LP
-As network or remote filesystems have been implemented for
-.UX ,
-several stylized interfaces between the filesystem implementation
-and the rest of the kernel have been developed.
-Notable among these are Sun Microsystems' virtual filesystem interface
-using vnodes, Digital Equipment's Generic File System architecture,
-and AT&T's File System Switch.
-Each design attempts to isolate filesystem-dependent details
-below the generic interface and to provide a framework within which
-new filesystems may be incorporated.
-However, each of these interfaces is different from
-and incompatible with the others.
-Each of them addresses somewhat different design goals.
-Each was based upon a different starting version of
-.UX ,
-targetted a different set of filesystems with varying characteristics,
-and uses a different set of primitive operations provided by the filesystem.
-The current study compares the various filesystem interfaces.
-Criteria for comparison include generality, completeness, robustness,
-efficiency and esthetics.
-As a result of this comparison, a proposal for a new filesystem interface
-is advanced that includes the best features of the existing implementations.
-The proposal adopts the calling convention for name lookup introduced
-in 4.3BSD.
-A prototype implementation is described.
-This proposal and the rationale underlying its development
-have been presented to major software vendors
-as an early step toward convergence upon a compatible filesystem interface.
diff --git a/share/doc/papers/fsinterface/fsinterface.ms b/share/doc/papers/fsinterface/fsinterface.ms
deleted file mode 100644
index a9a531727db..00000000000
--- a/share/doc/papers/fsinterface/fsinterface.ms
+++ /dev/null
@@ -1,1174 +0,0 @@
-.\" $OpenBSD: fsinterface.ms,v 1.5 2006/03/04 16:18:04 miod Exp $
-.\"
-.\" Copyright (c) 1986 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)fsinterface.ms 1.4 (Berkeley) 4/16/91
-.\"
-.if \nv .rm CM
-.de UX
-.ie \\n(UX \s-1UNIX\s0\\$1
-.el \{\
-\s-1UNIX\s0\\$1\(dg
-.FS
-\(dg \s-1UNIX\s0 is a registered trademark of AT&T.
-.FE
-.nr UX 1
-.\}
-..
-.TL
-Toward a Compatible Filesystem Interface
-.AU
-Michael J. Karels
-Marshall Kirk McKusick
-.AI
-Computer Systems Research Group
-Computer Science Division
-Department of Electrical Engineering and Computer Science
-University of California, Berkeley
-Berkeley, California 94720
-.AB
-.LP
-As network or remote filesystems have been implemented for
-.UX ,
-several stylized interfaces between the filesystem implementation
-and the rest of the kernel have been developed.
-.FS
-This is an update of a paper originally presented
-at the September 1986 conference of the European
-.UX
-Users' Group.
-Last modified April 16, 1991.
-.FE
-Notable among these are Sun Microsystems' Virtual Filesystem interface (VFS)
-using vnodes, Digital Equipment's Generic File System (GFS) architecture,
-and AT&T's File System Switch (FSS).
-Each design attempts to isolate filesystem-dependent details
-below a generic interface and to provide a framework within which
-new filesystems may be incorporated.
-However, each of these interfaces is different from
-and incompatible with the others.
-Each of them addresses somewhat different design goals.
-Each was based on a different starting version of
-.UX ,
-targetted a different set of filesystems with varying characteristics,
-and uses a different set of primitive operations provided by the filesystem.
-The current study compares the various filesystem interfaces.
-Criteria for comparison include generality, completeness, robustness,
-efficiency and esthetics.
-Several of the underlying design issues are examined in detail.
-As a result of this comparison, a proposal for a new filesystem interface
-is advanced that includes the best features of the existing implementations.
-The proposal adopts the calling convention for name lookup introduced
-in 4.3BSD, but is otherwise closely related to Sun's VFS.
-A prototype implementation is now being developed at Berkeley.
-This proposal and the rationale underlying its development
-have been presented to major software vendors
-as an early step toward convergence on a compatible filesystem interface.
-.AE
-.SH
-Introduction
-.PP
-As network communications and workstation environments
-became common elements in
-.UX
-systems, several vendors of
-.UX
-systems have designed and built network file systems
-that allow client process on one
-.UX
-machine to access files on a server machine.
-Examples include Sun's Network File System, NFS [Sandberg85],
-AT&T's recently-announced Remote File Sharing, RFS [Rifkin86],
-the LOCUS distributed filesystem [Walker85],
-and Masscomp's extended filesystem [Cole85].
-Other remote filesystems have been implemented in research or university groups
-for internal use, notably the network filesystem in the Eighth Edition
-.UX
-system [Weinberger84] and two different filesystems used at Carnegie-Mellon
-University [Satyanarayanan85].
-Numerous other remote file access methods have been devised for use
-within individual
-.UX
-processes,
-many of them by modifications to the C I/O library
-similar to those in the Newcastle Connection [Brownbridge82].
-.PP
-Multiple network filesystems may frequently
-be found in use within a single organization.
-These circumstances make it highly desirable to be able to transport filesystem
-implementations from one system to another.
-Such portability is considerably enhanced by the use of a stylized interface
-with carefully-defined entry points to separate the filesystem from the rest
-of the operating system.
-This interface should be similar to the interface between device drivers
-and the kernel.
-Although varying somewhat among the common versions of
-.UX ,
-the device driver interfaces are sufficiently similar that device drivers
-may be moved from one system to another without major problems.
-A clean, well-defined interface to the filesystem also allows a single
-system to support multiple local filesystem types.
-.PP
-For reasons such as these, several filesystem interfaces have been used
-when integrating new filesystems into the system.
-The best-known of these are Sun Microsystems' Virtual File System interface,
-VFS [Kleiman86], and AT&T's File System Switch, FSS.
-Another interface, known as the Generic File System, GFS,
-has been implemented for the ULTRIX\(dd
-.FS
-\(dd ULTRIX is a trademark of Digital Equipment Corp.
-.FE
-system by Digital [Rodriguez86].
-There are numerous differences among these designs.
-The differences may be understood from the varying philosophies
-and design goals of the groups involved, from the systems under which
-the implementations were done, and from the filesystems originally targetted
-by the designs.
-These differences are summarized in the following sections
-within the limitations of the published specifications.
-.SH
-Design goals
-.PP
-There are several design goals which, in varying degrees,
-have driven the various designs.
-Each attempts to divide the filesystem into a filesystem-type-independent
-layer and individual filesystem implementations.
-The division between these layers occurs at somewhat different places
-in these systems, reflecting different views of the diversity and types
-of the filesystems that may be accommodated.
-Compatibility with existing local filesystems has varying importance;
-at the user-process level, each attempts to be completely transparent
-except for a few filesystem-related system management programs.
-The AT&T interface also makes a major effort to retain familiar internal
-system interfaces, and even to retain object-file-level binary compatibility
-with operating system modules such as device drivers.
-Both Sun and DEC were willing to change internal data structures and interfaces
-so that other operating system modules might require recompilation
-or source-code modification.
-.PP
-AT&T's interface both allows and requires filesystems to support the full
-and exact semantics of their previous filesystem,
-including interruptions of system calls on slow operations.
-System calls that deal with remote files are encapsulated
-with their environment and sent to a server where execution continues.
-The system call may be aborted by either client or server, returning
-control to the client.
-Most system calls that descend into the file-system dependent layer
-of a filesystem other than the standard local filesystem do not return
-to the higher-level kernel calling routines.
-Instead, the filesystem-dependent code completes the requested
-operation and then executes a non-local goto (\fIlongjmp\fP) to exit the
-system call.
-These efforts to avoid modification of main-line kernel code
-indicate a far greater emphasis on internal compatibility than on modularity,
-clean design, or efficiency.
-.PP
-In contrast, the Sun VFS interface makes major modifications to the internal
-interfaces in the kernel, with a very clear separation
-of filesystem-independent and -dependent data structures and operations.
-The semantics of the filesystem are largely retained for local operations,
-although this is achieved at some expense where it does not fit the internal
-structuring well.
-The filesystem implementations are not required to support the same
-semantics as local
-.UX
-filesystems.
-Several historical features of
-.UX
-filesystem behavior are difficult to achieve using the VFS interface,
-including the atomicity of file and link creation and the use of open files
-whose names have been removed.
-.PP
-A major design objective of Sun's network filesystem,
-statelessness,
-permeates the VFS interface.
-No locking may be done in the filesystem-independent layer,
-and locking in the filesystem-dependent layer may occur only during
-a single call into that layer.
-.PP
-A final design goal of most implementors is performance.
-For remote filesystems,
-this goal tends to be in conflict with the goals of complete semantic
-consistency, compatibility and modularity.
-Sun has chosen performance over modularity in some areas,
-but has emphasized clean separation of the layers within the filesystem
-at the expense of performance.
-Although the performance of RFS is yet to be seen,
-AT&T seems to have considered compatibility far more important than modularity
-or performance.
-.SH
-Differences among filesystem interfaces
-.PP
-The existing filesystem interfaces may be characterized
-in several ways.
-Each system is centered around a few data structures or objects,
-along with a set of primitives for performing operations upon these objects.
-In the original
-.UX
-filesystem [Ritchie74],
-the basic object used by the filesystem is the inode, or index node.
-The inode contains all of the information about a file except its name:
-its type, identification, ownership, permissions, timestamps and location.
-Inodes are identified by the filesystem device number and the index within
-the filesystem.
-The major entry points to the filesystem are \fInamei\fP,
-which translates a filesystem pathname into the underlying inode,
-and \fIiget\fP, which locates an inode by number and installs it in the in-core
-inode table.
-\fINamei\fP performs name translation by iterative lookup
-of each component name in its directory to find its inumber,
-then using \fIiget\fP to return the actual inode.
-If the last component has been reached, this inode is returned;
-otherwise, the inode describes the next directory to be searched.
-The inode returned may be used in various ways by the caller;
-it may be examined, the file may be read or written,
-types and access may be checked, and fields may be modified.
-Modified inodes are automatically written back to the filesystem
-on disk when the last reference is released with \fIiput\fP.
-Although the details are considerably different,
-the same general scheme is used in the faster filesystem in 4.2BSD
-.UX
-[Mckusick85].
-.PP
-Both the AT&T interface and, to a lesser extent, the DEC interface
-attempt to preserve the inode-oriented interface.
-Each modify the inode to allow different varieties of the structure
-for different filesystem types by separating the filesystem-dependent
-parts of the inode into a separate structure or one arm of a union.
-Both interfaces allow operations
-equivalent to the \fInamei\fP and \fIiget\fP operations
-of the old filesystem to be performed in the filesystem-independent
-layer, with entry points to the individual filesystem implementations to support
-the type-specific parts of these operations. Implicit in this interface
-is that files may be conveniently be named by and located using a single
-index within a filesystem.
-The GFS provides specific entry points to the filesystems
-to change most file properties rather than allowing arbitrary changes
-to be made to the generic part of the inode.
-.PP
-In contrast, the Sun VFS interface replaces the inode as the primary object
-with the vnode.
-The vnode contains no filesystem-dependent fields except the pointer
-to the set of operations implemented by the filesystem.
-Properties of a vnode that might be transient, such as the ownership,
-permissions, size and timestamps, are maintained by the lower layer.
-These properties may be presented in a generic format upon request;
-callers are expected not to hold this information for any length of time,
-as they may not be up-to-date later on.
-The vnode operations do not include a corollary for \fIiget\fP;
-the only external interface for obtaining vnodes for specific files
-is the name lookup operation.
-(Separate procedures are provided outside of this interface
-that obtain a ``file handle'' for a vnode which may be given
-to a client by a server, such that the vnode may be retrieved
-upon later presentation of the file handle.)
-.SH
-Name translation issues
-.PP
-Each of the systems described include a mechanism for performing
-pathname-to-internal-representation translation.
-The style of the name translation function is very different in all
-three systems.
-As described above, the AT&T and DEC systems retain the \fInamei\fP function.
-The two are quite different, however, as the ULTRIX interface uses
-the \fInamei\fP calling convention introduced in 4.3BSD.
-The parameters and context for the name lookup operation
-are collected in a \fInameidata\fP structure which is passed to \fInamei\fP
-for operation.
-Intent to create or delete the named file is declared in advance,
-so that the final directory scan in \fInamei\fP may retain information
-such as the offset in the directory at which the modification will be made.
-Filesystems that use such mechanisms to avoid redundant work
-must therefore lock the directory to be modified so that it may not
-be modified by another process before completion.
-In the System V filesystem, as in previous versions of
-.UX ,
-this information is stored in the per-process \fIuser\fP structure
-by \fInamei\fP for use by a low-level routine called after performing
-the actual creation or deletion of the file itself.
-In 4.3BSD and in the GFS interface, these side effects of \fInamei\fP
-are stored in the \fInameidata\fP structure given as argument to \fInamei\fP,
-which is also presented to the routine implementing file creation or deletion.
-.PP
-The ULTRIX \fInamei\fP routine is responsible for the generic
-parts of the name translation process, such as copying the name into
-an internal buffer, validating it, interpolating
-the contents of symbolic links, and indirecting at mount points.
-As in 4.3BSD, the name is copied into the buffer in a single call,
-according to the location of the name.
-After determining the type of the filesystem at the start of translation
-(the current directory or root directory), it calls the filesystem's
-\fInamei\fP entry with the same structure it received from its caller.
-The filesystem-specific routine translates the name, component by component,
-as long as no mount points are reached.
-It may return after any number of components have been processed.
-\fINamei\fP performs any processing at mount points, then calls
-the correct translation routine for the next filesystem.
-Network filesystems may pass the remaining pathname to a server for translation,
-or they may look up the pathname components one at a time.
-The former strategy would be more efficient,
-but the latter scheme allows mount points within a remote filesystem
-without server knowledge of all client mounts.
-.PP
-The AT&T \fInamei\fP interface is presumably the same as that in previous
-.UX
-systems, accepting the name of a routine to fetch pathname characters
-and an operation (one of: lookup, lookup for creation, or lookup for deletion).
-It translates, component by component, as before.
-If it detects that a mount point crosses to a remote filesystem,
-it passes the remainder of the pathname to the remote server.
-A pathname-oriented request other than open may be completed
-within the \fInamei\fP call,
-avoiding return to the (unmodified) system call handler
-that called \fInamei\fP.
-.PP
-In contrast to the first two systems, Sun's VFS interface has replaced
-\fInamei\fP with \fIlookupname\fP.
-This routine simply calls a new pathname-handling module to allocate
-a pathname buffer and copy in the pathname (copying a character per call),
-then calls \fIlookuppn\fP.
-\fILookuppn\fP performs the iteration over the directories leading
-to the destination file; it copies each pathname component to a local buffer,
-then calls the filesystem \fIlookup\fP entry to locate the vnode
-for that file in the current directory.
-Per-filesystem \fIlookup\fP routines may translate only one component
-per call.
-For creation and deletion of new files, the lookup operation is unmodified;
-the lookup of the final component only serves to check for the existence
-of the file.
-The subsequent creation or deletion call, if any, must repeat the final
-name translation and associated directory scan.
-For new file creation in particular, this is rather inefficient,
-as file creation requires two complete scans of the directory.
-.PP
-Several of the important performance improvements in 4.3BSD
-were related to the name translation process [McKusick85][Leffler84].
-The following changes were made:
-.IP 1. 4
-A system-wide cache of recent translations is maintained.
-The cache is separate from the inode cache, so that multiple names
-for a file may be present in the cache.
-The cache does not hold ``hard'' references to the inodes,
-so that the normal reference pattern is not disturbed.
-.IP 2.
-A per-process cache is kept of the directory and offset
-at which the last successful name lookup was done.
-This allows sequential lookups of all the entries in a directory to be done
-in linear time.
-.IP 3.
-The entire pathname is copied into a kernel buffer in a single operation,
-rather than using two subroutine calls per character.
-.IP 4.
-A pool of pathname buffers are held by \fInamei\fP, avoiding allocation
-overhead.
-.LP
-All of these performance improvements from 4.3BSD are well worth using
-within a more generalized filesystem framework.
-The generalization of the structure may otherwise make an already-expensive
-function even more costly.
-Most of these improvements are present in the GFS system, as it derives
-from the beta-test version of 4.3BSD.
-The Sun system uses a name-translation cache generally like that in 4.3BSD.
-The name cache is a filesystem-independent facility provided for the use
-of the filesystem-specific lookup routines.
-The Sun cache, like that first used at Berkeley but unlike that in 4.3,
-holds a ``hard'' reference to the vnode (increments the reference count).
-The ``soft'' reference scheme in 4.3BSD cannot be used with the current
-NFS implementation, as NFS allocates vnodes dynamically and frees them
-when the reference count returns to zero rather than caching them.
-As a result, fewer names may be held in the cache
-than (local filesystem) vnodes, and the cache distorts the normal reference
-patterns otherwise seen by the LRU cache.
-As the name cache references overflow the local filesystem inode table,
-the name cache must be purged to make room in the inode table.
-Also, to determine whether a vnode is in use (for example,
-before mounting upon it), the cache must be flushed to free any
-cache reference.
-These problems should be corrected
-by the use of the soft cache reference scheme.
-.PP
-A final observation on the efficiency of name translation in the current
-Sun VFS architecture is that the number of subroutine calls used
-by a multi-component name lookup is dramatically larger
-than in the other systems.
-The name lookup scheme in GFS suffers from this problem much less,
-at no expense in violation of layering.
-.PP
-A final problem to be considered is synchronization and consistency.
-As the filesystem operations are more stylized and broken into separate
-entry points for parts of operations, it is more difficult to guarantee
-consistency throughout an operation and/or to synchronize with other
-processes using the same filesystem objects.
-The Sun interface suffers most severely from this,
-as it forbids the filesystems from locking objects across calls
-to the filesystem.
-It is possible that a file may be created between the time that a lookup
-is performed and a subsequent creation is requested.
-Perhaps more strangely, after a lookup fails to find the target
-of a creation attempt, the actual creation might find that the target
-now exists and is a symbolic link.
-The call will either fail unexpectedly, as the target is of the wrong type,
-or the generic creation routine will have to note the error
-and restart the operation from the lookup.
-This problem will always exist in a stateless filesystem,
-but the VFS interface forces all filesystems to share the problem.
-This restriction against locking between calls also
-forces duplication of work during file creation and deletion.
-This is considered unacceptable.
-.SH
-Support facilities and other interactions
-.PP
-Several support facilities are used by the current
-.UX
-filesystem and require generalization for use by other filesystem types.
-For filesystem implementations to be portable,
-it is desirable that these modified support facilities
-should also have a uniform interface and
-behave in a consistent manner in target systems.
-A prominent example is the filesystem buffer cache.
-The buffer cache in a standard (System V or 4.3BSD)
-.UX
-system contains physical disk blocks with no reference to the files containing
-them.
-This works well for the local filesystem, but has obvious problems
-for remote filesystems.
-Sun has modified the buffer cache routines to describe buffers by vnode
-rather than by device.
-For remote files, the vnode used is that of the file, and the block
-numbers are virtual data blocks.
-For local filesystems, a vnode for the block device is used for cache reference,
-and the block numbers are filesystem physical blocks.
-Use of per-file cache description does not easily accommodate
-caching of indirect blocks, inode blocks, superblocks or cylinder group blocks.
-However, the vnode describing the block device for the cache
-is one created internally,
-rather than the vnode for the device looked up when mounting,
-and it is located by searching a private list of vnodes
-rather than by holding it in the mount structure.
-Although the Sun modification makes it possible to use the buffer
-cache for data blocks of remote files, a better generalization
-of the buffer cache is needed.
-.PP
-The RFS filesystem used by AT&T does not currently cache data blocks
-on client systems, thus the buffer cache is probably unmodified.
-The form of the buffer cache in ULTRIX is unknown to us.
-.PP
-Another subsystem that has a large interaction with the filesystem
-is the virtual memory system.
-The virtual memory system must read data from the filesystem
-to satisfy fill-on-demand page faults.
-For efficiency, this read call is arranged to place the data directly
-into the physical pages assigned to the process (a ``raw'' read) to avoid
-copying the data.
-Although the read operation normally bypasses the filesystem buffer cache,
-consistency must be maintained by checking the buffer cache and copying
-or flushing modified data not yet stored on disk.
-The 4.2BSD virtual memory system, like that of Sun and ULTRIX,
-maintains its own cache of reusable text pages.
-This creates additional complications.
-As the virtual memory systems are redesigned, these problems should be
-resolved by reading through the buffer cache, then mapping the cached
-data into the user address space.
-If the buffer cache or the process pages are changed while the other reference
-remains, the data would have to be copied (``copy-on-write'').
-.PP
-In the meantime, the current virtual memory systems must be used
-with the new filesystem framework.
-Both the Sun and AT&T filesystem interfaces
-provide entry points to the filesystem for optimization of the virtual
-memory system by performing logical-to-physical block number translation
-when setting up a fill-on-demand image for a process.
-The VFS provides a vnode operation analogous to the \fIbmap\fP function of the
-.UX
-filesystem.
-Given a vnode and logical block number, it returns a vnode and block number
-which may be read to obtain the data.
-If the filesystem is local, it returns the private vnode for the block device
-and the physical block number.
-As the \fIbmap\fP operations are all performed at one time, during process
-startup, any indirect blocks for the file will remain in the cache
-after they are once read.
-In addition, the interface provides a \fIstrategy\fP entry that may be used
-for ``raw'' reads from a filesystem device,
-used to read data blocks into an address space without copying.
-This entry uses a buffer header (\fIbuf\fP structure)
-to describe the I/O operation
-instead of a \fIuio\fP structure.
-The buffer-style interface is the same as that used by disk drivers internally.
-This difference allows the current \fIuio\fP primitives to be avoided,
-as they copy all data to/from the current user process address space.
-Instead, for local filesystems these operations could be done internally
-with the standard raw disk read routines,
-which use a \fIuio\fP interface.
-When loading from a remote filesystems,
-the data will be received in a network buffer.
-If network buffers are suitably aligned,
-the data may be mapped into the process address space by a page swap
-without copying.
-In either case, it should be possible to use the standard filesystem
-read entry from the virtual memory system.
-.PP
-Other issues that must be considered in devising a portable
-filesystem implementation include kernel memory allocation,
-the implicit use of user-structure global context,
-which may create problems with reentrancy,
-the style of the system call interface,
-and the conventions for synchronization
-(sleep/wakeup, handling of interrupted system calls, semaphores).
-.SH
-The Berkeley Proposal
-.PP
-The Sun VFS interface has been most widely used of the three described here.
-It is also the most general of the three, in that filesystem-specific
-data and operations are best separated from the generic layer.
-Although it has several disadvantages which were described above,
-most of them may be corrected with minor changes to the interface
-(and, in a few areas, philosophical changes).
-The DEC GFS has other advantages, in particular the use of the 4.3BSD
-\fInamei\fP interface and optimizations.
-It allows single or multiple components of a pathname
-to be translated in a single call to the specific filesystem
-and thus accommodates filesystems with either preference.
-The FSS is least well understood, as there is little public information
-about the interface.
-However, the design goals are the least consistent with those of the Berkeley
-research groups.
-Accordingly, a new filesystem interface has been devised to avoid
-some of the problems in the other systems.
-The proposed interface derives directly from Sun's VFS,
-but, like GFS, uses a 4.3BSD-style name lookup interface.
-Additional context information has been moved from the \fIuser\fP structure
-to the \fInameidata\fP structure so that name translation may be independent
-of the global context of a user process.
-This is especially desired in any system where kernel-mode servers
-operate as light-weight or interrupt-level processes,
-or where a server may store or cache context for several clients.
-This calling interface has the additional advantage
-that the call parameters need not all be pushed onto the stack for each call
-through the filesystem interface,
-and they may be accessed using short offsets from a base pointer
-(unlike global variables in the \fIuser\fP structure).
-.PP
-The proposed filesystem interface is described very tersely here.
-For the most part, data structures and procedures are analogous
-to those used by VFS, and only the changes will be be treated here.
-See [Kleiman86] for complete descriptions of the vfs and vnode operations
-in Sun's interface.
-.PP
-The central data structure for name translation is the \fInameidata\fP
-structure.
-The same structure is used to pass parameters to \fInamei\fP,
-to pass these same parameters to filesystem-specific lookup routines,
-to communicate completion status from the lookup routines back to \fInamei\fP,
-and to return completion status to the calling routine.
-For creation or deletion requests, the parameters to the filesystem operation
-to complete the request are also passed in this same structure.
-The form of the \fInameidata\fP structure is:
-.br
-.ne 2i
-.ID
-.nf
-.ta .5i +\w'caddr_t\0\0\0'u +\w'struct\0\0'u +\w'vnode *nc_prevdir;\0\0\0\0\0'u
-/*
- * Encapsulation of namei parameters.
- * One of these is located in the u. area to
- * minimize space allocated on the kernel stack
- * and to retain per-process context.
- */
-struct nameidata {
- /* arguments to namei and related context: */
- caddr_t ni_dirp; /* pathname pointer */
- enum uio_seg ni_seg; /* location of pathname */
- short ni_nameiop; /* see below */
- struct vnode *ni_cdir; /* current directory */
- struct vnode *ni_rdir; /* root directory, if not normal root */
- struct ucred *ni_cred; /* credentials */
-
- /* shared between namei, lookup routines and commit routines: */
- caddr_t ni_pnbuf; /* pathname buffer */
- char *ni_ptr; /* current location in pathname */
- int ni_pathlen; /* remaining chars in path */
- short ni_more; /* more left to translate in pathname */
- short ni_loopcnt; /* count of symlinks encountered */
-
- /* results: */
- struct vnode *ni_vp; /* vnode of result */
- struct vnode *ni_dvp; /* vnode of intermediate directory */
-
-/* BEGIN UFS SPECIFIC */
- struct diroffcache { /* last successful directory search */
- struct vnode *nc_prevdir; /* terminal directory */
- long nc_id; /* directory's unique id */
- off_t nc_prevoffset; /* where last entry found */
- } ni_nc;
-/* END UFS SPECIFIC */
-};
-.DE
-.DS
-.ta \w'#define\0\0'u +\w'WANTPARENT\0\0'u +\w'0x40\0\0\0\0\0\0\0'u
-/*
- * namei operations and modifiers
- */
-#define LOOKUP 0 /* perform name lookup only */
-#define CREATE 1 /* setup for file creation */
-#define DELETE 2 /* setup for file deletion */
-#define WANTPARENT 0x10 /* return parent directory vnode also */
-#define NOCACHE 0x20 /* name must not be left in cache */
-#define FOLLOW 0x40 /* follow symbolic links */
-#define NOFOLLOW 0x0 /* don't follow symbolic links (pseudo) */
-.DE
-As in current systems other than Sun's VFS, \fInamei\fP is called
-with an operation request, one of LOOKUP, CREATE or DELETE.
-For a LOOKUP, the operation is exactly like the lookup in VFS.
-CREATE and DELETE allow the filesystem to ensure consistency
-by locking the parent inode (private to the filesystem),
-and (for the local filesystem) to avoid duplicate directory scans
-by storing the new directory entry and its offset in the directory
-in the \fIndirinfo\fP structure.
-This is intended to be opaque to the filesystem-independent levels.
-Not all lookups for creation or deletion are actually followed
-by the intended operation; permission may be denied, the filesystem
-may be read-only, etc.
-Therefore, an entry point to the filesystem is provided
-to abort a creation or deletion operation
-and allow release of any locked internal data.
-After a \fInamei\fP with a CREATE or DELETE flag, the pathname pointer
-is set to point to the last filename component.
-Filesystems that choose to implement creation or deletion entirely
-within the subsequent call to a create or delete entry
-are thus free to do so.
-.PP
-The \fInameidata\fP is used to store context used during name translation.
-The current and root directories for the translation are stored here.
-For the local filesystem, the per-process directory offset cache
-is also kept here.
-A file server could leave the directory offset cache empty,
-could use a single cache for all clients,
-or could hold caches for several recent clients.
-.PP
-Several other data structures are used in the filesystem operations.
-One is the \fIucred\fP structure which describes a client's credentials
-to the filesystem.
-This is modified slightly from the Sun structure;
-the ``accounting'' group ID has been merged into the groups array.
-The actual number of groups in the array is given explicitly
-to avoid use of a reserved group ID as a terminator.
-Also, typedefs introduced in 4.3BSD for user and group ID's have been used.
-The \fIucred\fP structure is thus:
-.DS
-.ta .5i +\w'caddr_t\0\0\0'u +\w'struct\0\0'u +\w'vnode *nc_prevdir;\0\0\0\0\0'u
-/*
- * Credentials.
- */
-struct ucred {
- u_short cr_ref; /* reference count */
- uid_t cr_uid; /* effective user id */
- short cr_ngroups; /* number of groups */
- gid_t cr_groups[NGROUPS]; /* groups */
- /*
- * The following either should not be here,
- * or should be treated as opaque.
- */
- uid_t cr_ruid; /* real user id */
- gid_t cr_svgid; /* saved set-group id */
-};
-.DE
-.PP
-A final structure used by the filesystem interface is the \fIuio\fP
-structure mentioned earlier.
-This structure describes the source or destination of an I/O
-operation, with provision for scatter/gather I/O.
-It is used in the read and write entries to the filesystem.
-The \fIuio\fP structure presented here is modified from the one
-used in 4.2BSD to specify the location of each vector of the operation
-(user or kernel space)
-and to allow an alternate function to be used to implement the data movement.
-The alternate function might perform page remapping rather than a copy,
-for example.
-.DS
-.ta .5i +\w'caddr_t\0\0\0'u +\w'struct\0\0'u +\w'vnode *nc_prevdir;\0\0\0\0\0'u
-/*
- * Description of an I/O operation which potentially
- * involves scatter-gather, with individual sections
- * described by iovec, below. uio_resid is initially
- * set to the total size of the operation, and is
- * decremented as the operation proceeds. uio_offset
- * is incremented by the amount of each operation.
- * uio_iov is incremented and uio_iovcnt is decremented
- * after each vector is processed.
- */
-struct uio {
- struct iovec *uio_iov;
- int uio_iovcnt;
- off_t uio_offset;
- int uio_resid;
- enum uio_rw uio_rw;
-};
-
-enum uio_rw { UIO_READ, UIO_WRITE };
-.DE
-.DS
-.ta .5i +\w'caddr_t\0\0\0'u +\w'vnode *nc_prevdir;\0\0\0\0\0'u
-/*
- * Description of a contiguous section of an I/O operation.
- * If iov_op is non-null, it is called to implement the copy
- * operation, possibly by remapping, with the call
- * (*iov_op)(from, to, count);
- * where from and to are caddr_t and count is int.
- * Otherwise, the copy is done in the normal way,
- * treating base as a user or kernel virtual address
- * according to iov_segflg.
- */
-struct iovec {
- caddr_t iov_base;
- int iov_len;
- enum uio_seg iov_segflg;
- int (*iov_op)();
-};
-.DE
-.DS
-.ta .5i +\w'UIO_USERISPACE\0\0\0\0\0'u
-/*
- * Segment flag values.
- */
-enum uio_seg {
- UIO_USERSPACE, /* from user data space */
- UIO_SYSSPACE, /* from system space */
- UIO_USERISPACE /* from user I space */
-};
-.DE
-.SH
-File and filesystem operations
-.PP
-With the introduction of the data structures used by the filesystem
-operations, the complete list of filesystem entry points may be listed.
-As noted, they derive mostly from the Sun VFS interface.
-Lines marked with \fB+\fP are additions to the Sun definitions;
-lines marked with \fB!\fP are modified from VFS.
-.PP
-The structure describing the externally-visible features of a mounted
-filesystem, \fIvfs\fP, is:
-.DS
-.ta .5i +\w'struct vfsops\0\0\0'u +\w'*vfs_vnodecovered;\0\0\0\0\0'u
-/*
- * Structure per mounted file system.
- * Each mounted file system has an array of
- * operations and an instance record.
- * The file systems are put on a doubly linked list.
- */
-struct vfs {
- struct vfs *vfs_next; /* next vfs in vfs list */
-\fB+\fP struct vfs *vfs_prev; /* prev vfs in vfs list */
- struct vfsops *vfs_op; /* operations on vfs */
- struct vnode *vfs_vnodecovered; /* vnode we mounted on */
- int vfs_flag; /* flags */
-\fB!\fP int vfs_fsize; /* fundamental block size */
-\fB+\fP int vfs_bsize; /* optimal transfer size */
-\fB!\fP uid_t vfs_exroot; /* exported fs uid 0 mapping */
- short vfs_exflags; /* exported fs flags */
- caddr_t vfs_data; /* private data */
-};
-.DE
-.DS
-.ta \w'\fB+\fP 'u +\w'#define\0\0'u +\w'VFS_EXPORTED\0\0'u +\w'0x40\0\0\0\0\0'u
- /*
- * vfs flags.
- * VFS_MLOCK lock the vfs so that name lookup cannot proceed past the vfs.
- * This keeps the subtree stable during mounts and unmounts.
- */
- #define VFS_RDONLY 0x01 /* read only vfs */
-\fB+\fP #define VFS_NOEXEC 0x02 /* can't exec from filesystem */
- #define VFS_MLOCK 0x04 /* lock vfs so that subtree is stable */
- #define VFS_MWAIT 0x08 /* someone is waiting for lock */
- #define VFS_NOSUID 0x10 /* don't honor setuid bits on vfs */
- #define VFS_EXPORTED 0x20 /* file system is exported (NFS) */
-
- /*
- * exported vfs flags.
- */
- #define EX_RDONLY 0x01 /* exported read only */
-.DE
-.LP
-The operations supported by the filesystem-specific layer
-on an individual filesystem are:
-.DS
-.ta .5i +\w'struct vfsops\0\0\0'u +\w'*vfs_vnodecovered;\0\0\0\0\0'u
-/*
- * Operations supported on virtual file system.
- */
-struct vfsops {
-\fB!\fP int (*vfs_mount)( /* vfs, path, data, datalen */ );
-\fB!\fP int (*vfs_unmount)( /* vfs, forcibly */ );
-\fB+\fP int (*vfs_mountroot)();
- int (*vfs_root)( /* vfs, vpp */ );
-\fB!\fP int (*vfs_statfs)( /* vfs, vp, sbp */ );
-\fB!\fP int (*vfs_sync)( /* vfs, waitfor */ );
-\fB+\fP int (*vfs_fhtovp)( /* vfs, fhp, vpp */ );
-\fB+\fP int (*vfs_vptofh)( /* vp, fhp */ );
-};
-.DE
-.LP
-The \fIvfs_statfs\fP entry returns a structure of the form:
-.DS
-.ta .5i +\w'struct vfsops\0\0\0'u +\w'*vfs_vnodecovered;\0\0\0\0\0'u
-/*
- * file system statistics
- */
-struct statfs {
-\fB!\fP short f_type; /* type of filesystem */
-\fB+\fP short f_flags; /* copy of vfs (mount) flags */
-\fB!\fP long f_fsize; /* fundamental file system block size */
-\fB+\fP long f_bsize; /* optimal transfer block size */
- long f_blocks; /* total data blocks in file system */
- long f_bfree; /* free blocks in fs */
- long f_bavail; /* free blocks avail to non-superuser */
- long f_files; /* total file nodes in file system */
- long f_ffree; /* free file nodes in fs */
- fsid_t f_fsid; /* file system id */
-\fB+\fP char *f_mntonname; /* directory on which mounted */
-\fB+\fP char *f_mntfromname; /* mounted filesystem */
- long f_spare[7]; /* spare for later */
-};
-
-typedef long fsid_t[2]; /* file system id type */
-.DE
-.LP
-The modifications to Sun's interface at this level are minor.
-Additional arguments are present for the \fIvfs_mount\fP and \fIvfs_umount\fP
-entries.
-\fIvfs_statfs\fP accepts a vnode as well as filesystem identifier,
-as the information may not be uniform throughout a filesystem.
-For example,
-if a client may mount a file tree that spans multiple physical
-filesystems on a server, different sections may have different amounts
-of free space.
-(NFS does not allow remotely-mounted file trees to span physical filesystems
-on the server.)
-The final additions are the entries that support file handles.
-\fIvfs_vptofh\fP is provided for the use of file servers,
-which need to obtain an opaque
-file handle to represent the current vnode for transmission to clients.
-This file handle may later be used to relocate the vnode using \fIvfs_fhtovp\fP
-without requiring the vnode to remain in memory.
-.PP
-Finally, the external form of a filesystem object, the \fIvnode\fP, is:
-.DS
-.ta .5i +\w'struct vnodeops\0\0'u +\w'*v_vfsmountedhere;\0\0\0'u
-/*
- * vnode types. VNON means no type.
- */
-enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK };
-
-struct vnode {
- u_short v_flag; /* vnode flags (see below) */
- u_short v_count; /* reference count */
- u_short v_shlockc; /* count of shared locks */
- u_short v_exlockc; /* count of exclusive locks */
- struct vfs *v_vfsmountedhere; /* ptr to vfs mounted here */
- struct vfs *v_vfsp; /* ptr to vfs we are in */
- struct vnodeops *v_op; /* vnode operations */
-\fB+\fP struct text *v_text; /* text/mapped region */
- enum vtype v_type; /* vnode type */
- caddr_t v_data; /* private data for fs */
-};
-.DE
-.DS
-.ta \w'#define\0\0'u +\w'NOFOLLOW\0\0'u +\w'0x40\0\0\0\0\0\0\0'u
-/*
- * vnode flags.
- */
-#define VROOT 0x01 /* root of its file system */
-#define VTEXT 0x02 /* vnode is a pure text prototype */
-#define VEXLOCK 0x10 /* exclusive lock */
-#define VSHLOCK 0x20 /* shared lock */
-#define VLWAIT 0x40 /* proc is waiting on shared or excl. lock */
-.DE
-.LP
-The operations supported by the filesystems on individual \fIvnode\fP\^s
-are:
-.DS
-.ta .5i +\w'int\0\0\0\0\0'u +\w'(*vn_getattr)(\0\0\0\0\0'u
-/*
- * Operations on vnodes.
- */
-struct vnodeops {
-\fB!\fP int (*vn_lookup)( /* ndp */ );
-\fB!\fP int (*vn_create)( /* ndp, vap, fflags */ );
-\fB+\fP int (*vn_mknod)( /* ndp, vap, fflags */ );
-\fB!\fP int (*vn_open)( /* vp, fflags, cred */ );
- int (*vn_close)( /* vp, fflags, cred */ );
- int (*vn_access)( /* vp, fflags, cred */ );
- int (*vn_getattr)( /* vp, vap, cred */ );
- int (*vn_setattr)( /* vp, vap, cred */ );
-
-\fB+\fP int (*vn_read)( /* vp, uiop, offp, ioflag, cred */ );
-\fB+\fP int (*vn_write)( /* vp, uiop, offp, ioflag, cred */ );
-\fB!\fP int (*vn_ioctl)( /* vp, com, data, fflag, cred */ );
- int (*vn_select)( /* vp, which, cred */ );
-\fB+\fP int (*vn_mmap)( /* vp, ..., cred */ );
- int (*vn_fsync)( /* vp, cred */ );
-\fB+\fP int (*vn_seek)( /* vp, offp, off, whence */ );
-
-\fB!\fP int (*vn_remove)( /* ndp */ );
-\fB!\fP int (*vn_link)( /* vp, ndp */ );
-\fB!\fP int (*vn_rename)( /* src ndp, target ndp */ );
-\fB!\fP int (*vn_mkdir)( /* ndp, vap */ );
-\fB!\fP int (*vn_rmdir)( /* ndp */ );
-\fB!\fP int (*vn_symlink)( /* ndp, vap, nm */ );
- int (*vn_readdir)( /* vp, uiop, offp, ioflag, cred */ );
- int (*vn_readlink)( /* vp, uiop, ioflag, cred */ );
-
-\fB+\fP int (*vn_abortop)( /* ndp */ );
-\fB+\fP int (*vn_lock)( /* vp */ );
-\fB+\fP int (*vn_unlock)( /* vp */ );
-\fB!\fP int (*vn_inactive)( /* vp */ );
-};
-.DE
-.DS
-.ta \w'#define\0\0'u +\w'NOFOLLOW\0\0'u +\w'0x40\0\0\0\0\0'u
-/*
- * flags for ioflag
- */
-#define IO_UNIT 0x01 /* do io as atomic unit for VOP_RDWR */
-#define IO_APPEND 0x02 /* append write for VOP_RDWR */
-#define IO_SYNC 0x04 /* sync io for VOP_RDWR */
-.DE
-.LP
-The argument types listed in the comments following each operation are:
-.sp
-.IP ndp 10
-A pointer to a \fInameidata\fP structure.
-.IP vap
-A pointer to a \fIvattr\fP structure (vnode attributes; see below).
-.IP fflags
-File open flags, possibly including O_APPEND, O_CREAT, O_TRUNC and O_EXCL.
-.IP vp
-A pointer to a \fIvnode\fP previously obtained with \fIvn_lookup\fP.
-.IP cred
-A pointer to a \fIucred\fP credentials structure.
-.IP uiop
-A pointer to a \fIuio\fP structure.
-.IP ioflag
-Any of the IO flags defined above.
-.IP com
-An \fIioctl\fP command, with type \fIunsigned long\fP.
-.IP data
-A pointer to a character buffer used to pass data to or from an \fIioctl\fP.
-.IP which
-One of FREAD, FWRITE or 0 (select for exceptional conditions).
-.IP off
-A file offset of type \fIoff_t\fP.
-.IP offp
-A pointer to file offset of type \fIoff_t\fP.
-.IP whence
-One of SEEK_SET, SEEK_CUR, or SEEK_END.
-.IP fhp
-A pointer to a file handle buffer.
-.sp
-.PP
-Several changes have been made to Sun's set of vnode operations.
-Most obviously, the \fIvn_lookup\fP receives a \fInameidata\fP structure
-containing its arguments and context as described.
-The same structure is also passed to one of the creation or deletion
-entries if the lookup operation is for CREATE or DELETE to complete
-an operation, or to the \fIvn_abortop\fP entry if no operation
-is undertaken.
-For filesystems that perform no locking between lookup for creation
-or deletion and the call to implement that action,
-the final pathname component may be left untranslated by the lookup
-routine.
-In any case, the pathname pointer points at the final name component,
-and the \fInameidata\fP contains a reference to the vnode of the parent
-directory.
-The interface is thus flexible enough to accommodate filesystems
-that are fully stateful or fully stateless, while avoiding redundant
-operations whenever possible.
-One operation remains problematical, the \fIvn_rename\fP call.
-It is tempting to look up the source of the rename for deletion
-and the target for creation.
-However, filesystems that lock directories during such lookups must avoid
-deadlock if the two paths cross.
-For that reason, the source is translated for LOOKUP only,
-with the WANTPARENT flag set;
-the target is then translated with an operation of CREATE.
-.PP
-In addition to the changes concerned with the \fInameidata\fP interface,
-several other changes were made in the vnode operations.
-The \fIvn_rdrw\fP entry was split into \fIvn_read\fP and \fIvn_write\fP;
-frequently, the read/write entry amounts to a routine that checks
-the direction flag, then calls either a read routine or a write routine.
-The two entries may be identical for any given filesystem;
-the direction flag is contained in the \fIuio\fP given as an argument.
-.PP
-All of the read and write operations use a \fIuio\fP to describe
-the file offset and buffer locations.
-All of these fields must be updated before return.
-In particular, the \fIvn_readdir\fP entry uses this
-to return a new file offset token for its current location.
-.PP
-Several new operations have been added.
-The first, \fIvn_seek\fP, is a concession to record-oriented files
-such as directories.
-It allows the filesystem to verify that a seek leaves a file at a sensible
-offset, or to return a new offset token relative to an earlier one.
-For most filesystems and files, this operation amounts to performing
-simple arithmetic.
-Another new entry point is \fIvn_mmap\fP, for use in mapping device memory
-into a user process address space.
-Its semantics are not yet decided.
-The final additions are the \fIvn_lock\fP and \fIvn_unlock\fP entries.
-These are used to request that the underlying file be locked against
-changes for short periods of time if the filesystem implementation allows it.
-They are used to maintain consistency
-during internal operations such as \fIexec\fP,
-and may not be used to construct atomic operations from other filesystem
-operations.
-.PP
-The attributes of a vnode are not stored in the vnode,
-as they might change with time and may need to be read from a remote
-source.
-Attributes have the form:
-.DS
-.ta .5i +\w'struct vnodeops\0\0'u +\w'*v_vfsmountedhere;\0\0\0'u
-/*
- * Vnode attributes. A field value of -1
- * represents a field whose value is unavailable
- * (getattr) or which is not to be changed (setattr).
- */
-struct vattr {
- enum vtype va_type; /* vnode type (for create) */
- u_short va_mode; /* files access mode and type */
-\fB!\fP uid_t va_uid; /* owner user id */
-\fB!\fP gid_t va_gid; /* owner group id */
- long va_fsid; /* file system id (dev for now) */
-\fB!\fP long va_fileid; /* file id */
- short va_nlink; /* number of references to file */
- u_long va_size; /* file size in bytes (quad?) */
-\fB+\fP u_long va_size1; /* reserved if not quad */
- long va_blocksize; /* blocksize preferred for i/o */
- struct timeval va_atime; /* time of last access */
- struct timeval va_mtime; /* time of last modification */
- struct timeval va_ctime; /* time file changed */
- dev_t va_rdev; /* device the file represents */
- u_long va_bytes; /* bytes of disk space held by file */
-\fB+\fP u_long va_bytes1; /* reserved if va_bytes not a quad */
-};
-.DE
-.SH
-Conclusions
-.PP
-The Sun VFS filesystem interface is the most widely used generic
-filesystem interface.
-Of the interfaces examined, it creates the cleanest separation
-between the filesystem-independent and -dependent layers and data structures.
-It has several flaws, but it is felt that certain changes in the interface
-can ameliorate most of them.
-The interface proposed here includes those changes.
-The proposed interface is now being implemented by the Computer Systems
-Research Group at Berkeley.
-If the design succeeds in improving the flexibility and performance
-of the filesystem layering, it will be advanced as a model interface.
-.SH
-Acknowledgements
-.PP
-The filesystem interface described here is derived from Sun's VFS interface.
-It also includes features similar to those of DEC's GFS interface.
-We are indebted to members of the Sun and DEC system groups
-for long discussions of the issues involved.
-.br
-.ne 2i
-.SH
-References
-
-.IP Brownbridge82 \w'Satyanarayanan85\0\0'u
-Brownbridge, D.R., L.F. Marshall, B. Randell,
-``The Newcastle Connection, or UNIXes of the World Unite!,''
-\fISoftware\- Practice and Experience\fP, Vol. 12, pp. 1147-1162, 1982.
-
-.IP Cole85
-Cole, C.T., P.B. Flinn, A.B. Atlas,
-``An Implementation of an Extended File System for UNIX,''
-\fIUsenix Conference Proceedings\fP,
-pp. 131-150, June, 1985.
-
-.IP Kleiman86
-``Vnodes: An Architecture for Multiple File System Types in Sun UNIX,''
-\fIUsenix Conference Proceedings\fP,
-pp. 238-247, June, 1986.
-
-.IP Leffler84
-Leffler, S., M.K. McKusick, M. Karels,
-``Measuring and Improving the Performance of 4.2BSD,''
-\fIUsenix Conference Proceedings\fP, pp. 237-252, June, 1984.
-
-.IP McKusick84
-McKusick, M.K., W.N. Joy, S.J. Leffler, R.S. Fabry,
-``A Fast File System for UNIX,'' \fITransactions on Computer Systems\fP,
-Vol. 2, pp. 181-197,
-ACM, August, 1984.
-
-.IP McKusick85
-McKusick, M.K., M. Karels, S. Leffler,
-``Performance Improvements and Functional Enhancements in 4.3BSD,''
-\fIUsenix Conference Proceedings\fP, pp. 519-531, June, 1985.
-
-.IP Rifkin86
-Rifkin, A.P., M.P. Forbes, R.L. Hamilton, M. Sabrio, S. Shah, and K. Yueh,
-``RFS Architectural Overview,'' \fIUsenix Conference Proceedings\fP,
-pp. 248-259, June, 1986.
-
-.IP Ritchie74
-Ritchie, D.M. and K. Thompson, ``The Unix Time-Sharing System,''
-\fICommunications of the ACM\fP, Vol. 17, pp. 365-375, July, 1974.
-
-.IP Rodriguez86
-Rodriguez, R., M. Koehler, R. Hyde,
-``The Generic File System,'' \fIUsenix Conference Proceedings\fP,
-pp. 260-269, June, 1986.
-
-.IP Sandberg85
-Sandberg, R., D. Goldberg, S. Kleiman, D. Walsh, B. Lyon,
-``Design and Implementation of the Sun Network Filesystem,''
-\fIUsenix Conference Proceedings\fP,
-pp. 119-130, June, 1985.
-
-.IP Satyanarayanan85
-Satyanarayanan, M., \fIet al.\fP,
-``The ITC Distributed File System: Principles and Design,''
-\fIProc. 10th Symposium on Operating Systems Principles\fP, pp. 35-50,
-ACM, December, 1985.
-
-.IP Walker85
-Walker, B.J. and S.H. Kiser, ``The LOCUS Distributed Filesystem,''
-\fIThe LOCUS Distributed System Architecture\fP,
-G.J. Popek and B.J. Walker, ed., The MIT Press, Cambridge, MA, 1985.
-
-.IP Weinberger84
-Weinberger, P.J., ``The Version 8 Network File System,''
-\fIUsenix Conference presentation\fP,
-June, 1984.
diff --git a/share/doc/papers/fsinterface/slides.t b/share/doc/papers/fsinterface/slides.t
deleted file mode 100644
index 898af7f0f00..00000000000
--- a/share/doc/papers/fsinterface/slides.t
+++ /dev/null
@@ -1,316 +0,0 @@
-.\" $OpenBSD: slides.t,v 1.3 2003/06/02 23:30:09 millert Exp $
-.\"
-.\" Copyright (c) 1986 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)slides.t 5.2 (Berkeley) 4/16/91
-.\"
-.so macros
-.nf
-.LL
-Encapsulation of namei parameters
-.NP 0
-.ta .5i +\w'caddr_t\0\0'u +\w'struct\0\0'u +\w'vnode *nc_prevdir;\0\0\0\0\0'u
-struct nameidata {
- /* arguments and context: */
- caddr_t ni_dirp;
- enum uio_seg ni_seg;
- short ni_nameiop;
- struct vnode *ni_cdir;
- struct vnode *ni_rdir;
- struct ucred *ni_cred;
-.sp .2
- /* shared with lookup and commit: */
- caddr_t ni_pnbuf;
- char *ni_ptr;
- int ni_pathlen;
- short ni_more;
- short ni_loopcnt;
-.sp .2
- /* results: */
- struct vnode *ni_vp;
- struct vnode *ni_dvp;
-.sp .2
-/* BEGIN UFS SPECIFIC */
- struct diroffcache {
- struct vnode *nc_prevdir;
- long nc_id;
- off_t nc_prevoffset;
- } ni_nc;
-/* END UFS SPECIFIC */
-};
-.bp
-
-
-.LL
-Namei operations and modifiers
-
-.NP 0
-.ta \w'#define\0\0'u +\w'WANTPARENT\0\0'u +\w'0x40\0\0\0\0\0\0\0'u
-#define LOOKUP 0 /* name lookup only */
-#define CREATE 1 /* setup for creation */
-#define DELETE 2 /* setup for deletion */
-#define WANTPARENT 0x10 /* return parent vnode also */
-#define NOCACHE 0x20 /* remove name from cache */
-#define FOLLOW 0x40 /* follow symbolic links */
-.bp
-
-.LL
-Namei operations and modifiers
-
-.NP 0
-.ta \w'#define\0\0'u +\w'WANTPARENT\0\0'u +\w'0x40\0\0\0\0\0\0\0'u
-#define LOOKUP 0
-#define CREATE 1
-#define DELETE 2
-#define WANTPARENT 0x10
-#define NOCACHE 0x20
-#define FOLLOW 0x40
-.bp
-
-
-.LL
-Credentials
-
-.NP 0
-.ta .5i +\w'caddr_t\0\0\0'u +\w'struct\0\0'u +\w'vnode *nc_prevdir;\0\0\0\0\0'u
-struct ucred {
- u_short cr_ref;
- uid_t cr_uid;
- short cr_ngroups;
- gid_t cr_groups[NGROUPS];
- /*
- * The following either should not be here,
- * or should be treated as opaque.
- */
- uid_t cr_ruid;
- gid_t cr_svgid;
-};
-.bp
-.LL
-Scatter-gather I/O
-.NP 0
-.ta .5i +\w'caddr_t\0\0\0'u +\w'struct\0\0'u +\w'vnode *nc_prevdir;\0\0\0\0\0'u
-struct uio {
- struct iovec *uio_iov;
- int uio_iovcnt;
- off_t uio_offset;
- int uio_resid;
- enum uio_rw uio_rw;
-};
-
-enum uio_rw { UIO_READ, UIO_WRITE };
-
-
-
-.ta .5i +\w'caddr_t\0\0\0'u +\w'vnode *nc_prevdir;\0\0\0\0\0'u
-struct iovec {
- caddr_t iov_base;
- int iov_len;
- enum uio_seg iov_segflg;
- int (*iov_op)();
-};
-.bp
-.LL
-Per-filesystem information
-.NP 0
-.ta .25i +\w'struct vfsops\0\0\0'u +\w'*vfs_vnodecovered;\0\0\0\0\0'u
-struct vfs {
- struct vfs *vfs_next;
-\fB+\fP struct vfs *vfs_prev;
- struct vfsops *vfs_op;
- struct vnode *vfs_vnodecovered;
- int vfs_flag;
-\fB!\fP int vfs_fsize;
-\fB+\fP int vfs_bsize;
-\fB!\fP uid_t vfs_exroot;
- short vfs_exflags;
- caddr_t vfs_data;
-};
-
-.NP 0
-.ta \w'\fB+\fP 'u +\w'#define\0\0'u +\w'VFS_EXPORTED\0\0'u +\w'0x40\0\0\0\0\0'u
- /* vfs flags: */
- #define VFS_RDONLY 0x01
-\fB+\fP #define VFS_NOEXEC 0x02
- #define VFS_MLOCK 0x04
- #define VFS_MWAIT 0x08
- #define VFS_NOSUID 0x10
- #define VFS_EXPORTED 0x20
-
- /* exported vfs flags: */
- #define EX_RDONLY 0x01
-.bp
-
-
-.LL
-Operations supported on virtual file system.
-
-.NP 0
-.ta .25i +\w'int\0\0'u +\w'*vfs_mountroot();\0'u
-struct vfsops {
-\fB!\fP int (*vfs_mount)(vfs, path, data, len);
-\fB!\fP int (*vfs_unmount)(vfs, forcibly);
-\fB+\fP int (*vfs_mountroot)();
- int (*vfs_root)(vfs, vpp);
- int (*vfs_statfs)(vfs, sbp);
-\fB!\fP int (*vfs_sync)(vfs, waitfor);
-\fB+\fP int (*vfs_fhtovp)(vfs, fhp, vpp);
-\fB+\fP int (*vfs_vptofh)(vp, fhp);
-};
-.bp
-
-
-.LL
-Dynamic file system information
-
-.NP 0
-.ta .5i +\w'struct\0\0\0'u +\w'*vfs_vnodecovered;\0\0\0\0\0'u
-struct statfs {
-\fB!\fP short f_type;
-\fB+\fP short f_flags;
-\fB!\fP long f_fsize;
-\fB+\fP long f_bsize;
- long f_blocks;
- long f_bfree;
- long f_bavail;
- long f_files;
- long f_ffree;
- fsid_t f_fsid;
-\fB+\fP char *f_mntonname;
-\fB+\fP char *f_mntfromname;
- long f_spare[7];
-};
-
-typedef long fsid_t[2];
-.bp
-.LL
-Filesystem objects (vnodes)
-.NP 0
-.ta .25i +\w'struct vnodeops\0\0'u +\w'*v_vfsmountedhere;\0\0\0'u
-enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK };
-
-struct vnode {
- u_short v_flag;
- u_short v_count;
- u_short v_shlockc;
- u_short v_exlockc;
- struct vfs *v_vfsmountedhere;
- struct vfs *v_vfsp;
- struct vnodeops *v_op;
-\fB+\fP struct text *v_text;
- enum vtype v_type;
- caddr_t v_data;
-};
-.ta \w'#define\0\0'u +\w'NOFOLLOW\0\0'u +\w'0x40\0\0\0\0\0\0\0'u
-
-/* vnode flags */
-#define VROOT 0x01
-#define VTEXT 0x02
-#define VEXLOCK 0x10
-#define VSHLOCK 0x20
-#define VLWAIT 0x40
-.bp
-.LL
-Operations on vnodes
-
-.NP 0
-.ta .25i +\w'int\0\0'u +\w'(*vn_getattr)(\0\0\0\0\0'u
-struct vnodeops {
-\fB!\fP int (*vn_lookup)(ndp);
-\fB!\fP int (*vn_create)(ndp, vap, fflags);
-\fB+\fP int (*vn_mknod)(ndp, vap, fflags);
-\fB!\fP int (*vn_open)(vp, fflags, cred);
- int (*vn_close)(vp, fflags, cred);
- int (*vn_access)(vp, fflags, cred);
- int (*vn_getattr)(vp, vap, cred);
- int (*vn_setattr)(vp, vap, cred);
-.sp .5
-\fB+\fP int (*vn_read)(vp, uiop,
- offp, ioflag, cred);
-\fB+\fP int (*vn_write)(vp, uiop,
- offp, ioflag, cred);
-\fB!\fP int (*vn_ioctl)(vp, com,
- data, fflag, cred);
- int (*vn_select)(vp, which, cred);
-\fB+\fP int (*vn_mmap)(vp, ..., cred);
- int (*vn_fsync)(vp, cred);
-\fB+\fP int (*vn_seek)(vp, offp, off,
- whence);
-.bp
-.LL
-Operations on vnodes (cont)
-
-.NP 0
-.ta .25i +\w'int\0\0'u +\w'(*vn_getattr)(\0\0\0\0\0'u
-
-\fB!\fP int (*vn_remove)(ndp);
-\fB!\fP int (*vn_link)(vp, ndp);
-\fB!\fP int (*vn_rename)(sndp, tndp);
-\fB!\fP int (*vn_mkdir)(ndp, vap);
-\fB!\fP int (*vn_rmdir)(ndp);
-\fB!\fP int (*vn_symlink)(ndp, vap, nm);
-\fB!\fP int (*vn_readdir)(vp, uiop,
- offp, ioflag, cred);
-\fB!\fP int (*vn_readlink)(vp, uiop,
- offp, ioflag, cred);
-.sp .5
-\fB+\fP int (*vn_abortop)(ndp);
-\fB!\fP int (*vn_inactive)(vp);
-};
-
-.NP 0
-.ta \w'#define\0\0'u +\w'NOFOLLOW\0\0'u +\w'0x40\0\0\0\0\0'u
-/* flags for ioflag */
-#define IO_UNIT 0x01
-#define IO_APPEND 0x02
-#define IO_SYNC 0x04
-.bp
-
-.LL
-Vnode attributes
-
-.NP 0
-.ta .5i +\w'struct timeval\0\0'u +\w'*v_vfsmountedhere;\0\0\0'u
-struct vattr {
- enum vtype va_type;
- u_short va_mode;
-\fB!\fP uid_t va_uid;
-\fB!\fP gid_t va_gid;
- long va_fsid;
-\fB!\fP long va_fileid;
- short va_nlink;
- u_long va_size;
-\fB+\fP u_long va_size1;
- long va_blocksize;
- struct timeval va_atime;
- struct timeval va_mtime;
- struct timeval va_ctime;
- dev_t va_rdev;
-\fB!\fP u_long va_bytes;
-\fB+\fP u_long va_bytes1;
-};
diff --git a/share/doc/papers/future/0.t b/share/doc/papers/future/0.t
deleted file mode 100644
index b0bf05803d3..00000000000
--- a/share/doc/papers/future/0.t
+++ /dev/null
@@ -1,58 +0,0 @@
-.\" Copyright (c) 1986 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)0.t 5.1 (Berkeley) 4/16/91
-.\"
-.rm CM
-.TL
-Directions of UNIX at Berkeley
-.AU
-Marshall Kirk McKusick
-.AU
-Michael J. Karels
-.AI
-Computer Systems Research Group
-Computer Science Division
-Department of Electrical Engineering and Computer Science
-University of California, Berkeley
-Berkeley, California 94720
-.AB
-This paper gives a brief overview of the contributions to UNIX\(dg
-.FS
-\(dgUNIX is a registered trademark of AT&T in the US and other countries.
-.FE
-made by the research community and describes the needs that
-prompted the distributions from Berkeley.
-The next Berkeley system will attempt to adapt to the
-current state of technology in the areas of virtual memory
-and file system interfaces.
-The paper makes a brief survey of this available technological base
-and then speculates on the ways in which future
-Berkeley systems will use this technology.
-.AE
-.LP
-.sp 2
diff --git a/share/doc/papers/future/1.t b/share/doc/papers/future/1.t
deleted file mode 100644
index 95655e0033c..00000000000
--- a/share/doc/papers/future/1.t
+++ /dev/null
@@ -1,154 +0,0 @@
-.\" Copyright (c) 1986 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)1.t 5.1 (Berkeley) 4/16/91
-.\"
-.NH
-The Role of Research in Maintaining System Vitality
-.PP
-Since the divestiture of AT&T, UNIX has become the focus of
-a massive marketing effort.
-To succeed, this effort must convince
-potential customers that the product is supported,
-that future versions will continue to be developed,
-and that these versions will be upwardly
-compatible with all past applications.
-.PP
-AT&T's size alone ensures that it will be around in years to come.
-Because the company has allocated increasing research, development,
-and support resources to UNIX over the past 10 years
-it provides an assurance of its commitment.
-Its massive advertising campaign for System V,
-its presence on the /usr/group UNIX standards committee,
-and the publication of the \fISystem V Interface Definition\fR
-testify to the company's intention
-to remain compatible with past systems.
-.PP
-Although repeal of the law of entropy is a necessary step
-along the road to a
-viable commercial product, this runs counter to
-orderly system evolution.
-Be that as it may, AT&T's major UNIX
-commercialization effort has succeeded in making the system
-available to a much broader audience than was previously possible.
-.PP
-The freezing of what previously had been
-an ever-changing UNIX interface
-represented a major departure from the pattern that the
-small but highly skilled UNIX community had come to expect.
-Most early users had accounts
-at sites that had the source to the programs they ran.
-Thus, as the system interface evolved to reflect more current technology,
-software could be changed to keep pace.
-Users simply updated their programs
-to account for the new interface,
-recompiled them, and continued to use them as before.
-Although this required a large effort,
-it allowed the system --
-and the tools that ran on it -- to reflect
-changes in software technology.
-.PP
-At the forefront of the technological wave
-was AT&T's own Bell Laboratories [Ritchie74].
-It was there that the UNIX system was born and nurtured,
-and it was there that its evolution was controlled --
-up through the release
-of the 7th Edition.
-Universities also were involved with the
-system almost from its inception.
-The University of California at
-Berkeley was among the first participants,
-playing host to several researchers on sabbatical from the Labs.
-This cooperation typified the harmony
-that was characteristic of the early UNIX community.
-Work that was contributed to the Labs by
-different members of the community helped
-produce a rapidly expanding set of tools and facilities.
-.PP
-With the release of the 7th Edition, though,
-the usefulness of UNIX already had been
-clearly established, and other organizations within AT&T began
-to handle the public releases of the system.
-These groups took far less input from the community
-as they began to freeze the system interface
-in preparation for entry into the commercial marketplace.
-.PP
-As the research community continued to modify the UNIX system,
-it found that it needed an organization that could produce releases.
-Berkeley quickly stepped into the role.
-Before the final public release of UNIX from the Labs,
-Berkeley's work had been focused on the development of
-tools designed to be added to
-existing UNIX systems.
-After the AT&T freeze, though,
-a group of researchers at
-the university found that they could easily
-expand their role to include the coalescing function
-previously provided by the Labs.
-Out of this came the first full Berkeley distribution of UNIX
-(3.0BSD),
-complete with virtual memory --
-a first for UNIX users.
-The idea was so successful that System V eventually adopted it
-six years later.
-.NH 2
-Motivations for Change
-.PP
-At the same time that AT&T was beginning
-to put the brakes on further
-change in UNIX, local area networks and bitmapped workstations
-were just beginning to emerge from
-Xerox PARC and other research centers.
-Users in the academic and research community realized
-that there were no production-quality operating systems
-capable of using such hardware.
-They also saw that networking unquestionably would be
-an indispensable facility in future systems research.
-Though it was not clear that UNIX
-was the correct base on which to build a networked system,
-it was clear that UNIX offered the most expedient means by which
-to build such a system.
-.PP
-This posed the Berkeley group with an interesting challenge:
-how to meet the needs of the community of users
-without adding needless complexity to existing applications.
-Their efforts were aided by the presence of a large and diverse
-local group of users who were teaching introductory programming,
-typesetting documents, developing software systems, and
-trying to build huge Lisp-based systems capable
-of solving differential equations.
-In addition, they were able to discuss current problems
-and hash out potential solutions at semi-annual
-technical conferences run by the Usenix organization.
-.PP
-The assistance of a steering committee composed of academics,
-commercial vendors, DARPA researchers, and people from the Labs
-made it possible for
-the architecture of a networking-based UNIX system to be developed.
-By keeping with the UNIX tradition of integrating work done by
-others in preference to writing everything from scratch,
-4.2BSD was released less than two years later [Joy83].
diff --git a/share/doc/papers/future/2.t b/share/doc/papers/future/2.t
deleted file mode 100644
index a38b0bc59dc..00000000000
--- a/share/doc/papers/future/2.t
+++ /dev/null
@@ -1,180 +0,0 @@
-.\" Copyright (c) 1986 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)2.t 5.1 (Berkeley) 4/16/91
-.\"
-.NH
-The Future of UNIX at Berkeley
-.PP
-The release of 4.3BSD in April of 1986 addressed many of the
-performance problems and unfinished interfaces
-present in 4.2BSD [Leffler84] [McKusick85].
-Berkeley has now embarked on a new development phase to likewise
-update other old parts of the system.
-There are three main areas of work.
-The first is to rewrite the virtual memory system to take
-advantage of current technology and to provide new capabilities
-such as mapped files and shared memory.
-The second is to provide a standard interface to file systems
-so that multiple local and remote file systems can be supported
-much as multiple networking protocols are by 4.3BSD.
-Finally, there is a need to provide more internal flexibility in a
-way similar to the System V Streams paradigm.
-.NH 2
-A New Virtual Memory Implementation
-.PP
-With the cost per byte of memory approaching that of the cost per byte
-for disks, and with file systems increasingly removed from host
-machines, a new approach to the implementation of virtual memory is
-necessary. In 4.3BSD the swap space is preallocated;
-this limits the maximum virtual memory that can be
-supported to the size of the swap area [Babaoglu79] [Someren84].
-The new system should support virtual memory space at least as great as
-the sum of sizes of physical memory plus swap space
-(a system may run with no swap space if it has no local disk).
-For systems that have a local swap
-disk, but utilize remote file systems,
-using some memory to keep track of the contents of swap space
-may be useful to avoid multiple fetches
-of the same data from the file system.
-.PP
-The new implementation should also add new functionality. Processes
-should be allowed to have large sparse address spaces, to map files
-into their address spaces, to map device memory into their address
-spaces, and to share memory with other processes. The shared address
-space may either be obtained by mapping a file into (possibly
-different) parts of the address space, or by arranging for processes to
-share ``anonymous memory'' (that is, memory that is zero-fill on demand, and
-whose contents are lost when the last process unmaps the memory).
-This latter approach was the one adopted by the developers of System V.
-.PP
-One possible use of shared memory is to provide a high-speed
-Inter-Process Communication (IPC) mechanism between two or more
-cooperating processes. To insure the integrity of data structures
-in a shared region, processes must be able to use semaphores to
-coordinate their access to these shared structures. In System V,
-semaphores are provided as a set of system calls. Unfortunately,
-the use of system calls reduces the throughput of the shared memory
-IPC to that of existing IPC mechanisms.
-To avoid this bottleneck,
-we expect that the next release of BSD will incorporate a scheme
-that places the semaphores in the shared memory segment, so that
-machines with a test-and-set instruction will be able to handle the usual
-uncontested ``lock'' and ``unlock'' without doing two system calls.
-Only in the unusual case of trying to lock an already-locked lock or when
-a desired lock is being released will a system call be required. The
-interface will allow a user-level implementation of the System V semaphore
-interface on most machines with a much lower runtime cost [McKusick86].
-.NH 2
-Toward a Compatible File System Interface
-.PP
-As network or remote file systems have been implemented for UNIX,
-several stylized interfaces between the file system implementation
-and the rest of the kernel have been developed.
-Among these are Sun Microsystems' Virtual File System interface (VFS)
-using \fBvnodes\fP [Sandburg85] [Kleiman86],
-Digital Equipment's Generic File System (GFS) architecture [Rodriguez86],
-AT&T's File System Switch (FSS) [Rifkin86],
-the LOCUS distributed file system [Walker85],
-and Masscomp's extended file system [Cole85].
-Other remote file systems have been implemented in research or
-university groups for internal use \-
-notably the network file system in the Eighth Edition UNIX
-system [Weinberger84] and two different file systems used at Carnegie Mellon
-University [Satyanarayanan85].
-Numerous other remote file access methods have been devised for use
-within individual UNIX processes,
-many of them by modifications to the C I/O library
-similar to those in the Newcastle Connection [Brownbridge82].
-.PP
-Each design attempts to isolate file system-dependent details
-below a generic interface and to provide a framework within which
-new file systems may be incorporated.
-However, each of these interfaces is different from
-and is incompatible with the others.
-Each addresses somewhat different design goals,
-having been based on a different starting version of UNIX,
-having targeted a different set of file systems with varying characteristics,
-and having selected a different set of file system primitive operations.
-.PP
-We have studied the various file system interfaces to determine
-their generality, completeness, robustness, efficiency, and aesthetics.
-Based on this study, we have developed a proposal for a new
-file system interface that we believe includes the best features of
-each of the existing implementations.
-Briefly, the proposal adopts the 4.3BSD calling convention for name lookup,
-but otherwise is closely related to Sun's VFS.
-A prototype implementation now is being developed.
-This proposal and the rationale underlying its development
-have been presented to major software vendors as an early step
-toward convergence on a compatible file system interface [Karels86].
-.NH 2
-Changes to the Protocol Layering Interface
-.PP
-The original work on restructuring the UNIX character I/O system
-to allow flexible configuration of the internal modules by user
-processes was done at Bell Laboratories [Ritchie84].
-Known as stackable line disciplines, these interfaces allowed a user
-process to open a raw terminal port and then push on appropriate
-processing modules (such as one to do line editing).
-This model allowed terminal processing modules to be used with
-virtual-circuit network modules to create ``network virtual terminals''
-by stacking a terminal processing module on top of a
-networking protocol.
-.PP
-The design of the networking facilities for 4.2BSD took
-a different approach based on the \fBsocket\fP interface.
-This design allows a single system to support multiple sets of networking
-protocols with stream, datagram, and other types of access.
-Protocol modules may deal with multiplexing of data from different connections
-onto a single transport medium.
-.PP
-A problem with stackable line disciplines though, is that they
-are inherently linear in nature.
-Thus, they do not adequately model the fan-in and fan-out
-associated with multiplexing.
-The simple and elegant stackable line discipline implementation
-of Eighth Edition UNIX was converted to the full production implementation
-of Streams in System V Release 3.
-In doing the conversion, many pragmatic issues were addressed,
-including the handling of
-multiplexed connections and commercially important protocols.
-Unfortunately, the implementation complexity increased enormously.
-.PP
-Because AT&T will not allow others to include Streams unless they
-also change their interface to comply with the System V Interface Definition
-base and Networking Extension,
-we cannot use the Release 3 implementation of Streams in the Berkeley system.
-Given that compatibility thus will be difficult,
-we feel we will have complete freedom to make our
-choices based solely on technical merits.
-As a result, our implementation will appear far more like the simpler stackable
-line disciplines than the more complex Release 3 Streams [Chandler86].
-A socket interface will be used rather than a character device interface,
-and demultiplexing will be handled internally by the protocols in the kernel.
-However, like Streams, the interfaces between kernel
-protocol modules will follow a uniform convention.
diff --git a/share/doc/papers/future/Makefile b/share/doc/papers/future/Makefile
deleted file mode 100644
index 10682342fbc..00000000000
--- a/share/doc/papers/future/Makefile
+++ /dev/null
@@ -1,13 +0,0 @@
-# @(#)Makefile 1.3 (Berkeley) 6/8/93
-
-DIR= papers/future
-SRCS= 0.t 1.t 2.t r.t
-MACROS= -ms
-
-paper.ps: ${SRCS}
- ${TBL} ${SRCS} | ${ROFF} > ${.TARGET}
-
-paper.txt: ${SRCS}
- ${TBL} ${SRCS} | ${ROFF} -Tascii > ${.TARGET}
-
-.include <bsd.doc.mk>
diff --git a/share/doc/papers/future/r.t b/share/doc/papers/future/r.t
deleted file mode 100644
index 78775b1c205..00000000000
--- a/share/doc/papers/future/r.t
+++ /dev/null
@@ -1,140 +0,0 @@
-.\" Copyright (c) 1986 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)r.t 5.1 (Berkeley) 4/16/91
-.\"
-.NH
-References
-.ls 1
-.sp
-.IP Babaoglu79 \w'Satyanarayanan85\0\0'u
-Babaoglu, O., W. Joy,
-``Data Structures Added in the Berkeley Virtual Memory Extensions
-to the UNIX Operating System''
-Computer Systems Research Group, Dept of EECS, University of California,
-Berkeley, CA 94720, USA, November 1979.
-.sp
-.IP Brownbridge82
-Brownbridge, D.R., L.F. Marshall, B. Randell,
-``The Newcastle Connection, or UNIXes of the World Unite!,''
-\fISoftware\- Practice and Experience\fP, Vol. 12, pp. 1147-1162, 1982.
-.sp
-.IP Chandler86
-Chandler, D.,
-``The Monthly Report \- Up the Streams Without a Standard'',
-\fIUNIX Review\fP, Vol. 4, No. 9, pp. 6-14, September 1986.
-.sp
-.IP Cole85
-Cole, C.T., P.B. Flinn, A.B. Atlas,
-``An Implementation of an Extended File System for UNIX,''
-\fIUsenix Conference Proceedings\fP,
-pp. 131-150, June, 1985.
-.sp
-.IP Joy83
-Joy, W., E. Cooper, R. Fabry, S. Leffler, M. McKusick, D. Mosher,
-``4.2BSD System Manual,''
-\fI4.2BSD UNIX Programmer's Manual\fP, Vol 2c, Document #68
-August 1983.
-.sp
-.IP Karels86
-Karels, M., M. McKusick,
-``Towards a Compatible File System Interface,''
-\fIProceedings of the European UNIX Users Group Meeting\fP,
-Manchester, England, pp. 481-496, September 1986.
-.sp
-.IP Kleiman86
-Kleiman, S.,
-``Vnodes: An Architecture for Multiple File System Types in Sun UNIX,''
-\fIUsenix Conference Proceedings\fP,
-pp. 238-247, June, 1986.
-.sp
-.IP Leffler84
-Leffler, S., M.K. McKusick, M. Karels,
-``Measuring and Improving the Performance of 4.2BSD,''
-\fIUsenix Conference Proceedings\fP, pp. 237-252, June, 1984.
-.sp
-.IP McKusick85
-McKusick, M.K., M. Karels, S. Leffler,
-``Performance Improvements and Functional Enhancements in 4.3BSD,''
-\fIUsenix Conference Proceedings\fP, pp. 519-531, June, 1985.
-.sp
-.IP McKusick86
-McKusick, M., M. Karels,
-``A New Virtual Memory Implementation for Berkeley UNIX,''
-\fIProceedings of the European UNIX Users Group Meeting\fP,
-Manchester, England, pp. 451-460, September 1986.
-.sp
-.IP Someren84
-Someren, J. van,
-``Paging in Berkeley UNIX,''
-Laboratorium voor schakeltechniek en techneik v.d.
-informatieverwerkende machines,
-Codenummer 051560-44(1984)01, February 1984.
-.sp
-.IP Rifkin86
-Rifkin, A.P., M.P. Forbes, R.L. Hamilton, M. Sabrio, S. Shah, K. Yueh,
-``RFS Architectural Overview,'' \fIUsenix Conference Proceedings\fP,
-pp. 248-259, June, 1986.
-.sp
-.IP Ritchie74
-Ritchie, D.M., K. Thompson,
-``The Unix Time-Sharing System,''
-\fICommunications of the ACM\fP, Vol. 17, pp. 365-375, July, 1974.
-.sp
-.IP Ritchie84
-Ritchie, D.M.,
-``A Stream Input-Output System,''
-\fIAT&T Bell Laboratories Technical Journal\fP, Vol 63, No 8, Part 2,
-pp. 1897-1910, October 1984.
-.sp
-.IP Rodriguez86
-Rodriguez, R., M. Koehler, R. Hyde,
-``The Generic File System,''
-\fIUsenix Conference Proceedings\fP,
-pp. 260-269, June, 1986.
-.sp
-.IP Sandberg85
-Sandberg, R., D. Goldberg, S. Kleiman, D. Walsh, B. Lyon,
-``Design and Implementation of the Sun Network File System,''
-\fIUsenix Conference Proceedings\fP,
-pp. 119-130, June, 1985.
-.sp
-.IP Satyanarayanan85
-Satyanarayanan, M., \fIet al.\fP,
-``The ITC Distributed File System: Principles and Design,''
-\fIProc. 10th Symposium on Operating Systems Principles\fP, pp. 35-50,
-ACM, December, 1985.
-.sp
-.IP Walker85
-Walker, B.J. and S.H. Kiser, ``The LOCUS Distributed File System,''
-\fIThe LOCUS Distributed System Architecture\fP,
-G.J. Popek and B.J. Walker, ed., The MIT Press, Cambridge, MA, 1985.
-.sp
-.IP Weinberger84
-Weinberger, P.J., ``The Version 8 Network File System,''
-\fIUsenix Conference presentation\fP,
-June, 1984.
diff --git a/share/doc/papers/future/spell.ok b/share/doc/papers/future/spell.ok
deleted file mode 100644
index 311a84c4d54..00000000000
--- a/share/doc/papers/future/spell.ok
+++ /dev/null
@@ -1,90 +0,0 @@
-A.B
-A.P
-B.J
-BSD
-Babaoglu
-Babaoglu79
-Brownbridge
-Brownbridge82
-C.T
-CM
-Chandler86
-Codenummer
-Cole85
-D.M
-D.R
-Dept
-EECS
-FSS
-Fabry
-Flinn
-G.J
-GFS
-IDP
-IP
-IPC
-ITC
-Joy83
-Karels
-Karels86
-Kiser
-Kleiman
-Kleiman86
-Koehler
-L.F
-LL
-Laboratorium
-Leffler
-Leffler84
-M.K
-M.P
-Masscomp's
-McKusick
-McKusick85
-McKusick86
-Mosher
-P.B
-P.J
-PARC
-Popek
-Proc
-R.L
-RFS
-Randell
-Rifkin
-Rifkin86
-Ritchie74
-Ritchie84
-Rodriguez86
-S.H
-SPP
-Sabrio
-Sandberg
-Sandberg85
-Sandburg85
-Satyanarayanan
-Satyanarayanan85
-Someren
-Someren84
-TCP
-UNIXes
-Usenix
-VFS
-VS
-Vnodes
-Vol
-Walker85
-Weinberger84
-Yueh
-al
-bitmapped
-informatieverwerkende
-pp
-rlogin
-runtime
-schakeltechniek
-techneik
-telnet
-v.d
-vnodes
-voor
diff --git a/share/doc/papers/jus/Makefile b/share/doc/papers/jus/Makefile
deleted file mode 100644
index 064be7e3602..00000000000
--- a/share/doc/papers/jus/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-# @(#)Makefile 5.2 (Berkeley) 6/8/93
-
-DIR= papers/jus
-SRCS= paper.ms
-MACROS= -ms
-
-paper.txt: ${SRCS}
- ${ROFF} -Tascii ${SRCS} > ${.TARGET}
-
-.include <bsd.doc.mk>
diff --git a/share/doc/papers/jus/paper.ms b/share/doc/papers/jus/paper.ms
deleted file mode 100644
index f435c44f897..00000000000
--- a/share/doc/papers/jus/paper.ms
+++ /dev/null
@@ -1,431 +0,0 @@
-.\" Copyright (c) 1992 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)paper.ms 5.3 (Berkeley) 5/26/92
-.\"
-.\" use roff -ms
-.ds CM
-.TL
-Berkeley UNIX
-Yesterday, Today and Tomorrow
-.AU
-Keith Bostic
-.AU
-Marshall Kirk McKusick
-.AU
-Michael J. Karels
-.AI
-Computer Systems Research Group
-Computer Science Division
-Department of Electrical Engineering and Computer Science
-University of California, Berkeley
-Berkeley, California 94720
-.AB
-This paper presents a brief overview of the historic Berkeley releases
-and the role that Berkeley has played in the UNIX\(dg
-.FS
-\(dgUNIX is a registered trademark of UNIX System Laboratories.
-.FE
-world and discusses the role that University and research releases
-should play in the future.
-.AE
-.NH
-A Technical History of the Berkeley Project
-.PP
-This is a \fBbrief\fP technical history of Berkeley UNIX.
-For more details, [McKusick85] is strongly recommended along with the
-other papers listed in the Reference section.
-Specifically, we apologize that space does not permit mentioning most
-of the major contributors and influences on the Berkeley system!
-.PP
-The University of California, Berkeley, first ran Bell Laboratories
-Version 4 UNIX on a PDP-11/45, in January of 1974.
-Later that spring, a PDP-11/40 was configured with the newly available
-Version 5.
-Version 6 was running on a PDP-11/70 by the fall of 1975,
-with the arrival of two new graduate students, Bill Joy and Chuck Haley.
-They initially concentrating on making improvements to a Pascal system
-written by Ken Thompson while on sabbatical at Berkeley.
-With this completed, they turned their attention to the \fBed\fP editor,
-eventually producing a new version they called \fBex\fP.
-By the end of the summer of 1976, Joy and Haley began to take an
-interest in exploring the internals of the UNIX kernel.
-.PP
-In 1977, a steady stream of requests for the enhanced Pascal system
-had begun.
-Early that year, Joy put together the first ``Berkeley Software
-Distribution'', including the Pascal system and the source to \fBex\fP.
-Over the next year, about thirty copies of this distribution
-were sent out.
-At around the same time, Joy began work on what was to become \fBvi\fP.
-By mid-1978, the software distribution clearly needed to be updated.
-The Pascal system had been made markedly more robust through feedback
-from its expanding user community,
-and had been split into two passes so that it could be run on PDP-11/34s.
-The result of the update was the ``Second Berkeley Software Distribution''
-a name that was quickly abbreviated to 2BSD.
-Along with the Pascal system,
-\fBvi\fP and \fBtermcap\fP for several terminals were included.
-Once again Bill Joy single-handedly put together distributions,
-answered the phone, and incorporated user feedback into the system.
-Over the next year nearly seventy-five tapes were shipped.
-Although Joy soon moved on to other projects, the 2BSD distribution
-continued to expand.
-Today the latest version of this distribution, 2.10BSD is still alive
-and being used by people all around the world on PDP-11's.
-.PP
-Early in 1978, Berkeley obtained a newly announced Digital
-Equipment Corp. (DEC) VAX\(dd
-.FS
-\(ddVAX is a registered trademark of Digital Equipment Corporation.
-.FE
-11-780's.
-Shortly after the arrival of the VAX, Bell Laboratories provided Berkeley
-with a copy of their 32/V port of UNIX to the VAX.
-Although 32/V supported a Version 7 UNIX environment on the VAX,
-it did not take advantage of the virtual memory capability of the VAX
-hardware.
-Like its predecessors on the PDP-11, it was a swapping system.
-Ozalp Babaoglu, a Berkeley graduate student, set about finding a way of
-implementing a working set paging system on the VAX.
-As Babaoglu neared the completion of his first attempt at an implementation,
-he approached Bill Joy for some help in understanding the intricacies
-of the UNIX kernel.
-Intrigued by Babaoglu's approach, Joy joined in helping to integrate
-the code into 32/V and then with the ensuing debugging.
-.PP
-Joy realized that the 32-bit VAX would soon make the 16-bit PDP-11
-obsolete, and he began to port the 2BSD software to the VAX.
-By the end of 1979, a complete distribution had been created.
-This distribution included the virtual memory kernel,
-the standard 32/V utilities, and the 2BSD additions, which by now
-included the Pascal system, \fBex\fP/\fBvi\fP, the \fBCshell\fP and
-several other utilities.
-In December, 1979, Joy shipped the first of nearly a hundred copies of 3BSD,
-the first VAX distribution from Berkeley.
-.PP
-In the fall of 1979, the Defense Advanced Research Projects Agency, DARPA,
-accepted a Berkeley proposal to develop an enhanced version of 3BSD for
-the DARPA community.
-With the already good reputation of 3BSD supporting the proposal, the
-Berkeley project was funded.
-Joy took charge of the project, which was to become the Computer Systems
-Research Group (CSRG).
-Joy soon incorporated job control, added auto reboot, a 1K block file
-system, and support for the latest VAX machine, the VAX-11/750.
-By October, 1980, a polished distribution that also included the Franz
-Lisp system and an enhanced mail handling system was released as 4BSD.
-During its nine-month lifetime, nearly 150 copies were shipped.
-.PP
-With the increasingly wide distribution and visibility of Berkeley UNIX,
-several critics began to emerge.
-The major objection cited was the performance of various benchmarks as
-compared to the DEC VMS system.
-Over the course of several months, Joy systematically tuned the kernel,
-soon matching VMS's performance.
-Rather than continue shipping 4BSD, the tuned system, with the addition
-of Robert Elz's auto configuration code, was released as 4.1BSD in June,
-1981.
-Over its two year lifetime about 400 distributions were shipped.
-.PP
-With the release of 4.1BSD, much of the debate over performance died
-down.
-DARPA again funded Berkeley, this time with the intention of adding
-new features to the BSD system.
-These new features eventually included Berkeley reliable signals,
-the fast filesystem, disk quotas,
-and the socket interface with TCP/IP networking support.
-Around this time Joy left the CSRG for Sun Microsystems, and Sam Leffler
-took over responsibility for completing the project.
-In August, 1983, this system was released as 4.2BSD [Joy83].
-The popularity of 4.2BSD was impressive; within eighteen months more
-copies of 4.2BSD had been shipped than of all the previous Berkeley
-software distributions combined.
-.PP
-As with 4BSD, the major criticism of 4.2BSD was performance.
-The problem, not surprisingly, was that the new facilities had not been
-tuned and that many of the kernel data structures were not well suited
-to their new uses.
-In addition, many of the interfaces, particularly in the networking
-area had been left unfinished.
-As Sam Leffler had left the CSRG for Lucasfilm, the tuning and enhancement
-of 4.2BSD was largely done under the direction of Michael Karels and Kirk
-McKusick [Leffler84] [McKusick85].
-This system was released in April of 1986 as 4.3BSD, and had greatly
-enhanced performance and reliability over 4.2BSD, along with several
-new features.
-.PP
-As it had done in 4.2BSD, the CSRG then embarked on a new development
-phase to update other major components of the system, and
-design and integrate new functionality.
-The 4.4BSD release, scheduled for fall of 1992, will contain the results
-of several major new projects.
-Among these projects are an OSI network protocol suite integrated
-with existing ISO applications, an IEEE POSIX 1003.1 standard interface,
-a highly tuned TCP/IP networking interface, support for Sun Microsystem's
-Network File System, the integration of a log-structured file system,
-an integration of the MACH virtual memory system, volume labels and
-user-level database support.
-.PP
-There will have been four interim releases made by the CSRG between 4.3BSD
-and the upcoming 4.4BSD release.
-The first two of these releases, 4.3BSD-Tahoe and 4.3BSD-Reno were
-intended to distribute a subset of the new functionality found in 4.4BSD
-available to vendors.
-The 4.3BSD-Tahoe release, made in the summer of 1988, was the first Berkeley
-release to support two architectures.
-This goal was made possible by the reimplementation of much of the machine
-specific kernel source and a fundamental restructuring of the source code
-pool so that binaries for more than one architecture could be constructed
-from a single source pool.
-The two supported architectures were the VAX and the Computer Consoles Inc.
-Power 6/32 (the Tahoe).
-Since this release, architecture support for the Intel 386/486, the
-Sun Microsystems SPARCstation\(dg
-.FS
-\(dgAll SPARC trademarks are trademarks or registered trademarks
-of SPARC International, Inc.
-SPARCstation is licensed exclusively to Sun Microsystems, Inc.
-.FE
-I and II, the DECstation 3100 and 5000 and the Hewlett-Packard 300 have
-been added as well.
-The 4.3BSD-Reno release, made in the summer of 1990, was intended to make
-the Network File System code available to vendors using Berkeley-derived
-systems, such as The Open Software Foundation (OSF).
-This code had been written by Rick Macklem at the University of Guelph
-and integrated by the CSRG, under a new version of the kernel file
-system switch.
-.PP
-Two other interim releases, the first and second release of the
-``Berkeley Network Software Distribution'', usually abbreviated as NET/1
-and NET/2, were intended to make the source code of the 4BSD system
-available to and redistributable by anyone.
-Over the years of development by the CSRG and others, an increasingly
-larger percentage of the system was not derived from the original AT&T
-32/V distribution.
-In the spring of 1988 Berkeley made its first distribution not requiring
-an AT&T source license, NET/1.
-This distribution primarily contained the networking portions of the system,
-from the utilities all the way through to the kernel device drivers, although
-other items such as \fBlogin\fP and other files were included for various
-reasons.
-This release was extremely popular with many vendors with their own
-versions of UNIX but who wished to run the Berkeley TCP/IP code and
-with vendors wishing to create smart networking cards, not to mention
-the users that wanted access to the source code for class work or other
-research purposes.
-.PP
-Around this time, the CSRG also began to search out freely redistributable
-versions of the UNIX utilities and to rewrite, or encourage BSD users to
-rewrite, those that were not available elsewhere.
-This was an immensely time-consuming task, involving contributions by
-hundreds of programmers from all around the world.
-In the summer of 1991, Berkeley released NET/2, which, like NET/1, did
-not require an AT&T source license.
-The NET/2 release included about 80% of the source code found in
-the 4.3BSD-Reno release.
-This release has proved to be immensely popular, with hundreds of thousands
-of copies taken from the public network archives and an unknown number
-redistributed by other organizations.
-.NH
-The Role of the Berkeley Project in the UNIX World
-.PP
-The role that Berkeley has played in the UNIX world has been a
-constantly changing one.
-In the 1970's, Berkeley was among the first participants in the UNIX
-research community, acting as host to several researchers on sabbatical
-from Bell Laboratories.
-This cooperation typified the harmony that was characteristic of the
-early UNIX community, as led by Bell Laboratories.
-Work that was contributed to the Laboratories by different members of
-the community, Berkeley among them, helped produce a rapidly expanding
-set of tools and facilities.
-With the commercialization of UNIX, the Bell Laboratories researchers were
-no longer able to act as a clearinghouse for the ongoing UNIX research.
-As the research community continued to modify the UNIX system, it found
-that it needed an organization that could produce leading edge research
-releases.
-Because of its early involvement in UNIX and its history of releasing
-UNIX-based tools, the CSRG quickly filled this role.
-.PP
-For the first half of the 1980's, Berkeley served as the focus of the
-leading edge of UNIX research.
-The Berkeley system was widely used, ported and considered the arbiter
-of what should comprise a UNIX system.
-By the mid-1980's, largely because the networking component of the Berkeley
-system was unique and unavailable from vendors for a period of time,
-Berkeley was forced into the role of a vendor [McKusick89].
-This role expanded to the point that there were two major variants of
-UNIX, System V and BSD, and resulted in a breach in the UNIX world that
-is only gradually being healed.
-Acting as a vendor required an immense amount of time, money and effort
-by the CSRG.
-Thousands of hours were devoted to release engineering, thousands more
-to participation in the emerging UNIX standards and thousands more in
-distribution and user support.
-Over the years it became increasingly clear to the people associated with
-the Berkeley UNIX project that its limited funding and manpower were
-insufficient to complete its historical task of designing, implementing
-and supporting a complete, reliable, leading edge system.
-As each portion of the system became more complex and additional features
-were added, more and more effort had to be expended to keep the
-system at a high level of quality, and less and less effort was available
-to move the system technically forward.
-Fortunately, during the last half of the 1980's, as the UNIX interface
-became the consensus choice for an industry standard, and the number of
-vendors marketing, selling and supporting UNIX systems grew, Berkeley
-has been able to start to return to its historical orientation of doing
-leading-edge research instead of customer support.
-.NH
-Berkeley UNIX Tomorrow
-.PP
-For UNIX to become the system of choice for a large segment of the industry,
-potential customers must have confidence that the product is supported,
-that future versions will continue to be developed and enhanced, and that
-future versions will be upwardly compatible with all past applications.
-In addition, vendors desiring to maximize their return on investment
-require that the source code for their systems be proprietary and are
-unwilling to make it available to users under any but the most onerous
-restrictions.
-Many of these changes, while acceptable for most users,
-are diametrically opposed to what has made UNIX the research platform
-of choice: low cost, wide availability of source code, and leading edge
-technology.
-.PP
-System development can be likened to the process of evolution.
-While gene mutation is critical to the advancement of the species, only
-one in 100 mutations produces a useful feature; the rest result in
-needless or detrimental changes.
-The mere existence of an environment for mutation is not enough --
-some organization must bear responsibility for
-brutally pruning the weak, outdated and useless ideas.
-UNIX was fortunate in this sense.
-Unlike other projects beset by competing groups jealously guarding their
-work from one another, UNIX thrived in an open and cooperative community
-willing to channel its ideas through a central clearinghouse (first Bell
-Laboratories and later the CSRG), in spite of the clearinghouse's
-reputation for selective technical scrutiny.
-.PP
-Here one must distinguish between the selection process provided
-by research and commercial organizations.
-Research organizations can base pruning decisions strictly on the
-coherence of the system and the technical merit of the idea.
-They need not concern themselves with how changes might affect
-past variants of the system.
-Commercial organizations, though, must ensure that
-changes will not affect programs built to an obsolete interface.
-For example, paging might be a great idea, but it will cause problems for
-software that depends on the execution predictability of a swap-based
-system, making it impossible for paging to replace swapping.
-As a result, both schemes must be maintained, dramatically increasing the
-complexity of the system.
-As the system becomes more complex, its evolutionary paths will become
-increasingly restricted.
-.PP
-Here the role of a dynamic research version of UNIX becomes clear.
-While it is only directly used by a small group of people,
-it provides an important role as the feedstock for the commercial
-versions of UNIX.
-Over the long term,
-it is reasonable to expect that the most useful functionality
-of the research systems will be grafted into the commercial versions.
-Examples of ideas that began with BSD and moved into commercial systems
-include the fast filesystem, TCP/IP networking, and nearly half of
-the commands and utilities.
-.PP
-The CSRG spends a significant amount of time collecting prototypes of
-projects throughout the research world and molding them together into
-a coherent and usable system.
-Many of the ideas do not work out and are dropped in later releases.
-The ability to experiment without concern for past applications is
-critical.
-The resulting system is a third the size and a fraction of the complexity
-of its roughly equivalently functioned commercial brethren.
-This lean and mean approach allows the system to evolve rapidly (the
-nightmare of every commercial user, but the dream of every researcher).
-A recent example of this type of experimentation is the prototyping of
-various proposed POSIX utilities and interfaces by the CSRG.
-When drafts of the standard were implemented, basic flaws in the
-specification became apparent.
-These flaws and suggested solutions were presented to the standards
-committees, resulting in changes to the standard ensuring that the
-ratified standard could be efficiently and correctly implemented.
-The research system users also benefit from having a reference
-implementation of the standard almost from the day that it is finalized.
-.PP
-Another major influence on the UNIX systems of the future will be the
-NET/2 release.
-At least three separate groups (two in the U.S. and one in Europe) have
-added the necessary source code to the NET/2 release to make it a fully
-functional UNIX system.
-As the NET/2 release was not proprietary to any person or organization
-other than the University of California and may be freely redistributed,
-the cost of a UNIX system with source code will be less in the future
-than in the current UNIX market by two orders of magnitude.
-The UNIX single-server release by the Carnegie Mellon University
-MACH group will also use the NET/2 release as a starting point, making
-their release freely redistributable without a UNIX source license.
-The advent of cheaply available sources will make it far easier than
-ever before for research groups and users to develop and exchange software.
-.PP
-The role of designing and implementing a leading-edge research version
-of UNIX is one that Berkeley is uniquely equipped to fill.
-Future Berkeley releases will be oriented, as they were in the early days
-of Berkeley UNIX, toward the development and integration of a few
-well-chosen pieces of new research into a leading-edge system.
-.NH
-References:
-.sp
-.IP Joy83
-.br
-Joy, W., E. Cooper, R. Fabry, S. Leffler, M. McKusick, D. Mosher,
-``4.2BSD System Manual,''
-\fI4.2BSD UNIX Programmer's Manual\fP, Vol 2c, Document #68
-August 1983.
-.sp
-.IP Leffler84
-Leffler, S., M.K. McKusick, M. Karels,
-``Measuring and Improving the Performance of 4.2BSD,''
-\fIUsenix Conference Proceedings\fP, pp. 237-252, June, 1984.
-.sp
-.IP McKusick85
-McKusick, M.K., M. Karels, S. Leffler,
-``Performance Improvements and Functional Enhancements in 4.3BSD,''
-\fIUsenix Conference Proceedings\fP, pp. 519-531, June, 1985.
-.sp
-.IP McKusick87
-M. McKusick, M. Karels,
-``Directions of UNIX at Berkeley'',
-\fIDigest of Papers of the Thirty-second IEEE Computer Society
-International Conference\fP,
-Compcon, San Francisco, pp. 196-199, February 23-27, 1987.
-.sp
-.IP McKusick89
-M. McKusick, M. Karels, K. Bostic,
-``The Release Engineering of 4.3BSD'',
-\fIProceedings of the New Orleans Usenix Workshop on Software Management\fP,
-pp. 95-100, April 1989.
diff --git a/share/doc/papers/kernmalloc/Makefile b/share/doc/papers/kernmalloc/Makefile
deleted file mode 100644
index 5148ba417f0..00000000000
--- a/share/doc/papers/kernmalloc/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-# $OpenBSD: Makefile,v 1.3 2004/02/01 14:22:44 jmc Exp $
-
-
-DIR= papers/kernmalloc
-SRCS= kernmalloc.t appendix.t
-MACROS= -ms
-
-paper.ps: ${SRCS} alloc.fig usage.tbl
- ${SOELIM} ${SRCS} | ${TBL} | ${PIC} | ${EQN} | ${GRIND} | \
- ${ROFF} > ${.TARGET}
-
-paper.txt: ${SRCS} alloc.fig usage.tbl
- ${SOELIM} ${SRCS} | ${TBL} | ${PIC} | ${EQN} | ${GRIND} | \
- ${ROFF} -Tascii > ${.TARGET}
-
-.include <bsd.doc.mk>
diff --git a/share/doc/papers/kernmalloc/alloc.fig b/share/doc/papers/kernmalloc/alloc.fig
deleted file mode 100644
index e17285a836a..00000000000
--- a/share/doc/papers/kernmalloc/alloc.fig
+++ /dev/null
@@ -1,113 +0,0 @@
-.\" $OpenBSD: alloc.fig,v 1.3 2003/06/02 23:30:09 millert Exp $
-.\"
-.\" Copyright (c) 1988 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)alloc.fig 5.1 (Berkeley) 4/16/91
-.\"
-.PS
-scale=100
-define m0 |
-[ box invis ht 16 wid 32 with .sw at 0,0
-line from 4,12 to 4,4
-line from 8,12 to 8,4
-line from 12,12 to 12,4
-line from 16,12 to 16,4
-line from 20,12 to 20,4
-line from 24,12 to 24,4
-line from 28,12 to 28,4
-line from 0,16 to 0,0
-line from 0,8 to 32,8
-] |
-
-define m1 |
-[ box invis ht 16 wid 32 with .sw at 0,0
-line from 8,12 to 8,4
-line from 16,12 to 16,4
-line from 24,12 to 24,4
-line from 0,8 to 32,8
-line from 0,16 to 0,0
-] |
-
-define m2 |
-[ box invis ht 16 wid 32 with .sw at 0,0
-line from 0,8 to 32,8
-line from 0,16 to 0,0
-] |
-
-define m3 |
-[ box invis ht 16 wid 31 with .sw at 0,0
-line from 15,12 to 15,4
-line from 0,8 to 31,8
-line from 0,16 to 0,0
-] |
-
-box invis ht 212 wid 580 with .sw at 0,0
-"\f1\s10\&kernel memory pages\f1\s0" at 168,204
-"\f1\s10\&Legend:\f1\s0" at 36,144
-"\f1\s10\&cont \- continuation of previous page\f1\s0" at 28,112 ljust
-"\f1\s10\&free \- unused page\f1\s0" at 28,128 ljust
-"\f1\s10\&Usage:\f1\s0" at 34,87
-"\f1\s10\&memsize(addr)\f1\s0" at 36,71 ljust
-"\f1\s10\&char *addr;\f1\s0" at 66,56 ljust
-"\f1\s10\&{\f1\s0" at 36,43 ljust
-"\f1\s10\&return(kmemsizes[(addr \- kmembase) \- \s-1PAGESIZE\s+1]);\f1" at 66,29 ljust
-"\f1\s10\&}\f1\s0" at 36,8 ljust
-line from 548,192 to 548,176
-line from 548,184 to 580,184 dotted
-"\f1\s10\&1024,\f1\s0" at 116,168
-"\f1\s10\&256,\f1\s0" at 148,168
-"\f1\s10\&512,\f1\s0" at 180,168
-"\f1\s10\&3072,\f1\s0" at 212,168
-"\f1\s10\&cont,\f1\s0" at 276,168
-"\f1\s10\&cont,\f1\s0" at 244,168
-"\f1\s10\&128,\f1\s0" at 308,168
-"\f1\s10\&128,\f1\s0" at 340,168
-"\f1\s10\&free,\f1\s0" at 372,168
-"\f1\s10\&cont,\f1\s0" at 404,168
-"\f1\s10\&128,\f1\s0" at 436,168
-"\f1\s10\&1024,\f1\s0" at 468,168
-"\f1\s10\&free,\f1\s0" at 500,168
-"\f1\s10\&cont,\f1\s0" at 532,168
-"\f1\s10\&cont,\f1\s0" at 564,168
-m2 with .nw at 100,192
-m1 with .nw at 132,192
-m3 with .nw at 164,192
-m2 with .nw at 196,192
-m2 with .nw at 228,192
-m2 with .nw at 260,192
-m0 with .nw at 292,192
-m0 with .nw at 324,192
-m2 with .nw at 356,192
-m2 with .nw at 388,192
-m0 with .nw at 420,192
-m2 with .nw at 452,192
-m2 with .nw at 484,192
-m2 with .nw at 516,192
-"\f1\s10\&kmemsizes[] = {\f1\s0" at 100,168 rjust
-"\f1\s10\&char *kmembase\f1\s0" at 97,184 rjust
-.PE
diff --git a/share/doc/papers/kernmalloc/appendix.t b/share/doc/papers/kernmalloc/appendix.t
deleted file mode 100644
index c1b2419ae12..00000000000
--- a/share/doc/papers/kernmalloc/appendix.t
+++ /dev/null
@@ -1,135 +0,0 @@
-.\" $OpenBSD: appendix.t,v 1.3 2003/06/02 23:30:09 millert Exp $
-.\"
-.\" Copyright (c) 1988 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)appendix.t 5.1 (Berkeley) 4/16/91
-.\"
-.bp
-.H 1 "Appendix A - Implementation Details"
-.LP
-.nf
-.vS
-/*
- * Constants for setting the parameters of the kernel memory allocator.
- *
- * 2 ** MINBUCKET is the smallest unit of memory that will be
- * allocated. It must be at least large enough to hold a pointer.
- *
- * Units of memory less or equal to MAXALLOCSAVE will permanently
- * allocate physical memory; requests for these size pieces of memory
- * are quite fast. Allocations greater than MAXALLOCSAVE must
- * always allocate and free physical memory; requests for these size
- * allocations should be done infrequently as they will be slow.
- * Constraints: CLBYTES <= MAXALLOCSAVE <= 2 ** (MINBUCKET + 14)
- * and MAXALLOCSIZE must be a power of two.
- */
-#define MINBUCKET 4 /* 4 => min allocation of 16 bytes */
-#define MAXALLOCSAVE (2 * CLBYTES)
-
-/*
- * Maximum amount of kernel dynamic memory.
- * Constraints: must be a multiple of the pagesize.
- */
-#define MAXKMEM (1024 * PAGESIZE)
-
-/*
- * Arena for all kernel dynamic memory allocation.
- * This arena is known to start on a page boundary.
- */
-extern char kmembase[MAXKMEM];
-
-/*
- * Array of descriptors that describe the contents of each page
- */
-struct kmemsizes {
- short ks_indx; /* bucket index, size of small allocations */
- u_short ks_pagecnt; /* for large allocations, pages allocated */
-} kmemsizes[MAXKMEM / PAGESIZE];
-
-/*
- * Set of buckets for each size of memory block that is retained
- */
-struct kmembuckets {
- caddr_t kb_next; /* list of free blocks */
-} bucket[MINBUCKET + 16];
-.bp
-/*
- * Macro to convert a size to a bucket index. If the size is constant,
- * this macro reduces to a compile time constant.
- */
-#define MINALLOCSIZE (1 << MINBUCKET)
-#define BUCKETINDX(size) \
- (size) <= (MINALLOCSIZE * 128) \
- ? (size) <= (MINALLOCSIZE * 8) \
- ? (size) <= (MINALLOCSIZE * 2) \
- ? (size) <= (MINALLOCSIZE * 1) \
- ? (MINBUCKET + 0) \
- : (MINBUCKET + 1) \
- : (size) <= (MINALLOCSIZE * 4) \
- ? (MINBUCKET + 2) \
- : (MINBUCKET + 3) \
- : (size) <= (MINALLOCSIZE* 32) \
- ? (size) <= (MINALLOCSIZE * 16) \
- ? (MINBUCKET + 4) \
- : (MINBUCKET + 5) \
- : (size) <= (MINALLOCSIZE * 64) \
- ? (MINBUCKET + 6) \
- : (MINBUCKET + 7) \
- : (size) <= (MINALLOCSIZE * 2048) \
- /* etc ... */
-
-/*
- * Macro versions for the usual cases of malloc/free
- */
-#define MALLOC(space, cast, size, flags) { \
- register struct kmembuckets *kbp = &bucket[BUCKETINDX(size)]; \
- long s = splimp(); \
- if (kbp->kb_next == NULL) { \
- (space) = (cast)malloc(size, flags); \
- } else { \
- (space) = (cast)kbp->kb_next; \
- kbp->kb_next = *(caddr_t *)(space); \
- } \
- splx(s); \
-}
-
-#define FREE(addr) { \
- register struct kmembuckets *kbp; \
- register struct kmemsizes *ksp = \
- &kmemsizes[((addr) - kmembase) / PAGESIZE]; \
- long s = splimp(); \
- if (1 << ksp->ks_indx > MAXALLOCSAVE) { \
- free(addr); \
- } else { \
- kbp = &bucket[ksp->ks_indx]; \
- *(caddr_t *)(addr) = kbp->kb_next; \
- kbp->kb_next = (caddr_t)(addr); \
- } \
- splx(s); \
-}
-.vE
diff --git a/share/doc/papers/kernmalloc/kernmalloc.t b/share/doc/papers/kernmalloc/kernmalloc.t
deleted file mode 100644
index deb9b40bf35..00000000000
--- a/share/doc/papers/kernmalloc/kernmalloc.t
+++ /dev/null
@@ -1,647 +0,0 @@
-.\" $OpenBSD: kernmalloc.t,v 1.4 2003/10/30 14:52:24 jmc Exp $
-.\"
-.\" Copyright (c) 1988 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)kernmalloc.t 5.1 (Berkeley) 4/16/91
-.\"
-.\" reference a system routine name
-.de RN
-\fI\\$1\fP\^(\h'1m/24u')\\$2
-..
-.\" reference a header name
-.de H
-.NH \\$1
-\\$2
-..
-.\" begin figure
-.\" .FI "title"
-.nr Fn 0 1
-.de FI
-.ds Lb Figure \\n+(Fn
-.ds Lt \\$1
-.KF
-.DS B
-.nf
-..
-.\"
-.\" end figure
-.de Fe
-.sp .5
-.\" cheat: original indent is stored in \n(OI by .DS B; restore it
-.\" then center legend after .DE rereads and centers the block.
-\\\\.in \\n(OI
-\\\\.ce
-\\\\*(Lb. \\\\*(Lt
-.sp .5
-.DE
-.KE
-.if \nd 'ls 2
-..
-.EQ
-delim $$
-.EN
-.ds CH "
-.pn 295
-.sp
-.rs
-.ps -1
-.sp -1
-.fi
-Reprinted from:
-\fIProceedings of the San Francisco USENIX Conference\fP,
-pp. 295-303, June 1988.
-.ps
-.\".sp |\n(HMu
-.rm CM
-.nr PO 1.25i
-.TL
-Design of a General Purpose Memory Allocator for the 4.3BSD UNIX\(dg Kernel
-.ds LF Summer USENIX '88
-.ds CF "%
-.ds RF San Francisco, June 20-24
-.EH 'Design of a General Purpose Memory ...''McKusick, Karels'
-.OH 'McKusick, Karels''Design of a General Purpose Memory ...'
-.FS
-\(dgUNIX is a registered trademark of AT&T in the US and other countries.
-.FE
-.AU
-Marshall Kirk McKusick
-.AU
-Michael J. Karels
-.AI
-Computer Systems Research Group
-Computer Science Division
-Department of Electrical Engineering and Computer Science
-University of California, Berkeley
-Berkeley, California 94720
-.AB
-The 4.3BSD UNIX kernel uses many memory allocation mechanisms,
-each designed for the particular needs of the utilizing subsystem.
-This paper describes a general purpose dynamic memory allocator
-that can be used by all of the kernel subsystems.
-The design of this allocator takes advantage of known memory usage
-patterns in the UNIX kernel and a hybrid strategy that is time-efficient
-for small allocations and space-efficient for large allocations.
-This allocator replaces the multiple memory allocation interfaces
-with a single easy-to-program interface,
-results in more efficient use of global memory by eliminating
-partitioned and specialized memory pools,
-and is quick enough that no performance loss is observed
-relative to the current implementations.
-The paper concludes with a discussion of our experience in using
-the new memory allocator,
-and directions for future work.
-.AE
-.LP
-.H 1 "Kernel Memory Allocation in 4.3BSD
-.PP
-The 4.3BSD kernel has at least ten different memory allocators.
-Some of them handle large blocks,
-some of them handle small chained data structures,
-and others include information to describe I/O operations.
-Often the allocations are for small pieces of memory that are only
-needed for the duration of a single system call.
-In a user process such short-term
-memory would be allocated on the run-time stack.
-Because the kernel has a limited run-time stack,
-it is not feasible to allocate even moderate blocks of memory on it.
-Consequently, such memory must be allocated through a more dynamic mechanism.
-For example,
-when the system must translate a pathname,
-it must allocate a one kilobye buffer to hold the name.
-Other blocks of memory must be more persistent than a single system call
-and really have to be allocated from dynamic memory.
-Examples include protocol control blocks that remain throughout
-the duration of the network connection.
-.PP
-Demands for dynamic memory allocation in the kernel have increased
-as more services have been added.
-Each time a new type of memory allocation has been required,
-a specialized memory allocation scheme has been written to handle it.
-Often the new memory allocation scheme has been built on top
-of an older allocator.
-For example, the block device subsystem provides a crude form of
-memory allocation through the allocation of empty buffers [Thompson78].
-The allocation is slow because of the implied semantics of
-finding the oldest buffer, pushing its contents to disk if they are dirty,
-and moving physical memory into or out of the buffer to create
-the requested size.
-To reduce the overhead, a ``new'' memory allocator was built in 4.3BSD
-for name translation that allocates a pool of empty buffers.
-It keeps them on a free list so they can
-be quickly allocated and freed [McKusick85].
-.PP
-This memory allocation method has several drawbacks.
-First, the new allocator can only handle a limited range of sizes.
-Second, it depletes the buffer pool, as it steals memory intended
-to buffer disk blocks to other purposes.
-Finally, it creates yet another interface of
-which the programmer must be aware.
-.PP
-A generalized memory allocator is needed to reduce the complexity
-of writing code inside the kernel.
-Rather than providing many semi-specialized ways of allocating memory,
-the kernel should provide a single general purpose allocator.
-With only a single interface,
-programmers do not need to figure
-out the most appropriate way to allocate memory.
-If a good general purpose allocator is available,
-it helps avoid the syndrome of creating yet another special
-purpose allocator.
-.PP
-To ease the task of understanding how to use it,
-the memory allocator should have an interface similar to the interface
-of the well-known memory allocator provided for
-applications programmers through the C library routines
-.RN malloc
-and
-.RN free .
-Like the C library interface,
-the allocation routine should take a parameter specifying the
-size of memory that is needed.
-The range of sizes for memory requests should not be constrained.
-The free routine should take a pointer to the storage being freed,
-and should not require additional information such as the size
-of the piece of memory being freed.
-.H 1 "Criteria for a Kernel Memory Allocator
-.PP
-The design specification for a kernel memory allocator is similar to,
-but not identical to,
-the design criteria for a user level memory allocator.
-The first criterion for a memory allocator is that it make good use
-of the physical memory.
-Good use of memory is measured by the amount of memory needed to hold
-a set of allocations at any point in time.
-Percentage utilization is expressed as:
-.EQ
-utilization~=~requested over required
-.EN
-Here, ``requested'' is the sum of the memory that has been requested
-and not yet freed.
-``Required'' is the amount of memory that has been
-allocated for the pool from which the requests are filled.
-An allocator requires more memory than requested because of fragmentation
-and a need to have a ready supply of free memory for future requests.
-A perfect memory allocator would have a utilization of 100%.
-In practice,
-having a 50% utilization is considered good [Korn85].
-.PP
-Good memory utilization in the kernel is more important than
-in user processes.
-Because user processes run in virtual memory,
-unused parts of their address space can be paged out.
-Thus pages in the process address space
-that are part of the ``required'' pool that are not
-being ``requested'' need not tie up physical memory.
-Because the kernel is not paged,
-all pages in the ``required'' pool are held by the kernel and
-cannot be used for other purposes.
-To keep the kernel utilization percentage as high as possible,
-it is desirable to release unused memory in the ``required'' pool
-rather than to hold it as is typically done with user processes.
-Because the kernel can directly manipulate its own page maps,
-releasing unused memory is fast;
-a user process must do a system call to release memory.
-.PP
-The most important criterion for a memory allocator is that it be fast.
-Because memory allocation is done frequently,
-a slow memory allocator will degrade the system performance.
-Speed of allocation is more critical when executing in the
-kernel than in user code,
-because the kernel must allocate many data structure that user
-processes can allocate cheaply on their run-time stack.
-In addition, the kernel represents the platform on which all user
-processes run,
-and if it is slow, it will degrade the performance of every process
-that is running.
-.PP
-Another problem with a slow memory allocator is that programmers
-of frequently-used kernel interfaces will feel that they
-cannot afford to use it as their primary memory allocator.
-Instead they will build their own memory allocator on top of the
-original by maintaining their own pool of memory blocks.
-Multiple allocators reduce the efficiency with which memory is used.
-The kernel ends up with many different free lists of memory
-instead of a single free list from which all allocation can be drawn.
-For example,
-consider the case of two subsystems that need memory.
-If they have their own free lists,
-the amount of memory tied up in the two lists will be the
-sum of the greatest amount of memory that each of
-the two subsystems has ever used.
-If they share a free list,
-the amount of memory tied up in the free list may be as low as the
-greatest amount of memory that either subsystem used.
-As the number of subsystems grows,
-the savings from having a single free list grow.
-.H 1 "Existing User-level Implementations
-.PP
-There are many different algorithms and
-implementations of user-level memory allocators.
-A survey of those available on UNIX systems appeared in [Korn85].
-Nearly all of the memory allocators tested made good use of memory,
-though most of them were too slow for use in the kernel.
-The fastest memory allocator in the survey by nearly a factor of two
-was the memory allocator provided on 4.2BSD originally
-written by Chris Kingsley at California Institute of Technology.
-Unfortunately,
-the 4.2BSD memory allocator also wasted twice as much memory
-as its nearest competitor in the survey.
-.PP
-The 4.2BSD user-level memory allocator works by maintaining a set of lists
-that are ordered by increasing powers of two.
-Each list contains a set of memory blocks of its corresponding size.
-To fulfill a memory request,
-the size of the request is rounded up to the next power of two.
-A piece of memory is then removed from the list corresponding
-to the specified power of two and returned to the requester.
-Thus, a request for a block of memory of size 53 returns
-a block from the 64-sized list.
-A typical memory allocation requires a roundup calculation
-followed by a linked list removal.
-Only if the list is empty is a real memory allocation done.
-The free operation is also fast;
-the block of memory is put back onto the list from which it came.
-The correct list is identified by a size indicator stored
-immediately preceding the memory block.
-.H 1 "Considerations Unique to a Kernel Allocator
-.PP
-There are several special conditions that arise when writing a
-memory allocator for the kernel that do not apply to a user process
-memory allocator.
-First, the maximum memory allocation can be determined at
-the time that the machine is booted.
-This number is never more than the amount of physical memory on the machine,
-and is typically much less since a machine with all its
-memory dedicated to the operating system is uninteresting to use.
-Thus, the kernel can statically allocate a set of data structures
-to manage its dynamically allocated memory.
-These data structures never need to be
-expanded to accommodate memory requests;
-yet, if properly designed, they need not be large.
-For a user process, the maximum amount of memory that may be allocated
-is a function of the maximum size of its virtual memory.
-Although it could allocate static data structures to manage
-its entire virtual memory,
-even if they were efficiently encoded they would potentially be huge.
-The other alternative is to allocate data structures as they are needed.
-However, that adds extra complications such as new
-failure modes if it cannot allocate space for additional
-structures and additional mechanisms to link them all together.
-.PP
-Another special condition of the kernel memory allocator is that it
-can control its own address space.
-Unlike user processes that can only grow and shrink their heap at one end,
-the kernel can keep an arena of kernel addresses and allocate
-pieces from that arena which it then populates with physical memory.
-The effect is much the same as a user process that has parts of
-its address space paged out when they are not in use,
-except that the kernel can explicitly control the set of pages
-allocated to its address space.
-The result is that the ``working set'' of pages in use by the
-kernel exactly corresponds to the set of pages that it is really using.
-.FI "One day memory usage on a Berkeley time-sharing machine"
-.so usage.tbl
-.Fe
-.PP
-A final special condition that applies to the kernel is that
-all of the different uses of dynamic memory are known in advance.
-Each one of these uses of dynamic memory can be assigned a type.
-For each type of dynamic memory that is allocated,
-the kernel can provide allocation limits.
-One reason given for having separate allocators is that
-no single allocator could starve the rest of the kernel of all
-its available memory and thus a single runaway
-client could not paralyze the system.
-By putting limits on each type of memory,
-the single general purpose memory allocator can provide the same
-protection against memory starvation.\(dg
-.FS
-\(dgOne might seriously ask the question what good it is if ``only''
-one subsystem within the kernel hangs if it is something like the
-network on a diskless workstation.
-.FE
-.PP
-\*(Lb shows the memory usage of the kernel over a one day period
-on a general timesharing machine at Berkeley.
-The ``In Use'', ``Free'', and ``Mem Use'' fields are instantaneous values;
-the ``Requests'' field is the number of allocations since system startup;
-the ``High Use'' field is the maximum value of
-the ``Mem Use'' field since system startup.
-The figure demonstrates that most
-allocations are for small objects.
-Large allocations occur infrequently,
-and are typically for long-lived objects
-such as buffers to hold the superblock for
-a mounted file system.
-Thus, a memory allocator only needs to be
-fast for small pieces of memory.
-.H 1 "Implementation of the Kernel Memory Allocator
-.PP
-In reviewing the available memory allocators,
-none of their strategies could be used without some modification.
-The kernel memory allocator that we ended up with is a hybrid
-of the fast memory allocator found in the 4.2BSD C library
-and a slower but more-memory-efficient first-fit allocator.
-.PP
-Small allocations are done using the 4.2BSD power-of-two list strategy;
-the typical allocation requires only a computation of
-the list to use and the removal of an element if it is available,
-so it is quite fast.
-Macros are provided to avoid the cost of a subroutine call.
-Only if the request cannot be fulfilled from a list is a call
-made to the allocator itself.
-To ensure that the allocator is always called for large requests,
-the lists corresponding to large allocations are always empty.
-Appendix A shows the data structures and implementation of the macros.
-.PP
-Similarly, freeing a block of memory can be done with a macro.
-The macro computes the list on which to place the request
-and puts it there.
-The free routine is called only if the block of memory is
-considered to be a large allocation.
-Including the cost of blocking out interrupts,
-the allocation and freeing macros generate respectively
-only nine and sixteen (simple) VAX instructions.
-.PP
-Because of the inefficiency of power-of-two allocation strategies
-for large allocations,
-a different strategy is used for allocations larger than two kilobytes.
-The selection of two kilobytes is derived from our statistics on
-the utilization of memory within the kernel,
-that showed that 95 to 98% of allocations are of size one kilobyte or less.
-A frequent caller of the memory allocator
-(the name translation function)
-always requests a one kilobyte block.
-Additionally the allocation method for large blocks is based on allocating
-pieces of memory in multiples of pages.
-Consequently the actual allocation size for requests of size
-$2~times~pagesize$ or less are identical.\(dg
-.FS
-\(dgTo understand why this number is $size 8 {2~times~pagesize}$ one
-observes that the power-of-two algorithm yields sizes of 1, 2, 4, 8, \&...
-pages while the large block algorithm that allocates in multiples
-of pages yields sizes of 1, 2, 3, 4, \&... pages.
-Thus for allocations of sizes between one and two pages
-both algorithms use two pages;
-it is not until allocations of sizes between two and three pages
-that a difference emerges where the power-of-two algorithm will use
-four pages while the large block algorithm will use three pages.
-.FE
-In 4.3BSD on the VAX, the (software) page size is one kilobyte,
-so two kilobytes is the smallest logical cutoff.
-.PP
-Large allocations are first rounded up to be a multiple of the page size.
-The allocator then uses a first-fit algorithm to find space in the
-kernel address arena set aside for dynamic allocations.
-Thus a request for a five kilobyte piece of memory will use exactly
-five pages of memory rather than eight kilobytes as with
-the power-of-two allocation strategy.
-When a large piece of memory is freed,
-the memory pages are returned to the free memory pool,
-and the address space is returned to the kernel address arena
-where it is coalesced with adjacent free pieces.
-.PP
-Another technique to improve both the efficiency of memory utilization
-and the speed of allocation
-is to cluster same-sized small allocations on a page.
-When a list for a power-of-two allocation is empty,
-a new page is allocated and divided into pieces of the needed size.
-This strategy speeds future allocations as several pieces of memory
-become available as a result of the call into the allocator.
-.PP
-.FI "Calculation of allocation size"
-.so alloc.fig
-.Fe
-Because the size is not specified when a block of memory is freed,
-the allocator must keep track of the sizes of the pieces it has handed out.
-The 4.2BSD user-level allocator stores the size of each block
-in a header just before the allocation.
-However, this strategy doubles the memory requirement for allocations that
-require a power-of-two-sized block.
-Therefore,
-instead of storing the size of each piece of memory with the piece itself,
-the size information is associated with the memory page.
-\*(Lb shows how the kernel determines
-the size of a piece of memory that is being freed,
-by calculating the page in which it resides,
-and looking up the size associated with that page.
-Eliminating the cost of the overhead per piece improved utilization
-far more than expected.
-The reason is that many allocations in the kernel are for blocks of
-memory whose size is exactly a power of two.
-These requests would be nearly doubled if the user-level strategy were used.
-Now they can be accommodated with no wasted memory.
-.PP
-The allocator can be called both from the top half of the kernel,
-which is willing to wait for memory to become available,
-and from the interrupt routines in the bottom half of the kernel
-that cannot wait for memory to become available.
-Clients indicate their willingness (and ability) to wait with a flag
-to the allocation routine.
-For clients that are willing to wait,
-the allocator guarrentees that their request will succeed.
-Thus, these clients can need not check the return value from the allocator.
-If memory is unavailable and the client cannot wait,
-the allocator returns a null pointer.
-These clients must be prepared to cope with this
-(hopefully infrequent) condition
-(usually by giving up and hoping to do better later).
-.H 1 "Results of the Implementation
-.PP
-The new memory allocator was written about a year ago.
-Conversion from the old memory allocators to the new allocator
-has been going on ever since.
-Many of the special purpose allocators have been eliminated.
-This list includes
-.RN calloc ,
-.RN wmemall ,
-and
-.RN zmemall .
-Many of the special purpose memory allocators built on
-top of other allocators have also been eliminated.
-For example, the allocator that was built on top of the buffer pool allocator
-.RN geteblk
-to allocate pathname buffers in
-.RN namei
-has been eliminated.
-Because the typical allocation is so fast,
-we have found that none of the special purpose pools are needed.
-Indeed, the allocation is about the same as the previous cost of
-allocating buffers from the network pool (\fImbuf\fP\^s).
-Consequently applications that used to allocate network
-buffers for their own uses have been switched over to using
-the general purpose allocator without increasing their running time.
-.PP
-Quantifying the performance of the allocator is difficult because
-it is hard to measure the amount of time spent allocating
-and freeing memory in the kernel.
-The usual approach is to compile a kernel for profiling
-and then compare the running time of the routines that
-implemented the old abstraction versus those that implement the new one.
-The old routines are difficult to quantify because
-individual routines were used for more than one purpose.
-For example, the
-.RN geteblk
-routine was used both to allocate one kilobyte memory blocks
-and for its intended purpose of providing buffers to the filesystem.
-Differentiating these uses is often difficult.
-To get a measure of the cost of memory allocation before
-putting in our new allocator,
-we summed up the running time of all the routines whose
-exclusive task was memory allocation.
-To this total we added the fraction
-of the running time of the multi-purpose routines that could
-clearly be identified as memory allocation usage.
-This number showed that approximately three percent of
-the time spent in the kernel could be accounted to memory allocation.
-.PP
-The new allocator is difficult to measure
-because the usual case of the memory allocator is implemented as a macro.
-Thus, its running time is a small fraction of the running time of the
-numerous routines in the kernel that use it.
-To get a bound on the cost,
-we changed the macro always to call the memory allocation routine.
-Running in this mode, the memory allocator accounted for six percent
-of the time spent in the kernel.
-Factoring out the cost of the statistics collection and the
-subroutine call overhead for the cases that could
-normally be handled by the macro,
-we estimate that the allocator would account for
-at most four percent of time in the kernel.
-These measurements show that the new allocator does not introduce
-significant new run-time costs.
-.PP
-The other major success has been in keeping the size information
-on a per-page basis.
-This technique allows the most frequently requested sizes to be
-allocated without waste.
-It also reduces the amount of bookkeeping information associated
-with the allocator to four kilobytes of information
-per megabyte of memory under management (with a one kilobyte page size).
-.H 1 "Future Work
-.PP
-Our next project is to convert many of the static
-kernel tables to be dynamically allocated.
-Static tables include the process table, the file table,
-and the mount table.
-Making these tables dynamic will have two benefits.
-First, it will reduce the amount of memory
-that must be statically allocated at boot time.
-Second, it will eliminate the arbitrary upper limit imposed
-by the current static sizing
-(although a limit will be retained to constrain runaway clients).
-Other researchers have already shown the memory savings
-achieved by this conversion [Rodriguez88].
-.PP
-Under the current implementation,
-memory is never moved from one size list to another.
-With the 4.2BSD memory allocator this causes problems,
-particularly for large allocations where a process may use
-a quarter megabyte piece of memory once,
-which is then never available for any other size request.
-In our hybrid scheme,
-memory can be shuffled between large requests so that large blocks
-of memory are never stranded as they are with the 4.2BSD allocator.
-However, pages allocated to small requests are allocated once
-to a particular size and never changed thereafter.
-If a burst of requests came in for a particular size,
-that size would acquire a large amount of memory
-that would then not be available for other future requests.
-.PP
-In practice, we do not find that the free lists become too large.
-However, we have been investigating ways to handle such problems
-if they occur in the future.
-Our current investigations involve a routine
-that can run as part of the idle loop that would sort the elements
-on each of the free lists into order of increasing address.
-Since any given page has only one size of elements allocated from it,
-the effect of the sorting would be to sort the list into distinct pages.
-When all the pieces of a page became free,
-the page itself could be released back to the free pool so that
-it could be allocated to another purpose.
-Although there is no guarantee that all the pieces of a page would ever
-be freed,
-most allocations are short-lived, lasting only for the duration of
-an open file descriptor, an open network connection, or a system call.
-As new allocations would be made from the page sorted to
-the front of the list,
-return of elements from pages at the back would eventually
-allow pages later in the list to be freed.
-.PP
-Two of the traditional UNIX
-memory allocators remain in the current system.
-The terminal subsystem uses \fIclist\fP\^s (character lists).
-That part of the system is expected to undergo major revision within
-the next year or so, and it will probably be changed to use
-\fImbuf\fP\^s as it is merged into the network system.
-The other major allocator that remains is
-.RN getblk ,
-the routine that manages the filesystem buffer pool memory
-and associated control information.
-Only the filesystem uses
-.RN getblk
-in the current system;
-it manages the constant-sized buffer pool.
-We plan to merge the filesystem buffer cache into the virtual memory system's
-page cache in the future.
-This change will allow the size of the buffer pool to be changed
-according to memory load,
-but will require a policy for balancing memory needs
-with filesystem cache performance.
-.H 1 "Acknowledgments
-.PP
-In the spirit of community support,
-we have made various versions of our allocator available to our test sites.
-They have been busily burning it in and giving
-us feedback on their experiences.
-We acknowledge their invaluable input.
-The feedback from the Usenix program committee on the initial draft of
-our paper suggested numerous important improvements.
-.H 1 "References
-.LP
-.IP Korn85 \w'Rodriguez88\0\0'u
-David Korn, Kiem-Phong Vo,
-``In Search of a Better Malloc''
-\fIProceedings of the Portland Usenix Conference\fP,
-pp 489-506, June 1985.
-.IP McKusick85
-M. McKusick, M. Karels, S. Leffler,
-``Performance Improvements and Functional Enhancements in 4.3BSD''
-\fIProceedings of the Portland Usenix Conference\fP,
-pp 519-531, June 1985.
-.IP Rodriguez88
-Robert Rodriguez, Matt Koehler, Larry Palmer, Ricky Palmer,
-``A Dynamic UNIX Operating System''
-\fIProceedings of the San Francisco Usenix Conference\fP,
-June 1988.
-.IP Thompson78
-Ken Thompson,
-``UNIX Implementation''
-\fIBell System Technical Journal\fP, volume 57, number 6,
-pp 1931-1946, 1978.
diff --git a/share/doc/papers/kernmalloc/spell.ok b/share/doc/papers/kernmalloc/spell.ok
deleted file mode 100644
index 10c3ab7d8ed..00000000000
--- a/share/doc/papers/kernmalloc/spell.ok
+++ /dev/null
@@ -1,57 +0,0 @@
-BUCKETINDX
-CLBYTES
-CM
-Karels
-Kiem
-Koehler
-Korn
-Korn85
-MAXALLOCSAVE
-MAXALLOCSIZE
-MAXKMEM
-MINALLOCSIZE
-MINBUCKET
-Matt
-McKusick
-McKusick85
-Mem
-Phong
-Ricky
-Rodriguez88
-S.Leffler
-Thompson78
-ULTRIX
-Usenix
-VAX
-Vo
-arptbl
-caddr
-devbuf
-extern
-fragtbl
-freelist
-geteblk
-indx
-ioctlops
-kb
-kbp
-kmembase
-kmembuckets
-kmemsizes
-ks
-ksp
-mbuf
-mbufs
-namei
-pagecnt
-pathname
-pcb
-pp
-routetbl
-runtime
-splimp
-splx
-superblk
-temp
-wmemall
-zmemall
diff --git a/share/doc/papers/kernmalloc/usage.tbl b/share/doc/papers/kernmalloc/usage.tbl
deleted file mode 100644
index d19dd52eeb2..00000000000
--- a/share/doc/papers/kernmalloc/usage.tbl
+++ /dev/null
@@ -1,73 +0,0 @@
-.\" $OpenBSD: usage.tbl,v 1.3 2003/06/02 23:30:09 millert Exp $
-.\"
-.\" Copyright (c) 1988 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)usage.tbl 5.1 (Berkeley) 4/16/91
-.\"
-.TS
-box;
-c s s s
-c c c c
-n n n n.
-Memory statistics by bucket size
-=
-Size In Use Free Requests
-_
-128 329 39 3129219
-256 0 0 0
-512 4 0 16
-1024 17 5 648771
-2048 13 0 13
-2049\-4096 0 0 157
-4097\-8192 2 0 103
-8193\-16384 0 0 0
-16385\-32768 1 0 1
-.TE
-.DE
-.DS B
-.TS
-box;
-c s s s s
-c c c c c
-c n n n n.
-Memory statistics by type
-=
-Type In Use Mem Use High Use Requests
-_
-mbuf 6 1K 17K 3099066
-devbuf 13 53K 53K 13
-socket 37 5K 6K 1275
-pcb 55 7K 8K 1512
-routetbl 229 29K 29K 2424
-fragtbl 0 0K 1K 404
-zombie 3 1K 1K 24538
-namei 0 0K 5K 648754
-ioctlops 0 0K 1K 12
-superblk 24 34K 34K 24
-temp 0 0K 8K 258
-.TE
diff --git a/share/doc/papers/kerntune/0.t b/share/doc/papers/kerntune/0.t
deleted file mode 100644
index 136838629a2..00000000000
--- a/share/doc/papers/kerntune/0.t
+++ /dev/null
@@ -1,127 +0,0 @@
-.\" $OpenBSD: 0.t,v 1.3 2003/06/02 23:30:09 millert Exp $
-.\"
-.\" Copyright (c) 1984 M. K. McKusick
-.\" Copyright (c) 1984 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)0.t 1.2 (Berkeley) 11/8/90
-.\"
-.EQ
-delim $$
-.EN
-.if n .ND
-.TL
-Using gprof to Tune the 4.2BSD Kernel
-.AU
-Marshall Kirk McKusick
-.AI
-Computer Systems Research Group
-Computer Science Division
-Department of Electrical Engineering and Computer Science
-University of California, Berkeley
-Berkeley, California 94720
-.AB
-This paper describes how the \fIgprof\fP profiler
-accounts for the running time of called routines
-in the running time of the routines that call them.
-It then explains how to configure a profiling kernel on
-the 4.2 Berkeley Software Distribution of
-.UX
-for the VAX\(dd
-.FS
-\(dd VAX is a trademark of Digital Equipment Corporation.
-.FE
-and discusses tradeoffs in techniques for collecting
-profile data.
-\fIGprof\fP identifies problems
-that severely affects the overall performance of the kernel.
-Once a potential problem areas is identified
-benchmark programs are devised to highlight the bottleneck.
-These benchmarks verify that the problem exist and provide
-a metric against which to validate proposed solutions.
-Two caches are added to the kernel to alleviate the bottleneck
-and \fIgprof\fP is used to validates their effectiveness.
-.AE
-.LP
-.de PT
-.lt \\n(LLu
-.pc %
-.nr PN \\n%
-.tl '\\*(LH'\\*(CH'\\*(RH'
-.lt \\n(.lu
-..
-.af PN i
-.ds LH 4.2BSD Performance
-.ds RH Contents
-.bp 1
-.if t .ds CF May 21, 1984
-.if t .ds LF
-.if t .ds RF McKusick
-.ce
-.B "TABLE OF CONTENTS"
-.LP
-.sp 1
-.nf
-.B "1. Introduction"
-.LP
-.sp .5v
-.nf
-.B "2. The \fIgprof\fP Profiler"
-\0.1. Data Presentation"
-\0.1.1. The Flat Profile
-\0.1.2. The Call Graph Profile
-\0.2 Profiling the Kernel
-.LP
-.sp .5v
-.nf
-.B "3. Using \fIgprof\fP to Improve Performance
-\0.1. Using the Profiler
-\0.2. An Example of Tuning
-.LP
-.sp .5v
-.nf
-.B "4. Conclusions"
-.LP
-.sp .5v
-.nf
-.B Acknowledgements
-.LP
-.sp .5v
-.nf
-.B References
-.af PN 1
-.bp 1
-.de _d
-.if t .ta .6i 2.1i 2.6i
-.\" 2.94 went to 2.6, 3.64 to 3.30
-.if n .ta .84i 2.6i 3.30i
-..
-.de _f
-.if t .ta .5i 1.25i 2.5i
-.\" 3.5i went to 3.8i
-.if n .ta .7i 1.75i 3.8i
-..
diff --git a/share/doc/papers/kerntune/1.t b/share/doc/papers/kerntune/1.t
deleted file mode 100644
index a5d6e6b450e..00000000000
--- a/share/doc/papers/kerntune/1.t
+++ /dev/null
@@ -1,46 +0,0 @@
-.\" $OpenBSD: 1.t,v 1.4 2003/10/30 14:52:24 jmc Exp $
-.\"
-.\" Copyright (c) 1984 M. K. McKusick
-.\" Copyright (c) 1984 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)1.t 1.2 (Berkeley) 11/8/90
-.\"
-.ds RH Introduction
-.NH 1
-Introduction
-.PP
-The purpose of this paper is to describe the tools and techniques
-that are available for improving the performance of the kernel.
-The primary tool used to measure the kernel is the hierarchical
-profiler \fIgprof\fP.
-The profiler enables the user to measure the cost of
-the abstractions that the kernel provides to the user.
-Once the expensive abstractions are identified,
-optimizations are postulated to help improve their performance.
-These optimizations are each individually
-verified to insure that they are producing a measurable improvement.
diff --git a/share/doc/papers/kerntune/2.t b/share/doc/papers/kerntune/2.t
deleted file mode 100644
index 3ec5d446fbe..00000000000
--- a/share/doc/papers/kerntune/2.t
+++ /dev/null
@@ -1,232 +0,0 @@
-.\" $OpenBSD: 2.t,v 1.3 2003/06/02 23:30:09 millert Exp $
-.\"
-.\" Copyright (c) 1984 M. K. McKusick
-.\" Copyright (c) 1984 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)2.t 1.3 (Berkeley) 11/8/90
-.\"
-.ds RH The \fIgprof\fP Profiler
-.NH 1
-The \fIgprof\fP Profiler
-.PP
-The purpose of the \fIgprof\fP profiling tool is to
-help the user evaluate alternative implementations
-of abstractions.
-The \fIgprof\fP design takes advantage of the fact that the kernel
-though large, is structured and hierarchical.
-We provide a profile in which the execution time
-for a set of routines that implement an
-abstraction is collected and charged
-to that abstraction.
-The profile can be used to compare and assess the costs of
-various implementations [Graham82] [Graham83].
-.NH 2
-Data presentation
-.PP
-The data is presented to the user in two different formats.
-The first presentation simply lists the routines
-without regard to the amount of time their descendants use.
-The second presentation incorporates the call graph of the
-kernel.
-.NH 3
-The Flat Profile
-.PP
-The flat profile consists of a list of all the routines
-that are called during execution of the kernel,
-with the count of the number of times they are called
-and the number of seconds of execution time for which they
-are themselves accountable.
-The routines are listed in decreasing order of execution time.
-A list of the routines that are never called during execution of
-the kernel is also available
-to verify that nothing important is omitted by
-this profiling run.
-The flat profile gives a quick overview of the routines that are used,
-and shows the routines that are themselves responsible
-for large fractions of the execution time.
-In practice,
-this profile usually shows that no single function
-is overwhelmingly responsible for
-the total time of the kernel.
-Notice that for this profile,
-the individual times sum to the total execution time.
-.NH 3
-The Call Graph Profile
-.PP
-Ideally, we would like to print the call graph of the kernel,
-but we are limited by the two-dimensional nature of our output
-devices.
-We cannot assume that a call graph is planar,
-and even if it is, that we can print a planar version of it.
-Instead, we choose to list each routine,
-together with information about
-the routines that are its direct parents and children.
-This listing presents a window into the call graph.
-Based on our experience,
-both parent information and child information
-is important,
-and should be available without searching
-through the output.
-Figure 1 shows a sample \fIgprof\fP entry.
-.KF
-.DS L
-.TS
-box center;
-c c c c c l l
-c c c c c l l
-c c c c c l l
-l n n n c l l.
- called/total \ \ parents
-index %time self descendants called+self name index
- called/total \ \ children
-_
- 0.20 1.20 4/10 \ \ \s-1CALLER1\s+1 [7]
- 0.30 1.80 6/10 \ \ \s-1CALLER2\s+1 [1]
-[2] 41.5 0.50 3.00 10+4 \s-1EXAMPLE\s+1 [2]
- 1.50 1.00 20/40 \ \ \s-1SUB1\s+1 <cycle1> [4]
- 0.00 0.50 1/5 \ \ \s-1SUB2\s+1 [9]
- 0.00 0.00 0/5 \ \ \s-1SUB3\s+1 [11]
-.TE
-.ce
-Figure 1. Profile entry for \s-1EXAMPLE\s+1.
-.DE
-.KE
-.PP
-The major entries of the call graph profile are the entries from the
-flat profile, augmented by the time propagated to each
-routine from its descendants.
-This profile is sorted by the sum of the time for the routine
-itself plus the time inherited from its descendants.
-The profile shows which of the higher level routines
-spend large portions of the total execution time
-in the routines that they call.
-For each routine, we show the amount of time passed by each child
-to the routine, which includes time for the child itself
-and for the descendants of the child
-(and thus the descendants of the routine).
-We also show the percentage these times represent of the total time
-accounted to the child.
-Similarly, the parents of each routine are listed,
-along with time,
-and percentage of total routine time,
-propagated to each one.
-.PP
-Cycles are handled as single entities.
-The cycle as a whole is shown as though it were a single routine,
-except that members of the cycle are listed in place of the children.
-Although the number of calls of each member
-from within the cycle are shown,
-they do not affect time propagation.
-When a child is a member of a cycle,
-the time shown is the appropriate fraction of the time
-for the whole cycle.
-Self-recursive routines have their calls broken
-down into calls from the outside and self-recursive calls.
-Only the outside calls affect the propagation of time.
-.PP
-The example shown in Figure 2 is the fragment of a call graph
-corresponding to the entry in the call graph profile listing
-shown in Figure 1.
-.KF
-.DS L
-.so fig2.pic
-.ce
-Figure 2. Example call graph fragment.
-.DE
-.KE
-.PP
-The entry is for routine \s-1EXAMPLE\s+1, which has
-the Caller routines as its parents,
-and the Sub routines as its children.
-The reader should keep in mind that all information
-is given \fIwith respect to \s-1EXAMPLE\s+1\fP.
-The index in the first column shows that \s-1EXAMPLE\s+1
-is the second entry in the profile listing.
-The \s-1EXAMPLE\s+1 routine is called ten times, four times by \s-1CALLER1\s+1,
-and six times by \s-1CALLER2\s+1.
-Consequently 40% of \s-1EXAMPLE\s+1's time is propagated to \s-1CALLER1\s+1,
-and 60% of \s-1EXAMPLE\s+1's time is propagated to \s-1CALLER2\s+1.
-The self and descendant fields of the parents
-show the amount of self and descendant time \s-1EXAMPLE\s+1
-propagates to them (but not the time used by
-the parents directly).
-Note that \s-1EXAMPLE\s+1 calls itself recursively four times.
-The routine \s-1EXAMPLE\s+1 calls routine \s-1SUB1\s+1 twenty times, \s-1SUB2\s+1 once,
-and never calls \s-1SUB3\s+1.
-Since \s-1SUB2\s+1 is called a total of five times,
-20% of its self and descendant time is propagated to \s-1EXAMPLE\s+1's
-descendant time field.
-Because \s-1SUB1\s+1 is a member of \fIcycle 1\fR,
-the self and descendant times
-and call count fraction
-are those for the cycle as a whole.
-Since cycle 1 is called a total of forty times
-(not counting calls among members of the cycle),
-it propagates 50% of the cycle's self and descendant
-time to \s-1EXAMPLE\s+1's descendant time field.
-Finally each name is followed by an index that shows
-where on the listing to find the entry for that routine.
-.NH 2
-Profiling the Kernel
-.PP
-It is simple to build a 4.2BSD kernel that will automatically
-collect profiling information as it operates simply by specifying the
-.B \-p
-option to \fIconfig\fP\|(8) when configuring a kernel.
-The program counter sampling can be driven by the system clock,
-or by an alternate real time clock.
-The latter is highly recommended as use of the system clock results
-in statistical anomalies in accounting for
-the time spent in the kernel clock routine.
-.PP
-Once a profiling system has been booted statistic gathering is
-handled by \fIkgmon\fP\|(8).
-\fIKgmon\fP allows profiling to be started and stopped
-and the internal state of the profiling buffers to be dumped.
-\fIKgmon\fP can also be used to reset the state of the internal
-buffers to allow multiple experiments to be run without
-rebooting the machine.
-The profiling data can then be processed with \fIgprof\fP\|(1)
-to obtain information regarding the system's operation.
-.PP
-A profiled system is about 5-10% larger in its text space because of
-the calls to count the subroutine invocations.
-When the system executes,
-the profiling data is stored in a buffer that is 1.2
-times the size of the text space.
-All the information is summarized in memory,
-it is not necessary to have a trace file
-being continuously dumped to disk.
-The overhead for running a profiled system varies;
-under normal load we see anywhere from 5-25%
-of the system time spent in the profiling code.
-Thus the system is noticeably slower than an unprofiled system,
-yet is not so bad that it cannot be used in a production environment.
-This is important since it allows us to gather data
-in a real environment rather than trying to
-devise synthetic work loads.
diff --git a/share/doc/papers/kerntune/3.t b/share/doc/papers/kerntune/3.t
deleted file mode 100644
index 98c6ccea3e3..00000000000
--- a/share/doc/papers/kerntune/3.t
+++ /dev/null
@@ -1,288 +0,0 @@
-.\" $OpenBSD: 3.t,v 1.3 2003/06/02 23:30:09 millert Exp $
-.\"
-.\" Copyright (c) 1984 M. K. McKusick
-.\" Copyright (c) 1984 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)3.t 1.2 (Berkeley) 11/8/90
-.\"
-.ds RH Techniques for Improving Performance
-.NH 1
-Techniques for Improving Performance
-.PP
-This section gives several hints on general optimization techniques.
-It then proceeds with an example of how they can be
-applied to the 4.2BSD kernel to improve its performance.
-.NH 2
-Using the Profiler
-.PP
-The profiler is a useful tool for improving
-a set of routines that implement an abstraction.
-It can be helpful in identifying poorly coded routines,
-and in evaluating the new algorithms and code that replace them.
-Taking full advantage of the profiler
-requires a careful examination of the call graph profile,
-and a thorough knowledge of the abstractions underlying
-the kernel.
-.PP
-The easiest optimization that can be performed
-is a small change
-to a control construct or data structure.
-An obvious starting point
-is to expand a small frequently called routine inline.
-The drawback to inline expansion is that the data abstractions
-in the kernel may become less parameterized,
-hence less clearly defined.
-The profiling will also become less useful since the loss of
-routines will make its output more granular.
-.PP
-Further potential for optimization lies in routines that
-implement data abstractions whose total execution
-time is long.
-If the data abstraction function cannot easily be speeded up,
-it may be advantageous to cache its results,
-and eliminate the need to rerun
-it for identical inputs.
-These and other ideas for program improvement are discussed in
-[Bentley81].
-.PP
-This tool is best used in an iterative approach:
-profiling the kernel,
-eliminating one bottleneck,
-then finding some other part of the kernel
-that begins to dominate execution time.
-.PP
-A completely different use of the profiler is to analyze the control
-flow of an unfamiliar section of the kernel.
-By running an example that exercises the unfamiliar section of the kernel,
-and then using \fIgprof\fR, you can get a view of the
-control structure of the unfamiliar section.
-.NH 2
-An Example of Tuning
-.PP
-The first step is to come up with a method for generating
-profile data.
-We prefer to run a profiling system for about a one day
-period on one of our general timesharing machines.
-While this is not as reproducible as a synthetic workload,
-it certainly represents a realistic test.
-We have run one day profiles on several
-occasions over a three month period.
-Despite the long period of time that elapsed
-between the test runs the shape of the profiles,
-as measured by the number of times each system call
-entry point was called, were remarkably similar.
-.PP
-A second alternative is to write a small benchmark
-program to repeated exercise a suspected bottleneck.
-While these benchmarks are not useful as a long term profile
-they can give quick feedback on whether a hypothesized
-improvement is really having an effect.
-It is important to realize that the only real assurance
-that a change has a beneficial effect is through
-long term measurements of general timesharing.
-We have numerous examples where a benchmark program
-suggests vast improvements while the change
-in the long term system performance is negligible,
-and conversely examples in which the benchmark program run more slowly,
-but the long term system performance improves significantly.
-.PP
-An investigation of our long term profiling showed that
-the single most expensive function performed by the kernel
-is path name translation.
-We find that our general time sharing systems do about
-500,000 name translations per day.
-The cost of doing name translation in the original 4.2BSD
-is 24.2 milliseconds,
-representing 40% of the time processing system calls,
-which is 19% of the total cycles in the kernel,
-or 11% of all cycles executed on the machine.
-The times are shown in Figure 3.
-.KF
-.DS L
-.TS
-center box;
-l r r.
-part time % of kernel
-_
-self 14.3 ms/call 11.3%
-child 9.9 ms/call 7.9%
-_
-total 24.2 ms/call 19.2%
-.TE
-.ce
-Figure 3. Call times for \fInamei\fP.
-.DE
-.KE
-.PP
-The system measurements collected showed the
-pathname translation routine, \fInamei\fP,
-was clearly worth optimizing.
-An inspection of \fInamei\fP shows that
-it consists of two nested loops.
-The outer loop is traversed once per pathname component.
-The inner loop performs a linear search through a directory looking
-for a particular pathname component.
-.PP
-Our first idea was to observe that many programs
-step through a directory performing an operation on
-each entry in turn.
-This caused us to modify \fInamei\fP to cache
-the directory offset of the last pathname
-component looked up by a process.
-The cached offset is then used
-as the point at which a search in the same directory
-begins. Changing directories invalidates the cache, as
-does modifying the directory.
-For programs that step sequentially through a directory with
-$N$ files, search time decreases from $O ( N sup 2 )$
-to $O(N)$.
-.PP
-The cost of the cache is about 20 lines of code
-(about 0.2 kilobytes)
-and 16 bytes per process, with the cached data
-stored in a process's \fIuser\fP vector.
-.PP
-As a quick benchmark to verify the effectiveness of the
-cache we ran ``ls \-l''
-on a directory containing 600 files.
-Before the per-process cache this command
-used 22.3 seconds of system time.
-After adding the cache the program used the same amount
-of user time, but the system time dropped to 3.3 seconds.
-.PP
-This change prompted our rerunning a profiled system
-on a machine containing the new \fInamei\fP.
-The results showed that the time in \fInamei\fP
-dropped by only 2.6 ms/call and
-still accounted for 36% of the system call time,
-18% of the kernel, or about 10% of all the machine cycles.
-This amounted to a drop in system time from 57% to about 55%.
-The results are shown in Figure 4.
-.KF
-.DS L
-.TS
-center box;
-l r r.
-part time % of kernel
-_
-self 11.0 ms/call 9.2%
-child 10.6 ms/call 8.9%
-_
-total 21.6 ms/call 18.1%
-.TE
-.ce
-Figure 4. Call times for \fInamei\fP with per-process cache.
-.DE
-.KE
-.PP
-The small performance improvement
-was caused by a low cache hit ratio.
-Although the cache was 90% effective when hit,
-it was only usable on about 25% of the names being translated.
-An additional reason for the small improvement was that
-although the amount of time spent in \fInamei\fP itself
-decreased substantially,
-more time was spent in the routines that it called
-since each directory had to be accessed twice;
-once to search from the middle to the end,
-and once to search from the beginning to the middle.
-.PP
-Most missed names were caused by path name components
-other than the last.
-Thus Robert Elz introduced a system wide cache of most recent
-name translations.
-The cache is keyed on a name and the
-inode and device number of the directory that contains it.
-Associated with each entry is a pointer to the corresponding
-entry in the inode table.
-This has the effect of short circuiting the outer loop of \fInamei\fP.
-For each path name component,
-\fInamei\fP first looks in its cache of recent translations
-for the needed name.
-If it exists, the directory search can be completely eliminated.
-If the name is not recognized,
-then the per-process cache may still be useful in
-reducing the directory search time.
-The two cacheing schemes complement each other well.
-.PP
-The cost of the name cache is about 200 lines of code
-(about 1.2 kilobytes)
-and 44 bytes per cache entry.
-Depending on the size of the system,
-about 200 to 1000 entries will normally be configured,
-using 10-44 kilobytes of physical memory.
-The name cache is resident in memory at all times.
-.PP
-After adding the system wide name cache we reran ``ls \-l''
-on the same directory.
-The user time remained the same,
-however the system time rose slightly to 3.7 seconds.
-This was not surprising as \fInamei\fP
-now had to maintain the cache,
-but was never able to make any use of it.
-.PP
-Another profiled system was created and measurements
-were collected over a one day period. These measurements
-showed a 6 ms/call decrease in \fInamei\fP, with
-\fInamei\fP accounting for only 31% of the system call time,
-16% of the time in the kernel,
-or about 7% of all the machine cycles.
-System time dropped from 55% to about 49%.
-The results are shown in Figure 5.
-.KF
-.DS L
-.TS
-center box;
-l r r.
-part time % of kernel
-_
-self 9.5 ms/call 9.6%
-child 6.1 ms/call 6.1%
-_
-total 15.6 ms/call 15.7%
-.TE
-.ce
-Figure 5. Call times for \fInamei\fP with both caches.
-.DE
-.KE
-.PP
-Statistics on the performance of both caches show
-the large performance improvement is
-caused by the high hit ratio.
-On the profiled system a 60% hit rate was observed in
-the system wide cache. This, coupled with the 25%
-hit rate in the per-process offset cache yielded an
-effective cache hit rate of 85%.
-While the system wide cache reduces both the amount of time in
-the routines that \fInamei\fP calls as well as \fInamei\fP itself
-(since fewer directories need to be accessed or searched),
-it is interesting to note that the actual percentage of system
-time spent in \fInamei\fP itself increases even though the
-actual time per call decreases.
-This is because less total time is being spent in the kernel,
-hence a smaller absolute time becomes a larger total percentage.
diff --git a/share/doc/papers/kerntune/4.t b/share/doc/papers/kerntune/4.t
deleted file mode 100644
index d4b9c30b175..00000000000
--- a/share/doc/papers/kerntune/4.t
+++ /dev/null
@@ -1,97 +0,0 @@
-.\" $OpenBSD: 4.t,v 1.3 2003/06/02 23:30:09 millert Exp $
-.\"
-.\" Copyright (c) 1984 M. K. McKusick
-.\" Copyright (c) 1984 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)4.t 1.2 (Berkeley) 11/8/90
-.\"
-.ds RH Conclusions
-.NH 1
-Conclusions
-.PP
-We have created a profiler that aids in the evaluation
-of the kernel.
-For each routine in the kernel,
-the profile shows the extent to which that routine
-helps support various abstractions,
-and how that routine uses other abstractions.
-The profile assesses the cost of routines
-at all levels of the kernel decomposition.
-The profiler is easily used,
-and can be compiled into the kernel.
-It adds only five to thirty percent execution overhead to the kernel
-being profiled,
-produces no additional output while the kernel is running
-and allows the kernel to be measured in its real environment.
-Kernel profiles can be used to identify bottlenecks in performance.
-We have shown how to improve performance
-by caching recently calculated name translations.
-The combined caches added to the name translation process
-reduce the average cost of translating a pathname to an inode by 35%.
-These changes reduce the percentage of time spent running
-in the system by nearly 9%.
-.nr H2 1
-.ds RH Acknowledgements
-.SH
-\s+2Acknowledgements\s0
-.PP
-I would like to thank Robert Elz for sharing his ideas and
-his code for cacheing system wide names.
-Thanks also to all the users at Berkeley who provided all the
-input to generate the kernel profiles.
-This work was supported by
-the Defense Advance Research Projects Agency (DoD) under
-Arpa Order No. 4031 monitored by Naval Electronic System Command under
-Contract No. N00039-82-C-0235.
-.ds RH References
-.nr H2 1
-.sp 2
-.SH
-\s+2References\s-2
-.LP
-.IP [Bentley81] 20
-Bentley, J. L.,
-``Writing Efficient Code'',
-Department of Computer Science,
-Carnegie-Mellon University,
-Pittsburgh, Pennsylvania,
-CMU-CS-81-116, 1981.
-.IP [Graham82] 20
-Graham, S., Kessler, P., McKusick, M.,
-``gprof: A Call Graph Execution Profiler'',
-Proceedings of the SIGPLAN '82 Symposium on Compiler Construction,
-Volume 17, Number 6, June 1982. pp 120-126
-.IP [Graham83] 20
-Graham, S., Kessler, P., McKusick, M.,
-``An Execution Profiler for Modular Programs''
-Software - Practice and Experience,
-Volume 13, 1983. pp 671-685
-.IP [Ritchie74] 20
-Ritchie, D. M. and Thompson, K.,
-``The UNIX Time-Sharing System'',
-CACM 17, 7. July 1974. pp 365-375
diff --git a/share/doc/papers/kerntune/Makefile b/share/doc/papers/kerntune/Makefile
deleted file mode 100644
index a2c10c2d9fa..00000000000
--- a/share/doc/papers/kerntune/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-# $OpenBSD: Makefile,v 1.3 2004/02/01 14:22:44 jmc Exp $
-
-
-DIR= papers/kerntune
-SRCS= 0.t 1.t 2.t 3.t 4.t
-MACROS= -ms
-
-paper.ps: ${SRCS}
- ${SOELIM} ${SRCS} | ${PIC} | ${TBL} | ${EQN} | ${ROFF} > ${.TARGET}
-
-paper.txt: ${SRCS}
- ${SOELIM} ${SRCS} | ${PIC} | ${TBL} | ${EQN} | \
- ${ROFF} -Tascii > ${.TARGET}
-
-.include <bsd.doc.mk>
diff --git a/share/doc/papers/kerntune/fig2.pic b/share/doc/papers/kerntune/fig2.pic
deleted file mode 100644
index d71b73cb98d..00000000000
--- a/share/doc/papers/kerntune/fig2.pic
+++ /dev/null
@@ -1,55 +0,0 @@
-.\" $OpenBSD: fig2.pic,v 1.3 2003/06/02 23:30:09 millert Exp $
-.\"
-.\" Copyright (c) 1987 M. K. McKusick
-.\" Copyright (c) 1987 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)fig2.pic 1.2 (Berkeley) 11/8/90
-.\"
-.PS
-ellipse ht .3i wid .75i "\s-1CALLER1\s+1"
-ellipse ht .3i wid .75i "\s-1CALLER2\s+1" at 1st ellipse + (2i,0i)
-ellipse ht .3i wid .8i "\s-1EXAMPLE\s+1" at 1st ellipse + (1i,-.5i)
-ellipse ht .3i wid .5i "\s-1SUB1\s+1" at 1st ellipse - (0i,1i)
-ellipse ht .3i wid .5i "\s-1SUB2\s+1" at 3rd ellipse - (0i,.5i)
-ellipse ht .3i wid .5i "\s-1SUB3\s+1" at 2nd ellipse - (0i,1i)
-line <- from 1st ellipse up .5i left .5i chop .1875i
-line <- from 1st ellipse up .5i right .5i chop .1875i
-line <- from 2nd ellipse up .5i left .5i chop .1875i
-line <- from 2nd ellipse up .5i right .5i chop .1875i
-arrow from 1st ellipse to 3rd ellipse chop
-arrow from 2nd ellipse to 3rd ellipse chop
-arrow from 3rd ellipse to 4th ellipse chop
-arrow from 3rd ellipse to 5th ellipse chop .15i chop .15i
-arrow from 3rd ellipse to 6th ellipse chop
-arrow from 4th ellipse down .5i left .5i chop .1875i
-arrow from 4th ellipse down .5i right .5i chop .1875i
-arrow from 5th ellipse down .5i left .5i chop .1875i
-arrow from 5th ellipse down .5i right .5i chop .1875i
-arrow from 6th ellipse down .5i left .5i chop .1875i
-arrow from 6th ellipse down .5i right .5i chop .1875i
-.PE
diff --git a/share/doc/papers/malloc/Makefile b/share/doc/papers/malloc/Makefile
deleted file mode 100644
index a9553dfc98f..00000000000
--- a/share/doc/papers/malloc/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-# From: @(#)Makefile 6.3 (Berkeley) 6/8/93
-# $OpenBSD: Makefile,v 1.3 2004/02/01 14:22:44 jmc Exp $
-
-VOLUME= papers
-DOC= malloc
-SRCS= abs.ms intro.ms kernel.ms malloc.ms problems.ms alternatives.ms
-SRCS+= performance.ms implementation.ms conclusion.ms
-MACROS= -ms
-
-paper.txt: ${SRCS}
- ${ROFF} -Tascii ${SRCS} > ${.TARGET}
-
-edit:
- vi ${SRCS}
-
-.include <bsd.doc.mk>
diff --git a/share/doc/papers/malloc/abs.ms b/share/doc/papers/malloc/abs.ms
deleted file mode 100644
index f7b74056046..00000000000
--- a/share/doc/papers/malloc/abs.ms
+++ /dev/null
@@ -1,35 +0,0 @@
-.\"
-.\" ----------------------------------------------------------------------------
-.\" "THE BEER-WARE LICENSE" (Revision 42):
-.\" <phk@login.dknet.dk> wrote this file. As long as you retain this notice you
-.\" can do whatever you want with this stuff. If we meet some day, and you think
-.\" this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
-.\" ----------------------------------------------------------------------------
-.\"
-.\" $OpenBSD: abs.ms,v 1.2 1997/01/05 22:18:06 tholo Exp $
-.\"
-.if n .ND
-.TL
-Malloc(3) in modern Virtual Memory environments.
-.sp
-Revised
-Fri Apr 5 12:50:07 1996
-.AU
-Poul-Henning Kamp
-.AI
-<phk@FreeBSD.org>
-Den Andensidste Viking
-Valbygaardsvej 8
-DK-4200 Slagelse
-Denmark
-.AB
-Malloc/free is one of the oldest part of the C language environment
-and obviously the world has changed a bit since it was first made.
-The fact that most UNIX kernels have changed from a swap/segment to
-a virtual memory/page based memory management has not been sufficiently
-reflected in the implementations of the malloc/free API.
-.PP
-A new implementation was designed, written, tested and bench-marked
-with an eye on the workings and performance characteristics of modern
-Virtual Memory systems.
-.AE
diff --git a/share/doc/papers/malloc/alternatives.ms b/share/doc/papers/malloc/alternatives.ms
deleted file mode 100644
index 3d152e385ad..00000000000
--- a/share/doc/papers/malloc/alternatives.ms
+++ /dev/null
@@ -1,45 +0,0 @@
-.\"
-.\" ----------------------------------------------------------------------------
-.\" "THE BEER-WARE LICENSE" (Revision 42):
-.\" <phk@login.dknet.dk> wrote this file. As long as you retain this notice you
-.\" can do whatever you want with this stuff. If we meet some day, and you think
-.\" this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
-.\" ----------------------------------------------------------------------------
-.\"
-.\" $OpenBSD: alternatives.ms,v 1.2 1997/01/05 22:18:06 tholo Exp $
-.\"
-.ds RH Alternative implementations
-.NH
-Alternative implementations
-.PP
-These problems were actually the inspiration for the first alternative
-malloc implementations.
-Since their main aim was debugging, they would often use techniques
-like allocating a guard zone before and after the chunk,
-and possibly fill these guard zones
-with some pattern, so accesses outside the allocated chunk can be detected
-with some decent probability.
-Another widely used technique is to use tables to keep track of what
-chunks were actually in what state and so on.
-.PP
-This class of debugging has been taken to its practical extreme by
-the product "Purify" which does the entire memory-colouring exercise
-and not only keeps track of what is in use and what isn't, but also
-detects if the first reference is a read (which would return undefined
-values) and other such violations.
-.PP
-Later actual complete implementations of malloc arrived, but many of
-these still based their workings on the basic schema mentioned previously,
-disregarding that in the meantime virtual memory and paging have
-become the standard environment.
-.PP
-The most widely used "alternative" malloc is undoubtedly ``gnumalloc''
-which have received wide acclaim and certainly runs faster than
-most stock mallocs. It does however tend to fare badly in a
-cases where paging is the norm rather than the exception.
-.PP
-The particular malloc that prompted this work basically didn't bother
-reusing storage until the kernel forced it to do so by refusing
-further allocations with sbrk(2).
-That may make sense if you work alone on your own personal mainframe,
-but as a general policy it is less than optimal.
diff --git a/share/doc/papers/malloc/conclusion.ms b/share/doc/papers/malloc/conclusion.ms
deleted file mode 100644
index 9cf91cfe85e..00000000000
--- a/share/doc/papers/malloc/conclusion.ms
+++ /dev/null
@@ -1,48 +0,0 @@
-.\"
-.\" ----------------------------------------------------------------------------
-.\" "THE BEER-WARE LICENSE" (Revision 42):
-.\" <phk@login.dknet.dk> wrote this file. As long as you retain this notice you
-.\" can do whatever you want with this stuff. If we meet some day, and you think
-.\" this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
-.\" ----------------------------------------------------------------------------
-.\"
-.\" $OpenBSD: conclusion.ms,v 1.2 1997/01/05 22:18:07 tholo Exp $
-.\"
-.ds RH Conclusion and experience.
-.NH
-Conclusion and experience.
-.PP
-In general the performance differences between gnumalloc and this
-malloc are not that big.
-The major difference comes when primary storage is seriously
-over-committed, in which case gnumalloc
-wastes time paging in pages it's not going to use.
-In such cases as much as a factor of five in wall-clock time has
-been seen in difference.
-Apart from that gnumalloc and this implementation are pretty
-much head-on performance wise.
-.PP
-Several legacy programs in the BSD 4.4 Lite distribution had
-code that depended on the memory returned from malloc to
-be zeroed, in a couple of cases free(3) was called more than
-once for the same allocation and a few cases even called free(3)
-with pointers to objects in the data section or on the stack.
-.PP
-A couple of users have reported that using this malloc on other
-platforms yielded "pretty impressive results", but no hard benchmarks
-have been made.
-.ds RH Acknowledgements & references.
-.NH
-Acknowledgements & references.
-.PP
-The first implementation of this algorithm was actually a file system,
-done in assembler using 5-hole ``Baudot'' paper tape for a drum storage
-device attached to a 20 bit germanium transistor computer with 2000 words
-of memory, but that was many years ago.
-.PP
-Peter Wemm <peter@FreeBSD.org> came up with the idea to store the
-page-directory in mmap(2)'ed memory instead of in the heap.
-This has proven to be a good move.
-.PP
-Lars Fredriksen <fredriks@mcs.com> found and identified a
-fence-post bug in the code.
diff --git a/share/doc/papers/malloc/implementation.ms b/share/doc/papers/malloc/implementation.ms
deleted file mode 100644
index 7813f8fdffc..00000000000
--- a/share/doc/papers/malloc/implementation.ms
+++ /dev/null
@@ -1,223 +0,0 @@
-.\"
-.\" ----------------------------------------------------------------------------
-.\" "THE BEER-WARE LICENSE" (Revision 42):
-.\" <phk@login.dknet.dk> wrote this file. As long as you retain this notice you
-.\" can do whatever you want with this stuff. If we meet some day, and you think
-.\" this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
-.\" ----------------------------------------------------------------------------
-.\"
-.\" $OpenBSD: implementation.ms,v 1.2 1997/01/05 22:18:07 tholo Exp $
-.\"
-.ds RH Implementation
-.NH
-Implementation
-.PP
-A new malloc(3) implementation was written to meet the goals,
-and to the extent possible to address the shortcomings listed previously.
-.PP
-The source is 1218 lines of C code, and can be found in FreeBSD 2.2
-(and probably later versions as well) as src/lib/libc/stdlib/malloc.c.
-.PP
-The main data structure is the
-.I page-directory
-which contains a
-.B void*
-for each page we have control over.
-The value can be one of:
-.IP
-.B MALLOC_NOT_MINE
-Another part of the code may call brk(2) to get a piece of the cake.
-Consequently we cannot rely on the memory we get from the kernel to
-be one consecutive piece of memory and therefore we need a way to
-mark such pages as "untouchable".
-.IP
-.B MALLOC_FREE
-This is a free page.
-.IP
-.B MALLOC_FIRST
-This is the first page in a (multi-)page allocation.
-.IP
-.B MALLOC_FOLLOW
-This is a subsequent page in a multi-page allocation.
-.IP
-.B
-struct pginfo*
-.R
-A pointer to a structure describing a partitioned page.
-.PP
-In addition there exist a linked list of small data structures that
-describe the free space as runs of free pages.
-.PP
-Notice that these structures are not part of the free pages themselves,
-but rather allocated with malloc so that the free pages themselves
-are never referenced while they are free.
-.PP
-When a request for storage comes in, it will be treated as a ``page''
-allocation if it is bigger than half a page.
-The freelist will be searched and the first run of free pages that
-can satisfy the request is used. The first page gets set to
-.B MALLOC_FIRST
-status, if more than that one page is needed the rest of them gets
-.B MALLOC_FOLLOW
-status in the page-directory.
-.PP
-If there were no pages on the free-list, brk(2) will be called, and
-the pages will get added to the page-directory with status
-.B MALLOC_FREE
-and the search restarts.
-.PP
-Freeing a number of pages is done by changing their state in the
-page directory to MALLOC_FREE, and then traverse the free-pages list to
-find the right place for this run of pages, possibly collapsing
-with the two neighbouring runs into one run and, if it is possible,
-release some memory back to the kernel by calling brk(2).
-.PP
-If the request is less than or equal to half of a page, its size will be
-rounded up to the nearest power of two before being processed
-and if the request is less than some minimum size, it is rounded up to
-that size.
-.PP
-These sub-page allocations are served from pages which are split up
-into some number of equal size chunks.
-For each of these pages a
-.B
-struct pginfo
-.R
-describes the size of the chunks on this page, how many there are,
-how many are free and so on.
-The description consist of a bitmap of used chunks, and various counters
-and numbers used to keep track of the stuff in the page.
-.PP
-For each size of sub-page allocation, the pginfo structures for the
-pages that have free chunks in them form a list.
-The head of these lists are stored in predetermined slots at
-the beginning of the page directory to make access fast.
-.PP
-To allocate a chunk of some size, the head of the list for the
-corresponding size is examined, and a free chunk found, the number
-of free chunks on that page is decreased by one and if zero the
-pginfo structure is unlinked from the list.
-.PP
-To free a chunk, the page is derived from the pointer, the page table
-for that page contains a pointer to the pginfo structure, where the
-free bit is set for the chunk, the number of free chunks increased by
-one, and if equal to one, the pginfo structure is linked into the
-proper place on the list for this size of chunks.
-If the count increases to match the number of chunks on the page, the
-pginfo structure is unlinked from the list and free(3)'ed and the
-actual page itself is free(3)'ed too.
-.PP
-To be 100% correct performance-wise these lists should be ordered
-according to the recent number of accesses to that page. This
-information is not available and it would essentially mean a reordering
-of the list on every memory reference to keep it up-to-date.
-Instead they are ordered according to the address of the pages.
-Interestingly enough, in practice this comes out to almost the same
-thing performance wise.
-.PP
-It's not that surprising after all, it's the difference between
-following the crowd or actively directing where it can go, in both
-ways you can end up in the middle of it all.
-.PP
-The side effect of this compromise is that it also uses less storage,
-and the list never has to be reordered, all the ordering happens when
-pages are added or deleted.
-.PP
-It is an interesting twist to the implementation that the
-.B
-struct pginfo
-.R
-Is allocated with malloc.
-That is, "as with malloc" to be painfully correct.
-The code knows the special case where the first (couple) of allocations on
-the page is actually the pginfo structure and deals with it accordingly.
-This avoids some silly "chicken and egg" issues.
-.ds RH Bells and whistles.
-.NH
-Bells and whistles.
-.PP
-brk(2) is actually not a very fast system call when you ask for storage.
-This is mainly because of the need by the kernel to zero the pages before
-handing them over, so therefore this implementation does not release
-back heap-pages, until there is a large chunk to release back to the kernel.
-Chances are pretty good that we will need it again pretty soon anyway.
-Since these pages are not accessed at all, they will soon be paged out
-and don't affect anything but swap-space usage.
-.PP
-The page directory is actually kept in a mmap(2)'ed piece of
-anonymous memory. This avoids some rather silly cases that
-we would otherwise have to be handled when the page directory
-has to be extended.
-.PP
-One particular nice feature is that all pointers passed to free(3)
-and realloc(3) can be checked conclusively for validity:
-First the pointer is masked to find the page. The page directory
-is then examined, it must contain either MALLOC_FIRST, in which
-case the pointer must point exactly at the page, or it can contain
-a struct pginfo*, in which case the pointer must point to a one of
-the chunks described by that structure.
-Warnings will be printed on stderr and nothing will be done with
-the pointer in case it is found to be invalid.
-.PP
-An environment variable
-.B MALLOC_OPTIONS
-allows the user some control over the behaviour of malloc.
-Some of the more interesting options are:
-.IP
-.B Abort
-If malloc fails to allocate storage, core-dump the process with
-a message rather than expect it handle this correctly.
-It's amazing how few programs actually handle this condition correctly,
-and consequently the havoc they can create is the more creative or
-destructive.
-.IP
-.B Dump
-Writes malloc statistics to a file called ``malloc.out'' prior
-to process termination.
-.IP
-.B Hint
-Pass a hint to the kernel about pages we no longer need through the
-madvise(2) system call. This can help performance on machines that
-page heavily by eliminating unnecessary page-ins and page-outs of
-unused data.
-.IP
-.B Realloc
-Always do a free and malloc when realloc(3) is called. The default
-is to leave things alone if the size of the allocation is still in
-the same size-class.
-For programs doing garbage collect using realloc(3) this make the
-heap collapse faster. Since the malloc will reallocate from the
-lowest available address.
-.IP
-.B Junk
-will explicitly fill the allocated area with a particular value
-to try to detect if programs rely on it being zero.
-.IP
-.B Zero
-will explicitly zero out the allocated chunk of memory, while any
-space after the allocation in the chunk will be filled with the
-junk value to try to catch out of the chunk references.
-.ds RH The road not taken.
-.NH
-The road not yet taken.
-.PP
-A couple of avenues were explored that could be interesting in some
-set of circumstances.
-.PP
-Using mmap(2) instead of brk(2) was actually slower, since brk(2)
-knows a lot of the things that mmap has to find out first.
-.PP
-In general there is little room for further improvement of the
-time-overhead of the malloc, further improvements will have to
-be in the area of improving paging behaviour.
-.PP
-It is still under consideration to add a feature such that
-if realloc is called with two zero arguments, the internal
-allocations will be reallocated to perform a garbage collect.
-This could be used in certain types of programs to collapse
-the memory use, but so far it doesn't seem to be worth the effort.
-.PP
-Malloc/Free can be a significant point of contention in multi-threaded
-programs. Low-grain locking of the data-structures inside the
-implementation should be implemented to avoid excessive spin-waiting.
-
diff --git a/share/doc/papers/malloc/intro.ms b/share/doc/papers/malloc/intro.ms
deleted file mode 100644
index 99beeca7501..00000000000
--- a/share/doc/papers/malloc/intro.ms
+++ /dev/null
@@ -1,74 +0,0 @@
-.\"
-.\" ----------------------------------------------------------------------------
-.\" "THE BEER-WARE LICENSE" (Revision 42):
-.\" <phk@login.dknet.dk> wrote this file. As long as you retain this notice you
-.\" can do whatever you want with this stuff. If we meet some day, and you think
-.\" this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
-.\" ----------------------------------------------------------------------------
-.\"
-.\" $OpenBSD: intro.ms,v 1.3 2003/08/10 23:43:28 mickey Exp $
-.\"
-.ds RH Introduction
-.NH
-Introduction
-.PP
-Most programs need to allocate storage dynamically in addition
-to whatever static storage the compiler reserved at compile-time.
-To C programmers this fact is rather obvious, but for many years
-this was not an accepted and recognized fact, and many languages
-still used today don't support this notion adequately.
-.PP
-The classic UNIX kernel provides two very simple and powerful
-mechanisms for obtaining dynamic storage, the execution stack
-and the heap.
-The stack is usually put at the far upper end of the address-space,
-from where it grows down as far as needed, though this may depend on
-the CPU design.
-The heap starts at the end of the
-.B bss
-segment and grows upwards as needed.
-.PP
-There isn't really a kernel-interface to the stack as such.
-The kernel will allocate some amount of memory for it,
-not even telling the process the exact size.
-If the process needs more space than that, it will simply try to access
-it, hoping that the kernel will detect that access have been
-attempted outside the allocated memory, and try to extend it.
-If the kernel fails to extend the stack, this could be because of lack
-of resources or permissions or because it may just be impossible
-to do in the first place, the process will usually be shot down by the
-kernel.
-.PP
-In the C language, there exists a little used interface to the stack,
-.B alloca(3) ,
-which will explicitly allocate space on the stack.
-This is not a interface to the kernel, but merely an adjustment
-done to the stack-pointer such that space will be available and
-unharmed by any subroutine calls yet to be made while the context
-of the current subroutine is intact.
-.PP
-Due to the nature of normal use of the stack, there is no corresponding
-"free" operator, but instead the space is returned when the current
-function returns to its caller and the stack frame is dismanteled.
-This is the cause of much grief, and probably the single most important
-reason that alloca(3) is not, and should not be, used widely.
-.PP
-The heap on the other hand has an explicit kernel-interface in the
-system call
-.B brk(2) .
-The argument to brk(2) is a pointer to where the process wants the
-heap to end.
-There is also a interface called
-.B sbrk(2)
-taking an increment to the current end of the heap, but this is merely a
-.B libc
-front for brk(2).
-.PP
-In addition to these two memory resources, modern virtual memory kernels
-provide the mmap(2)/munmap(2) interface which allows almost complete
-control over any bit of virtual memory in the process address room.
-.PP
-Because of the generality of the mmap(2) interface and the way the
-data structures representing the regions are laid out, sbrk(2) is actually
-faster in use than the equivalent mmap(2) call, simply because the
-mmap(2) has to search for information that is implicit in the sbrk(2) call.
diff --git a/share/doc/papers/malloc/kernel.ms b/share/doc/papers/malloc/kernel.ms
deleted file mode 100644
index b84ebd0380a..00000000000
--- a/share/doc/papers/malloc/kernel.ms
+++ /dev/null
@@ -1,56 +0,0 @@
-.\"
-.\" ----------------------------------------------------------------------------
-.\" "THE BEER-WARE LICENSE" (Revision 42):
-.\" <phk@login.dknet.dk> wrote this file. As long as you retain this notice you
-.\" can do whatever you want with this stuff. If we meet some day, and you think
-.\" this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
-.\" ----------------------------------------------------------------------------
-.\"
-.\" $OpenBSD: kernel.ms,v 1.2 1997/01/05 22:18:09 tholo Exp $
-.\"
-.ds RH The kernel and memory
-.NH
-The kernel and memory
-.PP
-Brk(2) isn't a particularly convenient interface,
-it was probably made more to fit the memory model of the
-hardware being used, than to fill the needs of the programmers.
-.PP
-Before paged and/or virtual memory systems became
-common, the most popular memory management facility used for
-UNIX was segments.
-This was also very often the only vehicle for imposing protection on
-various parts of memory.
-Depending on the hardware, segments can be anything, and consequently
-how the kernels exploited them varied a lot from UNIX to UNIX and from
-machine to machine.
-.PP
-Typically a process would have one segment for the text section, one
-for the data and bss section combined and one for the stack.
-On some systems the text shared a segment with the data and bss, and was
-consequently just as writable as them.
-.PP
-In this setup all the brk(2) system call have to do is to find the
-right amount of free storage, possibly moving things around in physical
-memory, maybe even swapping out a segment or two to make space,
-and change the upper limit on the data segment according to the address given.
-.PP
-In a more modern page based virtual memory implementation this is still
-pretty much the situation, except that the granularity is now pages:
-The kernel finds the right number of free pages, possibly paging some
-pages out to free them up, and then plug them into the page-table of
-the process.
-.PP
-As such the difference is very small, the real difference is that in
-the old world of swapping, either the entire process was in primary
-storage (or it wouldn't be selected to be run) in a modern VM kernel,
-a process might only have a subset of its pages in primary memory,
-the rest will be paged in, if and when the process tries to access them.
-.PP
-Only very few programs deal with the brk(2) interface directly, the
-few that does usually have their own memory management facilities.
-LISP or FORTH interpreters are good examples.
-Most other programs use the
-.B malloc(3)
-interface instead, and leave it to the malloc implementation to
-use brk(2) to get storage allocated from the kernel.
diff --git a/share/doc/papers/malloc/malloc.ms b/share/doc/papers/malloc/malloc.ms
deleted file mode 100644
index 78e52088efe..00000000000
--- a/share/doc/papers/malloc/malloc.ms
+++ /dev/null
@@ -1,72 +0,0 @@
-.\"
-.\" ----------------------------------------------------------------------------
-.\" "THE BEER-WARE LICENSE" (Revision 42):
-.\" <phk@login.dknet.dk> wrote this file. As long as you retain this notice you
-.\" can do whatever you want with this stuff. If we meet some day, and you think
-.\" this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
-.\" ----------------------------------------------------------------------------
-.\"
-.\" $OpenBSD: malloc.ms,v 1.2 1997/01/05 22:18:10 tholo Exp $
-.\"
-.ds RH Malloc and free
-.NH
-Malloc and free
-.PP
-The job of malloc(3) is to turn the rather simple
-brk(2) facility into a service programs can
-actually use without getting hurt.
-.PP
-The archetypical malloc(3) implementation keeps track of the memory between
-the end of the bss section, as defined by the
-.B _end
-symbol, and the current brk(2) point using a linked list of chunks of memory.
-Each item on the list has a status as either free or used, a pointer
-to the next entry and in most cases to the previous as well, to speed
-up inserts and deletes in the list.
-.PP
-When a malloc(3) request comes in, the list is traversed from the
-front and if a free chunk big enough to hold the request is found,
-it is returned, if the free chunk is bigger than the size requested,
-a new free chunk is made from the excess and put back on the list.
-.PP
-When a chunk is
-.B free(3) 'ed,
-the chunk is found in the list, its status
-is changed to free and if one or both of the surrounding chunks
-are free, they are collapsed to one.
-.PP
-A third kind of request,
-.B realloc(3)
-exists, it will resize
-a chunk, trying to avoid copying the contents if possible.
-It is seldom used, and has only had a significant impact on performance
-in a few special situations.
-The typical pattern of use is to malloc(3) a chunk of the maximum size
-needed, read in the data and adjust the size of the chunk to match the
-size of the data read using realloc(3).
-.PP
-For reasons of efficiency, the original implementation of malloc(3)
-put the small structure used to contain the next and previous pointers
-plus the state of the chunk right before the chunk itself.
-.PP
-As a matter of fact, the canonical malloc(3) implementation can be
-studied in the ``Old testament'', chapter 8 verse 7 [Kernighan & Ritchie]
-.PP
-Various optimisations can be applied to the above basic algorithm:
-.IP
-If in freeing a chunk, we end up with the last chunk on the list being
-free, we can return that to the kernel by calling brk(2) with the first
-address of that chunk and then make the previous chunk the last on the
-chain by terminating its ``next'' pointer.
-.IP
-A best-fit algorithm can be used instead of first-fit at an expense
-of memory, because statistically fewer chances to brk(2) backwards will
-present themselves.
-.IP
-Splitting the list in two, once for used and one for free chunks to
-speed the searching.
-.IP
-Putting free chunks on one of several free-list depending on the size
-to speed allocation.
-.IP
-\&...
diff --git a/share/doc/papers/malloc/performance.ms b/share/doc/papers/malloc/performance.ms
deleted file mode 100644
index 3f939f2c63e..00000000000
--- a/share/doc/papers/malloc/performance.ms
+++ /dev/null
@@ -1,113 +0,0 @@
-.\"
-.\" ----------------------------------------------------------------------------
-.\" "THE BEER-WARE LICENSE" (Revision 42):
-.\" <phk@login.dknet.dk> wrote this file. As long as you retain this notice you
-.\" can do whatever you want with this stuff. If we meet some day, and you think
-.\" this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
-.\" ----------------------------------------------------------------------------
-.\"
-.\" $OpenBSD: performance.ms,v 1.2 1997/01/05 22:18:10 tholo Exp $
-.\"
-.ds RH Performance
-.NH
-Performance
-.PP
-Performance for a malloc(3) implementation comes as two variables:
-.IP
-A: How much time does it use for searching and manipulating data structures.
-We will refer to this as ``overhead time''.
-.IP
-B: How well does it manage the storage.
-This rather vague metric we call ``quality of allocation''.
-.PP
-The overhead time is easy to measure, just to a lot of malloc/free calls
-of various kinds and combination, and compare the results.
-.PP
-The quality of allocation is not quite as simple as that.
-One measure of quality is the size of the process, that should obviously
-be minimized.
-Another measure is the execution time of the process.
-This is not an obvious indicator of quality, but people will generally
-agree that it should be minimized as well, and if malloc(3) can do
-anything to do so, it should.
-Explanation why it is still a good metric follows:
-.PP
-In a traditional segment/swap kernel, the desirable behaviour of a process
-is to keep the brk(2) as low as possible, thus minimizing the size of the
-data/bss/heap segment, which in turn translates to a smaller process and
-a smaller probability of the process being swapped out, qed: faster
-execution time as an average.
-.PP
-In a paging environment this is not a bad choice for a default, but
-a couple of details needs to be looked at much more carefully.
-.PP
-First of all, the size of a process becomes a more vague concept since
-only the pages that are actually used needs to be in primary storage
-for execution to progress, and they only need to be there when used.
-That implies that many more processes can fit in the same amount of
-primary storage, since most processes have a high degree of locality
-of reference and thus only need some fraction of their pages to actually
-do their job.
-.PP
-From this it follows that the interesting size of the process, is some
-subset of the total amount of virtual memory occupied by the process.
-This number isn't a constant, it varies depending on the whereabouts
-of the process, and it may indeed fluctuate wildly over the lifetime
-of the process.
-.PP
-One of the names for this vague concept is ``current working set''.
-It has been defined many different ways over the years, mostly to
-satisfy and support claims in marketing or benchmark contexts.
-.PP
-For now we can simply say that it is the number of pages the process
-needs in order to run at a sufficiently low paging rate in a congested
-primary storage.
-(If primary storage isn't congested, this is not really important
-of course, but most systems would be better off using the pages for
-disk-cache or similar functions, so from that perspective it will
-always be congested.)
-If the number of pages is too small, the process will wait for its
-pages to be read from secondary storage much of the time, if it's too
-big, the space could be used better for something else.
-.PP
-From the view of any single process, this number of pages is
-"all of my pages", but from the point of view of the OS it should
-be tuned to maximise the total throughput of all the processes on
-the machine at the time.
-This is usually done using various kinds of least-recently-used
-replacement algorithms to select page candidates for replacement.
-.PP
-With this knowledge, can we decide what the performance goal is for
-a modern malloc(3) ?
-Well, it's almost as simple as it used to be:
-.B
-Minimize the number of pages accessed.
-.R
-.PP
-This really is the core of it all.
-If the number of accessed pages is small, then locality of reference is
-higher, and all kinds of caches (which essentially is what the
-primary storage is in a VM system) works better.
-.PP
-It's interesting to notice that the classical malloc fails on this one
-because the information about free chunks are kept with the free
-chunks themselves. In some of the benchmarks this came out as all the
-pages were paged in every time a malloc were made, because malloc
-had to traverse the free-list to find a suitable chunk for the allocation.
-If memory is not in use, then you shouldn't access it.
-.PP
-The secondary goal is more evident:
-.B
-Try to work in pages.
-.R
-.PP
-That makes it easier for the kernel, and wastes less virtual memory.
-Most modern implementations does this when they interact with the
-kernel, but few try to avoid objects spanning pages.
-.PP
-If an objects size
-is less or equal to a page, there is no reason for it to span two pages.
-Having objects span pages means that two pages must be
-paged in, if that object is accessed.
-.PP
-With this analysis in the luggage, we can start coding.
diff --git a/share/doc/papers/malloc/problems.ms b/share/doc/papers/malloc/problems.ms
deleted file mode 100644
index 9dbf7ee2c07..00000000000
--- a/share/doc/papers/malloc/problems.ms
+++ /dev/null
@@ -1,54 +0,0 @@
-.\"
-.\" ----------------------------------------------------------------------------
-.\" "THE BEER-WARE LICENSE" (Revision 42):
-.\" <phk@login.dknet.dk> wrote this file. As long as you retain this notice you
-.\" can do whatever you want with this stuff. If we meet some day, and you think
-.\" this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
-.\" ----------------------------------------------------------------------------
-.\"
-.\" $OpenBSD: problems.ms,v 1.2 1997/01/05 22:18:11 tholo Exp $
-.\"
-.ds RH The problems
-.NH
-The problems
-.PP
-Even though malloc(3) is a lot simpler to use
-than the raw brk(2)/sbrk(2) interface
-or maybe exactly because
-of that,
-a lot of problems arise from its use.
-.IP
-Writing to memory outside the allocated chunk.
-The most likely result being that the data structure used to hold
-the links and flags about this chunk or the next one gets thrashed.
-.IP
-Freeing a pointer to memory not allocated by malloc.
-This is often a pointer that points to an object on the stack or in the
-data-section, in newer implementations of C it may even be in the text-
-section where it is likely to be readonly.
-Some malloc implementations detect this, some don't.
-.IP
-Freeing a modified pointer. This is a very common mistake, freeing
-not the pointer malloc(3) returned, but rather some offset from it.
-Some mallocs will handle this correctly if the offset is positive.
-.IP
-Freeing the same pointer more than once.
-.IP
-Accessing memory in a chunk after it has been free(3)'ed.
-.PP
-The handling of these problems have traditionally been weak.
-A core-dump was the most common form for "handling", but in rare
-cases one could experience the famous "malloc: corrupt arena."
-message before the core-dump.
-Even worse though, very often the program will just continue,
-possibly giving wrong results.
-.PP
-An entirely different form for problem is that
-the memory returned by malloc(3) can contain any value.
-Unfortunately most kernels, correctly, zero out the storage they
-provide with brk(2), and thus the storage malloc returns will be zeroed
-in many cases as well, so programmers are not particular apt to notice
-that their code depend on malloc'ed storage to be zeroed.
-.PP
-With problems this big and error handling this weak, it is not
-surprising that problems are hard and time consuming to find and fix.
diff --git a/share/doc/papers/memfs/0.t b/share/doc/papers/memfs/0.t
deleted file mode 100644
index 2de89693aec..00000000000
--- a/share/doc/papers/memfs/0.t
+++ /dev/null
@@ -1,84 +0,0 @@
-.\" $OpenBSD: 0.t,v 1.3 2003/06/02 23:30:09 millert Exp $
-.\"
-.\" Copyright (c) 1990 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)0.t 5.1 (Berkeley) 4/16/91
-.\"
-.rm CM
-.nr PO 1.25i
-.ds CH "
-.ds CF "%
-.nr Fn 0 1
-.ds b3 4.3\s-1BSD\s+1
-.de KI
-.ds Lb "Fig. \\n+(Fn
-.KF
-.ce 1
-Figure \\n(Fn - \\$1.
-..
-.de SM
-\\s-1\\$1\\s+1\\$2
-..
-.de NM
-\&\fI\\$1\fP\\$2
-..
-.de RN
-\&\fI\\$1\fP\^(\^)\\$2
-..
-.de PN
-\&\fB\\$1\fP\\$2
-..
-.TL
-A Pageable Memory Based Filesystem
-.AU
-Marshall Kirk McKusick
-.AU
-Michael J. Karels
-.AU
-Keith Bostic
-.AI
-Computer Systems Research Group
-Computer Science Division
-Department of Electrical Engineering and Computer Science
-University of California, Berkeley
-Berkeley, California 94720
-.sp
-email: mckusick@cs.Berkeley.EDU
-telephone: 415-642-4948
-.AB
-This paper describes the motivations for memory-based filesystems.
-It compares techniques used to implement them and
-describes the drawbacks of using dedicated memory to
-support such filesystems.
-To avoid the drawbacks of using dedicated memory,
-it discusses building a simple memory-based
-filesystem in pageable memory.
-It details the performance characteristics of this filesystem
-and concludes with areas for future work.
-.AE
-.LP
diff --git a/share/doc/papers/memfs/1.t b/share/doc/papers/memfs/1.t
deleted file mode 100644
index 83077f65c2d..00000000000
--- a/share/doc/papers/memfs/1.t
+++ /dev/null
@@ -1,390 +0,0 @@
-.\" $OpenBSD: 1.t,v 1.3 2003/06/02 23:30:09 millert Exp $
-.\"
-.\" Copyright (c) 1990 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)1.t 5.1 (Berkeley) 4/16/91
-.\"
-.nr PS 11
-.nr VS 13
-.SH
-Introduction
-.PP
-This paper describes the motivation for and implementation of
-a memory-based filesystem.
-Memory-based filesystems have existed for a long time;
-they have generally been marketed as RAM disks or sometimes
-as software packages that use the machine's general purpose memory.
-.[
-white
-.]
-.PP
-A RAM disk is designed to appear like any other disk peripheral
-connected to a machine.
-It is normally interfaced to the processor through the I/O bus
-and is accessed through a device driver similar or sometimes identical
-to the device driver used for a normal magnetic disk.
-The device driver sends requests for blocks of data to the device
-and the requested data is then DMA'ed to or from the requested block.
-Instead of storing its data on a rotating magnetic disk,
-the RAM disk stores its data in a large array of random access memory
-or bubble memory.
-Thus, the latency of accessing the RAM disk is nearly zero
-compared to the 15-50 milliseconds of latency incurred when
-access rotating magnetic media.
-RAM disks also have the benefit of being able to transfer data at
-the maximum DMA rate of the system,
-while disks are typically limited by the rate that the data passes
-under the disk head.
-.PP
-Software packages simulating RAM disks operate by allocating
-a fixed partition of the system memory.
-The software then provides a device driver interface similar
-to the one described for hardware RAM disks,
-except that it uses memory-to-memory copy instead of DMA to move
-the data between the RAM disk and the system buffers,
-or it maps the contents of the RAM disk into the system buffers.
-Because the memory used by the RAM disk is not available for
-other purposes, software RAM-disk solutions are used primarily
-for machines with limited addressing capabilities such as PC's
-that do not have an effective way of using the extra memory anyway.
-.PP
-Most software RAM disks lose their contents when the system is powered
-down or rebooted.
-The contents can be saved by using battery backed-up memory,
-by storing critical filesystem data structures in the filesystem,
-and by running a consistency check program after each reboot.
-These conditions increase the hardware cost
-and potentially slow down the speed of the disk.
-Thus, RAM-disk filesystems are not typically
-designed to survive power failures;
-because of their volatility, their usefulness is limited to transient
-or easily recreated information such as might be found in
-.PN /tmp .
-Their primary benefit is that they have higher throughput
-than disk based filesystems.
-.[
-smith
-.]
-This improved throughput is particularly useful for utilities that
-make heavy use of temporary files, such as compilers.
-On fast processors, nearly half of the elapsed time for a compilation
-is spent waiting for synchronous operations required for file
-creation and deletion.
-The use of the memory-based filesystem nearly eliminates this waiting time.
-.PP
-Using dedicated memory to exclusively support a RAM disk
-is a poor use of resources.
-The overall throughput of the system can be improved
-by using the memory where it is getting the highest access rate.
-These needs may shift between supporting process virtual address spaces
-and caching frequently used disk blocks.
-If the memory is dedicated to the filesystem,
-it is better used in a buffer cache.
-The buffer cache permits faster access to the data
-because it requires only a single memory-to-memory copy
-from the kernel to the user process.
-The use of memory is used in a RAM-disk configuration may require two
-memory-to-memory copies, one from the RAM disk
-to the buffer cache,
-then another copy from the buffer cache to the user process.
-.PP
-The new work being presented in this paper is building a prototype
-RAM-disk filesystem in pageable memory instead of dedicated memory.
-The goal is to provide the speed benefits of a RAM disk
-without paying the performance penalty inherent in dedicating
-part of the physical memory on the machine to the RAM disk.
-By building the filesystem in pageable memory,
-it competes with other processes for the available memory.
-When memory runs short, the paging system pushes its
-least-recently-used pages to backing store.
-Being pageable also allows the filesystem to be much larger than
-would be practical if it were limited by the amount of physical
-memory that could be dedicated to that purpose.
-We typically operate our
-.PN /tmp
-with 30 to 60 megabytes of space
-which is larger than the amount of memory on the machine.
-This configuration allows small files to be accessed quickly,
-while still allowing
-.PN /tmp
-to be used for big files,
-although at a speed more typical of normal, disk-based filesystems.
-.PP
-An alternative to building a memory-based filesystem would be to have
-a filesystem that never did operations synchronously and never
-flushed its dirty buffers to disk.
-However, we believe that such a filesystem would either
-use a disproportionately large percentage of the buffer
-cache space, to the detriment of other filesystems,
-or would require the paging system to flush its dirty pages.
-Waiting for other filesystems to push dirty pages
-subjects them to delays while waiting for the pages to be written.
-We await the results of others trying this approach.
-.[
-Ohta
-.]
-.SH
-Implementation
-.PP
-The current implementation took less time to write than did this paper.
-It consists of 560 lines of kernel code (1.7K text + data)
-and some minor modifications to the program that builds
-disk based filesystems, \fInewfs\fP.
-A condensed version of the kernel code for the
-memory-based filesystem are reproduced in Appendix 1.
-.PP
-A filesystem is created by invoking the modified \fInewfs\fP, with
-an option telling it to create a memory-based filesystem.
-It allocates a section of virtual address space of the requested
-size and builds a filesystem in the memory
-instead of on a disk partition.
-When built, it does a \fImount\fP system call specifying a filesystem type of
-.SM MFS
-(Memory File System).
-The auxiliary data parameter to the mount call specifies a pointer
-to the base of the memory in which it has built the filesystem.
-(The auxiliary data parameter used by the local filesystem, \fIufs\fP,
-specifies the block device containing the filesystem.)
-.PP
-The mount system call allocates and initializes a mount table
-entry and then calls the filesystem-specific mount routine.
-The filesystem-specific routine is responsible for doing
-the mount and initializing the filesystem-specific
-portion of the mount table entry.
-The memory-based filesystem-specific mount routine,
-.RN mfs_mount ,
-is shown in Appendix 1.
-It allocates a block-device vnode to represent the memory disk device.
-In the private area of this vnode it stores the base address of
-the filesystem and the process identifier of the \fInewfs\fP process
-for later reference when doing I/O.
-It also initializes an I/O list that it
-uses to record outstanding I/O requests.
-It can then call the \fIufs\fP filesystem mount routine,
-passing the special block-device vnode that it has created
-instead of the usual disk block-device vnode.
-The mount proceeds just as any other local mount, except that
-requests to read from the block device are vectored through
-.RN mfs_strategy
-(described below) instead of the usual
-.RN spec_strategy
-block device I/O function.
-When the mount is completed,
-.RN mfs_mount
-does not return as most other filesystem mount functions do;
-instead it sleeps in the kernel awaiting I/O requests.
-Each time an I/O request is posted for the filesystem,
-a wakeup is issued for the corresponding \fInewfs\fP process.
-When awakened, the process checks for requests on its buffer list.
-A read request is serviced by copying data from the section of the
-\fInewfs\fP address space corresponding to the requested disk block
-to the kernel buffer.
-Similarly a write request is serviced by copying data to the section of the
-\fInewfs\fP address space corresponding to the requested disk block
-from the kernel buffer.
-When all the requests have been serviced, the \fInewfs\fP
-process returns to sleep to await more requests.
-.PP
-Once mounted,
-all operations on files in the memory-based filesystem are handled
-by the \fIufs\fP filesystem code until they get to the point where the
-filesystem needs to do I/O on the device.
-Here, the filesystem encounters the second piece of the
-memory-based filesystem.
-Instead of calling the special-device strategy routine,
-it calls the memory-based strategy routine,
-.RN mfs_strategy .
-Usually,
-the request is serviced by linking the buffer onto the
-I/O list for the memory-based filesystem
-vnode and sending a wakeup to the \fInewfs\fP process.
-This wakeup results in a context-switch to the \fInewfs\fP
-process, which does a copyin or copyout as described above.
-The strategy routine must be careful to check whether
-the I/O request is coming from the \fInewfs\fP process itself, however.
-Such requests happen during mount and unmount operations,
-when the kernel is reading and writing the superblock.
-Here,
-.RN mfs_strategy
-must do the I/O itself to avoid deadlock.
-.PP
-The final piece of kernel code to support the
-memory-based filesystem is the close routine.
-After the filesystem has been successfully unmounted,
-the device close routine is called.
-For a memory-based filesystem, the device close routine is
-.RN mfs_close .
-This routine flushes any pending I/O requests,
-then sets the I/O list head to a special value
-that is recognized by the I/O servicing loop in
-.RN mfs_mount
-as an indication that the filesystem is unmounted.
-The
-.RN mfs_mount
-routine exits, in turn causing the \fInewfs\fP process
-to exit, resulting in the filesystem vanishing in a cloud of dirty pages.
-.PP
-The paging of the filesystem does not require any additional
-code beyond that already in the kernel to support virtual memory.
-The \fInewfs\fP process competes with other processes on an equal basis
-for the machine's available memory.
-Data pages of the filesystem that have not yet been used
-are zero-fill-on-demand pages that do not occupy memory,
-although they currently allocate space in backing store.
-As long as memory is plentiful, the entire contents of the filesystem
-remain memory resident.
-When memory runs short, the oldest pages of \fInewfs\fP will be
-pushed to backing store as part of the normal paging activity.
-The pages that are pushed usually hold the contents of
-files that have been created in the memory-based filesystem
-but have not been recently accessed (or have been deleted).
-.[
-leffler
-.]
-.SH
-Performance
-.PP
-The performance of the current memory-based filesystem is determined by
-the memory-to-memory copy speed of the processor.
-Empirically we find that the throughput is about 45% of this
-memory-to-memory copy speed.
-The basic set of steps for each block written is:
-.IP 1)
-memory-to-memory copy from the user process doing the write to a kernel buffer
-.IP 2)
-context-switch to the \fInewfs\fP process
-.IP 3)
-memory-to-memory copy from the kernel buffer to the \fInewfs\fP address space
-.IP 4)
-context switch back to the writing process
-.LP
-Thus each write requires at least two memory-to-memory copies
-accounting for about 90% of the
-.SM CPU
-time.
-The remaining 10% is consumed in the context switches and
-the filesystem allocation and block location code.
-The actual context switch count is really only about half
-of the worst case outlined above because
-read-ahead and write-behind allow multiple blocks
-to be handled with each context switch.
-.PP
-On the six-\c
-.SM "MIPS CCI"
-Power 6/32 machine,
-the raw reading and writing speed is only about twice that of
-a regular disk-based filesystem.
-However, for processes that create and delete many files,
-the speedup is considerably greater.
-The reason for the speedup is that the filesystem
-must do two synchronous operations to create a file,
-first writing the allocated inode to disk, then creating the
-directory entry.
-Deleting a file similarly requires at least two synchronous
-operations.
-Here, the low latency of the memory-based filesystem is
-noticeable compared to the disk-based filesystem,
-as a synchronous operation can be done with
-just two context switches instead of incurring the disk latency.
-.SH
-Future Work
-.PP
-The most obvious shortcoming of the current implementation
-is that filesystem blocks are copied twice, once between the \fInewfs\fP
-process' address space and the kernel buffer cache,
-and once between the kernel buffer and the requesting process.
-These copies are done in different process contexts, necessitating
-two context switches per group of I/O requests.
-These problems arise because of the current inability of the kernel
-to do page-in operations
-for an address space other than that of the currently-running process,
-and the current inconvenience of mapping process-owned pages into the kernel
-buffer cache.
-Both of these problems are expected to be solved in the next version
-of the virtual memory system,
-and thus we chose not to address them in the current implementation.
-With the new version of the virtual memory system, we expect to use
-any part of physical memory as part of the buffer cache,
-even though it will not be entirely addressable at once within the kernel.
-In that system, the implementation of a memory-based filesystem
-that avoids the double copy and context switches will be much easier.
-.PP
-Ideally part of the kernel's address space would reside in pageable memory.
-Once such a facility is available it would be most efficient to
-build a memory-based filesystem within the kernel.
-One potential problem with such a scheme is that many kernels
-are limited to a small address space (usually a few megabytes).
-This restriction limits the size of memory-based
-filesystem that such a machine can support.
-On such a machine, the kernel can describe a memory-based filesystem
-that is larger than its address space and use a ``window''
-to map the larger filesystem address space into its limited address space.
-The window would maintain a cache of recently accessed pages.
-The problem with this scheme is that if the working set of
-active pages is greater than the size of the window, then
-much time is spent remapping pages and invalidating
-translation buffers.
-Alternatively, a separate address space could be constructed for each
-memory-based filesystem as in the current implementation,
-and the memory-resident pages of that address space could be mapped
-exactly as other cached pages are accessed.
-.PP
-The current system uses the existing local filesystem structures
-and code to implement the memory-based filesystem.
-The major advantages of this approach are the sharing of code
-and the simplicity of the approach.
-There are several disadvantages, however.
-One is that the size of the filesystem is fixed at mount time.
-This means that a fixed number of inodes (files) and data blocks
-can be supported.
-Currently, this approach requires enough swap space for the entire
-filesystem, and prevents expansion and contraction of the filesystem on demand.
-The current design also prevents the filesystem from taking advantage
-of the memory-resident character of the filesystem.
-It would be interesting to explore other filesystem implementations
-that would be less expensive to execute and that would make better
-use of the space.
-For example, the current filesystem structure is optimized for magnetic
-disks.
-It includes replicated control structures, ``cylinder groups''
-with separate allocation maps and control structures,
-and data structures that optimize rotational layout of files.
-None of this is useful in a memory-based filesystem (at least when the
-backing store for the filesystem is dynamically allocated and not
-contiguous on a single disk type).
-On the other hand,
-directories could be implemented using dynamically-allocated
-memory organized as linked lists or trees rather than as files stored
-in ``disk'' blocks.
-Allocation and location of pages for file data might use virtual memory
-primitives and data structures rather than direct and indirect blocks.
-A reimplementation along these lines will be considered when the virtual
-memory system in the current system has been replaced.
-.[
-$LIST$
-.]
diff --git a/share/doc/papers/memfs/A.t b/share/doc/papers/memfs/A.t
deleted file mode 100644
index 418a2e8627f..00000000000
--- a/share/doc/papers/memfs/A.t
+++ /dev/null
@@ -1,171 +0,0 @@
-.\" $OpenBSD: A.t,v 1.3 2003/06/02 23:30:09 millert Exp $
-.\"
-.\" Copyright (c) 1990 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)A.t 5.1 (Berkeley) 4/16/91
-.\"
-.bp
-.nr PS 10
-.nr VS 12
-.SH
-Appendix A - Implementation Details
-.LP
-.nf
-.vS
-/*
- * This structure defines the control data for the memory
- * based file system.
- */
-struct mfsnode {
- struct vnode *mfs_vnode; /* vnode associated with this mfsnode */
- caddr_t mfs_baseoff; /* base of file system in memory */
- long mfs_size; /* size of memory file system */
- pid_t mfs_pid; /* supporting process pid */
- struct buf *mfs_buflist; /* list of I/O requests */
-};
-
-/*
- * Convert between mfsnode pointers and vnode pointers
- */
-#define VTOMFS(vp) ((struct mfsnode *)(vp)->v_data)
-#define MFSTOV(mfsp) ((mfsp)->mfs_vnode)
-#define MFS_EXIT (struct buf *)-1
-
-/*
- * Arguments to mount MFS
- */
-struct mfs_args {
- char *name; /* name to export for statfs */
- caddr_t base; /* base address of file system in memory */
- u_long size; /* size of file system */
-};
-.bp
-/*
- * Mount an MFS filesystem.
- */
-mfs_mount(mp, path, data)
- struct mount *mp;
- char *path;
- caddr_t data;
-{
- struct vnode *devvp;
- struct mfsnode *mfsp;
- struct buf *bp;
- struct mfs_args args;
-
- /*
- * Create a block device to represent the disk.
- */
- devvp = getnewvnode(VT_MFS, VBLK, &mfs_vnodeops);
- mfsp = VTOMFS(devvp);
- /*
- * Save base address of the filesystem from the supporting process.
- */
- copyin(data, &args, (sizeof mfs_args));
- mfsp->mfs_baseoff = args.base;
- mfsp->mfs_size = args.size;
- /*
- * Record the process identifier of the supporting process.
- */
- mfsp->mfs_pid = u.u_procp->p_pid;
- /*
- * Mount the filesystem.
- */
- mfsp->mfs_buflist = NULL;
- mountfs(devvp, mp);
- /*
- * Loop processing I/O requests.
- */
- while (mfsp->mfs_buflist != MFS_EXIT) {
- while (mfsp->mfs_buflist != NULL) {
- bp = mfsp->mfs_buflist;
- mfsp->mfs_buflist = bp->av_forw;
- offset = mfsp->mfs_baseoff + (bp->b_blkno * DEV_BSIZE);
- if (bp->b_flags & B_READ)
- copyin(offset, bp->b_un.b_addr, bp->b_bcount);
- else /* write_request */
- copyout(bp->b_un.b_addr, offset, bp->b_bcount);
- biodone(bp);
- }
- sleep((caddr_t)devvp, PWAIT);
- }
-}
-.bp
-/*
- * If the MFS process requests the I/O then we must do it directly.
- * Otherwise put the request on the list and request the MFS process
- * to be run.
- */
-mfs_strategy(bp)
- struct buf *bp;
-{
- struct vnode *devvp;
- struct mfsnode *mfsp;
- off_t offset;
-
- devvp = bp->b_vp;
- mfsp = VTOMFS(devvp);
- if (mfsp->mfs_pid == u.u_procp->p_pid) {
- offset = mfsp->mfs_baseoff + (bp->b_blkno * DEV_BSIZE);
- if (bp->b_flags & B_READ)
- copyin(offset, bp->b_un.b_addr, bp->b_bcount);
- else /* write_request */
- copyout(bp->b_un.b_addr, offset, bp->b_bcount);
- biodone(bp);
- } else {
- bp->av_forw = mfsp->mfs_buflist;
- mfsp->mfs_buflist = bp;
- wakeup((caddr_t)bp->b_vp);
- }
-}
-
-/*
- * The close routine is called by unmount after the filesystem
- * has been successfully unmounted.
- */
-mfs_close(devvp)
- struct vnode *devvp;
-{
- struct mfsnode *mfsp = VTOMFS(vp);
- struct buf *bp;
-
- /*
- * Finish any pending I/O requests.
- */
- while (bp = mfsp->mfs_buflist) {
- mfsp->mfs_buflist = bp->av_forw;
- mfs_doio(bp, mfsp->mfs_baseoff);
- wakeup((caddr_t)bp);
- }
- /*
- * Send a request to the filesystem server to exit.
- */
- mfsp->mfs_buflist = MFS_EXIT;
- wakeup((caddr_t)vp);
-}
-.vE
diff --git a/share/doc/papers/memfs/Makefile b/share/doc/papers/memfs/Makefile
deleted file mode 100644
index d6ecf22189f..00000000000
--- a/share/doc/papers/memfs/Makefile
+++ /dev/null
@@ -1,25 +0,0 @@
-# $OpenBSD: Makefile,v 1.3 2004/02/01 14:22:44 jmc Exp $
-
-
-DIR= papers/memfs
-SRCS= 0.t 1.t
-MACROS= -ms
-REFER= refer -n -e -l -s -p ref.bib
-EXTRA= ref.bib A.t tmac.srefs
-CLEANFILES=ref.bib.i A.gt paper.t
-
-paper.ps: paper.t
- ${ROFF} tmac.srefs paper.t > ${.TARGET}
-paper.txt: paper.t
- ${ROFF} -Tascii tmac.srefs paper.t > ${.TARGET}
-
-paper.t: ${SRCS} ref.bib.i A.gt
- ${REFER} ${SRCS} A.gt > ${.TARGET}
-
-ref.bib.i: ref.bib
- ${INDXBIB} ref.bib
-
-A.gt: A.t
- ${GRIND} < A.t > A.gt
-
-.include <bsd.doc.mk>
diff --git a/share/doc/papers/memfs/ref.bib b/share/doc/papers/memfs/ref.bib
deleted file mode 100644
index 89ae5070dd7..00000000000
--- a/share/doc/papers/memfs/ref.bib
+++ /dev/null
@@ -1,49 +0,0 @@
-%A M. K. McKusick
-%A J. M. Bloom
-%A M. J. Karels
-%T Bug Fixes and Changes in 4.3BSD
-%B \s-1UNIX\s0 System Manager's Manual, 4.3 Berkeley Software Distribution, Virtual VAX-11 Version
-%I \s-1USENIX\s0 Association
-%C Berkeley, CA
-%P 12:1\-22
-%D 1986
-
-%A M. J. Karels
-%T Changes to the Kernel in 4.3BSD
-%B \s-1UNIX\s0 System Manager's Manual, 4.3 Berkeley Software Distribution, Virtual VAX-11 Version
-%I \s-1USENIX\s0 Association
-%C Berkeley, CA
-%P 13:1\-32
-%D 1986
-
-%A S. J. Leffler
-%A M. K. McKusick
-%A M. J. Karels
-%A J. S. Quarterman
-%T The Design and Implementation of the 4.3BSD UNIX Operating System
-%I Addison-Wesley
-%C Reading, MA
-%D 1989
-
-%A R. M. White
-%T Disk Storage Technology
-%J Scientific American
-%V 243
-%N 2
-%P 138\-148
-%D August 1980
-
-%A A. J. Smith
-%T Bibliography on file and I/O system optimizations and related topics
-%J Operating Systems Review
-%V 14
-%N 4
-%P 39\-54
-%D October 1981
-
-%A Masataka Ohta
-%A Hiroshi Tezuka
-%T A Fast /tmp File System by Async Mount Option
-%J \s-1USENIX\s0 Association Conference Proceedings
-%P ???\-???
-%D June 1990
diff --git a/share/doc/papers/memfs/spell.ok b/share/doc/papers/memfs/spell.ok
deleted file mode 100644
index 7aa465fb693..00000000000
--- a/share/doc/papers/memfs/spell.ok
+++ /dev/null
@@ -1,18 +0,0 @@
-Berkeley.EDU
-Bostic
-CH
-CM
-Fn
-Karels
-Lb
-MFS
-McKusick
-Pageable
-copyin
-copyout
-email
-filesystem
-filesystems
-mckusick
-pageable
-tmp
diff --git a/share/doc/papers/memfs/tmac.srefs b/share/doc/papers/memfs/tmac.srefs
deleted file mode 100644
index 42f16b645f6..00000000000
--- a/share/doc/papers/memfs/tmac.srefs
+++ /dev/null
@@ -1,179 +0,0 @@
-.\" $OpenBSD: tmac.srefs,v 1.2 2001/02/03 08:14:58 niklas Exp $
-.\"
-.\" @(#)tmac.srefs 1.14 11/2/88
-.\" REFER macros .... citations
-.de []
-.][ \\$1
-..
-.de ][
-.if \\$1>5 .tm Bad arg to []
-.[\\$1
-..
-.if n .ds [. [
-.\".if t .ds [. \s-2\v'-.4m'\f1
-.if t .ds [. [
-.if n .ds .] ]
-.\".if t .ds .] \v'.4m'\s+2\fP
-.if t .ds .] ]
-.ds (. \& [
-.ds .) ]
-.if n .ds [o ""
-.if n .ds [c ""
-.if t .ds [o ``
-.if t .ds [c ''
-.ds [e \\fIet al.\\fP
-.\" for author list in reference:
-.ds &1 &
-.\" for -m signal (auth1 and auth2, year):
-.ds &2 &
-.\" the next lines deal with the problem of .[1] or [1].
-.\" refer will write "linexxx\*(<.[1]\*(>.
-.\" and either "<." or ">." should produce the .;
-.\" similarly for , and ;
-.rm <. <, <;
-.if n .ds >. .
-.if t .ds >. .
-.if n .ds >, ,
-.if t .ds >, ,
-.if n .ds >; ;
-.if t .ds >; ;
-.de [5 \" tm style
-.FS
-.IP "\\*([F.\0"
-\\*([A, \\f2\\*([T\\f1,
-.ie \\n(TN \\*([M.
-.el Bell Laboratories internal memorandum (\\*([D).
-.RT
-.FE
-..
-.de [0 \" other
-.FS
-.nr [: 0
-.if !"\\*([F"" .IP "\\*([F.\0"
-.if !"\\*([A"" \{.nr [: 1
-\\*([A\c\}
-.if !"\\*([T"" \{.if \\n([:>0 ,
-.nr [: 1
-\\f2\\*([T\\f1\c\}
-.if !"\\*([O""\{.if \\n([:>0 ,
-.nr [: 1
-.if \\n([O>0 .nr [: 0
-\\*([O\c
-.if \\n([O>0 \& \c\}
-.ie !"\\*([D"" \{.if \\n([:>0 ,
-.nr [: 1
-\\*([D\c\}
-.if \\n([:>0 \&.
-.RT
-.FE
-..
-.de [1 \" journal article
-.FS
-.if !"\\*([F"" .IP "\\*([F.\0"
-.if !"\\*([A"" \\*([A,
-.if !"\\*([T"" \\*([o\\*([T,\\*([c
-\\f2\\*([J\\f1\c
-.if !"\\*([V"" .if n \& Vol.\&\c
-.if !"\\*([V"" \& \\f3\\*([V\\f1\c
-.if !"\\*([N"" (\\*([N)\c
-.if !"\\*([P"" \{\
-.ie \\n([P>0 , pp. \c
-.el , p. \c
-\\*([P\c\}
-.if !"\\*([I"" .if "\\*([R"" , \\*([I\c
-.if !"\\*([O"" .if \\n([O=0 , \\*([O\c
-.if !"\\*([D"" \& (\\*([D)\c
-\&.
-.if !"\\*([O"" .if \\n([O>0 \\*([O
-.RT
-.FE
-..
-.de [2 \" book
-.FS
-.if !"\\*([F"" .IP "\\*([F.\0"
-.if !"\\*([A"" \\*([A,
-.if !"\\*([T"" \\f2\\*([T,\\f1
-\\*([I\c
-.if !"\\*([C"" , \\*([C\c
-.if !"\\*([D"" \& (\\*([D)\c
-\&.
-.if !"\\*([G"" Gov't. ordering no. \\*([G.
-.if !"\\*([O"" \\*([O
-.RT
-.FE
-..
-.de [4 \" report
-.FS
-.if !"\\*([F"" .IP "\\*([F.\0"
-\\*([A, \\*([o\\*([T,\\*([c
-\\*([R\c
-.if !"\\*([G"" \& (\\*([G)\c
-.if !"\\*([I"" , \\*([I\c
-.if !"\\*([C"" , \\*([C\c
-.if !"\\*([D"" \& (\\*([D)\c
-\&.
-.if !"\\*([O"" \\*([O
-.RT
-.FE
-..
-.de [3 \" article in book
-.FS
-.if !"\\*([F"" .IP "\\*([F.\0"
-.if !"\\*([A"" \\*([A,
-.if !"\\*([T"" \\*([o\\*([T,\\*([c
-.if !"\\*([P"" pp. \\*([P
-in \\f2\\*([B\\f1\c
-.if !"\\*([E"" , ed. \\*([E\c
-.if !"\\*([I"" , \\*([I\c
-.if !"\\*([C"" , \\*([C\c
-.if !"\\*([D"" \& (\\*([D)\c
-\&.
-.if !"\\*([O"" \\*([O
-.RT
-.FE
-..
-.de ]<
-.[<
-..
-.de [<
-.RT
-.ne 62p
-.ie \\n(rS \{\
-. rs
-. sp 4p
-.\}
-.el .sp 27p
-.Li 2 30.5P
-\fBReferences\fP
-.br
-.if \\n(Ns<2 \{\
-. nr Ns 1
-. ds ST References
-.\}
-.\"nr Tt 7
-.sp 8p
-.rm FS FE
-.\"sy echo '.T3 "\\\\t\\\\tReferences" \\n%' >>Toc
-.ns
-..
-.de [>
-.]>
-..
-.de ]>
-.sp
-..
-.de ]-
-.[-
-..
-.de [-
-.rm [V [P [A [T
-.rm [N [C [B [O
-.rm [R [I [E [D
-..
-.de ]]
-this is never
-executed
-and just
-uses up an end-of-file
-bug.
-..
diff --git a/share/doc/papers/newvm/0.t b/share/doc/papers/newvm/0.t
deleted file mode 100644
index 58bf23441f4..00000000000
--- a/share/doc/papers/newvm/0.t
+++ /dev/null
@@ -1,84 +0,0 @@
-.\" $OpenBSD: 0.t,v 1.3 2003/06/02 23:30:09 millert Exp $
-.\"
-.\" Copyright (c) 1986 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)0.t 5.1 (Berkeley) 4/16/91
-.\"
-.rm CM
-.TL
-A New Virtual Memory Implementation for Berkeley
-.UX
-.AU
-Marshall Kirk McKusick
-Michael J. Karels
-.AI
-Computer Systems Research Group
-Computer Science Division
-Department of Electrical Engineering and Computer Science
-University of California, Berkeley
-Berkeley, California 94720
-.AB
-With the cost per byte of memory approaching that of the cost per byte
-for disks, and with file systems increasingly distant from the host
-machines, a new approach to the implementation of virtual memory is
-necessary. Rather than preallocating swap space which limits the
-maximum virtual memory that can be supported to the size of the swap
-area, the system should support virtual memory up to the sum of the
-sizes of physical memory plus swap space. For systems with a local swap
-disk, but remote file systems, it may be useful to use some of the memory
-to keep track of the contents of the swap space to avoid multiple fetches
-of the same data from the file system.
-.PP
-The new implementation should also add new functionality. Processes
-should be allowed to have large sparse address spaces, to map files
-into their address spaces, to map device memory into their address
-spaces, and to share memory with other processes. The shared address
-space may either be obtained by mapping a file into (possibly
-different) parts of their address space, or by arranging to share
-``anonymous memory'' (that is, memory that is zero fill on demand, and
-whose contents are lost when the last process unmaps the memory) with
-another process as is done in System V.
-.PP
-One use of shared memory is to provide a high-speed
-Inter-Process Communication (IPC) mechanism between two or more
-cooperating processes. To insure the integrity of data structures
-in a shared region, processes must be able to use semaphores to
-coordinate their access to these shared structures. In System V,
-these semaphores are provided as a set of system calls. Unfortunately,
-the use of system calls reduces the throughput of the shared memory
-IPC to that of existing IPC mechanisms. We are proposing a scheme
-that places the semaphores in the shared memory segment, so that
-machines that have a test-and-set instruction can handle the usual
-uncontested lock and unlock without doing a system call. Only in
-the unusual case of trying to lock an already-locked lock or in
-releasing a wanted lock will a system call be required. The
-interface will allow a user-level implementation of the System V
-semaphore interface on most machines with a much lower runtime cost.
-.AE
-.LP
-.bp
diff --git a/share/doc/papers/newvm/1.t b/share/doc/papers/newvm/1.t
deleted file mode 100644
index 3a736076450..00000000000
--- a/share/doc/papers/newvm/1.t
+++ /dev/null
@@ -1,375 +0,0 @@
-.\" $OpenBSD: 1.t,v 1.3 2003/06/02 23:30:09 millert Exp $
-.\"
-.\" Copyright (c) 1986 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)1.t 5.1 (Berkeley) 4/16/91
-.\"
-.NH
-Motivations for a New Virtual Memory System
-.PP
-The virtual memory system distributed with Berkeley UNIX has served
-its design goals admirably well over the ten years of its existence.
-However the relentless advance of technology has begun to render it
-obsolete.
-This section of the paper describes the current design,
-points out the current technological trends,
-and attempts to define the new design considerations that should
-be taken into account in a new virtual memory design.
-.SH
-Implementation of 4.3BSD virtual memory
-.PP
-All Berkeley Software Distributions through 4.3BSD
-have used the same virtual memory design.
-All processes, whether active or sleeping, have some amount of
-virtual address space associated with them.
-This virtual address space
-is the combination of the amount of address space with which they initially
-started plus any stack or heap expansions that they have made.
-All requests for address space are allocated from available swap space
-at the time that they are first made;
-if there is insufficient swap space left to honor the allocation,
-the system call requesting the address space fails synchronously.
-Thus, the limit to available virtual memory is established by the
-amount of swap space allocated to the system.
-.PP
-Memory pages are used in a sort of shell game to contain the
-contents of recently accessed locations.
-As a process first references a location
-a new page is allocated and filled either with initialized data or
-zeros (for new stack and break pages).
-As the supply of free pages begins to run out, dirty pages are
-pushed to the previously allocated swap space so that they can be reused
-to contain newly faulted pages.
-If a previously accessed page that has been pushed to swap is once
-again used, a free page is reallocated and filled from the swap area
-[Babaoglu79], [Someren84].
-.SH
-Design assumptions for 4.3BSD virtual memory
-.PP
-The design criteria for the current virtual memory implementation
-were made in 1979.
-At that time the cost of memory was about a thousand times greater per
-byte than magnetic disks.
-Most machines were used as centralized time sharing machines.
-These machines had far more disk storage than they had memory
-and given the cost tradeoff between memory and disk storage,
-wanted to make maximal use of the memory even at the cost of
-wasting some of the disk space or generating extra disk I/O.
-.PP
-The primary motivation for virtual memory was to allow the
-system to run individual programs whose address space exceeded
-the memory capacity of the machine.
-Thus the virtual memory capability allowed programs to be run that
-could not have been run on a swap based system.
-Equally important in the large central timesharing environment
-was the ability to allow the sum of the memory requirements of
-all active processes to exceed the amount of physical memory on
-the machine.
-The expected mode of operation for which the system was tuned
-was to have the sum of active virtual memory be one and a half
-to two times the physical memory on the machine.
-.PP
-At the time that the virtual memory system was designed,
-most machines ran with little or no networking.
-All the file systems were contained on disks that were
-directly connected to the machine.
-Similarly all the disk space devoted to swap space was also
-directly connected.
-Thus the speed and latency with which file systems could be accessed
-were roughly equivalent to the speed and latency with which swap
-space could be accessed.
-Given the high cost of memory there was little incentive to have
-the kernel keep track of the contents of the swap area once a process
-exited since it could almost as easily and quickly be reread from the
-file system.
-.SH
-New influences
-.PP
-In the ten years since the current virtual memory system was designed,
-many technological advances have occurred.
-One effect of the technological revolution is that the
-micro-processor has become powerful enough to allow users to have their
-own personal workstations.
-Thus the computing environment is moving away from a purely centralized
-time sharing model to an environment in which users have a
-computer on their desk.
-This workstation is linked through a network to a centralized
-pool of machines that provide filing, computing, and spooling services.
-The workstations tend to have a large quantity of memory,
-but little or no disk space.
-Because users do not want to be bothered with backing up their disks,
-and because of the difficulty of having a centralized administration
-backing up hundreds of small disks, these local disks are typically
-used only for temporary storage and as swap space.
-Long term storage is managed by the central file server.
-.PP
-Another major technical advance has been in all levels of storage capacity.
-In the last ten years we have experienced a factor of four decrease in the
-cost per byte of disk storage.
-In this same period of time the cost per byte of memory has dropped
-by a factor of a hundred!
-Thus the cost per byte of memory compared to the cost per byte of disk is
-approaching a difference of only about a factor of ten.
-The effect of this change is that the way in which a machine is used
-is beginning to change dramatically.
-As the amount of physical memory on machines increases and the number of
-users per machine decreases, the expected
-mode of operation is changing from that of supporting more active virtual
-memory than physical memory to that of having a surplus of memory that can
-be used for other purposes.
-.PP
-Because many machines will have more physical memory than they do swap
-space (with diskless workstations as an extreme example!),
-it is no longer reasonable to limit the maximum virtual memory
-to the amount of swap space as is done in the current design.
-Consequently, the new design will allow the maximum virtual memory
-to be the sum of physical memory plus swap space.
-For machines with no swap space, the maximum virtual memory will
-be governed by the amount of physical memory.
-.PP
-Another effect of the current technology is that the latency and overhead
-associated with accessing the file system is considerably higher
-since the access must be be over the network
-rather than to a locally-attached disk.
-One use of the surplus memory would be to
-maintain a cache of recently used files;
-repeated uses of these files would require at most a verification from
-the file server that the data was up to date.
-Under the current design, file caching is done by the buffer pool,
-while the free memory is maintained in a separate pool.
-The new design should have only a single memory pool so that any
-free memory can be used to cache recently accessed files.
-.PP
-Another portion of the memory will be used to keep track of the contents
-of the blocks on any locally-attached swap space analogously
-to the way that memory pages are handled.
-Thus inactive swap blocks can also be used to cache less-recently-used
-file data.
-Since the swap disk is locally attached, it can be much more quickly
-accessed than a remotely located file system.
-This design allows the user to simply allocate their entire local disk
-to swap space, thus allowing the system to decide what files should
-be cached to maximize its usefulness.
-This design has two major benefits.
-It relieves the user of deciding what files
-should be kept in a small local file system.
-It also insures that all modified files are migrated back to the
-file server in a timely fashion, thus eliminating the need to dump
-the local disk or push the files manually.
-.NH
-User Interface
-.PP
-This section outlines our new virtual memory interface as it is
-currently envisioned.
-The details of the system call interface are contained in Appendix A.
-.SH
-Regions
-.PP
-The virtual memory interface is designed to support both large,
-sparse address spaces as well as small, densely-used address spaces.
-In this context, ``small'' is an address space roughly the
-size of the physical memory on the machine,
-while ``large'' may extend up to the maximum addressability of the machine.
-A process may divide its address space up into a number of regions.
-Initially a process begins with four regions;
-a shared read-only fill-on-demand region with its text,
-a private fill-on-demand region for its initialized data,
-a private zero-fill-on-demand region for its uninitialized data and heap,
-and a private zero-fill-on-demand region for its stack.
-In addition to these regions, a process may allocate new ones.
-The regions may not overlap and the system may impose an alignment
-constraint, but the size of the region should not be limited
-beyond the constraints of the size of the virtual address space.
-.PP
-Each new region may be mapped either as private or shared.
-When it is privately mapped, changes to the contents of the region
-are not reflected to any other process that map the same region.
-Regions may be mapped read-only or read-write.
-As an example, a shared library would be implemented as two regions;
-a shared read-only region for the text, and a private read-write
-region for the global variables associated with the library.
-.PP
-A region may be allocated with one of several allocation strategies.
-It may map some memory hardware on the machine such as a frame buffer.
-Since the hardware is responsible for storing the data,
-such regions must be exclusive use if they are privately mapped.
-.PP
-A region can map all or part of a file.
-As the pages are first accessed, the region is filled in with the
-appropriate part of the file.
-If the region is mapped read-write and shared, changes to the
-contents of the region are reflected back into the contents of the file.
-If the region is read-write but private,
-changes to the region are copied to a private page that is not
-visible to other processes mapping the file,
-and these modified pages are not reflected back to the file.
-.PP
-The final type of region is ``anonymous memory''.
-Uninitialed data uses such a region, privately mapped;
-it is zero-fill-on-demand and its contents are abandoned
-when the last reference is dropped.
-Unlike a region that is mapped from a file,
-the contents of an anonymous region will never be read from or
-written to a disk unless memory is short and part of the region
-must be paged to a swap area.
-If one of these regions is mapped shared,
-then all processes see the changes in the region.
-This difference has important performance considerations;
-the overhead of reading, flushing, and possibly allocating a file
-is much higher than simply zeroing memory.
-.PP
-If several processes wish to share a region,
-then they must have some way of rendezvousing.
-For a mapped file this is easy;
-the name of the file is used as the rendezvous point.
-However, processes may not need the semantics of mapped files
-nor be willing to pay the overhead associated with them.
-For anonymous memory they must use some other rendezvous point.
-Our current interface allows processes to associate a
-descriptor with a region, which it may then pass to other
-processes that wish to attach to the region.
-Such a descriptor may be bound into the UNIX file system
-name space so that other processes can find it just as
-they would with a mapped file.
-.SH
-Shared memory as high speed interprocess communication
-.PP
-The primary use envisioned for shared memory is to
-provide a high speed interprocess communication (IPC) mechanism
-between cooperating processes.
-Existing IPC mechanisms (\fIi.e.\fP pipes, sockets, or streams)
-require a system call to hand off a set
-of data destined for another process, and another system call
-by the recipient process to receive the data.
-Even if the data can be transferred by remapping the data pages
-to avoid a memory to memory copy, the overhead of doing the system
-calls limits the throughput of all but the largest transfers.
-Shared memory, by contrast, allows processes to share data at any
-level of granularity without system intervention.
-.PP
-However, to maintain all but the simplest of data structures,
-the processes must serialize their modifications to shared
-data structures if they are to avoid corrupting them.
-This serialization is typically done with semaphores.
-Unfortunately, most implementations of semaphores are
-done with system calls.
-Thus processes are once again limited by the need to do two
-system calls per transaction, one to lock the semaphore, the
-second to release it.
-The net effect is that the shared memory model provides little if
-any improvement in interprocess bandwidth.
-.PP
-To achieve a significant improvement in interprocess bandwidth
-requires a large decrease in the number of system calls needed to
-achieve the interaction.
-In profiling applications that use
-serialization locks such as the UNIX kernel,
-one typically finds that most locks are not contested.
-Thus if one can find a way to avoid doing a system call in the case
-in which a lock is not contested,
-one would expect to be able to dramatically reduce the number
-of system calls needed to achieve serialization.
-.PP
-In our design, cooperating processes manage their semaphores
-in their own address space.
-In the typical case, a process executes an atomic test-and-set instruction
-to acquire a lock, finds it free, and thus is able to get it.
-Only in the (rare) case where the lock is already set does the process
-need to do a system call to wait for the lock to clear.
-When a process is finished with a lock,
-it can clear the lock itself.
-Only if the ``WANT'' flag for the lock has been set is
-it necessary for the process to do a system call to cause the other
-process(es) to be awakened.
-.PP
-Another issue that must be considered is portability.
-Some computers require access to special hardware to implement
-atomic interprocessor test-and-set.
-For such machines the setting and clearing of locks would
-all have to be done with system calls;
-applications could still use the same interface without change,
-though they would tend to run slowly.
-.PP
-The other issue of compatibility is with System V's semaphore
-implementation.
-Since the System V interface has been in existence for several years,
-and applications have been built that depend on this interface,
-it is important that this interface also be available.
-Although the interface is based on system calls for both setting and
-clearing locks,
-the same interface can be obtained using our interface without
-system calls in most cases.
-.PP
-This implementation can be achieved as follows.
-System V allows entire sets of semaphores to be set concurrently.
-If any of the locks are unavailable, the process is put to sleep
-until they all become available.
-Under our paradigm, a single additional semaphore is defined
-that serializes access to the set of semaphores being simulated.
-Once obtained in the usual way, the set of semaphores can be
-inspected to see if the desired ones are available.
-If they are available, they are set, the guardian semaphore
-is released and the process proceeds.
-If one or more of the requested set is not available,
-the guardian semaphore is released and the process selects an
-unavailable semaphores for which to wait.
-On being reawakened, the whole selection process must be repeated.
-.PP
-In all the above examples, there appears to be a race condition.
-Between the time that the process finds that a semaphore is locked,
-and the time that it manages to call the system to sleep on the
-semaphore another process may unlock the semaphore and issue a wakeup call.
-Luckily the race can be avoided.
-The insight that is critical is that the process and the kernel agree
-on the physical byte of memory that is being used for the semaphore.
-The system call to put a process to sleep takes a pointer
-to the desired semaphore as its argument so that once inside
-the kernel, the kernel can repeat the test-and-set.
-If the lock has cleared
-(and possibly the wakeup issued) between the time that the process
-did the test-and-set and eventually got into the sleep request system call,
-then the kernel immediately resumes the process rather than putting
-it to sleep.
-Thus the only problem to solve is how the kernel interlocks between testing
-a semaphore and going to sleep;
-this problem has already been solved on existing systems.
-.NH
-References
-.sp
-.IP [Babaoglu79] 20
-Babaoglu, O., and Joy, W.,
-``Data Structures Added in the Berkeley Virtual Memory Extensions
-to the UNIX Operating System''
-Computer Systems Research Group, Dept of EECS, University of California,
-Berkeley, CA 94720, USA, November 1979.
-.IP [Someren84] 20
-Someren, J. van,
-``Paging in Berkeley UNIX'',
-Laboratorium voor schakeltechniek en techneik v.d.
-informatieverwerkende machines,
-Codenummer 051560-44(1984)01, February 1984.
diff --git a/share/doc/papers/newvm/Makefile b/share/doc/papers/newvm/Makefile
deleted file mode 100644
index bec913a4d04..00000000000
--- a/share/doc/papers/newvm/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-# $OpenBSD: Makefile,v 1.3 2004/02/01 14:22:44 jmc Exp $
-
-
-DIR= papers/newvm
-SRCS= 0.t 1.t a.t
-MACROS= -ms
-
-paper.ps: ${SRCS}
- ${TBL} ${SRCS} | ${ROFF} > ${.TARGET}
-
-paper.txt: ${SRCS}
- ${TBL} ${SRCS} | ${ROFF} -Tascii > ${.TARGET}
-
-.include <bsd.doc.mk>
diff --git a/share/doc/papers/newvm/a.t b/share/doc/papers/newvm/a.t
deleted file mode 100644
index 0be41f29901..00000000000
--- a/share/doc/papers/newvm/a.t
+++ /dev/null
@@ -1,237 +0,0 @@
-.\" $OpenBSD: a.t,v 1.4 2003/10/30 14:52:24 jmc Exp $
-.\"
-.\" Copyright (c) 1986 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)a.t 5.1 (Berkeley) 4/16/91
-.\"
-.sp 2
-.ne 2i
-.NH
-Appendix A \- Virtual Memory Interface
-.SH
-Mapping pages
-.PP
-The system supports sharing of data between processes
-by allowing pages to be mapped into memory. These mapped
-pages may be \fIshared\fP with other processes or \fIprivate\fP
-to the process.
-Protection and sharing options are defined in \fI<sys/mman.h>\fP as:
-.DS
-.ta \w'#define\ \ 'u +\w'MAP_HASSEMAPHORE\ \ 'u +\w'0x0080\ \ 'u
-/* protections are chosen from these bits, or-ed together */
-#define PROT_READ 0x04 /* pages can be read */
-#define PROT_WRITE 0x02 /* pages can be written */
-#define PROT_EXEC 0x01 /* pages can be executed */
-.DE
-.DS
-.ta \w'#define\ \ 'u +\w'MAP_HASSEMAPHORE\ \ 'u +\w'0x0080\ \ 'u
-/* flags contain mapping type, sharing type and options */
-/* mapping type; choose one */
-#define MAP_FILE 0x0001 /* mapped from a file or device */
-#define MAP_ANON 0x0002 /* allocated from memory, swap space */
-#define MAP_TYPE 0x000f /* mask for type field */
-.DE
-.DS
-.ta \w'#define\ \ 'u +\w'MAP_HASSEMAPHORE\ \ 'u +\w'0x0080\ \ 'u
-/* sharing types; choose one */
-#define MAP_SHARED 0x0010 /* share changes */
-#define MAP_PRIVATE 0x0000 /* changes are private */
-.DE
-.DS
-.ta \w'#define\ \ 'u +\w'MAP_HASSEMAPHORE\ \ 'u +\w'0x0080\ \ 'u
-/* other flags */
-#define MAP_FIXED 0x0020 /* map addr must be exactly as requested */
-#define MAP_INHERIT 0x0040 /* region is retained after exec */
-#define MAP_HASSEMAPHORE 0x0080 /* region may contain semaphores */
-.DE
-The cpu-dependent size of a page is returned by the
-\fIgetpagesize\fP system call:
-.DS
-pagesize = getpagesize();
-result int pagesize;
-.DE
-.LP
-The call:
-.DS
-maddr = mmap(addr, len, prot, flags, fd, pos);
-result caddr_t maddr; caddr_t addr; int *len, prot, flags, fd; off_t pos;
-.DE
-causes the pages starting at \fIaddr\fP and continuing
-for at most \fIlen\fP bytes to be mapped from the object represented by
-descriptor \fIfd\fP, starting at byte offset \fIpos\fP.
-The starting address of the region is returned;
-for the convenience of the system,
-it may differ from that supplied
-unless the MAP_FIXED flag is given,
-in which case the exact address will be used or the call will fail.
-The actual amount mapped is returned in \fIlen\fP.
-The \fIaddr\fP, \fIlen\fP, and \fIpos\fP parameters
-must all be multiples of the pagesize.
-A successful \fImmap\fP will delete any previous mapping
-in the allocated address range.
-The parameter \fIprot\fP specifies the accessibility
-of the mapped pages.
-The parameter \fIflags\fP specifies
-the type of object to be mapped,
-mapping options, and
-whether modifications made to
-this mapped copy of the page
-are to be kept \fIprivate\fP, or are to be \fIshared\fP with
-other references.
-Possible types include MAP_FILE,
-mapping a regular file or character-special device memory,
-and MAP_ANON, which maps memory not associated with any specific file.
-The file descriptor used for creating MAP_ANON regions is used only
-for naming, and may be given as \-1 if no name
-is associated with the region.\(dg
-.FS
-\(dg The current design does not allow a process
-to specify the location of swap space.
-In the future we may define an additional mapping type, MAP_SWAP,
-in which the file descriptor argument specifies a file
-or device to which swapping should be done.
-.FE
-The MAP_INHERIT flag allows a region to be inherited after an \fIexec\fP.
-The MAP_HASSEMAPHORE flag allows special handling for
-regions that may contain semaphores.
-.PP
-A facility is provided to synchronize a mapped region with the file
-it maps; the call
-.DS
-msync(addr, len);
-caddr_t addr; int len;
-.DE
-writes any modified pages back to the filesystem and updates
-the file modification time.
-If \fIlen\fP is 0, all modified pages within the region containing \fIaddr\fP
-will be flushed;
-if \fIlen\fP is non-zero, only the pages containing \fIaddr\fP and \fIlen\fP
-succeeding locations will be examined.
-Any required synchronization of memory caches
-will also take place at this time.
-Filesystem operations on a file that is mapped for shared modifications
-are unpredictable except after an \fImsync\fP.
-.PP
-A mapping can be removed by the call
-.DS
-munmap(addr, len);
-caddr_t addr; int len;
-.DE
-This call deletes the mappings for the specified address range,
-and causes further references to addresses within the range
-to generate invalid memory references.
-.SH
-Page protection control
-.PP
-A process can control the protection of pages using the call
-.DS
-mprotect(addr, len, prot);
-caddr_t addr; int len, prot;
-.DE
-This call changes the specified pages to have protection \fIprot\fP\|.
-Not all implementations will guarantee protection on a page basis;
-the granularity of protection changes may be as large as an entire region.
-.SH
-Giving and getting advice
-.PP
-A process that has knowledge of its memory behavior may
-use the \fImadvise\fP call:
-.DS
-madvise(addr, len, behav);
-caddr_t addr; int len, behav;
-.DE
-\fIBehav\fP describes expected behavior, as given
-in \fI<sys/mman.h>\fP:
-.DS
-.ta \w'#define\ \ 'u +\w'MADV_SEQUENTIAL\ \ 'u +\w'00\ \ \ \ 'u
-#define MADV_NORMAL 0 /* no further special treatment */
-#define MADV_RANDOM 1 /* expect random page references */
-#define MADV_SEQUENTIAL 2 /* expect sequential references */
-#define MADV_WILLNEED 3 /* will need these pages */
-#define MADV_DONTNEED 4 /* don't need these pages */
-#define MADV_SPACEAVAIL 5 /* insure that resources are reserved */
-.DE
-Finally, a process may obtain information about whether pages are
-core resident by using the call
-.DS
-mincore(addr, len, vec)
-caddr_t addr; int len; result char *vec;
-.DE
-Here the current core residency of the pages is returned
-in the character array \fIvec\fP, with a value of 1 meaning
-that the page is in-core.
-.SH
-Synchronization primitives
-.PP
-Primitives are provided for synchronization using semaphores in shared memory.
-Semaphores must lie within a MAP_SHARED region with at least modes
-PROT_READ and PROT_WRITE.
-The MAP_HASSEMAPHORE flag must have been specified when the region was created.
-To acquire a lock a process calls:
-.DS
-value = mset(sem, wait)
-result int value; semaphore *sem; int wait;
-.DE
-\fIMset\fP indivisibly tests and sets the semaphore \fIsem\fP.
-If the previous value is zero, the process has acquired the lock
-and \fImset\fP returns true immediately.
-Otherwise, if the \fIwait\fP flag is zero,
-failure is returned.
-If \fIwait\fP is true and the previous value is non-zero,
-\fImset\fP relinquishes the processor until notified that it should retry.
-.LP
-To release a lock a process calls:
-.DS
-mclear(sem)
-semaphore *sem;
-.DE
-\fIMclear\fP indivisibly tests and clears the semaphore \fIsem\fP.
-If the ``WANT'' flag is zero in the previous value,
-\fImclear\fP returns immediately.
-If the ``WANT'' flag is non-zero in the previous value,
-\fImclear\fP arranges for waiting processes to retry before returning.
-.PP
-Two routines provide services analogous to the kernel
-\fIsleep\fP and \fIwakeup\fP functions interpreted in the domain of
-shared memory.
-A process may relinquish the processor by calling \fImsleep\fP
-with a set semaphore:
-.DS
-msleep(sem)
-semaphore *sem;
-.DE
-If the semaphore is still set when it is checked by the kernel,
-the process will be put in a sleeping state
-until some other process issues an \fImwakeup\fP for the same semaphore
-within the region using the call:
-.DS
-mwakeup(sem)
-semaphore *sem;
-.DE
-An \fImwakeup\fP may awaken all sleepers on the semaphore,
-or may awaken only the next sleeper on a queue.
diff --git a/share/doc/papers/newvm/spell.ok b/share/doc/papers/newvm/spell.ok
deleted file mode 100644
index 543dc7e16a8..00000000000
--- a/share/doc/papers/newvm/spell.ok
+++ /dev/null
@@ -1,56 +0,0 @@
-ANON
-Babaoglu
-Babaoglu79
-Behav
-CM
-Codenummer
-DONTNEED
-Dept
-EECS
-Filesystem
-HASSEMAPHORE
-IPC
-Karels
-Laboratorium
-MADV
-McKusick
-Mclear
-Mset
-NOEXTEND
-PROT
-SPACEAVAIL
-Someren
-Someren84
-WILLNEED
-addr
-behav
-caching
-caddr
-es
-fd
-filesystem
-getpagesize
-informatieverwerkende
-len
-maddr
-madvise
-mclear
-mincore
-mman.h
-mmap
-mprotect
-mset
-msleep
-msync
-munmap
-mwakeup
-pagesize
-pos
-prot
-runtime
-schakeltechniek
-sem
-techneik
-v.d
-vec
-voor
diff --git a/share/doc/papers/nqnfs/Makefile b/share/doc/papers/nqnfs/Makefile
deleted file mode 100644
index 7a8e6c46364..00000000000
--- a/share/doc/papers/nqnfs/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-# $OpenBSD: Makefile,v 1.3 2004/02/01 14:22:45 jmc Exp $
-
-
-DIR= papers/nqnfs
-SRCS= nqnfs.me
-MACROS= -me
-
-paper.ps: ${SRCS}
- ${PIC} ${SRCS} | ${TBL} | ${ROFF} > ${.TARGET}
-
-paper.txt: ${SRCS}
- ${PIC} ${SRCS} | ${TBL} | ${ROFF} -Tascii > ${.TARGET}
-
-.include <bsd.doc.mk>
diff --git a/share/doc/papers/nqnfs/nqnfs.me b/share/doc/papers/nqnfs/nqnfs.me
deleted file mode 100644
index d0c29e199fd..00000000000
--- a/share/doc/papers/nqnfs/nqnfs.me
+++ /dev/null
@@ -1,2009 +0,0 @@
-.\" $OpenBSD: nqnfs.me,v 1.2 2001/02/03 08:14:59 niklas Exp $
-.\"
-.\" Copyright (c) 1993 The Usenix Association. All rights reserved.
-.\"
-.\" This document is derived from software contributed to Berkeley by
-.\" Rick Macklem at The University of Guelph with the permission of
-.\" the Usenix Association.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. All advertising materials mentioning features or use of this software
-.\" must display the following acknowledgement:
-.\" This product includes software developed by the University of
-.\" California, Berkeley and its contributors.
-.\" 4. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)nqnfs.me 8.1 (Berkeley) 4/20/94
-.\"
-.lp
-.nr PS 12
-.ps 12
-Reprinted with permission from the "Proceedings of the Winter 1994 Usenix
-Conference", January 1994, San Francisco, CA, Copyright The Usenix
-Association.
-.nr PS 14
-.ps 14
-.sp
-.ce
-\fBNot Quite NFS, Soft Cache Consistency for NFS\fR
-.nr PS 12
-.ps 12
-.sp
-.ce
-\fIRick Macklem\fR
-.ce
-\fIUniversity of Guelph\fR
-.sp
-.nr PS 12
-.ps 12
-.ce
-\fBAbstract\fR
-.nr PS 10
-.ps 10
-.pp
-There are some constraints inherent in the NFS\(tm\(mo protocol
-that result in performance limitations
-for high performance
-workstation environments.
-This paper discusses an NFS-like protocol named Not Quite NFS (NQNFS),
-designed to address some of these limitations.
-This protocol provides full cache consistency during normal
-operation, while permitting more effective client-side caching in an
-effort to improve performance.
-There are also a variety of minor protocol changes, in order to resolve
-various NFS issues.
-The emphasis is on observed performance of a
-preliminary implementation of the protocol, in order to show
-how well this design works
-and to suggest possible areas for further improvement.
-.sh 1 "Introduction"
-.pp
-It has been observed that
-overall workstation performance has not been scaling with
-processor speed and that file system I/O is a limiting factor [Ousterhout90].
-Ousterhout
-notes
-that a principal challenge for operating system developers is the
-decoupling of system calls from their underlying I/O operations, in order
-to improve average system call response times.
-For distributed file systems, every synchronous Remote Procedure Call (RPC)
-takes a minimum of a few milliseconds and, as such, is analogous to an
-underlying I/O operation.
-This suggests that client caching with a very good
-hit ratio for read type operations, along with asynchronous writing, is required in order to avoid delays waiting for RPC replies.
-However, the NFS protocol requires that the server be stateless\**
-.(f
-\**The server must not require any state that may be lost due to a crash, to
-function correctly.
-.)f
-and does not provide any explicit mechanism for client cache
-consistency, putting
-constraints on how the client may cache data.
-This paper describes an NFS-like protocol that includes a cache consistency
-component designed to enhance client caching performance. It does provide
-full consistency under normal operation, but without requiring that hard
-state information be maintained on the server.
-Design tradeoffs were made towards simplicity and
-high performance over cache consistency under abnormal conditions.
-The protocol design uses a variation of Leases [Gray89]
-to provide state on the server that does not need to be recovered after a
-crash.
-.pp
-The protocol also includes changes designed to address other limitations
-of NFS in a modern workstation environment.
-The use of TCP transport is optionally available to avoid
-the pitfalls of Sun RPC over UDP transport when running across an internetwork [Nowicki89].
-Kerberos [Steiner88] support is available
-to do proper user authentication, in order to provide improved security and
-arbitrary client to server user ID mappings.
-There are also a variety of other changes to accommodate large file systems,
-such as 64bit file sizes and offsets, as well as lifting the 8Kbyte I/O size
-limit.
-The remainder of this paper gives an overview of the protocol, highlighting
-performance related components, followed by an evaluation of resultant performance
-for the 4.4BSD implementation.
-.sh 1 "Distributed File Systems and Caching"
-.pp
-Clients using distributed file systems cache recently-used data in order
-to reduce the number of synchronous server operations, and therefore improve
-average response times for system calls.
-Unfortunately, maintaining consistency between these caches is a problem
-whenever write sharing occurs; that is, when a process on a client writes
-to a file and one or more processes on other client(s) read the file.
-If the writer closes the file before any reader(s) open the file for reading,
-this is called sequential write sharing. Both the Andrew ITC file system
-[Howard88] and NFS [Sandberg85] maintain consistency for sequential write
-sharing by requiring the writer to push all the writes through to the
-server on close and having readers check to see if the file has been
-modified upon open. If the file has been modified, the client throws away
-all cached data for that file, as it is now stale.
-NFS implementations typically detect file modification by checking a cached
-copy of the file's modification time; since this cached value is often
-several seconds out of date and only has a resolution of one second, an NFS
-client often uses stale cached data for some time after the file has
-been updated on the server.
-.pp
-A more difficult case is concurrent write sharing, where write operations are intermixed
-with read operations.
-Consistency for this case, often referred to as "full cache consistency,"
-requires that a reader always receives the most recently written data.
-Neither NFS nor the Andrew ITC file system maintain consistency for this
-case.
-The simplest mechanism for maintaining full cache consistency is the one
-used by Sprite [Nelson88], which disables all client caching of the
-file whenever concurrent write sharing might occur.
-There are other mechanisms described in the literature [Kent87a,
-Burrows88], but they appeared to be too elaborate for incorporation
-into NQNFS (for example, Kent's requires specialized hardware).
-NQNFS differs from Sprite in the way it
-detects write sharing. The Sprite server maintains a list of files currently open
-by the various clients and detects write sharing when a file open request
-for writing is received and the file is already open for reading
-(or vice versa).
-This list of open files is hard state information that must be recovered
-after a server crash, which is a significant problem in its own
-right [Mogul93, Welch90].
-.pp
-The approach used by NQNFS is a variant of the Leases mechanism [Gray89].
-In this model, the server issues to a client a promise, referred to as a
-"lease," that the client may cache a specific object without fear of
-conflict.
-A lease has a limited duration and must be renewed by the client if it
-wishes to continue to cache the object.
-In NQNFS, clients hold short-term (up to one minute) leases on files
-for reading or writing.
-The leases are analogous to entries in the open file list, except that
-they expire after the lease term unless renewed by the client.
-As such, one minute after issuing the last lease there are no current
-leases and therefore no lease records to be recovered after a crash, hence
-the term "soft server state."
-.pp
-A related design consideration is the way client writing is done.
-Synchronous writing requires that all writes be pushed through to the server
-during the write system call.
-This is the simplest variant, from a consistency point of view, since the
-server always has the most recently written data. It also permits any write
-errors, such as "file system out of space" to be propagated back to the
-client's process via the write system call return.
-Unfortunately this approach limits the client write rate, based on server write
-performance and client/server RPC round trip time (RTT).
-.pp
-An alternative to this is delayed writing, where the write system call returns
-as soon as the data is cached on the client and the data is written to the
-server sometime later.
-This permits client writing to occur at the rate of local storage access
-up to the size of the local cache.
-Also, for cases where file truncation/deletion occurs shortly after writing,
-the write to the server may be avoided since the data has already been
-deleted, reducing server write load.
-There are some obvious drawbacks to this approach.
-For any Sprite-like system to maintain
-full consistency, the server must "callback" to the client to cause the
-delayed writes to be written back to the server when write sharing is about to
-occur.
-There are also problems with the propagation of errors
-back to the client process that issued the write system call.
-The reason for this is that
-the system call has already returned without reporting an error and the
-process may also have already terminated.
-As well, there is a risk of the loss of recently written data if the client
-crashes before the data is written back to the server.
-.pp
-A compromise between these two alternatives is asynchronous writing, where
-the write to the server is initiated during the write system call but the write system
-call returns before the write completes.
-This approach minimizes the risk of data loss due to a client crash, but negates
-the possibility of reducing server write load by throwing writes away when
-a file is truncated or deleted.
-.pp
-NFS implementations usually do a mix of asynchronous and delayed writing
-but push all writes to the server upon close, in order to maintain open/close
-consistency.
-Pushing the delayed writes on close
-negates much of the performance advantage of delayed writing, since the
-delays that were avoided in the write system calls are observed in the close
-system call.
-Akin to Sprite, the NQNFS protocol does delayed writing in an effort to achieve
-good client performance and uses a callback mechanism to maintain full cache
-consistency.
-.sh 1 "Related Work"
-.pp
-There has been a great deal of effort put into improving the performance and
-consistency of the NFS protocol. This work can be put in two categories.
-The first category are implementation enhancements for the NFS protocol and
-the second involve modifications to the protocol.
-.pp
-The work done on implementation enhancements have attacked two problem areas,
-NFS server write performance and RPC transport problems.
-Server write performance is a major problem for NFS, in part due to the
-requirement to push all writes to the server upon close and in part due
-to the fact that, for writes, all data and meta-data must be committed to
-non-volatile storage before the server replies to the write RPC.
-The Prestoserve\(tm\(dg
-[Moran90]
-system uses non-volatile RAM as a buffer for recently written data on the server,
-so that the write RPC replies can be returned to the client before the data is written to the
-disk surface.
-Write gathering [Juszczak94] is a software technique used on the server where a write
-RPC request is delayed for a short time in the hope that another contiguous
-write request will arrive, so that they can be merged into one write operation.
-Since the replies to all of the merged writes are not returned to the client until the write
-operation is completed, this delay does not violate the protocol.
-When write operations are merged, the number of disk writes can be reduced,
-improving server write performance.
-Although either of the above reduces write RPC response time for the server,
-it cannot be reduced to zero, and so, any client side caching mechanism
-that reduces write RPC load or client dependence on server RPC response time
-should still improve overall performance.
-Good client side caching should be complementary to these server techniques,
-although client performance improvements as a result of caching may be less
-dramatic when these techniques are used.
-.pp
-In NFS, each Sun RPC request is packaged in a UDP datagram for transmission
-to the server. A timer is started, and if a timeout occurs before the corresponding
-RPC reply is received, the RPC request is retransmitted.
-There are two problems with this model.
-First, when a retransmit timeout occurs, the RPC may be redone, instead of
-simply retransmitting the RPC request message to the server. A recent-request
-cache can be used on the server to minimize the negative impact of redoing
-RPCs [Juszczak89].
-The second problem is that a large UDP datagram, such as a read request or
-write reply, must be fragmented by IP and if any one IP fragment is lost in
-transit, the entire UDP datagram is lost [Kent87]. Since entire requests and replies
-are packaged in a single UDP datagram, this puts an upper bound on the read/write
-data size (8 kbytes).
-.pp
-Adjusting the retransmit timeout (RTT) interval dynamically and applying a
-congestion window on outstanding requests has been shown to be of some help
-[Nowicki89] with the retransmission problem.
-An alternative to this is to use TCP transport to delivery the RPC messages
-reliably [Macklem90] and one of the performance results in this paper
-shows the effects of this further.
-.pp
-Srinivasan and Mogul [Srinivasan89] enhanced the NFS protocol to use the Sprite cache
-consistency algorithm in an effort to improve performance and to provide
-full client cache consistency.
-This experimental implementation demonstrated significantly better
-performance than NFS, but suffered from a lack of crash recovery support.
-The NQNFS protocol design borrowed heavily from this work, but differed
-from the Sprite algorithm by using Leases instead of file open state
-to detect write sharing.
-The decision to use Leases was made primarily to avoid the crash recovery
-problem.
-More recent work by the Sprite group [Baker91] and Mogul [Mogul93] have
-addressed the crash recovery problem, making this design tradeoff more
-questionable now.
-.pp
-Sun has recently updated the NFS protocol to Version 3 [SUN93], using some
-changes similar to NQNFS to address various issues. The Version 3 protocol
-uses 64bit file sizes and offsets, provides a Readdir_and_Lookup RPC and
-an access RPC.
-It also provides cache hints, to permit a client to be able to determine
-whether a file modification is the result of that client's write or some
-other client's write.
-It would be possible to add either Spritely NFS or NQNFS support for cache
-consistency to the NFS Version 3 protocol.
-.sh 1 "NQNFS Consistency Protocol and Recovery"
-.pp
-The NQNFS cache consistency protocol uses a somewhat Sprite-like [Nelson88]
-mechanism, but is based on Leases [Gray89] instead of hard server state information
-about open files.
-The basic principle is that the server disables client caching of files whenever
-concurrent write sharing could occur, by performing a server-to-client
-callback,
-forcing the client to flush its caches and to do all subsequent I/O on the file with
-synchronous RPCs.
-A Sprite server maintains a record of the open state of files for
-all clients and uses this to determine when concurrent write sharing might
-occur.
-This \fIopen state\fR information might also be referred to as an infinite-term
-lease for the file, with explicit lease cancellation.
-NQNFS, on the other hand, uses a short-term lease that expires due to timeout
-after a maximum of one minute, unless explicitly renewed by the client.
-The fundamental difference is that an NQNFS client must keep renewing
-a lease to use cached data whereas a Sprite client assumes the data is valid until canceled
-by the server
-or the file is closed.
-Using leases permits the server to remain "stateless," since the soft
-state information, which consists of the set of current leases, is
-moot after one minute, when all the leases expire.
-.pp
-Whenever a client wishes to access a file's data it must hold one of
-three types of lease: read-caching, write-caching or non-caching.
-The latter type requires that all file operations be done synchronously with
-the server via the appropriate RPCs.
-.pp
-A read-caching lease allows for client data caching but no modifications
-may be done.
-It may, however, be shared between multiple clients. Diagram 1 shows a typical
-read-caching scenario. The vertical solid black lines depict the lease records.
-Note that the time lines are nowhere near to scale, since a client/server
-interaction will normally take less than one hundred milliseconds, whereas the
-normal lease duration is thirty seconds.
-Every lease includes a \fImodrev\fR value, which changes upon every modification
-of the file. It may be used to check to see if data cached on the client is
-still current.
-.pp
-A write-caching lease permits delayed write caching,
-but requires that all data be pushed to the server when the lease expires
-or is terminated by an eviction callback.
-When a write-caching lease has almost expired, the client will attempt to
-extend the lease if the file is still open, but is required to push the delayed writes to the server
-if renewal fails (as depicted by diagram 2).
-The writes may not arrive at the server until after the write lease has
-expired on the client, but this does not result in a consistency problem,
-so long as the write lease is still valid on the server.
-Note that, in diagram 2, the lease record on the server remains current after
-the expiry time, due to the conditions mentioned in section 5.
-If a write RPC is done on the server after the write lease has expired on
-the server, this could be considered an error since consistency could be
-lost, but it is not handled as such by NQNFS.
-.pp
-Diagram 3 depicts how read and write leases are replaced by a non-caching
-lease when there is the potential for write sharing.
-.(z
-.sp
-.PS
-.ps
-.ps 50
-line from 0.738,5.388 to 1.238,5.388
-.ps
-.ps 10
-dashwid = 0.050i
-line dashed from 1.488,10.075 to 1.488,5.450
-line dashed from 2.987,10.075 to 2.987,5.450
-line dashed from 4.487,10.075 to 4.487,5.450
-.ps
-.ps 50
-line from 4.487,7.013 to 4.487,5.950
-line from 2.987,7.700 to 2.987,5.950 to 2.987,6.075
-line from 1.488,7.513 to 1.488,5.950
-line from 2.987,9.700 to 2.987,8.325
-line from 1.488,9.450 to 1.488,8.325
-.ps
-.ps 10
-line from 2.987,6.450 to 4.487,6.200
-line from 4.385,6.192 to 4.487,6.200 to 4.393,6.241
-line from 4.487,6.888 to 2.987,6.575
-line from 3.080,6.620 to 2.987,6.575 to 3.090,6.571
-line from 2.987,7.263 to 4.487,7.013
-line from 4.385,7.004 to 4.487,7.013 to 4.393,7.054
-line from 4.487,7.638 to 2.987,7.388
-line from 3.082,7.429 to 2.987,7.388 to 3.090,7.379
-line from 2.987,6.888 to 1.488,6.575
-line from 1.580,6.620 to 1.488,6.575 to 1.590,6.571
-line from 1.488,7.200 to 2.987,6.950
-line from 2.885,6.942 to 2.987,6.950 to 2.893,6.991
-line from 2.987,7.700 to 1.488,7.513
-line from 1.584,7.550 to 1.488,7.513 to 1.590,7.500
-line from 1.488,8.012 to 2.987,7.763
-line from 2.885,7.754 to 2.987,7.763 to 2.893,7.804
-line from 2.987,9.012 to 1.488,8.825
-line from 1.584,8.862 to 1.488,8.825 to 1.590,8.813
-line from 1.488,9.325 to 2.987,9.137
-line from 2.885,9.125 to 2.987,9.137 to 2.891,9.175
-line from 2.987,9.637 to 1.488,9.450
-line from 1.584,9.487 to 1.488,9.450 to 1.590,9.438
-line from 1.488,9.887 to 2.987,9.700
-line from 2.885,9.688 to 2.987,9.700 to 2.891,9.737
-.ps
-.ps 12
-.ft
-.ft R
-"Lease valid on machine" at 1.363,5.296 ljust
-"with same modrev" at 1.675,7.421 ljust
-"miss)" at 2.612,9.233 ljust
-"(cache" at 2.300,9.358 ljust
-.ps
-.ps 14
-"Diagram #1: Read Caching Leases" at 0.738,5.114 ljust
-"Client B" at 4.112,10.176 ljust
-"Server" at 2.612,10.176 ljust
-"Client A" at 0.925,10.176 ljust
-.ps
-.ps 12
-"from cache" at 4.675,6.546 ljust
-"Read syscalls" at 4.675,6.796 ljust
-"Reply" at 3.737,6.108 ljust
-"(cache miss)" at 3.675,6.421 ljust
-"Read req" at 3.737,6.608 ljust
-"to lease" at 3.112,6.796 ljust
-"Client B added" at 3.112,6.983 ljust
-"Reply" at 3.237,7.296 ljust
-"Read + lease req" at 3.175,7.671 ljust
-"Read syscall" at 4.675,7.608 ljust
-"Reply" at 1.675,6.796 ljust
-"miss)" at 2.487,7.108 ljust
-"Read req (cache" at 1.675,7.233 ljust
-"from cache" at 0.425,6.296 ljust
-"Read syscalls" at 0.425,6.546 ljust
-"cache" at 0.425,6.858 ljust
-"so can still" at 0.425,7.108 ljust
-"Modrev same" at 0.425,7.358 ljust
-"Reply" at 1.675,7.671 ljust
-"Get lease req" at 1.675,8.108 ljust
-"Read syscall" at 0.425,7.983 ljust
-"Lease times out" at 0.425,8.296 ljust
-"from cache" at 0.425,9.046 ljust
-"Read syscalls" at 0.425,9.296 ljust
-"for Client A" at 3.112,9.296 ljust
-"Read caching lease" at 3.112,9.483 ljust
-"Reply" at 1.675,8.983 ljust
-"Read req" at 1.675,9.358 ljust
-"Reply" at 1.675,9.608 ljust
-"Read + lease req" at 1.675,9.921 ljust
-"Read syscall" at 0.425,9.921 ljust
-.ps
-.ft
-.PE
-.sp
-.)z
-.(z
-.sp
-.PS
-.ps
-.ps 50
-line from 1.175,5.700 to 1.300,5.700
-line from 0.738,5.700 to 1.175,5.700
-line from 2.987,6.638 to 2.987,6.075
-.ps
-.ps 10
-dashwid = 0.050i
-line dashed from 2.987,6.575 to 2.987,5.950
-line dashed from 1.488,6.575 to 1.488,5.888
-.ps
-.ps 50
-line from 2.987,9.762 to 2.987,6.638
-line from 1.488,9.450 to 1.488,7.700
-.ps
-.ps 10
-line from 2.987,6.763 to 1.488,6.575
-line from 1.584,6.612 to 1.488,6.575 to 1.590,6.563
-line from 1.488,7.013 to 2.987,6.825
-line from 2.885,6.813 to 2.987,6.825 to 2.891,6.862
-line from 2.987,7.325 to 1.488,7.075
-line from 1.582,7.116 to 1.488,7.075 to 1.590,7.067
-line from 1.488,7.700 to 2.987,7.388
-line from 2.885,7.383 to 2.987,7.388 to 2.895,7.432
-line from 2.987,8.575 to 1.488,8.325
-line from 1.582,8.366 to 1.488,8.325 to 1.590,8.317
-line from 1.488,8.887 to 2.987,8.637
-line from 2.885,8.629 to 2.987,8.637 to 2.893,8.679
-line from 2.987,9.637 to 1.488,9.450
-line from 1.584,9.487 to 1.488,9.450 to 1.590,9.438
-line from 1.488,9.887 to 2.987,9.762
-line from 2.886,9.746 to 2.987,9.762 to 2.890,9.796
-line dashed from 2.987,10.012 to 2.987,6.513
-line dashed from 1.488,10.012 to 1.488,6.513
-.ps
-.ps 12
-.ft
-.ft R
-"write" at 4.237,5.921 ljust
-"Lease valid on machine" at 1.425,5.733 ljust
-.ps
-.ps 14
-"Diagram #2: Write Caching Lease" at 0.738,5.551 ljust
-"Server" at 2.675,10.114 ljust
-"Client A" at 1.113,10.114 ljust
-.ps
-.ps 12
-"seconds after last" at 3.112,5.921 ljust
-"Expires write_slack" at 3.112,6.108 ljust
-"due to write activity" at 3.112,6.608 ljust
-"Expiry delayed" at 3.112,6.796 ljust
-"Lease times out" at 3.112,7.233 ljust
-"Lease renewed" at 3.175,8.546 ljust
-"Lease for client A" at 3.175,9.358 ljust
-"Write caching" at 3.175,9.608 ljust
-"Reply" at 1.675,6.733 ljust
-"Write req" at 1.988,7.046 ljust
-"Reply" at 1.675,7.233 ljust
-"Write req" at 1.675,7.796 ljust
-"Lease expires" at 0.487,7.733 ljust
-"Close syscall" at 0.487,8.108 ljust
-"lease granted" at 1.675,8.546 ljust
-"Get write lease" at 1.675,8.921 ljust
-"before expiry" at 0.487,8.608 ljust
-"Lease renewal" at 0.487,8.796 ljust
-"syscalls" at 0.487,9.046 ljust
-"Delayed write" at 0.487,9.233 ljust
-"lease granted" at 1.675,9.608 ljust
-"Get write lease req" at 1.675,9.921 ljust
-"Write syscall" at 0.487,9.858 ljust
-.ps
-.ft
-.PE
-.sp
-.)z
-.(z
-.sp
-.PS
-.ps
-.ps 50
-line from 0.613,2.638 to 1.238,2.638
-line from 1.488,4.075 to 1.488,3.638
-line from 2.987,4.013 to 2.987,3.575
-line from 4.487,4.013 to 4.487,3.575
-.ps
-.ps 10
-line from 2.987,3.888 to 4.487,3.700
-line from 4.385,3.688 to 4.487,3.700 to 4.391,3.737
-line from 4.487,4.138 to 2.987,3.950
-line from 3.084,3.987 to 2.987,3.950 to 3.090,3.938
-line from 2.987,4.763 to 4.487,4.450
-line from 4.385,4.446 to 4.487,4.450 to 4.395,4.495
-.ps
-.ps 50
-line from 4.487,4.438 to 4.487,4.013
-.ps
-.ps 10
-line from 4.487,5.138 to 2.987,4.888
-line from 3.082,4.929 to 2.987,4.888 to 3.090,4.879
-.ps
-.ps 50
-line from 4.487,6.513 to 4.487,5.513
-line from 4.487,6.513 to 4.487,6.513 to 4.487,5.513
-line from 2.987,5.450 to 2.987,5.200
-line from 1.488,5.075 to 1.488,4.075
-line from 2.987,5.263 to 2.987,4.013
-line from 2.987,7.700 to 2.987,5.325
-line from 4.487,7.575 to 4.487,6.513
-line from 1.488,8.512 to 1.488,8.075
-line from 2.987,8.637 to 2.987,8.075
-line from 2.987,9.637 to 2.987,8.825
-line from 1.488,9.450 to 1.488,8.950
-.ps
-.ps 10
-line from 2.987,4.450 to 1.488,4.263
-line from 1.584,4.300 to 1.488,4.263 to 1.590,4.250
-line from 1.488,4.888 to 2.987,4.575
-line from 2.885,4.571 to 2.987,4.575 to 2.895,4.620
-line from 2.987,5.263 to 1.488,5.075
-line from 1.584,5.112 to 1.488,5.075 to 1.590,5.063
-line from 4.487,5.513 to 2.987,5.325
-line from 3.084,5.362 to 2.987,5.325 to 3.090,5.313
-line from 2.987,5.700 to 4.487,5.575
-line from 4.386,5.558 to 4.487,5.575 to 4.390,5.608
-line from 4.487,6.013 to 2.987,5.825
-line from 3.084,5.862 to 2.987,5.825 to 3.090,5.813
-line from 2.987,6.200 to 4.487,6.075
-line from 4.386,6.058 to 4.487,6.075 to 4.390,6.108
-line from 4.487,6.450 to 2.987,6.263
-line from 3.084,6.300 to 2.987,6.263 to 3.090,6.250
-line from 2.987,6.700 to 4.487,6.513
-line from 4.385,6.500 to 4.487,6.513 to 4.391,6.550
-line from 1.488,6.950 to 2.987,6.763
-line from 2.885,6.750 to 2.987,6.763 to 2.891,6.800
-line from 2.987,7.700 to 4.487,7.575
-line from 4.386,7.558 to 4.487,7.575 to 4.390,7.608
-line from 4.487,7.950 to 2.987,7.763
-line from 3.084,7.800 to 2.987,7.763 to 3.090,7.750
-line from 2.987,8.637 to 1.488,8.512
-line from 1.585,8.546 to 1.488,8.512 to 1.589,8.496
-line from 1.488,8.887 to 2.987,8.700
-line from 2.885,8.688 to 2.987,8.700 to 2.891,8.737
-line from 2.987,9.637 to 1.488,9.450
-line from 1.584,9.487 to 1.488,9.450 to 1.590,9.438
-line from 1.488,9.950 to 2.987,9.762
-line from 2.885,9.750 to 2.987,9.762 to 2.891,9.800
-dashwid = 0.050i
-line dashed from 4.487,10.137 to 4.487,2.825
-line dashed from 2.987,10.137 to 2.987,2.825
-line dashed from 1.488,10.137 to 1.488,2.825
-.ps
-.ps 12
-.ft
-.ft R
-"(not cached)" at 4.612,3.858 ljust
-.ps
-.ps 14
-"Diagram #3: Write sharing case" at 0.613,2.239 ljust
-.ps
-.ps 12
-"Write syscall" at 4.675,7.546 ljust
-"Read syscall" at 0.550,9.921 ljust
-.ps
-.ps 14
-"Lease valid on machine" at 1.363,2.551 ljust
-.ps
-.ps 12
-"(can still cache)" at 1.675,8.171 ljust
-"Reply" at 3.800,3.858 ljust
-"Write" at 3.175,4.046 ljust
-"writes" at 4.612,4.046 ljust
-"synchronous" at 4.612,4.233 ljust
-"write syscall" at 4.675,5.108 ljust
-"non-caching lease" at 3.175,4.296 ljust
-"Reply " at 3.175,4.483 ljust
-"req" at 3.175,4.983 ljust
-"Get write lease" at 3.175,5.108 ljust
-"Vacated msg" at 3.175,5.483 ljust
-"to the server" at 4.675,5.858 ljust
-"being flushed to" at 4.675,6.046 ljust
-"Delayed writes" at 4.675,6.233 ljust
-.ps
-.ps 16
-"Server" at 2.675,10.182 ljust
-"Client B" at 3.925,10.182 ljust
-"Client A" at 0.863,10.182 ljust
-.ps
-.ps 12
-"(not cached)" at 0.550,4.733 ljust
-"Read data" at 0.550,4.921 ljust
-"Reply data" at 1.675,4.421 ljust
-"Read request" at 1.675,4.921 ljust
-"lease" at 1.675,5.233 ljust
-"Reply non-caching" at 1.675,5.421 ljust
-"Reply" at 3.737,5.733 ljust
-"Write" at 3.175,5.983 ljust
-"Reply" at 3.737,6.171 ljust
-"Write" at 3.175,6.421 ljust
-"Eviction Notice" at 3.175,6.796 ljust
-"Get read lease" at 1.675,7.046 ljust
-"Read syscall" at 0.550,6.983 ljust
-"being cached" at 4.675,7.171 ljust
-"Delayed writes" at 4.675,7.358 ljust
-"lease" at 3.175,7.233 ljust
-"Reply write caching" at 3.175,7.421 ljust
-"Get write lease" at 3.175,7.983 ljust
-"Write syscall" at 4.675,7.983 ljust
-"with same modrev" at 1.675,8.358 ljust
-"Lease" at 0.550,8.171 ljust
-"Renewed" at 0.550,8.358 ljust
-"Reply" at 1.675,8.608 ljust
-"Get Lease Request" at 1.675,8.983 ljust
-"Read syscall" at 0.550,8.733 ljust
-"from cache" at 0.550,9.108 ljust
-"Read syscall" at 0.550,9.296 ljust
-"Reply " at 1.675,9.671 ljust
-"plus lease" at 2.050,9.983 ljust
-"Read Request" at 1.675,10.108 ljust
-.ps
-.ft
-.PE
-.sp
-.)z
-A write-caching lease is not used in the Stanford V Distributed System [Gray89],
-since synchronous writing is always used. A side effect of this change
-is that the five to ten second lease duration recommended by Gray was found
-to be insufficient to achieve good performance for the write-caching lease.
-Experimentation showed that thirty seconds was about optimal for cases where
-the client and server are connected to the same local area network, so
-thirty seconds is the default lease duration for NQNFS.
-A maximum of twice that value is permitted, since Gray showed that for some
-network topologies, a larger lease duration functions better.
-Although there is an explicit get_lease RPC defined for the protocol,
-most lease requests are piggybacked onto the other RPCs to minimize the
-additional overhead introduced by leasing.
-.sh 2 "Rationale"
-.pp
-Leasing was chosen over hard server state information for the following
-reasons:
-.ip 1.
-The server must maintain state information about all current
-client leases.
-Since at most one lease is allocated for each RPC and the leases expire
-after their lease term,
-the upper bound on the number of current leases is the product of the
-lease term and the server RPC rate.
-In practice, it has been observed that less than 10% of RPCs request new leases
-and since most leases have a term of thirty seconds, the following rule of
-thumb should estimate the number of server lease records:
-.sp
-.nf
- Number of Server Lease Records \(eq 0.1 * 30 * RPC rate
-.fi
-.sp
-Since each lease record occupies 64 bytes of server memory, storing the lease
-records should not be a serious problem.
-If a server has exhausted lease storage, it can simply wait a few seconds
-for a lease to expire and free up a record.
-On the other hand, a Sprite-like server must store records for all files
-currently open by all clients, which can require significant storage for
-a large, heavily loaded server.
-In [Mogul93], it is proposed that a mechanism vaguely similar to paging could be
-used to deal with this for Spritely NFS, but this
-appears to introduce a fair amount of complexity and may limit the
-usefulness of open records for storing other state information, such
-as file locks.
-.ip 2.
-After a server crashes it must recover lease records for
-the current outstanding leases, which actually implies that if it waits
-until all leases have expired, there is no state to recover.
-The server must wait for the maximum lease duration of one minute, and it must serve
-all outstanding write requests resulting from terminated write-caching
-leases before issuing new leases. The one minute delay can be overlapped with
-file system consistency checking (eg. fsck).
-Because no state must be recovered, a lease-based server, like an NFS server,
-avoids the problem of state recovery after a crash.
-.sp
-There can, however, be problems during crash recovery
-because of a potentially large number of write backs due to terminated
-write-caching leases.
-One of these problems is a "recovery storm" [Baker91], which could occur when
-the server is overloaded by the number of write RPC requests.
-The NQNFS protocol deals with this by replying
-with a return status code called
-try_again_later to all
-RPC requests (except write) until the write requests subside.
-At this time, there has not been sufficient testing of server crash
-recovery while under heavy server load to determine if the try_again_later
-reply is a sufficient solution to the problem.
-The other problem is that consistency will be lost if other RPCs are performed
-before all of the write backs for terminated write-caching leases have completed.
-This is handled by only performing write RPCs until
-no write RPC requests arrive
-for write_slack seconds, where write_slack is set to several times
-the client timeout retransmit interval,
-at which time it is assumed all clients have had an opportunity to send their writes
-to the server.
-.ip 3.
-Another advantage of leasing is that, since leases are required at times when other I/O operations occur,
-lease requests can almost always be piggybacked on other RPCs, avoiding some of the
-overhead associated with the explicit open and close RPCs required by a Sprite-like system.
-Compared with Sprite cache consistency,
-this can result in a significantly lower RPC load (see table #1).
-.sh 1 "Limitations of the NQNFS Protocol"
-.pp
-There is a serious risk when leasing is used for delayed write
-caching.
-If the server is simply too busy to service a lease renewal before a write-caching
-lease terminates, the client will not be able to push the write
-data to the server before the lease has terminated, resulting in
-inconsistency.
-Note that the danger of inconsistency occurs when the server assumes that
-a write-caching lease has terminated before the client has
-had the opportunity to write the data back to the server.
-In an effort to avoid this problem, the NQNFS server does not assume that
-a write-caching lease has terminated until three conditions are met:
-.sp
-.(l
-1 - clock time > (expiry time + clock skew)
-2 - there is at least one server daemon (nfsd) waiting for an RPC request
-3 - no write RPCs received for leased file within write_slack after the corrected expiry time
-.)l
-.lp
-The first condition ensures that the lease has expired on the client.
-The clock_skew, by default three seconds, must be
-set to a value larger than the maximum time-of-day clock error that is likely to occur
-during the maximum lease duration.
-The second condition attempts to ensure that the client
-is not waiting for replies to any writes that are still queued for service by
-an nfsd. The third condition tries to guarantee that the client has
-transmitted all write requests to the server, since write_slack is set to
-several times the client's timeout retransmit interval.
-.pp
-There are also certain file system semantics that are problematic for both NFS and NQNFS,
-due to the
-lack of state information maintained by the
-server. If a file is unlinked on one client while open on another it will
-be removed from the file server, resulting in failed file accesses on the
-client that has the file open.
-If the file system on the server is out of space or the client user's disk
-quota has been exceeded, a delayed write can fail long after the write system
-call was successfully completed.
-With NFS this error will be detected by the close system call, since
-the delayed writes are pushed upon close. With NQNFS however, the delayed write
-RPC may not occur until after the close system call, possibly even after the process
-has exited.
-Therefore,
-if a process must check for write errors,
-a system call such as \fIfsync\fR must be used.
-.pp
-Another problem occurs when a process on one client is
-running an executable file
-and a process on another client starts to write to the file. The read lease on
-the first client is terminated by the server, but the client has no recourse but
-to terminate the process, since the process is already in progress on the old
-executable.
-.pp
-The NQNFS protocol does not support file locking, since a file lock would have
-to involve hard, recovered after a crash, state information.
-.sh 1 "Other NQNFS Protocol Features"
-.pp
-NQNFS also includes a variety of minor modifications to the NFS protocol, in an
-attempt to address various limitations.
-The protocol uses 64bit file sizes and offsets in order to handle large files.
-TCP transport may be used as an alternative to UDP
-for cases where UDP does not perform well.
-Transport mechanisms
-such as TCP also permit the use of much larger read/write data sizes,
-which might improve performance in certain environments.
-.pp
-The NQNFS protocol replaces the Readdir RPC with a Readdir_and_Lookup
-RPC that returns the file handle and attributes for each file in the
-directory as well as name and file id number.
-This additional information may then be loaded into the lookup and file-attribute
-caches on the client.
-Thus, for cases such as "ls -l", the \fIstat\fR system calls can be performed
-locally without doing any lookup or getattr RPCs.
-Another additional RPC is the Access RPC that checks for file
-accessibility against the server. This is necessary since in some cases the
-client user ID is mapped to a different user on the server and doing the
-access check locally on the client using file attributes and client credentials is
-not correct.
-One case where this becomes necessary is when the NQNFS mount point is using
-Kerberos authentication, where the Kerberos authentication ticket is translated
-to credentials on the server that are mapped to the client side user id.
-For further details on the protocol, see [Macklem93].
-.sh 1 "Performance"
-.pp
-In order to evaluate the effectiveness of the NQNFS protocol,
-a benchmark was used that was
-designed to typify
-real work on the client workstation.
-Benchmarks, such as Laddis [Wittle93], that perform server load characterization
-are not appropriate for this work, since it is primarily client caching
-efficiency that needs to be evaluated.
-Since these tests are measuring overall client system performance and
-not just the performance of the file system,
-each sequence of runs was performed on identical hardware and operating system in order to factor out the system
-components affecting performance other than the file system protocol.
-.pp
-The equipment used for the all the benchmarks are members of the DECstation\(tm\(dg
-family of workstations using the MIPS\(tm\(sc RISC architecture.
-The operating system running on these systems was a pre-release version of
-4.4BSD Unix\(tm\(dd.
-For all benchmarks, the file server was a DECstation 2100 (10 MIPS) with 8Mbytes of
-memory and a local RZ23 SCSI disk (27msec average access time).
-The clients range in speed from DECstation 2100s
-to a DECstation 5000/25, and always run with six block I/O daemons
-and a 4Mbyte buffer cache, except for the test runs where the
-buffer cache size was the independent variable.
-In all cases /tmp is mounted on the local SCSI disk\**, all machines were
-attached to the same uncongested Ethernet, and ran in single user mode during the benchmarks.
-.(f
-\**Testing using the 4.4BSD MFS [McKusick90] resulted in slightly degraded performance,
-probably since the machines only had 16Mbytes of memory, and so paging
-increased.
-.)f
-Unless noted otherwise, test runs used UDP RPC transport
-and the results given are the average values of four runs.
-.pp
-The benchmark used is the Modified Andrew Benchmark (MAB)
-[Ousterhout90],
-which is a slightly modified version of the benchmark used to characterize
-performance of the Andrew ITC file system [Howard88].
-The MAB was set up with the executable binaries in the remote mounted file
-system and the final load step was commented out, due to a linkage problem
-during testing under 4.4BSD.
-Therefore, these results are not directly comparable to other reported MAB
-results.
-The MAB is made up of five distinct phases:
-.sp
-.ip "1." 10
-Makes five directories (no significant cost)
-.ip "2." 10
-Copy a file system subtree to a working directory
-.ip "3." 10
-Get file attributes (stat) of all the working files
-.ip "4." 10
-Search for strings (grep) in the files
-.ip "5." 10
-Compile a library of C sources and archive them
-.lp
-Of the five phases, the fifth is by far the largest and is the one affected most
-by client caching mechanisms.
-The results for phase #1 are invariant over all
-the caching mechanisms.
-.sh 2 "Buffer Cache Size Tests"
-.pp
-The first experiment was done to see what effect changing the size of the
-buffer cache would have on client performance. A single DECstation 5000/25
-was used to do a series of runs of MAB with different buffer cache sizes
-for four variations of the file system protocol. The four variations are
-as follows:
-.ip "Case 1:" 10
-NFS - The NFS protocol as implemented in 4.4BSD
-.ip "Case 2:" 10
-Leases - The NQNFS protocol using leases for cache consistency
-.ip "Case 3:" 10
-Leases, Rdirlookup - The NQNFS protocol using leases for cache consistency
-and with the readdir RPC replaced by Readdir_and_Lookup
-.ip "Case 4:" 10
-Leases, Attrib leases, Rdirlookup - The NQNFS protocol using leases for
-cache consistency, with the readdir
-RPC replaced by the Readdir_and_Lookup,
-and requiring a valid lease not only for file-data access, but also for file-attribute access.
-.lp
-As can be seen in figure 1, the buffer cache achieves about optimal
-performance for the range of two to ten megabytes in size. At eleven
-megabytes in size, the system pages heavily and the runs did not
-complete in a reasonable time. Even at 64Kbytes, the buffer cache improves
-performance over no buffer cache by a significant margin of 136-148 seconds
-versus 239 seconds.
-This may be due, in part, to the fact that the Compile Phase of the MAB
-uses a rather small working set of file data.
-All variants of NQNFS achieve about
-the same performance, running around 30% faster than NFS, with a slightly
-larger difference for large buffer cache sizes.
-Based on these results, all remaining tests were run with the buffer cache
-size set to 4Mbytes.
-Although I do not know what causes the local peak in the curves between 0.5 and 2 megabytes,
-there is some indication that contention for buffer cache blocks, between the update process
-(which pushes delayed writes to the server every thirty seconds) and the I/O
-system calls, may be involved.
-.(z
-.PS
-.ps
-.ps 10
-dashwid = 0.050i
-line dashed from 0.900,7.888 to 4.787,7.888
-line dashed from 0.900,7.888 to 0.900,10.262
-line from 0.900,7.888 to 0.963,7.888
-line from 4.787,7.888 to 4.725,7.888
-line from 0.900,8.188 to 0.963,8.188
-line from 4.787,8.188 to 4.725,8.188
-line from 0.900,8.488 to 0.963,8.488
-line from 4.787,8.488 to 4.725,8.488
-line from 0.900,8.775 to 0.963,8.775
-line from 4.787,8.775 to 4.725,8.775
-line from 0.900,9.075 to 0.963,9.075
-line from 4.787,9.075 to 4.725,9.075
-line from 0.900,9.375 to 0.963,9.375
-line from 4.787,9.375 to 4.725,9.375
-line from 0.900,9.675 to 0.963,9.675
-line from 4.787,9.675 to 4.725,9.675
-line from 0.900,9.963 to 0.963,9.963
-line from 4.787,9.963 to 4.725,9.963
-line from 0.900,10.262 to 0.963,10.262
-line from 4.787,10.262 to 4.725,10.262
-line from 0.900,7.888 to 0.900,7.950
-line from 0.900,10.262 to 0.900,10.200
-line from 1.613,7.888 to 1.613,7.950
-line from 1.613,10.262 to 1.613,10.200
-line from 2.312,7.888 to 2.312,7.950
-line from 2.312,10.262 to 2.312,10.200
-line from 3.025,7.888 to 3.025,7.950
-line from 3.025,10.262 to 3.025,10.200
-line from 3.725,7.888 to 3.725,7.950
-line from 3.725,10.262 to 3.725,10.200
-line from 4.438,7.888 to 4.438,7.950
-line from 4.438,10.262 to 4.438,10.200
-line from 0.900,7.888 to 4.787,7.888
-line from 4.787,7.888 to 4.787,10.262
-line from 4.787,10.262 to 0.900,10.262
-line from 0.900,10.262 to 0.900,7.888
-line from 3.800,8.775 to 4.025,8.775
-line from 0.925,10.088 to 0.925,10.088
-line from 0.925,10.088 to 0.938,9.812
-line from 0.938,9.812 to 0.988,9.825
-line from 0.988,9.825 to 1.075,9.838
-line from 1.075,9.838 to 1.163,9.938
-line from 1.163,9.938 to 1.250,9.838
-line from 1.250,9.838 to 1.613,9.825
-line from 1.613,9.825 to 2.312,9.750
-line from 2.312,9.750 to 3.025,9.713
-line from 3.025,9.713 to 3.725,9.850
-line from 3.725,9.850 to 4.438,9.875
-dashwid = 0.037i
-line dotted from 3.800,8.625 to 4.025,8.625
-line dotted from 0.925,9.912 to 0.925,9.912
-line dotted from 0.925,9.912 to 0.938,9.887
-line dotted from 0.938,9.887 to 0.988,9.713
-line dotted from 0.988,9.713 to 1.075,9.562
-line dotted from 1.075,9.562 to 1.163,9.562
-line dotted from 1.163,9.562 to 1.250,9.562
-line dotted from 1.250,9.562 to 1.613,9.675
-line dotted from 1.613,9.675 to 2.312,9.363
-line dotted from 2.312,9.363 to 3.025,9.375
-line dotted from 3.025,9.375 to 3.725,9.387
-line dotted from 3.725,9.387 to 4.438,9.450
-line dashed from 3.800,8.475 to 4.025,8.475
-line dashed from 0.925,10.000 to 0.925,10.000
-line dashed from 0.925,10.000 to 0.938,9.787
-line dashed from 0.938,9.787 to 0.988,9.650
-line dashed from 0.988,9.650 to 1.075,9.537
-line dashed from 1.075,9.537 to 1.163,9.613
-line dashed from 1.163,9.613 to 1.250,9.800
-line dashed from 1.250,9.800 to 1.613,9.488
-line dashed from 1.613,9.488 to 2.312,9.375
-line dashed from 2.312,9.375 to 3.025,9.363
-line dashed from 3.025,9.363 to 3.725,9.325
-line dashed from 3.725,9.325 to 4.438,9.438
-dashwid = 0.075i
-line dotted from 3.800,8.325 to 4.025,8.325
-line dotted from 0.925,9.963 to 0.925,9.963
-line dotted from 0.925,9.963 to 0.938,9.750
-line dotted from 0.938,9.750 to 0.988,9.662
-line dotted from 0.988,9.662 to 1.075,9.613
-line dotted from 1.075,9.613 to 1.163,9.613
-line dotted from 1.163,9.613 to 1.250,9.700
-line dotted from 1.250,9.700 to 1.613,9.438
-line dotted from 1.613,9.438 to 2.312,9.463
-line dotted from 2.312,9.463 to 3.025,9.312
-line dotted from 3.025,9.312 to 3.725,9.387
-line dotted from 3.725,9.387 to 4.438,9.425
-.ps
-.ps -1
-.ft
-.ft I
-"0" at 0.825,7.810 rjust
-"20" at 0.825,8.110 rjust
-"40" at 0.825,8.410 rjust
-"60" at 0.825,8.697 rjust
-"80" at 0.825,8.997 rjust
-"100" at 0.825,9.297 rjust
-"120" at 0.825,9.597 rjust
-"140" at 0.825,9.885 rjust
-"160" at 0.825,10.185 rjust
-"0" at 0.900,7.660
-"2" at 1.613,7.660
-"4" at 2.312,7.660
-"6" at 3.025,7.660
-"8" at 3.725,7.660
-"10" at 4.438,7.660
-"Time (sec)" at 0.150,8.997
-"Buffer Cache Size (MBytes)" at 2.837,7.510
-"Figure #1: MAB Phase 5 (compile)" at 2.837,10.335
-"NFS" at 3.725,8.697 rjust
-"Leases" at 3.725,8.547 rjust
-"Leases, Rdirlookup" at 3.725,8.397 rjust
-"Leases, Attrib leases, Rdirlookup" at 3.725,8.247 rjust
-.ps
-.ft
-.PE
-.)z
-.sh 2 "Multiple Client Load Tests"
-.pp
-During preliminary runs of the MAB, it was observed that the server RPC
-counts were reduced significantly by NQNFS as compared to NFS (table 1).
-(Spritely NFS and Ultrix\(tm4.3/NFS numbers were taken from [Mogul93]
-and are not directly comparable, due to numerous differences in the
-experimental setup including deletion of the load step from phase 5.)
-This suggests
-that the NQNFS protocol might scale better with
-respect to the number of clients accessing the server.
-The experiment described in this section
-ran the MAB on from one to ten clients concurrently, to observe the
-effects of heavier server load.
-The clients were started at roughly the same time by pressing all the
-<return> keys together and, although not synchronized beyond that point,
-all clients would finish the test run within about two seconds of each
-other.
-This was not a realistic load of N active clients, but it did
-result in a reproducible increasing client load on the server.
-The results for the four variants
-are plotted in figures 2-5.
-.(z
-.ps -1
-.R
-.TS
-box, center;
-c s s s s s s s
-c c c c c c c c
-l | n n n n n n n.
-Table #1: MAB RPC Counts
-RPC Getattr Read Write Lookup Other GetLease/Open-Close Total
-_
-BSD/NQNFS 277 139 306 575 294 127 1718
-BSD/NFS 1210 506 451 489 238 0 2894
-Spritely NFS 259 836 192 535 306 1467 3595
-Ultrix4.3/NFS 1225 1186 476 810 305 0 4002
-.TE
-.ps
-.)z
-.pp
-For the MAB benchmark, the NQNFS protocol reduces the RPC counts significantly,
-but with a minimum of extra overhead (the GetLease/Open-Close count).
-.(z
-.PS
-.ps
-.ps 10
-dashwid = 0.050i
-line dashed from 0.900,7.888 to 4.787,7.888
-line dashed from 0.900,7.888 to 0.900,10.262
-line from 0.900,7.888 to 0.963,7.888
-line from 4.787,7.888 to 4.725,7.888
-line from 0.900,8.225 to 0.963,8.225
-line from 4.787,8.225 to 4.725,8.225
-line from 0.900,8.562 to 0.963,8.562
-line from 4.787,8.562 to 4.725,8.562
-line from 0.900,8.900 to 0.963,8.900
-line from 4.787,8.900 to 4.725,8.900
-line from 0.900,9.250 to 0.963,9.250
-line from 4.787,9.250 to 4.725,9.250
-line from 0.900,9.588 to 0.963,9.588
-line from 4.787,9.588 to 4.725,9.588
-line from 0.900,9.925 to 0.963,9.925
-line from 4.787,9.925 to 4.725,9.925
-line from 0.900,10.262 to 0.963,10.262
-line from 4.787,10.262 to 4.725,10.262
-line from 0.900,7.888 to 0.900,7.950
-line from 0.900,10.262 to 0.900,10.200
-line from 1.613,7.888 to 1.613,7.950
-line from 1.613,10.262 to 1.613,10.200
-line from 2.312,7.888 to 2.312,7.950
-line from 2.312,10.262 to 2.312,10.200
-line from 3.025,7.888 to 3.025,7.950
-line from 3.025,10.262 to 3.025,10.200
-line from 3.725,7.888 to 3.725,7.950
-line from 3.725,10.262 to 3.725,10.200
-line from 4.438,7.888 to 4.438,7.950
-line from 4.438,10.262 to 4.438,10.200
-line from 0.900,7.888 to 4.787,7.888
-line from 4.787,7.888 to 4.787,10.262
-line from 4.787,10.262 to 0.900,10.262
-line from 0.900,10.262 to 0.900,7.888
-line from 3.800,8.900 to 4.025,8.900
-line from 1.250,8.325 to 1.250,8.325
-line from 1.250,8.325 to 1.613,8.500
-line from 1.613,8.500 to 2.312,8.825
-line from 2.312,8.825 to 3.025,9.175
-line from 3.025,9.175 to 3.725,9.613
-line from 3.725,9.613 to 4.438,10.012
-dashwid = 0.037i
-line dotted from 3.800,8.750 to 4.025,8.750
-line dotted from 1.250,8.275 to 1.250,8.275
-line dotted from 1.250,8.275 to 1.613,8.412
-line dotted from 1.613,8.412 to 2.312,8.562
-line dotted from 2.312,8.562 to 3.025,9.088
-line dotted from 3.025,9.088 to 3.725,9.375
-line dotted from 3.725,9.375 to 4.438,10.000
-line dashed from 3.800,8.600 to 4.025,8.600
-line dashed from 1.250,8.250 to 1.250,8.250
-line dashed from 1.250,8.250 to 1.613,8.438
-line dashed from 1.613,8.438 to 2.312,8.637
-line dashed from 2.312,8.637 to 3.025,9.088
-line dashed from 3.025,9.088 to 3.725,9.525
-line dashed from 3.725,9.525 to 4.438,10.075
-dashwid = 0.075i
-line dotted from 3.800,8.450 to 4.025,8.450
-line dotted from 1.250,8.262 to 1.250,8.262
-line dotted from 1.250,8.262 to 1.613,8.425
-line dotted from 1.613,8.425 to 2.312,8.613
-line dotted from 2.312,8.613 to 3.025,9.137
-line dotted from 3.025,9.137 to 3.725,9.512
-line dotted from 3.725,9.512 to 4.438,9.988
-.ps
-.ps -1
-.ft
-.ft I
-"0" at 0.825,7.810 rjust
-"20" at 0.825,8.147 rjust
-"40" at 0.825,8.485 rjust
-"60" at 0.825,8.822 rjust
-"80" at 0.825,9.172 rjust
-"100" at 0.825,9.510 rjust
-"120" at 0.825,9.847 rjust
-"140" at 0.825,10.185 rjust
-"0" at 0.900,7.660
-"2" at 1.613,7.660
-"4" at 2.312,7.660
-"6" at 3.025,7.660
-"8" at 3.725,7.660
-"10" at 4.438,7.660
-"Time (sec)" at 0.150,8.997
-"Number of Clients" at 2.837,7.510
-"Figure #2: MAB Phase 2 (copying)" at 2.837,10.335
-"NFS" at 3.725,8.822 rjust
-"Leases" at 3.725,8.672 rjust
-"Leases, Rdirlookup" at 3.725,8.522 rjust
-"Leases, Attrib leases, Rdirlookup" at 3.725,8.372 rjust
-.ps
-.ft
-.PE
-.)z
-.(z
-.PS
-.ps
-.ps 10
-dashwid = 0.050i
-line dashed from 0.900,7.888 to 4.787,7.888
-line dashed from 0.900,7.888 to 0.900,10.262
-line from 0.900,7.888 to 0.963,7.888
-line from 4.787,7.888 to 4.725,7.888
-line from 0.900,8.188 to 0.963,8.188
-line from 4.787,8.188 to 4.725,8.188
-line from 0.900,8.488 to 0.963,8.488
-line from 4.787,8.488 to 4.725,8.488
-line from 0.900,8.775 to 0.963,8.775
-line from 4.787,8.775 to 4.725,8.775
-line from 0.900,9.075 to 0.963,9.075
-line from 4.787,9.075 to 4.725,9.075
-line from 0.900,9.375 to 0.963,9.375
-line from 4.787,9.375 to 4.725,9.375
-line from 0.900,9.675 to 0.963,9.675
-line from 4.787,9.675 to 4.725,9.675
-line from 0.900,9.963 to 0.963,9.963
-line from 4.787,9.963 to 4.725,9.963
-line from 0.900,10.262 to 0.963,10.262
-line from 4.787,10.262 to 4.725,10.262
-line from 0.900,7.888 to 0.900,7.950
-line from 0.900,10.262 to 0.900,10.200
-line from 1.613,7.888 to 1.613,7.950
-line from 1.613,10.262 to 1.613,10.200
-line from 2.312,7.888 to 2.312,7.950
-line from 2.312,10.262 to 2.312,10.200
-line from 3.025,7.888 to 3.025,7.950
-line from 3.025,10.262 to 3.025,10.200
-line from 3.725,7.888 to 3.725,7.950
-line from 3.725,10.262 to 3.725,10.200
-line from 4.438,7.888 to 4.438,7.950
-line from 4.438,10.262 to 4.438,10.200
-line from 0.900,7.888 to 4.787,7.888
-line from 4.787,7.888 to 4.787,10.262
-line from 4.787,10.262 to 0.900,10.262
-line from 0.900,10.262 to 0.900,7.888
-line from 3.800,8.775 to 4.025,8.775
-line from 1.250,8.975 to 1.250,8.975
-line from 1.250,8.975 to 1.613,8.963
-line from 1.613,8.963 to 2.312,8.988
-line from 2.312,8.988 to 3.025,9.037
-line from 3.025,9.037 to 3.725,9.062
-line from 3.725,9.062 to 4.438,9.100
-dashwid = 0.037i
-line dotted from 3.800,8.625 to 4.025,8.625
-line dotted from 1.250,9.312 to 1.250,9.312
-line dotted from 1.250,9.312 to 1.613,9.287
-line dotted from 1.613,9.287 to 2.312,9.675
-line dotted from 2.312,9.675 to 3.025,9.262
-line dotted from 3.025,9.262 to 3.725,9.738
-line dotted from 3.725,9.738 to 4.438,9.512
-line dashed from 3.800,8.475 to 4.025,8.475
-line dashed from 1.250,9.400 to 1.250,9.400
-line dashed from 1.250,9.400 to 1.613,9.287
-line dashed from 1.613,9.287 to 2.312,9.575
-line dashed from 2.312,9.575 to 3.025,9.300
-line dashed from 3.025,9.300 to 3.725,9.613
-line dashed from 3.725,9.613 to 4.438,9.512
-dashwid = 0.075i
-line dotted from 3.800,8.325 to 4.025,8.325
-line dotted from 1.250,9.400 to 1.250,9.400
-line dotted from 1.250,9.400 to 1.613,9.412
-line dotted from 1.613,9.412 to 2.312,9.700
-line dotted from 2.312,9.700 to 3.025,9.537
-line dotted from 3.025,9.537 to 3.725,9.938
-line dotted from 3.725,9.938 to 4.438,9.812
-.ps
-.ps -1
-.ft
-.ft I
-"0" at 0.825,7.810 rjust
-"5" at 0.825,8.110 rjust
-"10" at 0.825,8.410 rjust
-"15" at 0.825,8.697 rjust
-"20" at 0.825,8.997 rjust
-"25" at 0.825,9.297 rjust
-"30" at 0.825,9.597 rjust
-"35" at 0.825,9.885 rjust
-"40" at 0.825,10.185 rjust
-"0" at 0.900,7.660
-"2" at 1.613,7.660
-"4" at 2.312,7.660
-"6" at 3.025,7.660
-"8" at 3.725,7.660
-"10" at 4.438,7.660
-"Time (sec)" at 0.150,8.997
-"Number of Clients" at 2.837,7.510
-"Figure #3: MAB Phase 3 (stat/find)" at 2.837,10.335
-"NFS" at 3.725,8.697 rjust
-"Leases" at 3.725,8.547 rjust
-"Leases, Rdirlookup" at 3.725,8.397 rjust
-"Leases, Attrib leases, Rdirlookup" at 3.725,8.247 rjust
-.ps
-.ft
-.PE
-.)z
-.(z
-.PS
-.ps
-.ps 10
-dashwid = 0.050i
-line dashed from 0.900,7.888 to 4.787,7.888
-line dashed from 0.900,7.888 to 0.900,10.262
-line from 0.900,7.888 to 0.963,7.888
-line from 4.787,7.888 to 4.725,7.888
-line from 0.900,8.188 to 0.963,8.188
-line from 4.787,8.188 to 4.725,8.188
-line from 0.900,8.488 to 0.963,8.488
-line from 4.787,8.488 to 4.725,8.488
-line from 0.900,8.775 to 0.963,8.775
-line from 4.787,8.775 to 4.725,8.775
-line from 0.900,9.075 to 0.963,9.075
-line from 4.787,9.075 to 4.725,9.075
-line from 0.900,9.375 to 0.963,9.375
-line from 4.787,9.375 to 4.725,9.375
-line from 0.900,9.675 to 0.963,9.675
-line from 4.787,9.675 to 4.725,9.675
-line from 0.900,9.963 to 0.963,9.963
-line from 4.787,9.963 to 4.725,9.963
-line from 0.900,10.262 to 0.963,10.262
-line from 4.787,10.262 to 4.725,10.262
-line from 0.900,7.888 to 0.900,7.950
-line from 0.900,10.262 to 0.900,10.200
-line from 1.613,7.888 to 1.613,7.950
-line from 1.613,10.262 to 1.613,10.200
-line from 2.312,7.888 to 2.312,7.950
-line from 2.312,10.262 to 2.312,10.200
-line from 3.025,7.888 to 3.025,7.950
-line from 3.025,10.262 to 3.025,10.200
-line from 3.725,7.888 to 3.725,7.950
-line from 3.725,10.262 to 3.725,10.200
-line from 4.438,7.888 to 4.438,7.950
-line from 4.438,10.262 to 4.438,10.200
-line from 0.900,7.888 to 4.787,7.888
-line from 4.787,7.888 to 4.787,10.262
-line from 4.787,10.262 to 0.900,10.262
-line from 0.900,10.262 to 0.900,7.888
-line from 3.800,8.775 to 4.025,8.775
-line from 1.250,9.412 to 1.250,9.412
-line from 1.250,9.412 to 1.613,9.425
-line from 1.613,9.425 to 2.312,9.463
-line from 2.312,9.463 to 3.025,9.600
-line from 3.025,9.600 to 3.725,9.875
-line from 3.725,9.875 to 4.438,10.075
-dashwid = 0.037i
-line dotted from 3.800,8.625 to 4.025,8.625
-line dotted from 1.250,9.450 to 1.250,9.450
-line dotted from 1.250,9.450 to 1.613,9.438
-line dotted from 1.613,9.438 to 2.312,9.438
-line dotted from 2.312,9.438 to 3.025,9.525
-line dotted from 3.025,9.525 to 3.725,9.550
-line dotted from 3.725,9.550 to 4.438,9.662
-line dashed from 3.800,8.475 to 4.025,8.475
-line dashed from 1.250,9.438 to 1.250,9.438
-line dashed from 1.250,9.438 to 1.613,9.412
-line dashed from 1.613,9.412 to 2.312,9.450
-line dashed from 2.312,9.450 to 3.025,9.500
-line dashed from 3.025,9.500 to 3.725,9.613
-line dashed from 3.725,9.613 to 4.438,9.675
-dashwid = 0.075i
-line dotted from 3.800,8.325 to 4.025,8.325
-line dotted from 1.250,9.387 to 1.250,9.387
-line dotted from 1.250,9.387 to 1.613,9.600
-line dotted from 1.613,9.600 to 2.312,9.625
-line dotted from 2.312,9.625 to 3.025,9.738
-line dotted from 3.025,9.738 to 3.725,9.850
-line dotted from 3.725,9.850 to 4.438,9.800
-.ps
-.ps -1
-.ft
-.ft I
-"0" at 0.825,7.810 rjust
-"5" at 0.825,8.110 rjust
-"10" at 0.825,8.410 rjust
-"15" at 0.825,8.697 rjust
-"20" at 0.825,8.997 rjust
-"25" at 0.825,9.297 rjust
-"30" at 0.825,9.597 rjust
-"35" at 0.825,9.885 rjust
-"40" at 0.825,10.185 rjust
-"0" at 0.900,7.660
-"2" at 1.613,7.660
-"4" at 2.312,7.660
-"6" at 3.025,7.660
-"8" at 3.725,7.660
-"10" at 4.438,7.660
-"Time (sec)" at 0.150,8.997
-"Number of Clients" at 2.837,7.510
-"Figure #4: MAB Phase 4 (grep/wc/find)" at 2.837,10.335
-"NFS" at 3.725,8.697 rjust
-"Leases" at 3.725,8.547 rjust
-"Leases, Rdirlookup" at 3.725,8.397 rjust
-"Leases, Attrib leases, Rdirlookup" at 3.725,8.247 rjust
-.ps
-.ft
-.PE
-.)z
-.(z
-.PS
-.ps
-.ps 10
-dashwid = 0.050i
-line dashed from 0.900,7.888 to 4.787,7.888
-line dashed from 0.900,7.888 to 0.900,10.262
-line from 0.900,7.888 to 0.963,7.888
-line from 4.787,7.888 to 4.725,7.888
-line from 0.900,8.150 to 0.963,8.150
-line from 4.787,8.150 to 4.725,8.150
-line from 0.900,8.412 to 0.963,8.412
-line from 4.787,8.412 to 4.725,8.412
-line from 0.900,8.675 to 0.963,8.675
-line from 4.787,8.675 to 4.725,8.675
-line from 0.900,8.938 to 0.963,8.938
-line from 4.787,8.938 to 4.725,8.938
-line from 0.900,9.213 to 0.963,9.213
-line from 4.787,9.213 to 4.725,9.213
-line from 0.900,9.475 to 0.963,9.475
-line from 4.787,9.475 to 4.725,9.475
-line from 0.900,9.738 to 0.963,9.738
-line from 4.787,9.738 to 4.725,9.738
-line from 0.900,10.000 to 0.963,10.000
-line from 4.787,10.000 to 4.725,10.000
-line from 0.900,10.262 to 0.963,10.262
-line from 4.787,10.262 to 4.725,10.262
-line from 0.900,7.888 to 0.900,7.950
-line from 0.900,10.262 to 0.900,10.200
-line from 1.613,7.888 to 1.613,7.950
-line from 1.613,10.262 to 1.613,10.200
-line from 2.312,7.888 to 2.312,7.950
-line from 2.312,10.262 to 2.312,10.200
-line from 3.025,7.888 to 3.025,7.950
-line from 3.025,10.262 to 3.025,10.200
-line from 3.725,7.888 to 3.725,7.950
-line from 3.725,10.262 to 3.725,10.200
-line from 4.438,7.888 to 4.438,7.950
-line from 4.438,10.262 to 4.438,10.200
-line from 0.900,7.888 to 4.787,7.888
-line from 4.787,7.888 to 4.787,10.262
-line from 4.787,10.262 to 0.900,10.262
-line from 0.900,10.262 to 0.900,7.888
-line from 3.800,8.675 to 4.025,8.675
-line from 1.250,8.800 to 1.250,8.800
-line from 1.250,8.800 to 1.613,8.912
-line from 1.613,8.912 to 2.312,9.113
-line from 2.312,9.113 to 3.025,9.438
-line from 3.025,9.438 to 3.725,9.750
-line from 3.725,9.750 to 4.438,10.088
-dashwid = 0.037i
-line dotted from 3.800,8.525 to 4.025,8.525
-line dotted from 1.250,8.637 to 1.250,8.637
-line dotted from 1.250,8.637 to 1.613,8.700
-line dotted from 1.613,8.700 to 2.312,8.713
-line dotted from 2.312,8.713 to 3.025,8.775
-line dotted from 3.025,8.775 to 3.725,8.887
-line dotted from 3.725,8.887 to 4.438,9.037
-line dashed from 3.800,8.375 to 4.025,8.375
-line dashed from 1.250,8.675 to 1.250,8.675
-line dashed from 1.250,8.675 to 1.613,8.688
-line dashed from 1.613,8.688 to 2.312,8.713
-line dashed from 2.312,8.713 to 3.025,8.825
-line dashed from 3.025,8.825 to 3.725,8.887
-line dashed from 3.725,8.887 to 4.438,9.062
-dashwid = 0.075i
-line dotted from 3.800,8.225 to 4.025,8.225
-line dotted from 1.250,8.700 to 1.250,8.700
-line dotted from 1.250,8.700 to 1.613,8.688
-line dotted from 1.613,8.688 to 2.312,8.762
-line dotted from 2.312,8.762 to 3.025,8.812
-line dotted from 3.025,8.812 to 3.725,8.925
-line dotted from 3.725,8.925 to 4.438,9.025
-.ps
-.ps -1
-.ft
-.ft I
-"0" at 0.825,7.810 rjust
-"50" at 0.825,8.072 rjust
-"100" at 0.825,8.335 rjust
-"150" at 0.825,8.597 rjust
-"200" at 0.825,8.860 rjust
-"250" at 0.825,9.135 rjust
-"300" at 0.825,9.397 rjust
-"350" at 0.825,9.660 rjust
-"400" at 0.825,9.922 rjust
-"450" at 0.825,10.185 rjust
-"0" at 0.900,7.660
-"2" at 1.613,7.660
-"4" at 2.312,7.660
-"6" at 3.025,7.660
-"8" at 3.725,7.660
-"10" at 4.438,7.660
-"Time (sec)" at 0.150,8.997
-"Number of Clients" at 2.837,7.510
-"Figure #5: MAB Phase 5 (compile)" at 2.837,10.335
-"NFS" at 3.725,8.597 rjust
-"Leases" at 3.725,8.447 rjust
-"Leases, Rdirlookup" at 3.725,8.297 rjust
-"Leases, Attrib leases, Rdirlookup" at 3.725,8.147 rjust
-.ps
-.ft
-.PE
-.)z
-.pp
-In figure 2, where a subtree of seventy small files is copied, the difference between the protocol variants is minimal,
-with the NQNFS variants performing slightly better.
-For this case, the Readdir_and_Lookup RPC is a slight hindrance under heavy
-load, possibly because it results in larger directory blocks in the buffer
-cache.
-.pp
-In figure 3, for the phase that gets file attributes for a large number
-of files, the leasing variants take about 50% longer, indicating that
-there are performance problems in this area. For the case where valid
-current leases are required for every file when attributes are returned,
-the performance is significantly worse than when the attributes are allowed
-to be stale by a few seconds on the client.
-I have not been able to explain the oscillation in the curves for the
-Lease cases.
-.pp
-For the string searching phase depicted in figure 4, the leasing variants
-that do not require valid leases for files when attributes are returned
-appear to scale better with server load than NFS.
-However, the effect appears to be
-negligible until the server load is fairly heavy.
-.pp
-Most of the time in the MAB benchmark is spent in the compilation phase
-and this is where the differences between caching methods are most
-pronounced.
-In figure 5 it can be seen that any protocol variant using Leases performs
-about a factor of two better than NFS
-at a load of ten clients. This indicates that the use of NQNFS may
-allow servers to handle significantly more clients for this type of
-workload.
-.pp
-Table 2 summarizes the MAB run times for all phases for the single client
-DECstation 5000/25. The \fILeases\fR case refers to using leases, whereas
-the \fILeases, Rdirl\fR case uses the Readdir_and_Lookup RPC as well and
-the \fIBCache Only\fR case uses leases, but only the buffer cache and not
-the attribute or name caches.
-The \fINo Caching\fR cases does not do any client side caching, performing
-all system calls via synchronous RPCs to the server.
-.(z
-.ps -1
-.R
-.TS
-box, center;
-c s s s s s s
-c c c c c c c c
-l | n n n n n n n.
-Table #2: Single DECstation 5000/25 Client Elapsed Times (sec)
-Phase 1 2 3 4 5 Total % Improvement
-_
-No Caching 6 35 41 40 258 380 -93
-NFS 5 24 15 20 133 197 0
-BCache Only 5 20 24 23 116 188 5
-Leases, Rdirl 5 20 21 20 105 171 13
-Leases 5 19 21 21 99 165 16
-.TE
-.ps
-.)z
-.sh 2 "Processor Speed Tests"
-.pp
-An important goal of client-side file system caching is to decouple the
-I/O system calls from the underlying distributed file system, so that the
-client's system performance might scale with processor speed. In order
-to test this, a series of MAB runs were performed on three
-DECstations that are similar except for processor speed.
-In addition to the four protocol variants used for the above tests, runs
-were done with the client caches turned off, for
-worst case performance numbers for caching mechanisms with a 100% miss rate. The CPU utilization
-was measured, as an indicator of how much the processor was blocking for
-I/O system calls. Note that since the systems were running in single user mode
-and otherwise quiescent, almost all CPU activity was directly related
-to the MAB run.
-The results are presented in
-table 3.
-The CPU time is simply the product of the CPU utilization and
-elapsed running time and, as such, is the optimistic bound on performance
-achievable with an ideal client caching scheme that never blocks for I/O.
-.(z
-.ps -1
-.R
-.TS
-box, center;
-c s s s s s s s s s
-c c s s c s s c s s
-c c c c c c c c c c
-c c c c c c c c c c
-l | n n n n n n n n n.
-Table #3: MAB Phase 5 (compile)
- DS2100 (10.5 MIPS) DS3100 (14.0 MIPS) DS5000/25 (26.7 MIPS)
- Elapsed CPU CPU Elapsed CPU CPU Elapsed CPU CPU
- time Util(%) time time Util(%) time time Util(%) time
-_
-Leases 143 89 127 113 87 98 99 89 88
-Leases, Rdirl 150 89 134 110 91 100 105 88 92
-BCache Only 169 85 144 129 78 101 116 75 87
-NFS 172 77 132 135 74 100 133 71 94
-No Caching 330 47 155 256 41 105 258 39 101
-.TE
-.ps
-.)z
-As can be seen in the table, any caching mechanism achieves significantly
-better performance than when caching is disabled, roughly doubling the CPU
-utilization with a corresponding reduction in run time. For NFS, the CPU
-utilization is dropping with increase in CPU speed, which would suggest that
-it is not scaling with CPU speed. For the NQNFS variants, the CPU utilization
-remains at just below 90%, which suggests that the caching mechanism is working
-well and scaling within this CPU range.
-Note that for this benchmark, the ratio of CPU times for
-the DECstation 3100 and DECstation 5000/25 are quite different than the
-Dhrystone MIPS ratings would suggest.
-.pp
-Overall, the results seem encouraging, although it remains to be seen whether
-or not the caching provided by NQNFS can continue to scale with CPU
-performance.
-There is a good indication that NQNFS permits a server to scale
-to more clients than does NFS, at least for workloads akin to the MAB compile phase.
-A more difficult question is "What if the server is much faster doing
-write RPCs?" as a result of some technology such as Prestoserve
-or write gathering.
-Since a significant part of the difference between NFS and NQNFS is
-the synchronous writing, it is difficult to predict how much a server
-capable of fast write RPCs will negate the performance improvements of NQNFS.
-At the very least, table 1 indicates that the write RPC load on the server
-has decreased by approximately 30%, and this reduced write load should still
-result in some improvement.
-.pp
-Indications are that the Readdir_and_Lookup RPC has not improved performance
-for these tests and may in fact be degrading performance slightly.
-The results in figure 3 indicate some problems, possibly with handling
-of the attribute cache. It seems logical that the Readdir_and_Lookup RPC
-should be permit priming of the attribute cache improving hit rate, but the
-results are counter to that.
-.sh 2 "Internetwork Delay Tests"
-.pp
-This experimental setup was used to explore how the different protocol
-variants might perform over internetworks with larger RPC RTTs. The
-server was moved to a separate Ethernet, using a MicroVAXII\(tm as an
-IP router to the other Ethernet. The 4.3Reno BSD Unix system running on the
-MicroVAXII was modified to delay IP packets being forwarded by a tunable N
-millisecond delay. The implementation was rather crude and did not try to
-simulate a distribution of delay times nor was it programmed to drop packets
-at a given rate, but it served as a simple emulation of a long,
-fat network\** [Jacobson88].
-.(f
-\**Long fat networks refer to network interconnections with
-a Bandwidth X RTT product > 10\u5\d bits.
-.)f
-The MAB was run using both UDP and TCP RPC transports
-for a variety of RTT delays from five to two hundred milliseconds,
-to observe the effects of RTT delay on RPC transport.
-It was found that, due to a high variability between runs, four runs was not
-suffice, so eight runs at each value was done.
-The results in figure 6 and table 4 are the average for the eight runs.
-.(z
-.PS
-.ps
-.ps 10
-dashwid = 0.050i
-line dashed from 0.900,7.888 to 4.787,7.888
-line dashed from 0.900,7.888 to 0.900,10.262
-line from 0.900,7.888 to 0.963,7.888
-line from 4.787,7.888 to 4.725,7.888
-line from 0.900,8.350 to 0.963,8.350
-line from 4.787,8.350 to 4.725,8.350
-line from 0.900,8.800 to 0.963,8.800
-line from 4.787,8.800 to 4.725,8.800
-line from 0.900,9.262 to 0.963,9.262
-line from 4.787,9.262 to 4.725,9.262
-line from 0.900,9.713 to 0.963,9.713
-line from 4.787,9.713 to 4.725,9.713
-line from 0.900,10.175 to 0.963,10.175
-line from 4.787,10.175 to 4.725,10.175
-line from 0.900,7.888 to 0.900,7.950
-line from 0.900,10.262 to 0.900,10.200
-line from 1.825,7.888 to 1.825,7.950
-line from 1.825,10.262 to 1.825,10.200
-line from 2.750,7.888 to 2.750,7.950
-line from 2.750,10.262 to 2.750,10.200
-line from 3.675,7.888 to 3.675,7.950
-line from 3.675,10.262 to 3.675,10.200
-line from 4.600,7.888 to 4.600,7.950
-line from 4.600,10.262 to 4.600,10.200
-line from 0.900,7.888 to 4.787,7.888
-line from 4.787,7.888 to 4.787,10.262
-line from 4.787,10.262 to 0.900,10.262
-line from 0.900,10.262 to 0.900,7.888
-line from 4.125,8.613 to 4.350,8.613
-line from 0.988,8.400 to 0.988,8.400
-line from 0.988,8.400 to 1.637,8.575
-line from 1.637,8.575 to 2.375,8.713
-line from 2.375,8.713 to 3.125,8.900
-line from 3.125,8.900 to 3.862,9.137
-line from 3.862,9.137 to 4.600,9.425
-dashwid = 0.037i
-line dotted from 4.125,8.463 to 4.350,8.463
-line dotted from 0.988,8.375 to 0.988,8.375
-line dotted from 0.988,8.375 to 1.637,8.525
-line dotted from 1.637,8.525 to 2.375,8.850
-line dotted from 2.375,8.850 to 3.125,8.975
-line dotted from 3.125,8.975 to 3.862,9.137
-line dotted from 3.862,9.137 to 4.600,9.625
-line dashed from 4.125,8.312 to 4.350,8.312
-line dashed from 0.988,8.525 to 0.988,8.525
-line dashed from 0.988,8.525 to 1.637,8.688
-line dashed from 1.637,8.688 to 2.375,8.838
-line dashed from 2.375,8.838 to 3.125,9.150
-line dashed from 3.125,9.150 to 3.862,9.275
-line dashed from 3.862,9.275 to 4.600,9.588
-dashwid = 0.075i
-line dotted from 4.125,8.162 to 4.350,8.162
-line dotted from 0.988,8.525 to 0.988,8.525
-line dotted from 0.988,8.525 to 1.637,8.838
-line dotted from 1.637,8.838 to 2.375,8.863
-line dotted from 2.375,8.863 to 3.125,9.137
-line dotted from 3.125,9.137 to 3.862,9.387
-line dotted from 3.862,9.387 to 4.600,10.200
-.ps
-.ps -1
-.ft
-.ft I
-"0" at 0.825,7.810 rjust
-"100" at 0.825,8.272 rjust
-"200" at 0.825,8.722 rjust
-"300" at 0.825,9.185 rjust
-"400" at 0.825,9.635 rjust
-"500" at 0.825,10.097 rjust
-"0" at 0.900,7.660
-"50" at 1.825,7.660
-"100" at 2.750,7.660
-"150" at 3.675,7.660
-"200" at 4.600,7.660
-"Time (sec)" at 0.150,8.997
-"Round Trip Delay (msec)" at 2.837,7.510
-"Figure #6: MAB Phase 5 (compile)" at 2.837,10.335
-"Leases,UDP" at 4.050,8.535 rjust
-"Leases,TCP" at 4.050,8.385 rjust
-"NFS,UDP" at 4.050,8.235 rjust
-"NFS,TCP" at 4.050,8.085 rjust
-.ps
-.ft
-.PE
-.)z
-.(z
-.ps -1
-.R
-.TS
-box, center;
-c s s s s s s s s
-c c s c s c s c s
-c c c c c c c c c
-c c c c c c c c c
-l | n n n n n n n n.
-Table #4: MAB Phase 5 (compile) for Internetwork Delays
- NFS,UDP NFS,TCP Leases,UDP Leases,TCP
-Delay Elapsed Standard Elapsed Standard Elapsed Standard Elapsed Standard
-(msec) time (sec) Deviation time (sec) Deviation time (sec) Deviation time (sec) Deviation
-_
-5 139 2.9 139 2.4 112 7.0 108 6.0
-40 175 5.1 208 44.5 150 23.8 139 4.3
-80 207 3.9 213 4.7 180 7.7 210 52.9
-120 276 29.3 273 17.1 221 7.7 238 5.8
-160 304 7.2 328 77.1 275 21.5 274 10.1
-200 372 35.0 506 235.1 338 25.2 379 69.2
-.TE
-.ps
-.)z
-.pp
-I found these results somewhat surprising, since I had assumed that stability
-across an internetwork connection would be a function of RPC transport
-protocol.
-Looking at the standard deviations observed between the eight runs, there is an indication
-that the NQNFS protocol plays a larger role in
-maintaining stability than the underlying RPC transport protocol.
-It appears that NFS over TCP transport
-is the least stable variant tested.
-It should be noted that the TCP implementation used was roughly at 4.3BSD Tahoe
-release and that the 4.4BSD TCP implementation was far less stable and would
-fail intermittently, due to a bug I was not able to isolate.
-It would appear that some of the recent enhancements to the 4.4BSD TCP
-implementation have a detrimental effect on the performance of
-RPC-type traffic loads, which intermix small and large
-data transfers in both directions.
-It is obvious that more exploration of this area is needed before any
-conclusions can be made
-beyond the fact that over a local area network, TCP transport provides
-performance comparable to UDP.
-.sh 1 "Lessons Learned"
-.pp
-Evaluating the performance of a distributed file system is fraught with
-difficulties, due to the many software and hardware factors involved.
-The limited benchmarking presented here took a considerable amount of time
-and the results gained by the exercise only give indications of what the
-performance might be for a few scenarios.
-.pp
-The IP router with delay introduction proved to be a valuable tool for protocol debugging\**,
-.(f
-\**It exposed two bugs in the 4.4BSD networking, one a problem in the Lance chip
-driver for the DECstation and the other a TCP window sizing problem that I was
-not able to isolate.
-.)f
-and may be useful for a more extensive study of performance over internetworks
-if enhanced to do a better job of simulating internetwork delay and packet loss.
-.pp
-The Leases mechanism provided a simple model for the provision of cache
-consistency and did seem to improve performance for various scenarios.
-Unfortunately, it does not provide the server state information that is required
-for file system semantics, such as locking, that many software systems demand.
-In production environments on my campus, the need for file locking and the correct
-generation of the ETXTBSY error code
-are far more important that full cache consistency, and leasing
-does not satisfy these needs.
-Another file system semantic that requires hard server state is the delay
-of file removal until the last close system call. Although Spritely NFS
-did not support this semantic either, it is logical that the open file
-state maintained by that system would facilitate the implementation of
-this semantic more easily than would the Leases mechanism.
-.sh 1 "Further Work"
-.pp
-The current implementation uses a fixed, moderate sized buffer cache designed
-for the local UFS [McKusick84] file system.
-The results in figure 1 suggest that this is adequate so long as the cache
-is of an appropriate size.
-However, a mechanism permitting the cache to vary in size
-has been shown to outperform fixed sized buffer caches [Nelson90], and could
-be beneficial. It could also be useful to allow the buffer cache to grow very
-large by making use of local backing store for cases where server performance
-is limited.
-A very large buffer cache size would in turn permit experimentation with
-much larger read/write data sizes, facilitating bulk data transfers
-across long fat networks, such as will characterize the Internet of the
-near future.
-A careful redesign of the buffer cache mechanism to provide
-support for these features would probably be the next implementation step.
-.pp
-The results in figure 3 indicate that the mechanics of caching file
-attributes and maintaining the attribute cache's consistency needs to
-be looked at further.
-There also needs to be more work done on the interaction between a
-Readdir_and_Lookup RPC and the name and attribute caches, in an effort
-to reduce Getattr and Lookup RPC loads.
-.pp
-The NQNFS protocol has never been used in a production environment and doing
-so would provide needed insight into how well the protocol saisfies the
-needs of real workstation environments.
-It is hoped that the distribution of the implementation in 4.4BSD will
-facilitate use of the protocol in production environments elsewhere.
-.pp
-The big question that needs to be resolved is whether Leases are an adequate
-mechanism for cache consistency or whether hard server state is required.
-Given the work presented here and in the papers related to Sprite and Spritely
-NFS, there are clear indications that a cache consistency algorithm can
-improve both performance and file system semantics.
-As yet, however, it is unclear what the best approach to maintain consistency is.
-It would appear that hard state information is required for file locking and
-other mechanisms and, if so, it seems appropriate to use it for cache
-consistency as well.
-.sh 1 "Acknowledgements"
-.pp
-I would like to thank the members of the CSRG at the University of California,
-Berkeley for their continued support over the years. Without their encouragement and assistance this
-software would never have been implemented.
-Prof. Jim Linders and Prof. Tom Wilson here at the University of Guelph helped
-proofread this paper and Jeffrey Mogul provided a great deal of
-assistance, helping to turn my gibberish into something at least moderately
-readable.
-.sh 1 "References"
-.ip [Baker91] 15
-Mary Baker and John Ousterhout, Availability in the Sprite Distributed
-File System, In \fIOperating System Review\fR, (25)2, pg. 95-98,
-April 1991.
-.ip [Baker91a] 15
-Mary Baker, private communication, May 1991.
-.ip [Burrows88] 15
-Michael Burrows, Efficient Data Sharing, Technical Report #153,
-Computer Laboratory, University of Cambridge, Dec. 1988.
-.ip [Gray89] 15
-Cary G. Gray and David R. Cheriton, Leases: An Efficient Fault-Tolerant
-Mechanism for Distributed File Cache Consistency, In \fIProc. of the
-Twelfth ACM Symposium on Operating Systems Principals\fR, Litchfield Park,
-AZ, Dec. 1989.
-.ip [Howard88] 15
-John H. Howard, Michael L. Kazar, Sherri G. Menees, David A. Nichols,
-M. Satyanarayanan, Robert N. Sidebotham and Michael J. West,
-Scale and Performance in a Distributed File System, \fIACM Trans. on
-Computer Systems\fR, (6)1, pg 51-81, Feb. 1988.
-.ip [Jacobson88] 15
-Van Jacobson and R. Braden, \fITCP Extensions for Long-Delay Paths\fR,
-ARPANET Working Group Requests for Comment, DDN Network Information Center,
-SRI International, Menlo Park, CA, October 1988, RFC-1072.
-.ip [Jacobson89] 15
-Van Jacobson, Sun NFS Performance Problems, \fIPrivate Communication,\fR
-November, 1989.
-.ip [Juszczak89] 15
-Chet Juszczak, Improving the Performance and Correctness of an NFS Server,
-In \fIProc. Winter 1989 USENIX Conference,\fR pg. 53-63, San Diego, CA, January 1989.
-.ip [Juszczak94] 15
-Chet Juszczak, Improving the Write Performance of an NFS Server,
-to appear in \fIProc. Winter 1994 USENIX Conference,\fR San Francisco, CA, January 1994.
-.ip [Kazar88] 15
-Michael L. Kazar, Synchronization and Caching Issues in the Andrew File System,
-In \fIProc. Winter 1988 USENIX Conference,\fR pg. 27-36, Dallas, TX, February
-1988.
-.ip [Kent87] 15
-Christopher. A. Kent and Jeffrey C. Mogul, \fIFragmentation Considered Harmful\fR, Research Report 87/3,
-Digital Equipment Corporation Western Research Laboratory, Dec. 1987.
-.ip [Kent87a] 15
-Christopher. A. Kent, \fICache Coherence in Distributed Systems\fR, Research Report 87/4,
-Digital Equipment Corporation Western Research Laboratory, April 1987.
-.ip [Macklem90] 15
-Rick Macklem, Lessons Learned Tuning the 4.3BSD Reno Implementation of the
-NFS Protocol,
-In \fIProc. Winter 1991 USENIX Conference,\fR pg. 53-64, Dallas, TX,
-January 1991.
-.ip [Macklem93] 15
-Rick Macklem, The 4.4BSD NFS Implementation,
-In \fIThe System Manager's Manual\fR, 4.4 Berkeley Software Distribution,
-University of California, Berkeley, June 1993.
-.ip [McKusick84] 15
-Marshall K. McKusick, William N. Joy, Samuel J. Leffler and Robert S. Fabry,
-A Fast File System for UNIX, \fIACM Transactions on Computer Systems\fR,
-Vol. 2, Number 3, pg. 181-197, August 1984.
-.ip [McKusick90] 15
-Marshall K. McKusick, Michael J. Karels and Keith Bostic, A Pageable Memory
-Based Filesystem,
-In \fIProc. Summer 1990 USENIX Conference,\fR pg. 137-143, Anaheim, CA, June
-1990.
-.ip [Mogul93] 15
-Jeffrey C. Mogul, Recovery in Spritely NFS,
-Research Report 93/2, Digital Equipment Corporation Western Research
-Laboratory, June 1993.
-.ip [Moran90] 15
-Joseph Moran, Russel Sandberg, Don Coleman, Jonathan Kepecs and Bob Lyon,
-Breaking Through the NFS Performance Barrier,
-In \fIProc. Spring 1990 EUUG Conference,\fR pg. 199-206, Munich, FRG,
-April 1990.
-.ip [Nelson88] 15
-Michael N. Nelson, Brent B. Welch, and John K. Ousterhout, Caching in the
-Sprite Network File System, \fIACM Transactions on Computer Systems\fR (6)1
-pg. 134-154, February 1988.
-.ip [Nelson90] 15
-Michael N. Nelson, \fIVirtual Memory vs. The File System\fR, Research Report
-90/4, Digital Equipment Corporation Western Research Laboratory, March 1990.
-.ip [Nowicki89] 15
-Bill Nowicki, Transport Issues in the Network File System, In \fIComputer
-Communication Review\fR, pg. 16-20, March 1989.
-.ip [Ousterhout90] 15
-John K. Ousterhout, Why Aren't Operating Systems Getting Faster As Fast as
-Hardware? In \fIProc. Summer 1990 USENIX Conference\fR, pg. 247-256, Anaheim,
-CA, June 1990.
-.ip [Sandberg85] 15
-Russel Sandberg, David Goldberg, Steve Kleiman, Dan Walsh, and Bob Lyon,
-Design and Implementation of the Sun Network filesystem, In \fIProc. Summer
-1985 USENIX Conference\fR, pages 119-130, Portland, OR, June 1985.
-.ip [Srinivasan89] 15
-V. Srinivasan and Jeffrey. C. Mogul, Spritely NFS: Experiments with
-Cache-Consistency Protocols,
-In \fIProc. of the
-Twelfth ACM Symposium on Operating Systems Principals\fR, Litchfield Park,
-AZ, Dec. 1989.
-.ip [Steiner88] 15
-J. G. Steiner, B. C. Neuman and J. I. Schiller, Kerberos: An Authentication
-Service for Open Network Systems,
-In \fIProc. Winter 1988 USENIX Conference,\fR pg. 191-202, Dallas, TX, February
-1988.
-.ip [SUN89] 15
-Sun Microsystems Inc., \fINFS: Network File System Protocol Specification\fR,
-ARPANET Working Group Requests for Comment, DDN Network Information Center,
-SRI International, Menlo Park, CA, March 1989, RFC-1094.
-.ip [SUN93] 15
-Sun Microsystems Inc., \fINFS: Network File System Version 3 Protocol Specification\fR,
-Sun Microsystems Inc., Mountain View, CA, June 1993.
-.ip [Wittle93] 15
-Mark Wittle and Bruce E. Keith, LADDIS: The Next Generation in NFS File
-Server Benchmarking,
-In \fIProc. Summer 1993 USENIX Conference,\fR pg. 111-128, Cincinnati, OH, June
-1993.
-.(f
-\(mo
-NFS is believed to be a trademark of Sun Microsystems, Inc.
-.)f
-.(f
-\(dg
-Prestoserve is a trademark of Legato Systems, Inc.
-.)f
-.(f
-\(sc
-MIPS is a trademark of Silicon Graphics, Inc.
-.)f
-.(f
-\(dg
-DECstation, MicroVAXII and Ultrix are trademarks of Digital Equipment Corp.
-.)f
-.(f
-\(dd
-Unix is a trademark of Novell, Inc.
-.)f
diff --git a/share/doc/papers/px/Makefile b/share/doc/papers/px/Makefile
deleted file mode 100644
index 2fef10c3acf..00000000000
--- a/share/doc/papers/px/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-# $OpenBSD: Makefile,v 1.3 2004/02/01 14:22:45 jmc Exp $
-
-
-DIR= papers/px
-SRCS= pxin0.n pxin1.n pxin2.n pxin3.n pxin4.n
-MACROS= -ms
-EXTRA= fig1.1.n fig1.2.n fig1.3.n fig2.3.raw fig2.4.n fig3.2.n \
- fig3.3.n table2.1.n table2.2.n table2.3.n table3.1.n tmac.p
-CLEANFILES+=fig2.3.n
-
-paper.ps: ${SRCS} fig2.3.n
- ${SOELIM} ${SRCS} | ${TBL} | ${ROFF} > ${.TARGET}
-
-paper.txt: ${SRCS} fig2.3.n
- ${SOELIM} ${SRCS} | ${TBL} | ${ROFF} -Tascii > ${.TARGET}
-
-fig2.3.n: fig2.3.raw
- sort fig2.3.raw >fig2.3.n
-
-.include <bsd.doc.mk>
diff --git a/share/doc/papers/px/fig1.1.n b/share/doc/papers/px/fig1.1.n
deleted file mode 100644
index 63d32ea59d4..00000000000
--- a/share/doc/papers/px/fig1.1.n
+++ /dev/null
@@ -1,69 +0,0 @@
-.\" $OpenBSD: fig1.1.n,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1979 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)fig1.1.n 5.2 (Berkeley) 4/17/91
-.\"
-.KF
-.TS
-center;
-c l
-l l
-_ l
-| l |
-| cw(18) | aw(28)
-| _ | l
-| c | a.
-Base of stack frame
-
-
-
-Block mark Positive offsets
-.sp
- \(<- Display entry points here
-.sp
-Local
-variables
-.sp
-_ Negative offsets
-Temporary
-expression
-space
-.sp
-.T&
-| _ | l
-c l.
-
-.sp
-Top of stack frame
-.TE
-.sp
-.ce
-Figure 1.1 \- Structure of stack frame
-.sp
-.KE
diff --git a/share/doc/papers/px/fig1.2.n b/share/doc/papers/px/fig1.2.n
deleted file mode 100644
index 3e49cd90428..00000000000
--- a/share/doc/papers/px/fig1.2.n
+++ /dev/null
@@ -1,66 +0,0 @@
-.\" $OpenBSD: fig1.2.n,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1979 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)fig1.2.n 5.2 (Berkeley) 4/17/91
-.\"
-.KF
-.TS
-center;
-l l
-| cw(22n) | aw(20n).
-_ \&
- Created by \s-2CALL\s0
-Saved lino
-.sp
-Saved lc
-.sp
-Saved dp
-.sp
-_ \&
- Created by \s-2BEG\s0
-Saved dp contents
-.sp
-Pointer to current
-entry line and
-section name
-.sp
-Current file name
-and buffer
-.sp
-Top of stack reference
-.sp
-.T&
-| _ | l.
-
-.TE
-.sp
-.ce
-Figure 1.2 \- Block mark structure
-.sp
-.KE
diff --git a/share/doc/papers/px/fig1.3.n b/share/doc/papers/px/fig1.3.n
deleted file mode 100644
index dcb51fd023a..00000000000
--- a/share/doc/papers/px/fig1.3.n
+++ /dev/null
@@ -1,58 +0,0 @@
-.\" $OpenBSD: fig1.3.n,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1979 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)fig1.3.n 5.2 (Berkeley) 4/17/91
-.\"
-.TS
-center, allbox;
-lw(20).
-T{
-.nf
-.ce 1000
-Space for
-value returned
-from f
-.ce 0
-.fi
-T}
-T{
-.ce
-Value of a
-T}
-T{
-.sp
-.ce
-Block Mark
-.sp
-T}
-.TE
-.sp .1i
-.ce
-Figure 1.3 \- Stack structure on function call `f(a)'
-.sp .1i
diff --git a/share/doc/papers/px/fig2.3.raw b/share/doc/papers/px/fig2.3.raw
deleted file mode 100644
index 07feddf8331..00000000000
--- a/share/doc/papers/px/fig2.3.raw
+++ /dev/null
@@ -1,103 +0,0 @@
-HALT 2.2 Produce control flow backtrace
-BEG s,W,w," 2.2,1.8 Write second part of block mark, enter block
-END 2.2,1.8 End block execution
-CALL l,A 2.2,1.8 Procedure or function call
-NODUMP s,W,w," 2.2 \s-2BEG\s0 main program, suppress dump
-PUSH s 2.2,1.9 Clear space (for function result)
-POP s 2.2,1.9 Pop (arguments) off stack
-LINO s 2.2 Set line number, count statements
-TRA a 2.2 Short control transfer (local branching)
-TRA4 A 2.2 Long control transfer
-GOTO l,A 2.2,1.8 Non-local goto statement
-IF a 2.3 Conditional transfer
-REL* r 2.3 Relational test yielding Boolean result
-AND 2.4 Boolean and
-OR 2.4 Boolean or
-NOT 2.4 Boolean not
-LRV* l,A 2.5 Right value (load) operators
-RV* l,a 2.5 Right value (load) operators
-CON* v 2.5 Load constant operators
-AS* 2.5 Assignment operators
-OFF s 2.5 Offset address, typically used for field reference
-INX* s,w,w 2.6 Subscripting (indexing) operator
-NIL 2.6 Assert non-nil pointer
-LLV l,W 2.6 Address of operator
-LV l,w 2.6 Address of operator
-IND* 2.6 Indirection operators
-ADD* 2.7 Addition
-SUB* 2.7 Subtraction
-MUL* 2.7 Multiplication
-SQR* 2.7 Squaring
-DIV* 2.7 Fixed division
-MOD* 2.7 Modulus
-ABS* 2.7 Absolute value
-NEG* 2.7 Negation
-DVD* 2.7 Floating division
-RANG* v 2.8 Subrange checking
-CASEOP* 2.9 Case statements
-FOR* a 2.12 For statements
-PXPBUF w 2.10 Initialize \fIpxp\fP count buffer
-TRACNT w,A 2.10 Count a procedure entry
-COUNT w 2.10 Count a statement count point
-CTTOT s,w,w 2.11 Construct set
-CARD s 2.11 Cardinality of set
-STOI 2.12 Convert short to long integer
-STOD 2.12 Convert short integer to real
-ITOD 2.12 Convert integer to real
-ITOS 2.12 Convert integer to short integer
-GET 3.7 Get next record from a file
-PUT 3.8 Output a record to a file
-MESSAGE 3.6 Write to terminal
-FNIL 3.7 Check file initialized, not eof, synced
-FLUSH 3.11 Flush a file
-BUFF 3.11 Specify buffering for file "output"
-EOF 3.10 Returns \fItrue\fR if end of file
-EOLN 3.10 Returns \fItrue\fR if end of line on input text file
-RESET 3.11 Open file for input
-REWRITE 3.11 Open file for output
-REMOVE 3.11 Remove a file
-UNIT* 3.10 Set active file
-READ* 3.7 Read a record from a file
-WRITEC 3.8 Character unformatted write
-WRITEF l 3.8 General formatted write
-WRITES l 3.8 String unformatted write
-WRITLN 3.8 Output a newline to a text file
-PAGE 3.8 Output a formfeed to a text file
-MIN s 3.8 Minimum of top of stack and \fIs\fR
-MAX s,w 3.8 Maximum of top of stack and \fIw\fR
-NAM A 3.8 Convert enumerated type value to print format
-FILE 3.9 Push descriptor for active file
-DEFNAME 3.11 Attach file name for \fBprogram\fR statement files
-PACK s,w,w,w 2.15 Convert and copy from unpacked to packed
-UNPACK s,w,w,w 2.15 Convert and copy from packed to unpacked
-LLIMIT 2.14 Set linelimit for output text file
-ARGC 2.14 Returns number of arguments to current process
-ARGV 2.14 Copy specified process argument into char array
-CLCK 2.14 Returns user time of program
-SCLCK 2.14 Returns system time of program
-WCLCK 2.14 Returns current time stamp
-DATE 2.14 Copy date into char array
-TIME 2.14 Copy time into char array
-SEED 2.13 Set random seed, return old seed
-RANDOM 2.13 Returns random number
-DISPOSE 2.15 Dispose of a heap allocation
-NEW s 2.15 Allocate a record on heap, set pointer to it
-EXPO 2.13 Returns machine representation of real exponent
-ATAN 2.13 Returns arctangent of argument
-EXP 2.13 Returns exponential of argument
-LN 2.13 Returns natural log of argument
-COS 2.13 Returns cos of argument
-SIN 2.13 Returns sin of argument
-SQRT 2.13 Returns square root of argument
-CHR* 2.15 Returns integer to ascii mapping of argument
-ODD* 2.15 Returns \fItrue\fR if argument is odd, \fIfalse\fR if even
-PRED* 2.7 Returns predecessor of argument
-STLIM 2.14 Set program statement limit
-SUCC* 2.7 Returns successor of argument
-ROUND 2.13 Returns \s-2TRUNC\s0(argument + 0.5)
-TRUNC 2.13 Returns integer part of argument
-UNDEF 2.15 Returns \fIfalse\fR
-SDUP 2.2 Duplicate top stack word
-ASRT 2.12 Assert \fItrue\fR to continue
-IN s,w,w 2.11 Set membership
-INCT 2.11 Membership in a constructed set
diff --git a/share/doc/papers/px/fig2.4.n b/share/doc/papers/px/fig2.4.n
deleted file mode 100644
index 54dde982fc5..00000000000
--- a/share/doc/papers/px/fig2.4.n
+++ /dev/null
@@ -1,55 +0,0 @@
-.\" $OpenBSD: fig2.4.n,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1979 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)fig2.4.n 5.2 (Berkeley) 4/17/91
-.\"
-.KF
-.TS
-center, box;
-cw(15).
-\s-2CASEOP\s0
-_
-No. of cases
-_
-.sp
-Case
-transfer
-table
-.sp
-_
-.sp
-Array of case
-label values
-.sp
-.TE
-.sp
-.ce
-Figure 2.4 \- Case data structure
-.sp
-.KE
diff --git a/share/doc/papers/px/fig3.2.n b/share/doc/papers/px/fig3.2.n
deleted file mode 100644
index f64817205bd..00000000000
--- a/share/doc/papers/px/fig3.2.n
+++ /dev/null
@@ -1,54 +0,0 @@
-.\" $OpenBSD: fig3.2.n,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1979 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)fig3.2.n 5.2 (Berkeley) 4/17/91
-.\"
-.KF
-.TS
-center, box;
-cw(15).
-No. of cases
-_
-.sp
-offsets
-of element
-names
-.sp
-_
-.sp
-Array of
-null terminated
-element names
-.sp
-.TE
-.sp
-.ce
-Figure 3.2 \- Enumerated type conversion structure
-.sp
-.KE
diff --git a/share/doc/papers/px/fig3.3.n b/share/doc/papers/px/fig3.3.n
deleted file mode 100644
index 722b206b453..00000000000
--- a/share/doc/papers/px/fig3.3.n
+++ /dev/null
@@ -1,55 +0,0 @@
-.\" $OpenBSD: fig3.3.n,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1979 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)fig3.3.n 5.2 (Berkeley) 4/17/91
-.\"
-.KF
-.TS
-center;
-l l
-l | cw(15) |.
- _
-\fIbool\fP: 2
- _
- 6
- _
- 12
- _
- 17
- _
- "false"
- _
- "true"
- _
-.TE
-.sp
-.ce
-Figure 3.3 \- Boolean type conversion structure
-.sp
-.KE
diff --git a/share/doc/papers/px/pxin0.n b/share/doc/papers/px/pxin0.n
deleted file mode 100644
index 0d5b693748d..00000000000
--- a/share/doc/papers/px/pxin0.n
+++ /dev/null
@@ -1,138 +0,0 @@
-.\" $OpenBSD: pxin0.n,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1979 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)pxin0.n 5.2 (Berkeley) 4/17/91
-.\"
-.if !\n(xx .so tmac.p
-.RP
-.TL
-Berkeley Pascal
-PX Implementation Notes
-.br
-Version 2.0 \- January, 1979
-.AU
-William N. Joy\*(Dg
-.AU
-M. Kirk McKusick\*(Dd
-.AI
-Computer Science Division
-Department of Electrical Engineering and Computer Science
-University of California, Berkeley
-Berkeley, California 94720
-.AB
-.PP
-Berkeley Pascal
-is designed for interactive instructional use and runs on the
-.SM "VAX 11/780" .
-The interpreter
-.I px
-executes the Pascal binaries generated by the Pascal translator
-.I pi .
-.PP
-The
-.I
-PX Implementation Notes
-.R
-describe the general organization of
-.I px ,
-detail the various operations of the interpreter,
-and describe the file input/output structure.
-Conclusions are given on the viability of an interpreter
-based approach to language implementation for an instructional environment.
-.AE
-.if n 'ND
-.SH
-Introduction
-.PP
-These
-.I
-PX Implementation Notes
-.R
-have been updated from the original
-.SM "PDP 11/70"
-implementation notes to reflect the interpreter that runs on the
-.SM "VAX 11/780" .
-These notes consist of four major parts.
-The first part outlines the general organization of
-.I px .
-Section 2 describes the operations (instructions) of the interpreter
-while section 3 focuses on input/output related activity.
-A final section gives conclusions about the viability of an interpreter
-based approach to language implementation for instruction.
-.SH
-Related Berkeley Pascal documents
-.PP
-The
-.I "PXP Implementation Notes"
-give details of the internals of the execution profiler
-.I pxp;
-parts of the interpreter related to
-.I pxp
-are discussed in section 2.10.
-A paper describing the syntactic error recovery mechanism used in
-.I pi
-was presented at the ACM Conference on Compiler Construction
-in Boulder Colorado in August, 1979.
-.SH
-Acknowledgements
-.PP
-This version of
-.I px
-is a
-.SM "PDP 11/70"
-to
-.SM "VAX 11/780"
-opcode mapping of the original
-.I px
-that was designed and implemented by Ken Thompson,
-with extensive modifications and additions
-by William Joy
-and Charles Haley.
-Without their work, this
-.UP
-system would never have existed.
-These notes were first written by William Joy for the
-.SM "PDP 11/70"
-implementation.
-We would also like to thank our faculty advisor Susan L. Graham
-for her encouragement,
-her helpful comments and suggestions
-relating to
-.UP
-and her excellent editorial assistance.
-.FS
-\*(dg\ The financial support of the National Science Foundation under grants
-MCS74-07644-A03 and MCS78-07291
-and of an \s-2IBM\s0 Graduate Fellowship are gratefully acknowledged.
-.FE
-.FS
-\*(dd\ The financial support of a Howard Hughes Graduate
-Fellowship is gratefully acknowledged.
-.FE
-.bp
diff --git a/share/doc/papers/px/pxin1.n b/share/doc/papers/px/pxin1.n
deleted file mode 100644
index f4c29b5f2fc..00000000000
--- a/share/doc/papers/px/pxin1.n
+++ /dev/null
@@ -1,536 +0,0 @@
-.\" $OpenBSD: pxin1.n,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1979 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)pxin1.n 5.2 (Berkeley) 4/17/91
-.\"
-.if !\n(xx .so tmac.p
-.tr _\(ru
-.nr H1 0
-.NH
-Organization
-.PP
-Most of
-.I px
-is written in the
-.SM "VAX 11/780"
-assembly language, using the
-.UX
-assembler
-.I as.
-Portions of
-.I px
-are also written in the
-.UX
-systems programming language C.
-.I Px
-consists of a main procedure that reads in the interpreter code,
-a main interpreter loop that transfers successively to various
-code segments implementing the abstract machine operations,
-built-in procedures and functions,
-and several routines that support the implementation of the
-Pascal input-output environment.
-.PP
-The interpreter runs at a fraction of the speed of equivalent
-compiled C code, with this fraction varying from 1/5 to 1/15.
-The interpreter occupies 18.5K bytes of instruction space, shared among
-all processes executing Pascal, and has 4.6K bytes of data space (constants,
-error messages, etc.) a copy of which is allocated to each executing process.
-.NH 2
-Format of the object file
-.PP
-.I Px
-normally interprets the code left in an object file by a run of the
-Pascal translator
-.I pi.
-The file where the translator puts the object originally, and the most
-commonly interpreted file, is called
-.I obj.
-In order that all persons using
-.I px
-share a common text image, this executable file is
-a small process that coordinates with the interpreter to start
-execution.
-The interpreter code is placed
-at the end of a special ``header'' file and the size of the initialized
-data area of this header file is expanded to include this code,
-so that during execution it is located at an
-easily determined address in its data space.
-When executed, the object process creates a
-.I pipe ,
-creates another process by doing a
-.I fork ,
-and arranges that the resulting parent process becomes an instance of
-.I px .
-The child process then writes the interpreter code through
-the pipe that it has to the
-interpreter process parent.
-When this process is complete, the child exits.
-.PP
-The real advantage of this approach is that it does not require modifications
-to the shell, and that the resultant objects are ``true objects'' not
-requiring special treatment.
-A simpler mechanism would be to determine the name of the file that was
-executed and pass this to the interpreter.
-However it is not possible to determine this name
-in all cases.\*(Dd
-.FS
-\*(dd\ For instance, if the
-.I pxref
-program is placed in the directory
-`/usr/bin'
-then when the user types
-``pxref program.p''
-the first argument to the program, nominally the programs name, is
-``pxref.''
-While it would be possible to search in the standard place,
-i.e. the current directory, and the system directories
-`/bin'
-and
-`/usr/bin'
-for a corresponding object file,
-this would be expensive and not guaranteed to succeed.
-Several shells exist that allow other directories to be searched
-for commands, and there is,
-in general,
-no way to determine what these directories are.
-.FE
-.NH 2
-General features of object code
-.PP
-Pascal object code is relocatable as all addressing references for
-control transfers within the code are relative.
-The code consists of instructions interspersed with inline data.
-All instructions have a length that is an even number of bytes.
-No variables are kept in the object code area.
-.PP
-The first byte of a Pascal interpreter instruction contains an operation
-code.
-This allows a total of 256 major operation codes, and 232 of these are
-in use in the current
-.I px.
-The second byte of each interpreter instruction is called the
-``sub-operation code'',
-or more commonly the
-.I sub-opcode.
-It contains a small integer that may, for example, be used as a
-block-structure level for the associated operation.
-If the instruction can take a longword constant,
-this constant is often packed into the sub-opcode
-if it fits into 8 bits and is not zero.
-A sub-opcode value of zero specifies that the constant would not
-fit and therefore follows in the next word.
-This is a space optimization, the value of zero for flagging
-the longer case being convenient because it is easy to test.
-.PP
-Other instruction formats are used.
-The branching
-instructions take an offset in the following word,
-operators that load constants onto the stack
-take arbitrarily long inline constant values,
-and many operations deal exclusively with data on the
-interpreter stack, requiring no inline data.
-.NH 2
-Stack structure of the interpreter
-.PP
-The interpreter emulates a stack-structured Pascal machine.
-The ``load'' instructions put values onto the stack, where all
-arithmetic operations take place.
-The ``store'' instructions take values off the stack
-and place them in an address that is also contained on the stack.
-The only way to move data or to compute in the machine is with the stack.
-.PP
-To make the interpreter operations more powerful
-and to thereby increase the interpreter speed,
-the arithmetic operations in the interpreter are ``typed''.
-That is, length conversion of arithmetic values occurs when they are
-used in an operation.
-This eliminates interpreter cycles for length conversion
-and the associated overhead.
-For example, when adding an integer that fits in one byte to one that
-requires four bytes to store, no ``conversion'' operators are required.
-The one byte integer is loaded onto the stack, followed by the four
-byte integer, and then an adding operator is used that has, implicit
-in its definition, the sizes of the arguments.
-.NH 2
-Data types in the interpreter
-.PP
-The interpreter deals with several different fundamental data types.
-In the memory of the machine, 1, 2, and 4 byte integers are supported,
-with only 2 and 4 byte integers being present on the stack.
-The interpreter always converts to 4 byte integers when there is a possibility
-of overflowing the shorter formats.
-This corresponds to the Pascal language definition of overflow in
-arithmetic operations that requires that the result be correct
-if all partial values lie within the bounds of the base integer type:
-4 byte integer values.
-.PP
-Character constants are treated similarly to 1 byte integers for
-most purposes, as are Boolean values.
-All enumerated types are treated as integer values of
-an appropriate length, usually 1 byte.
-The interpreter also has real numbers, occupying 8 bytes of storage,
-and sets and strings of varying length.
-The appropriate operations are included for each data type, such as
-set union and intersection and an operation to write a string.
-.PP
-No special
-.B packed
-data formats are supported by the interpreter.
-The smallest unit of storage occupied by any variable is one byte.
-The built-ins
-.I pack
-and
-.I unpack
-thus degenerate to simple memory to memory transfers with
-no special processing.
-.NH 2
-Runtime environment
-.PP
-The interpreter runtime environment uses a stack data area and a heap
-data area, that are kept at opposite ends of memory
-and grow towards each other.
-All global variables and variables local to procedures and functions
-are kept in the stack area.
-Dynamically allocated variables and buffers for input/output are
-allocated in the heap.
-.PP
-The addressing of block structured variables is done by using
-a fixed display
-that contains the address of its stack frame
-for each statically active block.\*(Dg
-.FS
-\*(dg\ Here ``block'' is being used to mean any
-.I procedure ,
-.I function
-or the main program.
-.FE
-This display is referenced by instructions that load and store
-variables and maintained by the operations for
-block entry and exit, and for non-local
-.B goto
-statements.
-.NH 2
-Dp, lc, loop
-.PP
-Three ``global'' variables in the interpreter, in addition to the
-``display'', are the
-.I dp,
-.I lc,
-and the
-.I loop.
-The
-.I dp
-is a pointer to the display entry for the current block;
-the
-.I lc
-is the abstract machine location counter;
-and the
-.I loop
-is a register that holds the address of the main interpreter
-loop so that returning to the loop to fetch the next instruction is
-a fast operation.
-.NH 2
-The stack frame structure
-.PP
-Each active block
-has a stack frame consisting of three parts:
-a block mark, local variables, and temporary storage for partially
-evaluated expressions.
-The stack in the interpreter grows from the high addresses in memory
-to the low addresses,
-so that those parts of the stack frame that are ``on the top''
-of the stack have the most negative offsets from the display
-entry for the block.
-The major parts of the stack frame are represented in Figure 1.1.
-.so fig1.1.n
-Note that the local variables of each block
-have negative offsets from the corresponding display entry,
-the ``first'' local variable having offset `\-2'.
-.NH 2
-The block mark
-.PP
-The block mark contains the saved information necessary
-to restore the environment when the current block exits.
-It consists of two parts.
-The first and top-most part is saved by the
-.SM CALL
-instruction in the interpreter.
-This information is not present for the main program
-as it is never ``called''.
-The second part of the block mark is created by the
-.SM BEG
-begin block operator that also allocates and clears the
-local variable storage.
-The format of these blocks is represented in Figure 1.2.
-.sp
-.so fig1.2.n
-.PP
-The data saved by the
-.SM CALL
-operator includes the line number
-.I lino
-of the point of call,
-that is printed if the program execution ends abnormally;
-the location counter
-.I lc
-giving the return address;
-and the current display entry address
-.I dp
-at the time of call.
-.PP
-The
-.SM BEG
-begin operator saves the previous display contents at the level
-of this block, so that the display can be restored on block exit.
-A pointer to the beginning line number and the
-name of this block is also saved.
-This information is stored in the interpreter object code in-line after the
-.SM BEG
-operator.
-It is used in printing a post-mortem backtrace.
-The saved file name and buffer reference are necessary because of
-the input/output structure
-(this is discussed in detail in
-sections 3.3 and 3.4).
-The top of stack reference gives the value the stack pointer should
-have when there are no expression temporaries on the stack.
-It is used for a consistency check in the
-.SM LINO
-line number operators in the interpreter, that occurs before
-each statement executed.
-This helps to catch bugs in the interpreter, that often manifest
-themselves by leaving the stack non-empty between statements.
-.PP
-Note that there is no explicit static link here.
-Thus to set up the display correctly after a non-local
-.B goto
-statement one must ``unwind''
-through all the block marks on the stack to rebuild the display.
-.NH 2
-Arguments and return values
-.PP
-A function returns its value into a space reserved by the calling
-block.
-Arguments to a
-.B function
-are placed on top of this return area.
-For both
-.B procedure
-and
-.B function
-calls, arguments are placed at the end of the expression evaluation area
-of the caller.
-When a
-.B function
-completes, expression evaluation can continue
-after popping the arguments to the
-.B function
-off the stack,
-exactly as if the function value had been ``loaded''.
-The arguments to a
-.B procedure
-are also popped off the stack by the caller
-after its execution ends.
-.KS
-.PP
-As a simple example consider the following stack structure
-for a call to a function
-.I f,
-of the form ``f(a)''.
-.so fig1.3.n
-.KE
-.PP
-If we suppose that
-.I f
-returns a
-.I real
-and that
-.I a
-is an integer,
-the calling sequence for this function would be:
-.DS
-.TS
-lp-2w(8) l.
-PUSH \-8
-RV4:\fIl a\fR
-CALL:\fIl f\fR
-POP 4
-.TE
-.DE
-.ZP
-Here we use the operator
-.SM PUSH
-to clear space for the return value,
-load
-.I a
-on the stack with a ``right value'' operator,
-call the function,
-pop off the argument
-.I a ,
-and can then complete evaluation of the containing expression.
-The operations used here will be explained in section 2.
-.PP
-If the function
-.I f
-were given by
-.LS
- 10 \*bfunction\fR f(i: integer): real;
- 11 \*bbegin\fR
- 12 f := i
- 13 \*bend\fR;
-.LE
-then
-.I f
-would have code sequence:
-.DS
-.TS
-lp-2w(8) l.
-BEG:2 0
- 11
- "f"
-LV:\fIl\fR 40
-RV4:\fIl\fR 32
-AS48
-END
-.TE
-.DE
-.ZP
-Here the
-.SM BEG
-operator takes 9 bytes of inline data.
-The first byte specifies the
-length of the function name.
-The second longword specifies the
-amount of local variable storage, here none.
-The succeeding two lines give the line number of the
-.B begin
-and the name of the block
-for error traceback.
-The
-.SM BEG
-operator places a name pointer in the block mark.
-The body of the
-.B function
-first takes an address of the
-.B function
-result variable
-.I f
-using the address of operator
-.SM LV
-.I a .
-The next operation in the interpretation of this function is the loading
-of the value of
-.I i .
-.I I
-is at the level of the
-.B function
-.I f ,
-here symbolically
-.I l,
-and the first variable in the local variable area.
-The
-.B function
-completes by assigning the 4 byte integer on the stack to the 8 byte
-return location, hence the
-.SM AS48
-assignment operator, and then uses the
-.SM END
-operator to exit the current block.
-.NH 2
-The main interpreter loop
-.PP
-The main interpreter loop is simply:
-.DS
-.mD
-iloop:
- \fBcaseb\fR (lc)+,$0,$255
- <table of opcode interpreter addresses>
-.DE
-.ZP
-The main opcode is extracted from the first byte of the instruction
-and used to index into the table of opcode interpreter addresses.
-Control is then transferred to the specified location.
-The sub-opcode may be used to index the display,
-as a small constant,
-or to specify one of several relational operators.
-In the cases where a constant is needed, but it
-is not small enough to fit in the byte sub-operator,
-a zero is placed there and the constant follows in the next word.
-Zero is easily tested for,
-as the instruction that fetches the
-sub-opcode sets the condition code flags.
-A construction like:
-.DS
-.mD
-_OPER:
- \fBcvtbl\fR (lc)+,r0
- \fBbneq\fR L1
- \fBcvtwl\fR (lc)+,r0
-L1: ...
-.DE
-is all that is needed to effect this packing of data.
-This technique saves space in the Pascal
-.I obj
-object code.
-.PP
-The address of the instruction at
-.I iloop
-is always contained in the register variable
-.I loop .
-Thus a return to the main interpreter is simply:
-.DS
- \fBjmp\fR (loop)
-.DE
-that is both quick and occupies little space.
-.NH 2
-Errors
-.PP
-Errors during interpretation fall into three classes:
-.DS
-1) Interpreter detected errors.
-2) Hardware detected errors.
-3) External events.
-.DE
-.PP
-Interpreter detected errors include I/O errors and
-built-in function errors.
-These errors cause a subroutine call to an error routine
-with a single parameter indicating the cause of the error.
-Hardware errors such as range errors and overflows are
-fielded by a special routine that determines the opcode
-that caused the error.
-It then calls the error routine with an appropriate error
-parameter.
-External events include interrupts and system limits such
-as available memory.
-They generate a call to the error routine with an
-appropriate error code.
-The error routine processes the error condition,
-printing an appropriate error message and usually
-a backtrace from the point of the error.
diff --git a/share/doc/papers/px/pxin2.n b/share/doc/papers/px/pxin2.n
deleted file mode 100644
index c036c71c797..00000000000
--- a/share/doc/papers/px/pxin2.n
+++ /dev/null
@@ -1,921 +0,0 @@
-.\" $OpenBSD: pxin2.n,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1979 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)pxin2.n 5.2 (Berkeley) 4/17/91
-.\"
-.if !\n(xx .so tmac.p
-.nr H1 1
-.if n .ND
-.NH
-Operations
-.NH 2
-Naming conventions and operation summary
-.PP
-Table 2.1 outlines the opcode typing convention.
-The expression ``a above b'' means that `a' is on top
-of the stack with `b' below it.
-Table 2.3 describes each of the opcodes.
-The character `*' at the end of a name specifies that
-all operations with the root prefix
-before the `*'
-are summarized by one entry.
-Table 2.2 gives the codes used
-to describe the type inline data expected by each instruction.
-.sp 2
-.so table2.1.n
-.sp 2
-.so table2.2.n
-.bp
-.so table2.3.n
-.bp
-.NH 2
-Basic control operations
-.LP
-.SH
-HALT
-.IP
-Corresponds to the Pascal procedure
-.I halt ;
-causes execution to end with a post-mortem backtrace as if a run-time
-error had occurred.
-.SH
-BEG s,W,w,"
-.IP
-Causes the second part of the block mark to be created, and
-.I W
-bytes of local variable space to be allocated and cleared to zero.
-Stack overflow is detected here.
-.I w
-is the first line of the body of this section for error traceback,
-and the inline string (length s) the character representation of its name.
-.SH
-NODUMP s,W,w,"
-.IP
-Equivalent to
-.SM BEG ,
-and used to begin the main program when the ``p''
-option is disabled so that the post-mortem backtrace will be inhibited.
-.SH
-END
-.IP
-Complementary to the operators
-.SM CALL
-and
-.SM BEG ,
-exits the current block, calling the procedure
-.I pclose
-to flush buffers for and release any local files.
-Restores the environment of the caller from the block mark.
-If this is the end for the main program, all files are
-.I flushed,
-and the interpreter is exited.
-.SH
-CALL l,A
-.IP
-Saves the current line number, return address, and active display entry pointer
-.I dp
-in the first part of the block mark, then transfers to the entry point
-given by the relative address
-.I A ,
-that is the beginning of a
-.B procedure
-or
-.B function
-at level
-.I l.
-.SH
-PUSH s
-.IP
-Clears
-.I s
-bytes on the stack.
-Used to make space for the return value of a
-.B function
-just before calling it.
-.SH
-POP s
-.IP
-Pop
-.I s
-bytes off the stack.
-Used after a
-.B function
-or
-.B procedure
-returns to remove the arguments from the stack.
-.SH
-TRA a
-.IP
-Transfer control to relative address
-.I a
-as a local
-.B goto
-or part of a structured statement.
-.SH
-TRA4 A
-.IP
-Transfer control to an absolute address as part of a non-local
-.B goto
-or to branch over procedure bodies.
-.SH
-LINO s
-.IP
-Set current line number to
-.I s.
-For consistency, check that the expression stack is empty
-as it should be (as this is the start of a statement.)
-This consistency check will fail only if there is a bug in the
-interpreter or the interpreter code has somehow been damaged.
-Increment the statement count and if it exceeds the statement limit,
-generate a fault.
-.SH
-GOTO l,A
-.IP
-Transfer control to address
-.I A
-that is in the block at level
-.I l
-of the display.
-This is a non-local
-.B goto.
-Causes each block to be exited as if with
-.SM END ,
-flushing and freeing files with
-.I pclose,
-until the current display entry is at level
-.I l.
-.SH
-SDUP*
-.IP
-Duplicate the word or long on the top of
-the stack.
-This is used mostly for constructing sets.
-See section 2.11.
-.NH 2
-If and relational operators
-.SH
-IF a
-.IP
-The interpreter conditional transfers all take place using this operator
-that examines the Boolean value on the top of the stack.
-If the value is
-.I true ,
-the next code is executed,
-otherwise control transfers to the specified address.
-.SH
-REL* r
-.IP
-These take two arguments on the stack,
-and the sub-operation code specifies the relational operation to
-be done, coded as follows with `a' above `b' on the stack:
-.DS
-.mD
-.TS
-lb lb
-c a.
-Code Operation
-_
-0 a = b
-2 a <> b
-4 a < b
-6 a > b
-8 a <= b
-10 a >= b
-.TE
-.DE
-.IP
-Each operation does a test to set the condition code
-appropriately and then does an indexed branch based on the
-sub-operation code to a test of the condition here specified,
-pushing a Boolean value on the stack.
-.IP
-Consider the statement fragment:
-.DS
-.mD
-\*bif\fR a = b \*bthen\fR
-.DE
-.IP
-If
-.I a
-and
-.I b
-are integers this generates the following code:
-.DS
-.TS
-lp-2w(8) l.
-RV4:\fIl a\fR
-RV4:\fIl b\fR
-REL4 \&=
-IF \fIElse part offset\fR
-.sp
-.T&
-c s.
-\fI\&... Then part code ...\fR
-.TE
-.DE
-.NH 2
-Boolean operators
-.PP
-The Boolean operators
-.SM AND ,
-.SM OR ,
-and
-.SM NOT
-manipulate values on the top of the stack.
-All Boolean values are kept in single bytes in memory,
-or in single words on the stack.
-Zero represents a Boolean \fIfalse\fP, and one a Boolean \fItrue\fP.
-.NH 2
-Right value, constant, and assignment operators
-.SH
-LRV* l,A
-.br
-RV* l,a
-.IP
-The right value operators load values on the stack.
-They take a block number as a sub-opcode and load the appropriate
-number of bytes from that block at the offset specified
-in the following word onto the stack. As an example, consider
-.SM LRV4 :
-.DS
-.mD
-_LRV4:
- \fBcvtbl\fR (lc)+,r0 #r0 has display index
- \fBaddl3\fR _display(r0),(lc)+,r1 #r1 has variable address
- \fBpushl\fR (r1) #put value on the stack
- \fBjmp\fR (loop)
-.DE
-.IP
-Here the interpreter places the display level in r0.
-It then adds the appropriate display value to the inline offset and
-pushes the value at this location onto the stack.
-Control then returns to the main
-interpreter loop.
-The
-.SM RV*
-operators have short inline data that
-reduces the space required to address the first 32K of
-stack space in each stack frame.
-The operators
-.SM RV14
-and
-.SM RV24
-provide explicit conversion to long as the data
-is pushed.
-This saves the generation of
-.SM STOI
-to align arguments to
-.SM C
-subroutines.
-.SH
-CON* r
-.IP
-The constant operators load a value onto the stack from inline code.
-Small integer values are condensed and loaded by the
-.SM CON1
-operator, that is given by
-.DS
-.mD
-_CON1:
- \fBcvtbw\fR (lc)+,\-(sp)
- \fBjmp\fR (loop)
-.DE
-.IP
-Here note that little work was required as the required constant
-was available at (lc)+.
-For longer constants,
-.I lc
-must be incremented before moving the constant.
-The operator
-.SM CON
-takes a length specification in the sub-opcode and can be used to load
-strings and other variable length data onto the stack.
-The operators
-.SM CON14
-and
-.SM CON24
-provide explicit conversion to long as the constant is pushed.
-.SH
-AS*
-.IP
-The assignment operators are similar to arithmetic and relational operators
-in that they take two operands, both in the stack,
-but the lengths given for them specify
-first the length of the value on the stack and then the length
-of the target in memory.
-The target address in memory is under the value to be stored.
-Thus the statement
-.DS
-i := 1
-.DE
-.IP
-where
-.I i
-is a full-length, 4 byte, integer,
-will generate the code sequence
-.DS
-.TS
-lp-2w(8) l.
-LV:\fIl i\fP
-CON1:1
-AS24
-.TE
-.DE
-.IP
-Here
-.SM LV
-will load the address of
-.I i,
-that is really given as a block number in the sub-opcode and an
-offset in the following word,
-onto the stack, occupying a single word.
-.SM CON1 ,
-that is a single word instruction,
-then loads the constant 1,
-that is in its sub-opcode,
-onto the stack.
-Since there are not one byte constants on the stack,
-this becomes a 2 byte, single word integer.
-The interpreter then assigns a length 2 integer to a length 4 integer using
-.SM AS24 \&.
-The code sequence for
-.SM AS24
-is given by:
-.DS
-.mD
-_AS24:
- \fBincl\fR lc
- \fBcvtwl\fR (sp)+,*(sp)+
- \fBjmp\fR (loop)
-.DE
-.IP
-Thus the interpreter gets the single word off the stack,
-extends it to be a 4 byte integer
-gets the target address off the stack,
-and finally stores the value in the target.
-This is a typical use of the constant and assignment operators.
-.NH 2
-Addressing operations
-.SH
-LLV l,W
-.br
-LV l,w
-.IP
-The most common operation done by the interpreter
-is the ``left value'' or ``address of'' operation.
-It is given by:
-.DS
-.mD
-_LLV:
- \fBcvtbl\fR (lc)+,r0 #r0 has display index
- \fBaddl3\fR _display(r0),(lc)+,\-(sp) #push address onto the stack
- \fBjmp\fR (loop)
-.DE
-.IP
-It calculates an address in the block specified in the sub-opcode
-by adding the associated display entry to the
-offset that appears in the following word.
-The
-.SM LV
-operator has a short inline data that reduces the space
-required to address the first 32K of stack space in each call frame.
-.SH
-OFF s
-.IP
-The offset operator is used in field names.
-Thus to get the address of
-.LS
-p^.f1
-.LE
-.IP
-.I pi
-would generate the sequence
-.DS
-.mD
-.TS
-lp-2w(8) l.
-RV:\fIl p\fP
-OFF \fIf1\fP
-.TE
-.DE
-.IP
-where the
-.SM RV
-loads the value of
-.I p,
-given its block in the sub-opcode and offset in the following word,
-and the interpreter then adds the offset of the field
-.I f1
-in its record to get the correct address.
-.SM OFF
-takes its argument in the sub-opcode if it is small enough.
-.SH
-NIL
-.IP
-The example above is incomplete, lacking a check for a
-.B nil
-pointer.
-The code generated would be
-.DS
-.TS
-lp-2w(8) l.
-RV:\fIl p\fP
-NIL
-OFF \fIf1\fP
-.TE
-.DE
-.IP
-where the
-.SM NIL
-operation checks for a
-.I nil
-pointer and generates the appropriate runtime error if it is.
-.SH
-LVCON s,"
-.IP
-A pointer to the specified length inline data is pushed
-onto the stack.
-This is primarily used for
-.I printf
-type strings used by
-.SM WRITEF .
-(see sections 3.6 and 3.8)
-.SH
-INX* s,w,w
-.IP
-The operators
-.SM INX2
-and
-.SM INX4
-are used for subscripting.
-For example, the statement
-.DS
-a[i] := 2.0
-.DE
-.IP
-with
-.I i
-an integer and
-.I a
-an
-``array [1..1000] of real''
-would generate
-.DS
-.TS
-lp-2w(8) l.
-LV:\fIl a\fP
-RV4:\fIl i\fP
-INX4:8 1,999
-CON8 2.0
-AS8
-.TE
-.DE
-.IP
-Here the
-.SM LV
-operation takes the address of
-.I a
-and places it on the stack.
-The value of
-.I i
-is then placed on top of this on the stack.
-The array address is indexed by the
-length 4 index (a length 2 index would use
-.SM INX2 )
-where the individual elements have a size of 8 bytes.
-The code for
-.SM INX4
-is:
-.DS
-.mD
-_INX4:
- \fBcvtbl\fR (lc)+,r0
- \fBbneq\fR L1
- \fBcvtwl\fR (lc)+,r0 #r0 has size of records
-L1:
- \fBcvtwl\fR (lc)+,r1 #r1 has lower bound
- \fBmovzwl\fR (lc)+,r2 #r2 has upper-lower bound
- \fBsubl3\fR r1,(sp)+,r3 #r3 has base subscript
- \fBcmpl\fR r3,r2 #check for out of bounds
- \fBbgtru\fR esubscr
- \fBmull2\fR r0,r3 #calculate byte offset
- \fBaddl2\fR r3,(sp) #calculate actual address
- \fBjmp\fR (loop)
-esubscr:
- \fBmovw\fR $ESUBSCR,_perrno
- \fBjbr\fR error
-.DE
-.IP
-Here the lower bound is subtracted, and range checked against the
-upper minus lower bound.
-The offset is then scaled to a byte offset into the array
-and added to the base address on the stack.
-Multi-dimension subscripts are translated as a sequence of single subscriptings.
-.SH
-IND*
-.IP
-For indirect references through
-.B var
-parameters and pointers,
-the interpreter has a set of indirection operators that convert a pointer
-on the stack into a value on the stack from that address.
-different
-.SM IND
-operators are necessary because of the possibility of different
-length operands.
-The
-.SM IND14
-and
-.SM IND24
-operators do conversions to long
-as they push their data.
-.NH 2
-Arithmetic operators
-.PP
-The interpreter has many arithmetic operators.
-All operators produce results long enough to prevent overflow
-unless the bounds of the base type are exceeded.
-The basic operators available are
-.DS
-Addition: ADD*, SUCC*
-Subtraction: SUB*, PRED*
-Multiplication: MUL*, SQR*
-Division: DIV*, DVD*, MOD*
-Unary: NEG*, ABS*
-.DE
-.NH 2
-Range checking
-.PP
-The interpreter has several range checking operators.
-The important distinction among these operators is between values whose
-legal range begins at zero and those that do not begin at zero,
-for example
-a subrange variable whose values range from 45 to 70.
-For those that begin at zero, a simpler ``logical'' comparison against
-the upper bound suffices.
-For others, both the low and upper bounds must be checked independently,
-requiring two comparisons.
-On the
-.SM "VAX 11/780"
-both checks are done using a single index instruction
-so the only gain is in reducing the inline data.
-.NH 2
-Case operators
-.PP
-The interpreter includes three operators for
-.B case
-statements that are used depending on the width of the
-.B case
-label type.
-For each width, the structure of the case data is the same, and
-is represented in figure 2.4.
-.sp 1
-.so fig2.4.n
-.PP
-The
-.SM CASEOP
-case statement operators do a sequential search through the
-case label values.
-If they find the label value, they take the corresponding entry
-from the transfer table and cause the interpreter to branch to the
-specified statement.
-If the specified label is not found, an error results.
-.PP
-The
-.SM CASE
-operators take the number of cases as a sub-opcode
-if possible.
-Three different operators are needed to handle single byte,
-word, and long case transfer table values.
-For example, the
-.SM CASEOP1
-operator has the following code sequence:
-.DS
-.mD
-_CASEOP1:
- \fBcvtbl\fR (lc)+,r0
- \fBbneq\fR L1
- \fBcvtwl\fR (lc)+,r0 #r0 has length of case table
-L1:
- \fBmovaw\fR (lc)[r0],r2 #r2 has pointer to case labels
- \fBmovzwl\fR (sp)+,r3 #r3 has the element to find
- \fBlocc\fR r3,r0,(r2) #r0 has index of located element
- \fBbeql\fR caserr #element not found
- \fBmnegl\fR r0,r0 #calculate new lc
- \fBcvtwl\fR (r2)[r0],r1 #r1 has lc offset
- \fBaddl2\fR r1,lc
- \fBjmp\fR (loop)
-caserr:
- \fBmovw\fR $ECASE,_perrno
- \fBjbr\fR error
-.DE
-.PP
-Here the interpreter first computes the address of the beginning
-of the case label value area by adding twice the number of case label
-values to the address of the transfer table, since the transfer
-table entries are 2 byte address offsets.
-It then searches through the label values, and generates an ECASE
-error if the label is not found.
-If the label is found, the index of the corresponding entry
-in the transfer table is extracted and that offset is added
-to the interpreter location counter.
-.NH 2
-Operations supporting pxp
-.PP
-The following operations are defined to do execution profiling.
-.SH
-PXPBUF w
-.IP
-Causes the interpreter to allocate a count buffer
-with
-.I w
-four byte counters
-and to clear them to zero.
-The count buffer is placed within an image of the
-.I pmon.out
-file as described in the
-.I "PXP Implementation Notes."
-The contents of this buffer are written to the file
-.I pmon.out
-when the program ends.
-.SH
-COUNT w
-.IP
-Increments the counter specified by
-.I w .
-.SH
-TRACNT w,A
-.IP
-Used at the entry point to procedures and functions,
-combining a transfer to the entry point of the block with
-an incrementing of its entry count.
-.NH 2
-Set operations
-.PP
-The set operations:
-union
-.SM ADDT,
-intersection
-.SM MULT,
-element removal
-.SM SUBT,
-and the set relationals
-.SM RELT
-are straightforward.
-The following operations are more interesting.
-.SH
-CARD s
-.IP
-Takes the cardinality of a set of size
-.I s
-bytes on top of the stack, leaving a 2 byte integer count.
-.SM CARD
-uses the
-.B ffs
-opcode to successively count the number of set bits in the set.
-.SH
-CTTOT s,w,w
-.IP
-Constructs a set.
-This operation requires a non-trivial amount of work,
-checking bounds and setting individual bits or ranges of bits.
-This operation sequence is slow,
-and motivates the presence of the operator
-.SM INCT
-below.
-The arguments to
-.SM CTTOT
-include the number of elements
-.I s
-in the constructed set,
-the lower and upper bounds of the set,
-the two
-.I w
-values,
-and a pair of values on the stack for each range in the set, single
-elements in constructed sets being duplicated with
-.SM SDUP
-to form degenerate ranges.
-.SH
-IN s,w,w
-.IP
-The operator
-.B in
-for sets.
-The value
-.I s
-specifies the size of the set,
-the two
-.I w
-values the lower and upper bounds of the set.
-The value on the stack is checked to be in the set on the stack,
-and a Boolean value of
-.I true
-or
-.I false
-replaces the operands.
-.SH
-INCT
-.IP
-The operator
-.B in
-on a constructed set without constructing it.
-The left operand of
-.B in
-is on top of the stack followed by the number of pairs in the
-constructed set,
-and then the pairs themselves, all as single word integers.
-Pairs designate runs of values and single values are represented by
-a degenerate pair with both value equal.
-This operator is generated in grammatical constructs such as
-.LS
-\fBif\fR character \fBin\fR [`+', '\-', `*', `/']
-.LE
-.IP
-or
-.LS
-\fBif\fR character \fBin\fR [`a'..`z', `$', `_']
-.LE
-.IP
-These constructs are common in Pascal, and
-.SM INCT
-makes them run much faster in the interpreter,
-as if they were written as an efficient series of
-.B if
-statements.
-.NH 2
-Miscellaneous
-.PP
-Other miscellaneous operators that are present in the interpreter
-are
-.SM ASRT
-that causes the program to end if the Boolean value on the stack is not
-.I true,
-and
-.SM STOI ,
-.SM STOD ,
-.SM ITOD ,
-and
-.SM ITOS
-that convert between different length arithmetic operands for
-use in aligning the arguments in
-.B procedure
-and
-.B function
-calls, and with some untyped built-ins, such as
-.SM SIN
-and
-.SM COS \&.
-.PP
-Finally, if the program is run with the run-time testing disabled, there
-are special operators for
-.B for
-statements
-and special indexing operators for arrays
-that have individual element size that is a power of 2.
-The code can run significantly faster using these operators.
-.NH 2
-Mathematical Functions
-.PP
-The transcendental functions
-.SM SIN ,
-.SM COS ,
-.SM ATAN ,
-.SM EXP ,
-.SM LN ,
-.SM SQRT ,
-.SM SEED ,
-and
-.SM RANDOM
-are taken from the standard UNIX
-mathematical package.
-These functions take double precision floating point
-values and return the same.
-.PP
-The functions
-.SM EXPO ,
-.SM TRUNC ,
-and
-.SM ROUND
-take a double precision floating point number.
-.SM EXPO
-returns an integer representing the machine
-representation of its argument's exponent,
-.SM TRUNC
-returns the integer part of its argument, and
-.SM ROUND
-returns the rounded integer part of its argument.
-.NH 2
-System functions and procedures
-.SH
-LLIMIT
-.IP
-A line limit and a file pointer are passed on the stack.
-If the limit is non-negative the line limit is set to the
-specified value, otherwise it is set to unlimited.
-The default is unlimited.
-.SH
-STLIM
-.IP
-A statement limit is passed on the stack. The statement limit
-is set as specified.
-The default is 500,000.
-No limit is enforced when the ``p'' option is disabled.
-.SH
-CLCK
-.br
-SCLCK
-.IP
-.SM CLCK
-returns the number of milliseconds of user time used by the program;
-.SM SCLCK
-returns the number of milliseconds of system time used by the program.
-.SH
-WCLCK
-.IP
-The number of seconds since some predefined time is
-returned. Its primary usefulness is in determining
-elapsed time and in providing a unique time stamp.
-.sp
-.LP
-The other system time procedures are
-.SM DATE
-and
-.SM TIME
-that copy an appropriate text string into a pascal string array.
-The function
-.SM ARGC
-returns the number of command line arguments passed to the program.
-The procedure
-.SM ARGV
-takes an index on the stack and copies the specified
-command line argument into a pascal string array.
-.NH 2
-Pascal procedures and functions
-.SH
-PACK s,w,w,w
-.br
-UNPACK s,w,w,w
-.IP
-They function as a memory to memory move with several
-semantic checks.
-They do no ``unpacking'' or ``packing'' in the true sense as the
-interpreter supports no packed data types.
-.SH
-NEW s
-.br
-DISPOSE s
-.IP
-An
-.SM LV
-of a pointer is passed.
-.SM NEW
-allocates a record of a specified size and puts a pointer
-to it into the pointer variable.
-.SM DISPOSE
-deallocates the record pointed to by the pointer
-and sets the pointer to
-.SM NIL .
-.sp
-.LP
-The function
-.SM CHR*
-converts a suitably small integer into an ascii character.
-Its primary purpose is to do a range check.
-The function
-.SM ODD*
-returns
-.I true
-if its argument is odd and returns
-.I false
-if its argument is even.
-The function
-.SM UNDEF
-always returns the value
-.I false .
diff --git a/share/doc/papers/px/pxin3.n b/share/doc/papers/px/pxin3.n
deleted file mode 100644
index 24be98673a0..00000000000
--- a/share/doc/papers/px/pxin3.n
+++ /dev/null
@@ -1,595 +0,0 @@
-.\" $OpenBSD: pxin3.n,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1979 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)pxin3.n 5.2 (Berkeley) 4/17/91
-.\"
-.if !\n(xx .so tmac.p
-.ta 8n 16n 24n
-.nr H1 2
-.if n .ND
-.NH
-Input/output
-.NH 2
-The files structure
-.PP
-Each file in the Pascal environment is represented by a pointer
-to a
-.I files
-structure in the heap.
-At the location addressed by the pointer is the element
-in the file's window variable.
-Behind this window variable is information about the file,
-at the following offsets:
-.so table3.1.n
-.PP
-Here
-.SM FBUF
-is a pointer to the system FILE block for the file.
-The standard system I/O library is
-used that provides block buffered input/output,
-with 1024 characters normally transferred at each read or write.
-.PP
-The files in the
-Pascal environment,
-are all linked together on a single file chain through the
-.SM FCHAIN
-links.
-For each file the
-.SM FLEV
-pointer gives its associated file variable.
-These are used to free files at block exit as described in section 3.3
-below.
-.PP
-The
-FNAME
-and
-PFNAME
-give the associated
-file name for the file and the name to be used when printing
-error diagnostics respectively.
-Although these names are usually the same,
-.I input
-and
-.I output
-usually have no associated
-file name so the distinction is necessary.
-.PP
-The
-FUNIT
-word contains
-a set of flags.
-whose representations are:
-.TS
-center;
-l l l.
-EOF 0x0100 At end-of-file
-EOLN 0x0200 At end-of-line (text files only)
-SYNC 0x0400 File window is out of sync
-TEMP 0x0800 File is temporary
-FREAD 0x1000 File is open for reading
-FWRITE 0x2000 File is open for writing
-FTEXT 0x4000 File is a text file; process EOLN
-FDEF 0x8000 File structure created, but file not opened
-.TE
-.PP
-The
-EOF
-and
-EOLN
-bits here reflect the associated built-in function values.
-TEMP
-specifies that the file has a generated temporary name and that
-it should therefore be removed when its block exits.
-FREAD
-and
-FWRITE
-specify that
-.I reset
-and
-.I rewrite
-respectively have been done on the file so that
-input or output operations can be done.
-FTEXT
-specifies the file is a text file so that
-EOLN
-processing should be done,
-with newline characters turned into blanks, etc.
-.PP
-The
-SYNC
-bit,
-when true,
-specifies that there is no usable image in the file buffer window.
-As discussed in the
-.I "Berkeley Pascal User's Manual,"
-the interactive environment necessitates having
-``input^'' undefined at the beginning
-of execution so that a program may print a prompt
-before the user is required to type input.
-The
-SYNC
-bit implements this.
-When it is set,
-it specifies that the element in the window
-must be updated before it can be used.
-This is never done until necessary.
-.NH 2
-Initialization of files
-.PP
-All the variables in the Pascal runtime environment are cleared to zero on
-block entry.
-This is necessary for simple processing of files.
-If a file is unused, its pointer will be
-.B nil.
-All references to an inactive file are thus references through a
-.B nil
-pointer.
-If the Pascal system did not clear storage to zero before execution
-it would not be possible to detect inactive files in this simple way;
-it would probably be necessary to generate (possibly complicated)
-code to initialize
-each file on block entry.
-.PP
-When a file is first mentioned in a
-.I reset
-or
-.I rewrite
-call,
-a buffer of the form described above is associated with it,
-and the necessary information about the file is placed in this
-buffer.
-The file is also linked into the active file chain.
-This chain is kept sorted by block mark address, the
-FLEV
-entries.
-.NH 2
-Block exit
-.PP
-When block exit occurs the interpreter must free the files that are in
-use in the block
-and their associated buffers.
-This is simple and efficient because the files in the active file chain are
-sorted by increasing block mark address.
-This means that the files for the current block will be at the front
-of the chain.
-For each file that is no longer accessible
-the interpreter first flushes the files buffer
-if it is an output file.
-The interpreter then returns the file buffer and the files structure and window
-to the free space in the heap and removes the file from the active file chain.
-.NH 2
-Flushing
-.PP
-Flushing all the file buffers at abnormal termination,
-or on a call to the procedure
-.I flush
-or
-.I message
-is done by flushing
-each file on the file chain that has the
-FWRITE
-bit set in its flags word.
-.NH 2
-The active file
-.PP
-For input-output,
-.I px
-maintains a notion of an active file.
-Each operation that references a file makes the file
-it will be using the active file and then does its operation.
-A subtle point here is that one may do a procedure call to
-.I write
-that involves a call to a function that references another file,
-thereby destroying the active file set up before the
-.I write.
-Thus the active file is saved at block entry
-in the block mark and restored at block exit.\*(Dg
-.FS
-\*(dg\ It would probably be better to dispense with the notion of
-active file and use another mechanism that did not involve extra
-overhead on each procedure and function call.
-.FE
-.NH 2
-File operations
-.PP
-Files in Pascal can be used in two distinct ways:
-as the object of
-.I read,
-.I write,
-.I get,
-and
-.I put
-calls, or indirectly as though they were pointers.
-The second use as pointers must be careful
-not to destroy the active file in a reference such as
-.LS
-write(output, input\(ua)
-.LE
-or the system would incorrectly write on the input device.
-.PP
-The fundamental operator related to the use of a file is
-.SM FNIL.
-This takes the file variable, as a pointer,
-insures that the pointer is not
-.B nil,
-and also that a usable image is in the file window,
-by forcing the
-.SM SYNC
-bit to be cleared.
-.PP
-A simple example that demonstrates the use of the file operators
-is given by
-.LS
-writeln(f)
-.LE
-that produces
-.DS
-.mD
-.TS
-lp-2w(8) l.
-RV:\fIl f\fP
-UNIT
-WRITLN
-.TE
-.DE
-.NH 2
-Read operations
-.SH
-GET
-.IP
-Advance the active file to the next input element.
-.SH
-FNIL
-.IP
-A file pointer is on the stack. Insure that the associated file is active
-and that the file is synced so that there is input available in the window.
-.SH
-READ*
-.IP
-If the file is a text file, read a block of text
-and convert it to the internal type of the specified
-operand. If the file is not a text file then
-do an unformatted read of the next record.
-The procedure
-.SM READLN
-reads upto and including the next end of line character.
-.SH
-READE A
-.IP
-The operator
-.SM READE
-reads a string name of an enumerated type and converts it
-to its internal value.
-.SM READE
-takes a pointer to a data structure as shown in figure 3.2.
-.so fig3.2.n
-See the description of
-.SM NAM
-in the next section for an example.
-.NH 2
-Write operations
-.SH
-PUT
-.IP
-Output the element in the active file window.
-.SH
-WRITEF s
-.IP
-The argument(s) on the stack are output
-by the
-.I fprintf
-standard
-.SM I/O
-library routine.
-The sub-opcode
-.I s
-specifies the number
-of longword arguments on the stack.
-.SH
-WRITEC
-.IP
-The character on the top of the stack is output
-without formatting. Formatted characters must be output with
-.SM WRITEF .
-.SH
-WRITES
-.IP
-The string specified by the pointer on the top of the stack is output
-by the
-.I fwrite
-standard
-.SM I/O
-library routine.
-All characters including nulls are printed.
-.SH
-WRITLN
-.IP
-A linefeed is output to the active file.
-The line-count for the file is
-incremented and checked against the line limit.
-.SH
-PAGE
-.IP
-A formfeed is output to the active file.
-.SH
-NAM A
-.IP
-The value on the top of the stack is converted to a pointer
-to an enumerated type string name.
-The address
-.SM A
-points to an enumerated type structure identical
-to that used by
-.SM READE .
-An error is raised if the value is out of range.
-The form of this structure for the predefined type
-.B boolean
-is shown in figure 3.3.
-.so fig3.3.n
-The code for
-.SM NAM
-is
-.DS
-.mD
-_NAM:
- \fBincl\fR lc
- \fBaddl3\fR (lc)+,ap,r6 #r6 points to scalar name list
- \fBmovl\fR (sp)+,r3 #r3 has data value
- \fBcmpw\fR r3,(r6)+ #check value out of bounds
- \fBbgequ\fR enamrng
- \fBmovzwl\fR (r6)[r3],r4 #r4 has string index
- \fBpushab\fR (r6)[r4] #push string pointer
- \fBjmp\fR (loop)
-enamrng:
- \fBmovw\fR $ENAMRNG,_perrno
- \fBjbr\fR error
-.DE
-The address of the table is calculated by adding the base address
-of the interpreter code,
-.I ap
-to the offset pointed to by
-.I lc .
-The first word of the table gives the number of records and
-provides a range check of the data to be output.
-The pointer is then calculated as
-.DS
-.mD
-tblbase = ap + A;
-size = *tblbase++;
-return(tblbase + tblbase[value]);
-.DE
-.SH
-MAX s,w
-.IP
-The sub-opcode
-.I s
-is subtracted from the integer on the top of the stack.
-The maximum of the result and the second argument,
-.I w ,
-replaces the value on the top of the stack.
-This function verifies that variable specified
-width arguments are non-negative, and meet certain minimum width
-requirements.
-.SH
-MIN s
-.IP
-The minimum of the value on the top of the stack
-and the sub-opcode replaces the value on the top
-of the stack.
-.sp 1
-.LP
-The uses of files and the file operations are summarized
-in an example which outputs a real variable (r) with a variable
-width field (i).
-.LS
-writeln('r =',r:i,' ',true);
-.LE
-that generates the code
-.DS
-.mD
-.TS
-lp-2w(8) l.
-UNITOUT
-FILE
-CON14:1
-CON14:3
-LVCON:4 "r ="
-WRITES
-RV8\fI:l r\fP
-RV4\fI:l i\fP
-MAX:8 1
-RV4\fI:l i\fP
-MAX:1 1
-LVCON:8 " %*.*E"
-FILE
-WRITEF:6
-CONC4 \' \'
-WRITEC
-CON14:1
-NAM \fIbool\fP
-LVCON:4 "%s"
-FILE
-WRITEF:3
-WRITLN
-.TE
-.DE
-.PP
-Here the operator
-.SM UNITOUT
-is an abbreviated form of the operator
-.SM UNIT
-that is used when the file to be made active is
-.I output .
-A file descriptor, record count, string size, and a pointer
-to the constant string ``r ='' are pushed
-and then output by
-.SM WRITES .
-Next the value of
-.I r
-is pushed on the stack
-and the precision size is calculated by taking
-seven less than the width, but not less than one.
-This is followed by the width that is reduced by
-one to leave space for the required leading blank.
-If the width is too narrow, it
-is expanded by
-.I fprintf .
-A pointer to the format string is pushed followed
-by a file descriptor and the operator
-.SM WRITEF
-that prints out
-.I r .
-The value of six on
-.SM WRITEF
-comes from two longs for
-.I r
-and a long each for the precision, width, format string pointer,
-and file descriptor.
-The operator
-.SM CONC4
-pushes the
-.I blank
-character onto a long on the stack that is then printed out by
-.SM WRITEC .
-The internal representation for
-.I true
-is pushed as a long onto the stack and is
-then replaced by a pointer to the string ``true''
-by the operator
-.SM NAM
-using the table
-.I bool
-for conversion.
-This string is output by the operator
-.SM WRITEF
-using the format string ``%s''.
-Finally the operator
-.SM WRITLN
-appends a newline to the file.
-.NH 2
-File activation and status operations
-.SH
-UNIT*
-.IP
-The file pointed to by the file pointer on the top
-of the stack is converted to be the active file.
-The opcodes
-.SM UNITINP
-and
-.SM UNITOUT
-imply standard input and output respectively
-instead of explicitly pushing their file pointers.
-.SH
-FILE
-.IP
-The standard
-.SM I/O
-library file descriptor associated with the active file
-is pushed onto the stack.
-.SH
-EOF
-.IP
-The file pointed to by the file pointer on the top
-of the stack is checked for end of file. A boolean
-is returned with
-.I true
-indicating the end of file condition.
-.SH
-EOLN
-.IP
-The file pointed to by the file pointer on the top
-of the stack is checked for end of line. A boolean
-is returned with
-.I true
-indicating the end of line condition.
-Note that only text files can check for end of line.
-.NH 2
-File housekeeping operations
-.SH
-DEFNAME
-.IP
-Four data items are passed on the stack;
-the size of the data type associated with the file,
-the maximum size of the file name,
-a pointer to the file name,
-and a pointer to the file variable.
-A file record is created with the specified window size
-and the file variable set to point to it.
-The file is marked as defined but not opened.
-This allows
-.B program
-statement association of file names with file variables
-before their use by a
-.SM RESET
-or a
-.SM REWRITE .
-.SH
-BUFF s
-.IP
-The sub-opcode is placed in the external variable
-.I _bufopt
-to specify the amount of I/O buffering that is desired.
-The current options are:
-.DS
-0 \- character at a time buffering
-1 \- line at a time buffering
-2 \- block buffering
-.DE
-The default value is 1.
-.SH
-RESET
-.br
-REWRITE
-.IP
-Four data items are passed on the stack;
-the size of the data type associated with the file,
-the maximum size of the name (possibly zero),
-a pointer to the file name (possibly null),
-and a pointer to the file variable.
-If the file has never existed it is created as in
-.SM DEFNAME .
-If no file name is specified and no previous name exists
-(for example one created by
-.SM DEFNAME
-) then a system temporary name is created.
-.SM RESET
-then opens the file for input, while
-.SM REWRITE
-opens the file for output.
-.sp 1
-.PP
-The three remaining file operations are
-.SM FLUSH
-that flushes the active file,
-.SM REMOVE
-that takes the pointer to a file name and removes the
-specified file, and
-.SM MESSAGE
-that flushes all the output files and sets the
-standard error file to be the active file.
diff --git a/share/doc/papers/px/pxin4.n b/share/doc/papers/px/pxin4.n
deleted file mode 100644
index c2827c4f718..00000000000
--- a/share/doc/papers/px/pxin4.n
+++ /dev/null
@@ -1,65 +0,0 @@
-.\" $OpenBSD: pxin4.n,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1979 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)pxin4.n 5.2 (Berkeley) 4/17/91
-.\"
-.if !\n(xx .so tmac.p
-.nr H1 3
-.if n .ND
-.NH
-Conclusions
-.PP
-It is appropriate to consider,
-given the amount of time invested in rewriting the interpreter,
-whether the time was well spent, or whether a code-generator
-could have been written with an equivalent amount of effort.
-The Berkeley Pascal system is being modified to interface
-to the code generator of the portable C compiler with
-not much more work than was involved in rewritting
-.I px .
-However this compiler will probably not supercede the interpreter
-in an instructional environment as the
-necessary loading and assembly processes will slow the
-compilation process to a noticeable degree.
-This effect will be further exaggerated because
-student users spend more time in compilation than in execution.
-Measurements over the course of a quarter at Berkeley with a mixture
-of students from beginning programming to upper division compiler
-construction show that the amount of time in compilation exceeds the amount
-of time spent in the interpreter, the ratio being approximately 60/40.
-.PP
-A more promising approach might have been a throw-away code generator
-such as was done for the
-.SM
-WATFIV
-.NL
-system.
-However the addition of high-quality post-mortem and interactive
-debugging facilities become much more difficult to provide than
-in the interpreter environment.
diff --git a/share/doc/papers/px/table2.1.n b/share/doc/papers/px/table2.1.n
deleted file mode 100644
index 7396ed88e1c..00000000000
--- a/share/doc/papers/px/table2.1.n
+++ /dev/null
@@ -1,81 +0,0 @@
-.\" $OpenBSD: table2.1.n,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1979 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)table2.1.n 5.2 (Berkeley) 4/17/91
-.\"
-.DS L
-.TS
-box center;
-c s s
-c s s
-c c c
-n ap-2 a.
-Table 2.1 \- Operator Suffixes
-=
-.sp
-Unary operator suffixes
-.sp .1i
-Suffix Example Argument type
-2 NEG2 Short integer (2 bytes)
-4 SQR4 Long integer (4 bytes)
-8 ABS8 Real (8 bytes)
-.sp
-_
-.sp
-.T&
-c s s
-c c c
-n ap-2 a.
-Binary operator suffixes
-.sp .1i
-Suffix Example Argument type
-2 ADD2 Two short integers
-24 MUL24 Short above long integer
-42 REL42 Long above short integer
-4 DIV4 Two long integers
-28 DVD28 Short integer above real
-48 REL48 Long integer above real
-82 SUB82 Real above short integer
-84 MUL84 Real above long integer
-8 ADD8 Two reals
-.sp
-_
-.sp
-.T&
-c s s
-c c c
-n ap-2 a.
-Other Suffixes
-.sp .1i
-Suffix Example Argument types
-T ADDT Sets
-G RELG Strings
-.sp
-.TE
-.DE
diff --git a/share/doc/papers/px/table2.2.n b/share/doc/papers/px/table2.2.n
deleted file mode 100644
index 2fd55d02010..00000000000
--- a/share/doc/papers/px/table2.2.n
+++ /dev/null
@@ -1,83 +0,0 @@
-.\" $OpenBSD: table2.2.n,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1979 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)table2.2.n 5.2 (Berkeley) 4/17/91
-.\"
-.DS L
-.TS
-box center;
-c s
-c | c
-ci | aw(3.25i).
-Table 2.2 \- Inline data type codes
-_
-Code Description
-=
-a T{
-.fi
-An address offset is given in the word
-following the instruction.
-T}
-_
-A T{
-An address offset is given in the four bytes following the instruction.
-T}
-_
-l T{
-An index into the display
-is given in the sub-opcode.
-T}
-_
-r T{
-A relational operator is encoded in the sub-opcode. (see section 2.3)
-T}
-_
-s T{
-A small integer is
-placed in the sub-opcode, or in the next word
-if it is zero or too large.
-T}
-_
-v T{
-Variable length inline data.
-T}
-_
-w T{
-A word value in the following word.
-T}
-_
-W T{
-A long value in the following four bytes.
-T}
-_
-" T{
-An inline constant string.
-T}
-.TE
-.DE
diff --git a/share/doc/papers/px/table2.3.n b/share/doc/papers/px/table2.3.n
deleted file mode 100644
index 37da10da649..00000000000
--- a/share/doc/papers/px/table2.3.n
+++ /dev/null
@@ -1,43 +0,0 @@
-.\" $OpenBSD: table2.3.n,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1979 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)table2.3.n 5.2 (Berkeley) 4/17/91
-.\"
-.TS H
-box center;
-c s s
-lw(14) | lw(12) | lw(40)
-lp-2 | a | l.
-Table 2.3 \- Machine operations
-_
-Mnemonic Reference Description
-=
-.TH
-.so fig2.3.n
-.TE
diff --git a/share/doc/papers/px/table3.1.n b/share/doc/papers/px/table3.1.n
deleted file mode 100644
index af7a98e6ace..00000000000
--- a/share/doc/papers/px/table3.1.n
+++ /dev/null
@@ -1,45 +0,0 @@
-.\" $OpenBSD: table3.1.n,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1979 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)table3.1.n 5.2 (Berkeley) 4/17/91
-.\"
-.TS
-center;
-n l l.
-\-108 FNAME Text name of associated UNIX file
-\-30 LCOUNT Current count of lines output
-\-26 LLIMIT Maximum number of lines permitted
-\-22 FBUF UNIX FILE pointer
-\-18 FCHAIN Chain to next file
-\-14 FLEV Pointer to associated file variable
-\-10 PFNAME Pointer to name of file for error messages
-\-6 FUNIT File status flags
-\-4 FSIZE Size of elements in the file
-0 File window element
-.TE
diff --git a/share/doc/papers/px/tmac.p b/share/doc/papers/px/tmac.p
deleted file mode 100644
index 678a8773c07..00000000000
--- a/share/doc/papers/px/tmac.p
+++ /dev/null
@@ -1,110 +0,0 @@
-.\" $OpenBSD: tmac.p,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1979 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)tmac.p 5.2 (Berkeley) 4/17/91
-.\"
-.if n .nr FM 1.2i
-.if t .tr *\(**=\(eq/\(sl+\(pl
-.bd S B 3
-.de mD
-.ta 8n 17n 42n
-..
-.de SM
-.if "\\$1"" .ps -2
-.if !"\\$1"" \s-2\\$1\s0\\$2
-..
-.de LG
-.if "\\$1"" .ps +2
-.if !"\\$1"" \s+2\\$a\s0\\$2
-..
-.de HP
-.nr pd \\n(PD
-.nr PD 0
-.if \\n(.$=0 .IP
-.if \\n(.$=1 .IP "\\$1"
-.if \\n(.$>=2 .IP "\\$1" "\\$2"
-.nr PD \\n(pd
-..
-.de ZP
-.nr pd \\n(PD
-.nr PD 0
-.PP
-.nr PD \\n(pd
-..
-.de LS \"LS - Literal display; ASCII DS
-.if \\n(.$=0 .DS
-.if \\n(.$=1 \\$1
-.if \\n(.$>1 \\$1 "\\$2"
-.if t .tr '\'`\`^\(ua-\(mi
-.if t .tr _\(ul
-..
-.de LE \"LE - End literal display
-.DE
-.tr ''``__--^^
-..
-.de UP
-Berkeley Pascal\\$1
-..
-.de PD
-\s-2PDP\s0
-.if \\n(.$=0 11/70
-.if \\n(.$>0 11/\\$1
-..
-.de DK
-Digital Equipment Corporation\\$1
-..
-.de PI
-.I pi \\$1
-..
-.de Xp
-.I Pxp \\$1
-..
-.de XP
-.I pxp \\$1
-..
-.de IX
-.I pix \\$1
-..
-.de X
-.I px \\$1
-..
-.de PX
-.I px \\$1
-..
-.if n .ds dg +
-.if t .ds dg \(dg
-.if n .ds Dg \*(dg
-.if t .ds Dg \v'-0.3m'\s-2\*(dg\s0\v'0.3m'
-.if n .ds dd *
-.if t .ds dd \(dd
-.if n .ds Dd \*(dd
-.if t .ds Dd \v'-0.3m'\s-2\*(dd\s0\v'0.3m'
-.if n .ds b \\fI
-.if t .ds b \\fB
-.nr xx 1
diff --git a/share/doc/papers/relengr/0.t b/share/doc/papers/relengr/0.t
deleted file mode 100644
index c07dbb43418..00000000000
--- a/share/doc/papers/relengr/0.t
+++ /dev/null
@@ -1,89 +0,0 @@
-.\" $OpenBSD: 0.t,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1989 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)0.t 5.1 (Berkeley) 4/17/91
-.\"
-.rm CM
-.nr PO 1.25i
-.ds CH "
-.ds CF "%
-.nr Fn 0 1
-.ds b3 4.3\s-1BSD\s+1
-.de KI
-.ds Lb "Fig. \\n+(Fn
-.KF
-.ce 1
-Figure \\n(Fn - \\$1.
-..
-.de SM
-\\s-1\\$1\\s+1\\$2
-..
-.de NM
-\&\fI\\$1\fP\\$2
-..
-.de RN
-\&\fI\\$1\fP\^(\^)\\$2
-..
-.de PN
-\&\fB\\$1\fP\\$2
-..
-.TL
-The Release Engineering of 4.3\s-1BSD\s0
-.AU
-Marshall Kirk McKusick
-.AU
-Michael J. Karels
-.AU
-Keith Bostic
-.AI
-Computer Systems Research Group
-Computer Science Division
-Department of Electrical Engineering and Computer Science
-University of California, Berkeley
-Berkeley, California 94720
-.AB
-This paper describes an approach used by a small group of people
-to develop and integrate a large software system.
-It details the development and release engineering strategy
-used during the preparation of the \*(b3 version of the UNIX\(dg
-.FS
-\(dgUNIX is a registered trademark of AT&T in the US and other countries.
-.FE
-operating system.
-Each release cycle is divided into an initial development phase
-followed by a release engineering phase.
-The release engineering of the distribution is done in three steps.
-The first step has an informal control policy for tracking modifications;
-it results in an alpha distribution.
-The second step has more rigid change mechanisms in place;
-it results in a beta release.
-During the final step changes are tracked very closely;
-the result is the final distribution.
-.AE
-.LP
diff --git a/share/doc/papers/relengr/1.t b/share/doc/papers/relengr/1.t
deleted file mode 100644
index 373a3c751b0..00000000000
--- a/share/doc/papers/relengr/1.t
+++ /dev/null
@@ -1,67 +0,0 @@
-.\" $OpenBSD: 1.t,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1989 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)1.t 5.1 (Berkeley) 4/17/91
-.\"
-.NH
-Introduction
-.PP
-The Computer Systems Research Group (\c
-.SM CSRG )
-has always been a small group of software developers.
-This resource limitation requires careful software-engineering management
-as well as careful coordination of both
-.SM CSRG
-personnel and the members of the general community who
-contribute to the development of the system.
-.PP
-Releases from Berkeley alternate between those that introduce
-major new facilities and those that provide bug fixes and efficiency
-improvements.
-This alternation allows timely releases, while providing for refinement,
-tuning, and correction of the new facilities.
-The timely followup of ``cleanup'' releases reflects the importance
-.SM CSRG
-places on providing a reliable and robust system on which its
-user community can depend.
-.PP
-The development of the Berkeley Software Distribution (\c
-.SM BSD )
-illustrates an \fIadvantage\fP of having a few
-principal developers:
-the developers all understand the entire system thoroughly enough
-to be able to coordinate their own work with
-that of other people to produce a coherent final system.
-Companies with large development organizations find
-this result difficult to duplicate.
-This paper describes the process by which
-the development effort for \*(b3 was managed.
-.[
-design and implementation
-.]
diff --git a/share/doc/papers/relengr/2.t b/share/doc/papers/relengr/2.t
deleted file mode 100644
index 34da66ecc9e..00000000000
--- a/share/doc/papers/relengr/2.t
+++ /dev/null
@@ -1,144 +0,0 @@
-.\" $OpenBSD: 2.t,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1989 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)2.t 5.1 (Berkeley) 4/17/91
-.\"
-.NH
-System Development
-.PP
-The first phase of each Berkeley system is its development.
-.SM CSRG
-maintains a continuously evolving list of projects that are candidates
-for integration into the system.
-Some of these are prompted by emerging ideas from the research world,
-such as the availability of a new technology, while other additions
-are suggested by the commercial world, such as the introduction of
-new standards like
-.SM POSIX ,
-and still other projects are emergency responses to situations like
-the Internet Worm.
-.PP
-These projects are ordered based on the perceived benefit of the
-project as opposed to its difficulty;
-the most important are selected for inclusion in each new release.
-Often there is a prototype available from a group outside
-.SM CSRG .
-Because of the limited staff at
-.SM CSRG ,
-this prototype is obtained to use as a starting base
-for integration into the
-.SM BSD
-system.
-Only if no prototype is available is the project begun in-house.
-In either case, the design of the facility is forced to conform to the
-.SM CSRG
-style.
-.PP
-Unlike other development groups, the staff of
-.SM CSRG
-specializes by projects rather than by particular parts
-of the system;
-a staff person will be responsible for all aspects of a project.
-This responsibility starts at the associated kernel device drivers;
-it proceeds up through the rest of the kernel,
-through the C library and system utility programs,
-ending at the user application layer.
-This staff person is also responsible for related documentation,
-including manual pages.
-Many projects proceed in parallel,
-interacting with other projects as their paths cross.
-.PP
-All source code, documentation, and auxiliary files are kept
-under a source code control system.
-During development,
-this control system is critical for notifying people
-when they are colliding with other ongoing projects.
-Even more important, however,
-is the audit trail maintained by the control system that
-is critical to the release engineering phase of the project
-described in the next section.
-.PP
-Much of the development of
-.SM BSD
-is done by personnel that are located at other institutions.
-Many of these people not only have interim copies of the release
-running on their own machines,
-but also have user accounts on the main development
-machine at Berkeley.
-Such users are commonly found logged in at Berkeley over the
-Internet, or sometimes via telephone dialup, from places as far away
-as Massachusetts or Maryland, as well as from closer places, such as
-Stanford.
-For the \*(b3 release,
-certain users had permission to modify the master copy of the
-system source directly.
-People given access to the master sources
-are carefully screened beforehand,
-but are not closely supervised.
-Their work is checked at the end of the beta-test period by
-.SM CSRG
-personnel who back out inappropriate changes.
-Several facilities, including the
-Fortran and C compilers,
-as well as important system programs, for example,
-.PN telnet
-and
-.PN ftp ,
-include significant contributions from people who did not work
-directly for
-.SM CSRG .
-One important exception to this approach is that changes to the kernel
-are made only by
-.SM CSRG
-personnel, although the changes are often suggested by the larger community.
-.PP
-The development phase continues until
-.SM CSRG
-decides that it is appropriate to make a release.
-The decision to halt development and transition to release mode
-is driven by several factors.
-The most important is that enough projects have been completed
-to make the system significantly superior to the previously released
-version of the system.
-For example,
-\*(b3 was released primarily because of the need for
-the improved networking capabilities and the markedly
-improved system performance.
-Of secondary importance is the issue of timing.
-If the releases are too infrequent, then
-.SM CSRG
-will be inundated with requests for interim releases.
-Conversely,
-if systems are released too frequently,
-the integration cost for many vendors will be too high,
-causing them to ignore the releases.
-Finally,
-the process of release engineering is long and tedious.
-Frequent releases slow the rate of development and
-cause undue tedium to the staff.
diff --git a/share/doc/papers/relengr/3.t b/share/doc/papers/relengr/3.t
deleted file mode 100644
index 6bf417904a3..00000000000
--- a/share/doc/papers/relengr/3.t
+++ /dev/null
@@ -1,388 +0,0 @@
-.\" $OpenBSD: 3.t,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1989 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)3.t 5.1 (Berkeley) 4/17/91
-.\"
-.NH
-System Release
-.PP
-Once the decision has been made to halt development
-and begin release engineering,
-all currently unfinished projects are evaluated.
-This evaluation involves computing the time required to complete
-the project as opposed to how important the project is to the
-upcoming release.
-Projects that are not selected for completion are
-removed from the distribution branch of the source code control system
-and saved on branch deltas so they can be retrieved,
-completed, and merged into a future release;
-the remaining unfinished projects are brought to orderly completion.
-.PP
-Developments from
-.SM CSRG
-are released in three steps: alpha, beta, and final.
-Alpha and beta releases are not true distributions\(emthey
-are test systems.
-Alpha releases are normally available to only a few sites,
-usually those working closely with
-.SM CSRG .
-More sites are given beta releases,
-as the system is closer to completion,
-and needs wider testing to find more obscure problems.
-For example, \*(b3 alpha was distributed to about fifteen
-sites, while \*(b3 beta ran at more than a hundred.
-.NH 2
-Alpha Distribution Development
-.PP
-The first step in creating an alpha distribution is to evaluate the
-existing state of the system and to decide what software should be
-included in the release.
-This decision process includes not only deciding what software should
-be added, but also what obsolete software ought to be retired from the
-distribution.
-The new software includes the successful projects that have been
-completed at
-.SM CSRG
-and elsewhere, as well as some portion of the vast quantity of
-contributed software that has been offered during the development
-period.
-.PP
-Once an initial list has been created,
-a prototype filesystem corresponding to the distribution
-is constructed, typically named
-.PN /nbsd .
-This prototype will eventually turn into the master source tree for the
-final distribution.
-During the period that the alpha distribution is being created,
-.PN /nbsd
-is mounted read-write, and is highly fluid.
-Programs are created and deleted,
-old versions of programs are completely replaced,
-and the correspondence between the sources and binaries
-is only loosely tracked.
-People outside
-.SM CSRG
-who are helping with the distribution are free to
-change their parts of the distribution at will.
-.PP
-During this period the newly forming distribution is
-checked for interoperability.
-For example,
-in \*(b3 the output of context differences from
-.PN diff
-was changed to merge overlapping sections.
-Unfortunately, this change broke the
-.PN patch
-program which could no longer interpret the output of
-.PN diff .
-Since the change to
-.PN diff
-and the
-.PN patch
-program had originated outside Berkeley,
-.SM CSRG
-had to coordinate the efforts of the respective authors
-to make the programs work together harmoniously.
-.PP
-Once the sources have stabilized,
-an attempt is made to compile the entire source tree.
-Often this exposes errors caused by changed header files,
-or use of obsoleted C library interfaces.
-If the incompatibilities affect too many programs,
-or require excessive amounts of change in the programs
-that are affected,
-the incompatibility is backed out or some backward-compatible
-interface is provided.
-The incompatibilities that are found and left in are noted
-in a list that is later incorporated into the release notes.
-Thus, users upgrading to the new system can anticipate problems
-in their own software that will require change.
-.PP
-Once the source tree compiles completely,
-it is installed and becomes the running system that
-.SM CSRG
-uses on its main development machine.
-Once in day-to-day use,
-other interoperability problems become apparent
-and are resolved.
-When all known problems have been resolved, and the system has been
-stable for some period of time, an alpha distribution tape is made
-from the contents of
-.PN /nbsd .
-.PP
-The alpha distribution is sent out to a small set of test sites.
-These test sites are selected as having a
-sophisticated user population, not only capable of finding bugs,
-but also of determining their cause and developing a fix for the problem.
-These sites are usually composed of groups that are contributing
-software to the distribution or groups that have a particular expertise
-with some portion of the system.
-.NH 2
-Beta Distribution Development
-.PP
-After the alpha tape is created,
-the distribution filesystem is mounted read-only.
-Further changes are requested in a change log rather than
-being made directly to the distribution.
-The change requests are inspected and implemented by a
-.SM CSRG
-staff person, followed by a compilation of the affected
-programs to ensure that they still build correctly.
-Once the alpha tape has been cut,
-changes to the distribution are no longer made by people outside
-.SM CSRG .
-.PP
-As the alpha sites install and begin running the alpha distribution,
-they monitor the problems that they encounter.
-For minor bugs, they typically report back the bug along with
-a suggested fix.
-Since many of the alpha sites are selected from among the people
-working closely with
-.SM CSRG ,
-they often have accounts on, and access to, the primary
-.SM CSRG
-development machine.
-Thus, they are able to directly install the fix themselves,
-and simply notify
-.SM CSRG
-when they have fixed the problem.
-After verifying the fix, the affected files are added to
-the list to be updated on
-.PN /nbsd .
-.PP
-The more important task of the alpha sites is to test out the
-new facilities that have been added to the system.
-The alpha sites often find major design flaws
-or operational shortcomings of the facilities.
-When such problems are found,
-the person in charge of that facility is responsible
-for resolving the problem.
-Occasionally this requires redesigning and reimplementing
-parts of the affected facility.
-For example,
-in 4.2\s-1BSD\s+1,
-the alpha release of the networking system did not have connection queueing.
-This shortcoming prevented the network from handling many
-connections to a single server.
-The result was that the networking interface had to be
-redesigned to provide this functionality.
-.PP
-The alpha sites are also responsible for ferreting out interoperability
-problems between different utilities.
-The user populations of the test sites differ from the user population at
-.SM CSRG ,
-and, as a result, the utilities are exercised in ways that differ
-from the ways that they are used at
-.SM CSRG .
-These differences in usage patterns turn up problems that
-do not occur in our initial test environment.
-.PP
-The alpha sites frequently redistribute the alpha tape to several
-of their own alpha sites that are particularly interested
-in parts of the new system.
-These additional sites are responsible for reporting
-problems back to the site from which they received the distribution,
-not to
-.SM CSRG .
-Often these redistribution sites are less sophisticated than the
-direct alpha sites, so their reports need to be filtered
-to avoid spurious, or site dependent, bug reports.
-The direct alpha sites sift through the reports to find those that
-are relevant, and usually verify the suggested fix if one is given,
-or develop a fix if none is provided.
-This hierarchical testing process forces
-bug reports, fixes, and new software
-to be collected, evaluated, and checked for inaccuracies
-by first-level sites before being forwarded to
-.SM CSRG ,
-allowing the developers at
-.SM CSRG
-to concentrate on tracking the changes being made to the system
-rather than sifting through information (often voluminous) from every
-alpha-test site.
-.PP
-Once the major problems have been attended to,
-the focus turns to getting the documentation synchronized
-with the code that is being shipped.
-The manual pages need to be checked to be sure that
-they accurately reflect any changes to the programs that
-they describe.
-Usually the manual pages are kept up to date as
-the program they describe evolves.
-However, the supporting documents frequently do not get changed,
-and must be edited to bring them up to date.
-During this review, the need for other documents becomes evident.
-For example, it was
-during this phase of \*(b3 that it was decided
-to add a tutorial document on how to use the socket
-interprocess communication primitives.
-.PP
-Another task during this period is to contact the people that
-have contributed complete software packages
-(such as
-.PN RCS
-or
-.PN MH )
-in previous releases to see if they wish to
-make any revisions to their software.
-For those who do,
-the new software has to be obtained,
-and tested to verify that it compiles and runs
-correctly on the system to be released.
-Again, this integration and testing can often be done by the
-contributors themselves by logging directly into the master machine.
-.PP
-After the stream of bug reports has slowed down
-to a reasonable level,
-.SM CSRG
-begins a careful review of all the changes to the
-system since the previous release.
-The review is done by running a recursive
-.PN diff
-of the entire source tree\(emhere, of
-.PN /nbsd
-with 4.2\s-1BSD\s+1.
-All the changes are checked to ensure that they are reasonable,
-and have been properly documented.
-The process often turns up questionable changes.
-When such a questionable change is found,
-the source code control system log is examined to find
-out who made the change and what their explanation was
-for the change.
-If the log does not resolve the problem,
-the person responsible for the change is asked for an explanation
-of what they were trying to accomplish.
-If the reason is not compelling,
-the change is backed out.
-Facilities deemed inappropriate in \*(b3 included new options to
-the directory-listing command and a changed return value for the
-.RN fseek
-library routine;
-the changes were removed from the source before final distribution.
-Although this process is long and tedious,
-it forces the developers to obtain a coherent picture of the entire set of
-changes to the system.
-This exercise often turns up inconsistencies that would
-otherwise never be found.
-.PP
-The outcome of the comparison results in
-a pair of documents detailing
-changes to every user-level command
-.[
-Bug Fixes and Changes
-.]
-and to every kernel source file.
-.[
-Changes to the Kernel
-.]
-These documents are delivered with the final distribution.
-A user can look up any command by name and see immediately
-what has changed,
-and a developer can similarly look up any kernel
-file by name and get a summary of the changes to that file.
-.PP
-Having completed the review of the entire system,
-the preparation of the beta distribution is started.
-Unlike the alpha distribution, where pieces of the system
-may be unfinished and the documentation incomplete,
-the beta distribution is put together as if it were
-going to be the final distribution.
-All known problems are fixed, and any remaining development
-is completed.
-Once the beta tape has been prepared,
-no further changes are permitted to
-.PN /nbsd
-without careful review,
-as spurious changes made after the system has been
-.PN diff ed
-are unlikely to be caught.
-.NH 2
-Final Distribution Development
-.PP
-The beta distribution goes to more sites than the
-alpha distribution for three main reasons.
-First, as it is closer to the final release, more sites are willing
-to run it in a production environment without fear of catastrophic failures.
-Second, more commercial sites delivering
-.SM BSD -\c
-derived systems are interested in getting a preview of the
-upcoming changes in preparation for merging them into their
-own systems.
-Finally, because the beta tape has fewer problems,
-it is beneficial to offer it to more sites in hopes of
-finding as many of the remaining problems as possible.
-Also, by handing the system out to less sophisticated sites,
-issues that would be ignored by the users of the alpha sites
-become apparent.
-.PP
-The anticipation is that the beta tape will not require
-extensive changes to either the programs or the documentation.
-Most of the work involves sifting through the reported bugs
-to find those that are relevant and devising the minimal
-reasonable set of changes to fix them.
-After throughly testing the fix, it is listed in the update log for
-.PN /nbsd .
-One person at
-.SM CSRG
-is responsible for doing the update of
-.PN /nbsd
-and ensuring that everything affected by the change is rebuilt and tested.
-Thus, a change to a C library routine requires that the entire
-system be rebuilt.
-.PP
-During this period, the documentation is all printed and proofread.
-As minor changes are made to the manual pages and documentation,
-the affected pages must be reprinted.
-.PP
-The final step in the release process is to check the distribution tree
-to ensure that it is in a consistent state.
-This step includes verification that every file and directory
-on the distribution has the proper owner, group, and modes.
-All source files must be checked to be sure that they have
-appropriate copyright notices and source code control system headers.
-Any extraneous files must be removed.
-Finally, the installed binaries must be checked to ensure that they correspond
-exactly to the sources and libraries that are on the distribution.
-.PP
-This checking is a formidable task given that there are over 20,000 files on
-a typical distribution.
-Much of the checking can be done by a set of programs set to scan
-over the distribution tree.
-Unfortunately, the exception list is long, and requires
-hours of tedious hand checking; this has caused
-.SM CSRG
-to develop even
-more comprehensive validation programs for use in our next release.
-.PP
-Once the final set of checks has been run,
-the master tape can be made, and the official distribution started.
-As for the staff of
-.SM CSRG ,
-we usually take a brief vacation before plunging back into
-a new development phase.
diff --git a/share/doc/papers/relengr/Makefile b/share/doc/papers/relengr/Makefile
deleted file mode 100644
index 656d9c38c6b..00000000000
--- a/share/doc/papers/relengr/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-# $OpenBSD: Makefile,v 1.3 2004/02/01 14:22:45 jmc Exp $
-
-
-DIR= papers/relengr
-SRCS= 0.t 1.t 2.t 3.t
-MACROS= -ms
-EXTRA= ref.bib tmac.srefs
-REFER= refer -n -e -l -s -p ref.bib
-
-paper.ps: ${SRCS}
- ${REFER} ${SRCS} | ${ROFF} > ${.TARGET}
-
-paper.txt: ${SRCS}
- ${REFER} ${SRCS} | ${ROFF} -Tascii > ${.TARGET}
-
-.include <bsd.doc.mk>
diff --git a/share/doc/papers/relengr/ref.bib b/share/doc/papers/relengr/ref.bib
deleted file mode 100644
index 6f33cd7e9dd..00000000000
--- a/share/doc/papers/relengr/ref.bib
+++ /dev/null
@@ -1,26 +0,0 @@
-%A M. K. McKusick
-%A J. M. Bloom
-%A M. J. Karels
-%T Bug Fixes and Changes in 4.3BSD
-%B \s-1UNIX\s0 System Manager's Manual, 4.3 Berkeley Software Distribution, Virtual VAX-11 Version
-%I \s-1USENIX\s0 Association
-%C Berkeley, CA
-%P 12:1\-22
-%D 1986
-
-%A M. J. Karels
-%T Changes to the Kernel in 4.3BSD
-%B \s-1UNIX\s0 System Manager's Manual, 4.3 Berkeley Software Distribution, Virtual VAX-11 Version
-%I \s-1USENIX\s0 Association
-%C Berkeley, CA
-%P 13:1\-32
-%D 1986
-
-%A S. J. Leffler
-%A M. K. McKusick
-%A M. J. Karels
-%A J. S. Quarterman
-%T The Design and Implementation of the 4.3BSD UNIX Operating System
-%I Addison-Wesley
-%C Reading, MA
-%D 1989
diff --git a/share/doc/papers/relengr/ref.bib.ig b/share/doc/papers/relengr/ref.bib.ig
deleted file mode 100644
index fb24c6ea0c9..00000000000
--- a/share/doc/papers/relengr/ref.bib.ig
+++ /dev/null
@@ -1,3 +0,0 @@
-ref.bib:0,249 mckusi bloom karels bug fixes change system manage manual berkel softwa distri virtua vax versio associ berkel 1986
-ref.bib:249,216 karels change kernel system manage manual berkel softwa distri virtua vax versio associ berkel 1986
-ref.bib:465,181 leffle mckusi karels quarte design implem unix operat system addiso wesley readin 1989
diff --git a/share/doc/papers/relengr/spell.ok b/share/doc/papers/relengr/spell.ok
deleted file mode 100644
index 8faadcf5997..00000000000
--- a/share/doc/papers/relengr/spell.ok
+++ /dev/null
@@ -1,15 +0,0 @@
-BSD
-Bostic
-CH
-CM
-CSRG
-Fn
-Karels
-Lb
-McKusick
-POSIX
-edited
-filesystem
-followup
-mothballed
-nbsd
diff --git a/share/doc/papers/relengr/tmac.srefs b/share/doc/papers/relengr/tmac.srefs
deleted file mode 100644
index 42f21a2c9da..00000000000
--- a/share/doc/papers/relengr/tmac.srefs
+++ /dev/null
@@ -1,181 +0,0 @@
-.\" $OpenBSD: tmac.srefs,v 1.2 2001/02/03 08:15:01 niklas Exp $
-.\"
-.\" @(#)tmac.srefs 1.14 11/2/88
-.\" REFER macros .... citations
-.de []
-.][ \\$1
-..
-.de ][
-.if \\$1>5 .tm Bad arg to []
-.[\\$1
-..
-.if n .ds [. [
-.\".if t .ds [. \s-2\v'-.4m'\f1
-.if t .ds [. [
-.if n .ds .] ]
-.\".if t .ds .] \v'.4m'\s+2\fP
-.if t .ds .] ]
-.ds (. \& [
-.ds .) ]
-.if n .ds [o ""
-.if n .ds [c ""
-.if t .ds [o ``
-.if t .ds [c ''
-.ds [e \\fIet al.\\fP
-.\" for author list in reference:
-.ds &1 &
-.\" for -m signal (auth1 and auth2, year):
-.ds &2 &
-.\" the next lines deal with the problem of .[1] or [1].
-.\" refer will write "linexxx\*(<.[1]\*(>.
-.\" and either "<." or ">." should produce the .;
-.\" similarly for , and ;
-.rm <. <, <;
-.if n .ds >. .
-.if t .ds >. .
-.if n .ds >, ,
-.if t .ds >, ,
-.if n .ds >; ;
-.if t .ds >; ;
-.de [5 \" tm style
-.FS
-.IP "\\*([F.\0"
-\\*([A, \\f2\\*([T\\f1,
-.ie \\n(TN \\*([M.
-.el Bell Laboratories internal memorandum (\\*([D).
-.RT
-.FE
-..
-.de [0 \" other
-.FS
-.nr [: 0
-.if !"\\*([F"" .IP "\\*([F.\0"
-.if !"\\*([A"" \{.nr [: 1
-\\*([A\c\}
-.if !"\\*([T"" \{.if \\n([:>0 ,
-.nr [: 1
-\\f2\\*([T\\f1\c\}
-.if !"\\*([O""\{.if \\n([:>0 ,
-.nr [: 1
-.if \\n([O>0 .nr [: 0
-\\*([O\c
-.if \\n([O>0 \& \c\}
-.ie !"\\*([D"" \{.if \\n([:>0 ,
-.nr [: 1
-\\*([D\c\}
-.if \\n([:>0 \&.
-.RT
-.FE
-..
-.de [1 \" journal article
-.FS
-.if !"\\*([F"" .IP "\\*([F.\0"
-.if !"\\*([A"" \\*([A,
-.if !"\\*([T"" \\*([o\\*([T,\\*([c
-\\f2\\*([J\\f1\c
-.if !"\\*([V"" .if n \& Vol.\&\c
-.if !"\\*([V"" \& \\f3\\*([V\\f1\c
-.if !"\\*([N"" (\\*([N)\c
-.if !"\\*([P"" \{\
-.ie \\n([P>0 , pp. \c
-.el , p. \c
-\\*([P\c\}
-.if !"\\*([I"" .if "\\*([R"" , \\*([I\c
-.if !"\\*([O"" .if \\n([O=0 , \\*([O\c
-.if !"\\*([D"" \& (\\*([D)\c
-\&.
-.if !"\\*([O"" .if \\n([O>0 \\*([O
-.RT
-.FE
-..
-.de [2 \" book
-.FS
-.if !"\\*([F"" .IP "\\*([F.\0"
-.if !"\\*([A"" \\*([A,
-.if !"\\*([T"" \\f2\\*([T,\\f1
-\\*([I\c
-.if !"\\*([C"" , \\*([C\c
-.if !"\\*([D"" \& (\\*([D)\c
-\&.
-.if !"\\*([G"" Gov't. ordering no. \\*([G.
-.if !"\\*([O"" \\*([O
-.RT
-.FE
-..
-.de [4 \" report
-.FS
-.if !"\\*([F"" .IP "\\*([F.\0"
-\\*([A, \\*([o\\*([T,\\*([c
-\\*([R\c
-.if !"\\*([G"" \& (\\*([G)\c
-.if !"\\*([I"" , \\*([I\c
-.if !"\\*([C"" , \\*([C\c
-.if !"\\*([D"" \& (\\*([D)\c
-\&.
-.if !"\\*([O"" \\*([O
-.RT
-.FE
-..
-.de [3 \" article in book
-.FS
-.if !"\\*([F"" .IP "\\*([F.\0"
-.if !"\\*([A"" \\*([A,
-.if !"\\*([T"" \\*([o\\*([T,\\*([c
-.if !"\\*([P"" pp. \\*([P
-in \\f2\\*([B\\f1\c
-.if !"\\*([E"" , ed. \\*([E\c
-.if !"\\*([I"" , \\*([I\c
-.if !"\\*([C"" , \\*([C\c
-.if !"\\*([D"" \& (\\*([D)\c
-\&.
-.if !"\\*([O"" \\*([O
-.RT
-.FE
-..
-.de ]<
-.[<
-..
-.de [<
-.RT
-.ne 62p
-.ie \\n(rS \{\
-. rs
-. sp 4p
-.\}
-.el .sp 27p
-.po -2.5P
-.Li 2 30.5P
-\\s11\fBReferences\fP\s10
-.br
-.if \\n(Ns<2 \{\
-. nr Ns 1
-. ds ST References
-.\}
-.\"nr Tt 7
-.po
-.sp 8p
-.rm FS FE
-.\"sy echo '.T3 "\\\\t\\\\tReferences" \\n%' >>Toc
-.ns
-..
-.de [>
-.]>
-..
-.de ]>
-.sp
-..
-.de ]-
-.[-
-..
-.de [-
-.rm [V [P [A [T
-.rm [N [C [B [O
-.rm [R [I [E [D
-..
-.de ]]
-this is never
-executed
-and just
-uses up an end-of-file
-bug.
-..
diff --git a/share/doc/papers/sysperf/0.t b/share/doc/papers/sysperf/0.t
deleted file mode 100644
index d3d304b4275..00000000000
--- a/share/doc/papers/sysperf/0.t
+++ /dev/null
@@ -1,245 +0,0 @@
-.\" $OpenBSD: 0.t,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1985 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)0.t 5.1 (Berkeley) 4/17/91
-.\"
-.if n .ND
-.TL
-Measuring and Improving the Performance of Berkeley UNIX*
-.sp
-April 17, 1991
-.AU
-Marshall Kirk McKusick,
-Samuel J. Leffler\(dg,
-Michael J. Karels
-.AI
-Computer Systems Research Group
-Computer Science Division
-Department of Electrical Engineering and Computer Science
-University of California, Berkeley
-Berkeley, CA 94720
-.AB
-.FS
-* UNIX is a trademark of AT&T Bell Laboratories.
-.FE
-.FS
-\(dg Samuel J. Leffler is currently employed by:
-Silicon Graphics, Inc.
-.FE
-.FS
-This work was done under grants from
-the National Science Foundation under grant MCS80-05144,
-and the Defense Advance Research Projects Agency (DoD) under
-ARPA Order No. 4031 monitored by Naval Electronic System Command under
-Contract No. N00039-82-C-0235.
-.FE
-The 4.2 Berkeley Software Distribution of
-.UX
-for the VAX\(dd
-.FS
-\(dd VAX, MASSBUS, UNIBUS, and DEC are trademarks of
-Digital Equipment Corporation.
-.FE
-had several problems that could severely affect the overall
-performance of the system.
-These problems were identified with
-kernel profiling and system tracing during day to day use.
-Once potential problem areas had been identified
-benchmark programs were devised to highlight the bottlenecks.
-These benchmarks verified that the problems existed and provided
-a metric against which to validate proposed solutions.
-This paper examines
-the performance problems encountered and describes
-modifications that have been made
-to the system since the initial distribution.
-.PP
-The changes to the system have consisted of improvements to the
-performance of the existing facilities,
-as well as enhancements to the current facilities.
-Performance improvements in the kernel include cacheing of path name
-translations, reductions in clock handling and scheduling overhead,
-and improved throughput of the network subsystem.
-Performance improvements in the libraries and utilities include replacement of
-linear searches of system databases with indexed lookup,
-merging of most network services into a single daemon,
-and conversion of system utilities to use the more efficient
-facilities available in 4.2BSD.
-Enhancements in the kernel include the addition of subnets and gateways,
-increases in many kernel limits,
-cleanup of the signal and autoconfiguration implementations,
-and support for windows and system logging.
-Functional extensions in the libraries and utilities include
-the addition of an Internet name server,
-new system management tools,
-and extensions to \fIdbx\fP to work with Pascal.
-The paper concludes with a brief discussion of changes made to
-the system to enhance security.
-All of these enhancements are present in Berkeley UNIX 4.3BSD.
-.AE
-.LP
-.sp 2
-CR Categories and Subject Descriptors:
-D.4.3
-.B "[Operating Systems]":
-File Systems Management \-
-.I "file organization, directory structures, access methods";
-D.4.8
-.B "[Operating Systems]":
-Performance \-
-.I "measurements, operational analysis";
-.sp
-Additional Keywords and Phrases:
-Berkeley UNIX,
-system performance,
-application program interface.
-.sp
-General Terms:
-UNIX operating system,
-measurement,
-performance.
-.de PT
-.lt \\n(LLu
-.pc %
-.nr PN \\n%
-.tl '\\*(LH'\\*(CH'\\*(RH'
-.lt \\n(.lu
-..
-.af PN i
-.ds LH Performance
-.ds RH Contents
-.bp 1
-.if t .ds CF April 17, 1991
-.if t .ds LF DRAFT
-.if t .ds RF McKusick, et. al.
-.ce
-.B "TABLE OF CONTENTS"
-.LP
-.sp 1
-.nf
-.B "1. Introduction"
-.LP
-.sp .5v
-.nf
-.B "2. Observation techniques
-\0.1. System maintenance tools
-\0.2. Kernel profiling
-\0.3. Kernel tracing
-\0.4. Benchmark programs
-.LP
-.sp .5v
-.nf
-.B "3. Results of our observations
-\0.1. User programs
-\0.1.1. Mail system
-\0.1.2. Network servers
-\0.2. System overhead
-\0.2.1. Micro-operation benchmarks
-\0.2.2. Path name translation
-\0.2.3. Clock processing
-\0.2.4. Terminal multiplexors
-\0.2.5. Process table management
-\0.2.6. File system buffer cache
-\0.2.7. Network subsystem
-\0.2.8. Virtual memory subsystem
-.LP
-.sp .5v
-.nf
-.B "4. Performance Improvements
-\0.1. Performance Improvements in the Kernel
-\0.1.1. Name Cacheing
-\0.1.2. Intelligent Auto Siloing
-\0.1.3. Process Table Management
-\0.1.4. Scheduling
-\0.1.5. Clock Handling
-\0.1.6. File System
-\0.1.7. Network
-\0.1.8. Exec
-\0.1.9. Context Switching
-\0.1.10. Setjmp and Longjmp
-\0.1.11. Compensating for Lack of Compiler Technology
-\0.2. Improvements to Libraries and Utilities
-\0.2.1. Hashed Databases
-\0.2.2. Buffered I/O
-\0.2.3. Mail System
-\0.2.4. Network Servers
-\0.2.5. The C Run-time Library
-\0.2.6. Csh
-.LP
-.sp .5v
-.nf
-.B "5. Functional Extensions
-\0.1. Kernel Extensions
-\0.1.1. Subnets, Broadcasts, and Gateways
-\0.1.2. Interface Addressing
-\0.1.3. User Control of Network Buffering
-\0.1.4. Number of File Descriptors
-\0.1.5. Kernel Limits
-\0.1.6. Memory Management
-\0.1.7. Signals
-\0.1.8. System Logging
-\0.1.9. Windows
-\0.1.10. Configuration of UNIBUS Devices
-\0.1.11. Disk Recovery from Errors
-\0.2. Functional Extensions to Libraries and Utilities
-\0.2.1. Name Server
-\0.2.2. System Management
-\0.2.3. Routing
-\0.2.4. Compilers
-.LP
-.sp .5v
-.nf
-.B "6. Security Tightening
-\0.1. Generic Kernel
-\0.2. Security Problems in Utilities
-.LP
-.sp .5v
-.nf
-.B "7. Conclusions
-.LP
-.sp .5v
-.nf
-.B Acknowledgements
-.LP
-.sp .5v
-.nf
-.B References
-.LP
-.sp .5v
-.nf
-.B "Appendix \- Benchmark Programs"
-.de _d
-.if t .ta .6i 2.1i 2.6i
-.\" 2.94 went to 2.6, 3.64 to 3.30
-.if n .ta .84i 2.6i 3.30i
-..
-.de _f
-.if t .ta .5i 1.25i 2.5i
-.\" 3.5i went to 3.8i
-.if n .ta .7i 1.75i 3.8i
-..
diff --git a/share/doc/papers/sysperf/1.t b/share/doc/papers/sysperf/1.t
deleted file mode 100644
index 4838b81d9ce..00000000000
--- a/share/doc/papers/sysperf/1.t
+++ /dev/null
@@ -1,79 +0,0 @@
-.\" $OpenBSD: 1.t,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1985 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)1.t 5.1 (Berkeley) 4/17/91
-.\"
-.ds RH Introduction
-.af PN 1
-.bp 1
-.NH
-Introduction
-.PP
-The Berkeley Software Distributions of
-.UX
-for the VAX have added many new capabilities that were
-previously unavailable under
-.UX .
-The development effort for 4.2BSD concentrated on providing new
-facilities, and in getting them to work correctly.
-Many new data structures were added to the system to support
-these new capabilities.
-In addition,
-many of the existing data structures and algorithms
-were put to new uses or their old functions placed under increased demand.
-The effect of these changes was that
-mechanisms that were well tuned under 4.1BSD
-no longer provided adequate performance for 4.2BSD.
-The increased user feedback that came with the release of
-4.2BSD and a growing body of experience with the system
-highlighted the performance shortcomings of 4.2BSD.
-.PP
-This paper details the work that we have done since
-the release of 4.2BSD to measure the performance of the system,
-detect the bottlenecks,
-and find solutions to remedy them.
-Most of our tuning has been in the context of the real
-timesharing systems in our environment.
-Rather than using simulated workloads,
-we have sought to analyze our tuning efforts under
-realistic conditions.
-Much of the work has been done in the machine independent parts
-of the system, hence these improvements could be applied to
-other variants of UNIX with equal success.
-All of the changes made have been included in 4.3BSD.
-.PP
-Section 2 of the paper describes the tools and techniques
-available to us for measuring system performance.
-In Section 3 we present the results of using these tools, while Section 4
-has the performance improvements
-that have been made to the system based on our measurements.
-Section 5 highlights the functional enhancements that have
-been made to Berkeley UNIX 4.2BSD.
-Section 6 discusses some of the security problems that
-have been addressed.
diff --git a/share/doc/papers/sysperf/2.t b/share/doc/papers/sysperf/2.t
deleted file mode 100644
index 2dd667b5d7d..00000000000
--- a/share/doc/papers/sysperf/2.t
+++ /dev/null
@@ -1,256 +0,0 @@
-.\" $OpenBSD: 2.t,v 1.4 2005/09/19 06:40:01 krw Exp $
-.\"
-.\" Copyright (c) 1985 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)2.t 5.1 (Berkeley) 4/17/91
-.\"
-.ds RH Observation techniques
-.NH
-Observation techniques
-.PP
-There are many tools available for monitoring the performance
-of the system.
-Those that we found most useful are described below.
-.NH 2
-System maintenance tools
-.PP
-Several standard maintenance programs are invaluable in
-observing the basic actions of the system.
-The \fIvmstat\fP(1)
-program is designed to be an aid to monitoring
-systemwide activity. Together with the
-\fIps\fP\|(1)
-command (as in ``ps av''), it can be used to investigate systemwide
-virtual memory activity.
-By running \fIvmstat\fP
-when the system is active you can judge the system activity in several
-dimensions: job distribution, virtual memory load, paging and swapping
-activity, disk and cpu utilization.
-Ideally, to have a balanced system in activity,
-there should be few blocked (b) jobs,
-there should be little paging or swapping activity, there should
-be available bandwidth on the disk devices (most single arms peak
-out at 25-35 tps in practice), and the user cpu utilization (us) should
-be high (above 50%).
-.PP
-If the system is busy, then the count of active jobs may be large,
-and several of these jobs may often be blocked (b). If the virtual
-memory is active, then the paging daemon will be running (sr will
-be non-zero). It is healthy for the paging daemon to free pages when
-the virtual memory gets active; it is triggered by the amount of free
-memory dropping below a threshold and increases its pace as free memory
-goes to zero.
-.PP
-If you run \fIvmstat\fP
-when the system is busy (a ``vmstat 5'' gives all the
-numbers computed by the system), you can find
-imbalances by noting abnormal job distributions. If many
-processes are blocked (b), then the disk subsystem
-is overloaded or imbalanced. If you have several non-dma
-devices or open teletype lines that are ``ringing'', or user programs
-that are doing high-speed non-buffered input/output, then the system
-time may go high (60-80% or higher).
-It is often possible to pin down the cause of high system time by
-looking to see if there is excessive context switching (cs), interrupt
-activity (in) or system call activity (sy). Long term measurements
-on one of
-our large machines show
-an average of 60 context switches and interrupts
-per second and an average of 90 system calls per second.
-.PP
-If the system is heavily loaded, or if you have little memory
-for your load (1 megabyte is little in our environment), then the system
-may be forced to swap. This is likely to be accompanied by a noticeable
-reduction in the system responsiveness and long pauses when interactive
-jobs such as editors swap out.
-.PP
-A second important program is \fIiostat\fP\|(1).
-\fIIostat\fP
-iteratively reports the number of characters read and written to terminals,
-and, for each disk, the number of transfers per second, kilobytes
-transferred per second,
-and the milliseconds per average seek.
-It also gives the percentage of time the system has
-spent in user mode, in user mode running low priority (niced) processes,
-in system mode, and idling.
-.PP
-To compute this information, for each disk, seeks and data transfer completions
-and the number of words transferred are counted;
-for terminals collectively, the number
-of input and output characters are counted.
-Also, every 100 ms,
-the state of each disk is examined
-and a tally is made if the disk is active.
-From these numbers and the transfer rates
-of the devices it is possible to determine
-average seek times for each device.
-.PP
-When filesystems are poorly placed on the available
-disks, figures reported by \fIiostat\fP can be used
-to pinpoint bottlenecks. Under heavy system load, disk
-traffic should be spread out among the drives with
-higher traffic expected to the devices where the root, swap, and
-/tmp filesystems are located. When multiple disk drives are
-attached to the same controller, the system will
-attempt to overlap seek operations with I/O transfers. When
-seeks are performed, \fIiostat\fP will show
-non-zero average seek times. Most modern disk drives should
-exhibit an average seek time of 25-35 ms.
-.PP
-Terminal traffic reported by \fIiostat\fP should be heavily
-output oriented unless terminal lines are being used for
-data transfer by programs such as \fIuucp\fP. Input and
-output rates are system specific. Screen editors
-such as \fIvi\fP and \fIemacs\fP tend to exhibit output/input
-ratios of anywhere from 5/1 to 8/1. On one of our largest
-systems, 88 terminal lines plus 32 pseudo terminals, we observed
-an average of 180 characters/second input and 450 characters/second
-output over 4 days of operation.
-.NH 2
-Kernel profiling
-.PP
-It is simple to build a 4.2BSD kernel that will automatically
-collect profiling information as it operates simply by specifying the
-.B \-p
-option to \fIconfig\fP\|(8) when configuring a kernel.
-The program counter sampling can be driven by the system clock,
-or by an alternate real time clock.
-The latter is highly recommended as use of the system clock results
-in statistical anomalies in accounting for
-the time spent in the kernel clock routine.
-.PP
-Once a profiling system has been booted statistic gathering is
-handled by \fIkgmon\fP\|(8).
-\fIKgmon\fP allows profiling to be started and stopped
-and the internal state of the profiling buffers to be dumped.
-\fIKgmon\fP can also be used to reset the state of the internal
-buffers to allow multiple experiments to be run without
-rebooting the machine.
-.PP
-The profiling data is processed with \fIgprof\fP\|(1)
-to obtain information regarding the system's operation.
-Profiled systems maintain histograms of the kernel program counter,
-the number of invocations of each routine,
-and a dynamic call graph of the executing system.
-The postprocessing propagates the time spent in each
-routine along the arcs of the call graph.
-\fIGprof\fP then generates a listing for each routine in the kernel,
-sorted according to the time it uses
-including the time of its call graph descendents.
-Below each routine entry is shown its (direct) call graph children,
-and how their times are propagated to this routine.
-A similar display above the routine shows how this routine's time and the
-time of its descendents is propagated to its (direct) call graph parents.
-.PP
-A profiled system is about 5-10% larger in its text space because of
-the calls to count the subroutine invocations.
-When the system executes,
-the profiling data is stored in a buffer that is 1.2
-times the size of the text space.
-All the information is summarized in memory,
-it is not necessary to have a trace file
-being continuously dumped to disk.
-The overhead for running a profiled system varies;
-under normal load we see anywhere from 5-25%
-of the system time spent in the profiling code.
-Thus the system is noticeably slower than an unprofiled system,
-yet is not so bad that it cannot be used in a production environment.
-This is important since it allows us to gather data
-in a real environment rather than trying to
-devise synthetic work loads.
-.NH 2
-Kernel tracing
-.PP
-The kernel can be configured to trace certain operations by
-specifying ``options TRACE'' in the configuration file. This
-forces the inclusion of code that records the occurrence of
-events in \fItrace records\fP in a circular buffer in kernel
-memory. Events may be enabled/disabled selectively while the
-system is operating. Each trace record contains a time stamp
-(taken from the VAX hardware time of day clock register), an
-event identifier, and additional information that is interpreted
-according to the event type. Buffer cache operations, such as
-initiating a read, include
-the disk drive, block number, and transfer size in the trace record.
-Virtual memory operations, such as a pagein completing, include
-the virtual address and process id in the trace record. The circular
-buffer is normally configured to hold 256 16-byte trace records.\**
-.FS
-\** The standard trace facilities distributed with 4.2
-differ slightly from those described here. The time stamp in the
-distributed system is calculated from the kernel's time of day
-variable instead of the VAX hardware register, and the buffer cache
-trace points do not record the transfer size.
-.FE
-.PP
-Several user programs were written to sample and interpret the
-tracing information. One program runs in the background and
-periodically reads the circular buffer of trace records. The
-trace information is compressed, in some instances interpreted
-to generate additional information, and a summary is written to a
-file. In addition, the sampling program can also record
-information from other kernel data structures, such as those
-interpreted by the \fIvmstat\fP program. Data written out to
-a file is further buffered to minimize I/O load.
-.PP
-Once a trace log has been created, programs that compress
-and interpret the data may be run to generate graphs showing the
-data and relationships between traced events and
-system load.
-.PP
-The trace package was used mainly to investigate the operation of
-the file system buffer cache. The sampling program maintained a
-history of read-ahead blocks and used the trace information to
-calculate, for example, percentage of read-ahead blocks used.
-.NH 2
-Benchmark programs
-.PP
-Benchmark programs were used in two ways. First, a suite of
-programs was constructed to calculate the cost of certain basic
-system operations. Operations such as system call overhead and
-context switching time are critically important in evaluating the
-overall performance of a system. Because of the drastic changes in
-the system between 4.1BSD and 4.2BSD, it was important to verify
-the overhead of these low level operations had not changed appreciably.
-.PP
-The second use of benchmarks was in exercising
-suspected bottlenecks.
-When we suspected a specific problem with the system,
-a small benchmark program was written to repeatedly use
-the facility.
-While these benchmarks are not useful as a general tool
-they can give quick feedback on whether a hypothesized
-improvement is really having an effect.
-It is important to realize that the only real assurance
-that a change has a beneficial effect is through
-long term measurements of general timesharing.
-We have numerous examples where a benchmark program
-suggests vast improvements while the change
-in the long term system performance is negligible,
-and conversely examples in which the benchmark program run more slowly,
-but the long term system performance improves significantly.
diff --git a/share/doc/papers/sysperf/3.t b/share/doc/papers/sysperf/3.t
deleted file mode 100644
index 7c7b18728b1..00000000000
--- a/share/doc/papers/sysperf/3.t
+++ /dev/null
@@ -1,692 +0,0 @@
-.\" $OpenBSD: 3.t,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1985 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)3.t 5.1 (Berkeley) 4/17/91
-.\"
-.ds RH Results of our observations
-.NH
-Results of our observations
-.PP
-When 4.2BSD was first installed on several large timesharing systems
-the degradation in performance was significant.
-Informal measurements showed 4.2BSD providing 80% of the throughput
-of 4.1BSD (based on load averages observed under a normal timesharing load).
-Many of the initial problems found were because of programs that were
-not part of 4.1BSD. Using the techniques described in the previous
-section and standard process profiling several problems were identified.
-Later work concentrated on the operation of the kernel itself.
-In this section we discuss the problems uncovered; in the next
-section we describe the changes made to the system.
-.NH 2
-User programs
-.PP
-.NH 3
-Mail system
-.PP
-The mail system was the first culprit identified as a major
-contributor to the degradation in system performance.
-At Lucasfilm the mail system is heavily used
-on one machine, a VAX-11/780 with eight megabytes of memory.\**
-.FS
-\** During part of these observations the machine had only four
-megabytes of memory.
-.FE
-Message
-traffic is usually between users on the same machine and ranges from
-person-to-person telephone messages to per-organization distribution
-lists. After conversion to 4.2BSD, it was
-immediately noticed that mail to distribution lists of 20 or more people
-caused the system load to jump by anywhere from 3 to 6 points.
-The number of processes spawned by the \fIsendmail\fP program and
-the messages sent from \fIsendmail\fP to the system logging
-process, \fIsyslog\fP, generated significant load both from their
-execution and their interference with basic system operation. The
-number of context switches and disk transfers often doubled while
-\fIsendmail\fP operated; the system call rate jumped dramatically.
-System accounting information consistently
-showed \fIsendmail\fP as the top cpu user on the system.
-.NH 3
-Network servers
-.PP
-The network services provided in 4.2BSD add new capabilities to the system,
-but are not without cost. The system uses one daemon process to accept
-requests for each network service provided. The presence of many
-such daemons increases the numbers of active processes and files,
-and requires a larger configuration to support the same number of users.
-The overhead of the routing and status updates can consume
-several percent of the cpu.
-Remote logins and shells incur more overhead
-than their local equivalents.
-For example, a remote login uses three processes and a
-pseudo-terminal handler in addition to the local hardware terminal
-handler. When using a screen editor, sending and echoing a single
-character involves four processes on two machines.
-The additional processes, context switching, network traffic, and
-terminal handler overhead can roughly triple the load presented by one
-local terminal user.
-.NH 2
-System overhead
-.PP
-To measure the costs of various functions in the kernel,
-a profiling system was run for a 17 hour
-period on one of our general timesharing machines.
-While this is not as reproducible as a synthetic workload,
-it certainly represents a realistic test.
-This test was run on several occasions over a three month period.
-Despite the long period of time that elapsed
-between the test runs the shape of the profiles,
-as measured by the number of times each system call
-entry point was called, were remarkably similar.
-.PP
-These profiles turned up several bottlenecks that are
-discussed in the next section.
-Several of these were new to 4.2BSD,
-but most were caused by overloading of mechanisms
-which worked acceptably well in previous BSD systems.
-The general conclusion from our measurements was that
-the ratio of user to system time had increased from
-45% system / 55% user in 4.1BSD to 57% system / 43% user
-in 4.2BSD.
-.NH 3
-Micro-operation benchmarks
-.PP
-To compare certain basic system operations
-between 4.1BSD and 4.2BSD a suite of benchmark
-programs was constructed and run on a VAX-11/750 with 4.5 megabytes
-of physical memory and two disks on a MASSBUS controller.
-Tests were run with the machine operating in single user mode
-under both 4.1BSD and 4.2BSD. Paging was localized to the drive
-where the root file system was located.
-.PP
-The benchmark programs were modeled after the Kashtan benchmarks,
-[Kashtan80], with identical sources compiled under each system.
-The programs and their intended purpose are described briefly
-before the presentation of the results. The benchmark scripts
-were run twice with the results shown as the average of
-the two runs.
-The source code for each program and the shell scripts used during
-the benchmarks are included in the Appendix.
-.PP
-The set of tests shown in Table 1 was concerned with
-system operations other than paging. The intent of most
-benchmarks is clear. The result of running \fIsignocsw\fP is
-deducted from the \fIcsw\fP benchmark to calculate the context
-switch overhead. The \fIexec\fP tests use two different jobs to gauge
-the cost of overlaying a larger program with a smaller one
-and vice versa. The
-``null job'' and ``big job'' differ solely in the size of their data
-segments, 1 kilobyte versus 256 kilobytes. In both cases the
-text segment of the parent is larger than that of the child.\**
-.FS
-\** These tests should also have measured the cost of expanding the
-text segment; unfortunately time did not permit running additional tests.
-.FE
-All programs were compiled into the default load format that causes
-the text segment to be demand paged out of the file system and shared
-between processes.
-.KF
-.DS L
-.TS
-center box;
-l | l.
-Test Description
-_
-syscall perform 100,000 \fIgetpid\fP system calls
-csw perform 10,000 context switches using signals
-signocsw send 10,000 signals to yourself
-pipeself4 send 10,000 4-byte messages to yourself
-pipeself512 send 10,000 512-byte messages to yourself
-pipediscard4 send 10,000 4-byte messages to child who discards
-pipediscard512 send 10,000 512-byte messages to child who discards
-pipeback4 exchange 10,000 4-byte messages with child
-pipeback512 exchange 10,000 512-byte messages with child
-forks0 fork-exit-wait 1,000 times
-forks1k sbrk(1024), fault page, fork-exit-wait 1,000 times
-forks100k sbrk(102400), fault pages, fork-exit-wait 1,000 times
-vforks0 vfork-exit-wait 1,000 times
-vforks1k sbrk(1024), fault page, vfork-exit-wait 1,000 times
-vforks100k sbrk(102400), fault pages, vfork-exit-wait 1,000 times
-execs0null fork-exec ``null job''-exit-wait 1,000 times
-execs0null (1K env) execs0null above, with 1K environment added
-execs1knull sbrk(1024), fault page, fork-exec ``null job''-exit-wait 1,000 times
-execs1knull (1K env) execs1knull above, with 1K environment added
-execs100knull sbrk(102400), fault pages, fork-exec ``null job''-exit-wait 1,000 times
-vexecs0null vfork-exec ``null job''-exit-wait 1,000 times
-vexecs1knull sbrk(1024), fault page, vfork-exec ``null job''-exit-wait 1,000 times
-vexecs100knull sbrk(102400), fault pages, vfork-exec ``null job''-exit-wait 1,000 times
-execs0big fork-exec ``big job''-exit-wait 1,000 times
-execs1kbig sbrk(1024), fault page, fork-exec ``big job''-exit-wait 1,000 times
-execs100kbig sbrk(102400), fault pages, fork-exec ``big job''-exit-wait 1,000 times
-vexecs0big vfork-exec ``big job''-exit-wait 1,000 times
-vexecs1kbig sbrk(1024), fault pages, vfork-exec ``big job''-exit-wait 1,000 times
-vexecs100kbig sbrk(102400), fault pages, vfork-exec ``big job''-exit-wait 1,000 times
-.TE
-.ce
-Table 1. Kernel Benchmark programs.
-.DE
-.KE
-.PP
-The results of these tests are shown in Table 2. If the 4.1BSD results
-are scaled to reflect their being run on a VAX-11/750, they
-correspond closely to those found in [Joy80].\**
-.FS
-\** We assume that a VAX-11/750 runs at 60% of the speed of a VAX-11/780
-(not considering floating point operations).
-.FE
-.KF
-.DS L
-.TS
-center box;
-c s s s s s s s s s
-c || c s s || c s s || c s s
-c || c s s || c s s || c s s
-c || c | c | c || c | c | c || c | c | c
-l || n | n | n || n | n | n || n | n | n.
-Berkeley Software Distribution UNIX Systems
-_
-Test Elapsed Time User Time System Time
-\^ _ _ _
-\^ 4.1 4.2 4.3 4.1 4.2 4.3 4.1 4.2 4.3
-=
-syscall 28.0 29.0 23.0 4.5 5.3 3.5 23.9 23.7 20.4
-csw 45.0 60.0 45.0 3.5 4.3 3.3 19.5 25.4 19.0
-signocsw 16.5 23.0 16.0 1.9 3.0 1.1 14.6 20.1 15.2
-pipeself4 21.5 29.0 26.0 1.1 1.1 0.8 20.1 28.0 25.6
-pipeself512 47.5 59.0 55.0 1.2 1.2 1.0 46.1 58.3 54.2
-pipediscard4 32.0 42.0 36.0 3.2 3.7 3.0 15.5 18.8 15.6
-pipediscard512 61.0 76.0 69.0 3.1 2.1 2.0 29.7 36.4 33.2
-pipeback4 57.0 75.0 66.0 2.9 3.2 3.3 25.1 34.2 29.7
-pipeback512 110.0 138.0 125.0 3.1 3.4 2.2 52.2 65.7 57.7
-forks0 37.5 41.0 22.0 0.5 0.3 0.3 34.5 37.6 21.5
-forks1k 40.0 43.0 22.0 0.4 0.3 0.3 36.0 38.8 21.6
-forks100k 217.5 223.0 176.0 0.7 0.6 0.4 214.3 218.4 175.2
-vforks0 34.5 37.0 22.0 0.5 0.6 0.5 27.3 28.5 17.9
-vforks1k 35.0 37.0 22.0 0.6 0.8 0.5 27.2 28.6 17.9
-vforks100k 35.0 37.0 22.0 0.6 0.8 0.6 27.6 28.9 17.9
-execs0null 97.5 92.0 66.0 3.8 2.4 0.6 68.7 82.5 48.6
-execs0null (1K env) 197.0 229.0 75.0 4.1 2.6 0.9 167.8 212.3 62.6
-execs1knull 99.0 100.0 66.0 4.1 1.9 0.6 70.5 86.8 48.7
-execs1knull (1K env) 199.0 230.0 75.0 4.2 2.6 0.7 170.4 214.9 62.7
-execs100knull 283.5 278.0 216.0 4.8 2.8 1.1 251.9 269.3 202.0
-vexecs0null 100.0 92.0 66.0 5.1 2.7 1.1 63.7 76.8 45.1
-vexecs1knull 100.0 91.0 66.0 5.2 2.8 1.1 63.2 77.1 45.1
-vexecs100knull 100.0 92.0 66.0 5.1 3.0 1.1 64.0 77.7 45.6
-execs0big 129.0 201.0 101.0 4.0 3.0 1.0 102.6 153.5 92.7
-execs1kbig 130.0 202.0 101.0 3.7 3.0 1.0 104.7 155.5 93.0
-execs100kbig 318.0 385.0 263.0 4.8 3.1 1.1 286.6 339.1 247.9
-vexecs0big 128.0 200.0 101.0 4.6 3.5 1.6 98.5 149.6 90.4
-vexecs1kbig 125.0 200.0 101.0 4.7 3.5 1.3 98.9 149.3 88.6
-vexecs100kbig 126.0 200.0 101.0 4.2 3.4 1.3 99.5 151.0 89.0
-.TE
-.ce
-Table 2. Kernel Benchmark results (all times in seconds).
-.DE
-.KE
-.PP
-In studying the measurements we found that the basic system call
-and context switch overhead did not change significantly
-between 4.1BSD and 4.2BSD. The \fIsignocsw\fP results were caused by
-the changes to the \fIsignal\fP interface, resulting
-in an additional subroutine invocation for each call, not
-to mention additional complexity in the system's implementation.
-.PP
-The times for the use of pipes are significantly higher under
-4.2BSD because of their implementation on top of the interprocess
-communication facilities. Under 4.1BSD pipes were implemented
-without the complexity of the socket data structures and with
-simpler code. Further, while not obviously a factor here,
-4.2BSD pipes have less system buffer space provided them than
-4.1BSD pipes.
-.PP
-The \fIexec\fP tests shown in Table 2 were performed with 34 bytes of
-environment information under 4.1BSD and 40 bytes under 4.2BSD.
-To figure the cost of passing data through the environment,
-the execs0null and execs1knull tests were rerun with
-1065 additional bytes of data. The results are show in Table 3.
-.KF
-.DS L
-.TS
-center box;
-c || c s || c s || c s
-c || c s || c s || c s
-c || c | c || c | c || c | c
-l || n | n || n | n || n | n.
-Test Real User System
-\^ _ _ _
-\^ 4.1 4.2 4.1 4.2 4.1 4.2
-=
-execs0null 197.0 229.0 4.1 2.6 167.8 212.3
-execs1knull 199.0 230.0 4.2 2.6 170.4 214.9
-.TE
-.ce
-Table 3. Benchmark results with ``large'' environment (all times in seconds).
-.DE
-.KE
-These results show that passing argument data is significantly
-slower than under 4.1BSD: 121 ms/byte versus 93 ms/byte. Even using
-this factor to adjust the basic overhead of an \fIexec\fP system
-call, this facility is more costly under 4.2BSD than under 4.1BSD.
-.NH 3
-Path name translation
-.PP
-The single most expensive function performed by the kernel
-is path name translation.
-This has been true in almost every UNIX kernel [Mosher80];
-we find that our general time sharing systems do about
-500,000 name translations per day.
-.PP
-Name translations became more expensive in 4.2BSD for several reasons.
-The single most expensive addition was the symbolic link.
-Symbolic links
-have the effect of increasing the average number of components
-in path names to be translated.
-As an insidious example,
-consider the system manager that decides to change /tmp
-to be a symbolic link to /usr/tmp.
-A name such as /tmp/tmp1234 that previously required two component
-translations,
-now requires four component translations plus the cost of reading
-the contents of the symbolic link.
-.PP
-The new directory format also changes the characteristics of
-name translation.
-The more complex format requires more computation to determine
-where to place new entries in a directory.
-Conversely the additional information allows the system to only
-look at active entries when searching,
-hence searches of directories that had once grown large
-but currently have few active entries are checked quickly.
-The new format also stores the length of each name so that
-costly string comparisons are only done on names that are the
-same length as the name being sought.
-.PP
-The net effect of the changes is that the average time to
-translate a path name in 4.2BSD is 24.2 milliseconds,
-representing 40% of the time processing system calls,
-that is 19% of the total cycles in the kernel,
-or 11% of all cycles executed on the machine.
-The times are shown in Table 4. We have no comparable times
-for \fInamei\fP under 4.1 though they are certain to
-be significantly less.
-.KF
-.DS L
-.TS
-center box;
-l r r.
-part time % of kernel
-_
-self 14.3 ms/call 11.3%
-child 9.9 ms/call 7.9%
-_
-total 24.2 ms/call 19.2%
-.TE
-.ce
-Table 4. Call times for \fInamei\fP in 4.2BSD.
-.DE
-.KE
-.NH 3
-Clock processing
-.PP
-Nearly 25% of the time spent in the kernel is spent in the clock
-processing routines.
-(This is a clear indication that to avoid sampling bias when profiling the
-kernel with our tools
-we need to drive them from an independent clock.)
-These routines are responsible for implementing timeouts,
-scheduling the processor,
-maintaining kernel statistics,
-and tending various hardware operations such as
-draining the terminal input silos.
-Only minimal work is done in the hardware clock interrupt
-routine (at high priority), the rest is performed (at a lower priority)
-in a software interrupt handler scheduled by the hardware interrupt
-handler.
-In the worst case, with a clock rate of 100 Hz
-and with every hardware interrupt scheduling a software
-interrupt, the processor must field 200 interrupts per second.
-The overhead of simply trapping and returning
-is 3% of the machine cycles,
-figuring out that there is nothing to do
-requires an additional 2%.
-.NH 3
-Terminal multiplexors
-.PP
-The terminal multiplexors supported by 4.2BSD have programmable receiver
-silos that may be used in two ways.
-With the silo disabled, each character received causes an interrupt
-to the processor.
-Enabling the receiver silo allows the silo to fill before
-generating an interrupt, allowing multiple characters to be read
-for each interrupt.
-At low rates of input, received characters will not be processed
-for some time unless the silo is emptied periodically.
-The 4.2BSD kernel uses the input silos of each terminal multiplexor,
-and empties each silo on each clock interrupt.
-This allows high input rates without the cost of per-character interrupts
-while assuring low latency.
-However, as character input rates on most machines are usually
-low (about 25 characters per second),
-this can result in excessive overhead.
-At the current clock rate of 100 Hz, a machine with 5 terminal multiplexors
-configured makes 500 calls to the receiver interrupt routines per second.
-In addition, to achieve acceptable input latency
-for flow control, each clock interrupt must schedule
-a software interrupt to run the silo draining routines.\**
-.FS
-\** It is not possible to check the input silos at
-the time of the actual clock interrupt without modifying the terminal
-line disciplines, as the input queues may not be in a consistent state \**.
-.FE
-\** This implies that the worst case estimate for clock processing
-is the basic overhead for clock processing.
-.NH 3
-Process table management
-.PP
-In 4.2BSD there are numerous places in the kernel where a linear search
-of the process table is performed:
-.IP \(bu 3
-in \fIexit\fP to locate and wakeup a process's parent;
-.IP \(bu 3
-in \fIwait\fP when searching for \fB\s-2ZOMBIE\s+2\fP and
-\fB\s-2STOPPED\s+2\fP processes;
-.IP \(bu 3
-in \fIfork\fP when allocating a new process table slot and
-counting the number of processes already created by a user;
-.IP \(bu 3
-in \fInewproc\fP, to verify
-that a process id assigned to a new process is not currently
-in use;
-.IP \(bu 3
-in \fIkill\fP and \fIgsignal\fP to locate all processes to
-which a signal should be delivered;
-.IP \(bu 3
-in \fIschedcpu\fP when adjusting the process priorities every
-second; and
-.IP \(bu 3
-in \fIsched\fP when locating a process to swap out and/or swap
-in.
-.LP
-These linear searches can incur significant overhead. The rule
-for calculating the size of the process table is:
-.ce
-nproc = 20 + 8 * maxusers
-.sp
-that means a 48 user system will have a 404 slot process table.
-With the addition of network services in 4.2BSD, as many as a dozen
-server processes may be maintained simply to await incoming requests.
-These servers are normally created at boot time which causes them
-to be allocated slots near the beginning of the process table. This
-means that process table searches under 4.2BSD are likely to take
-significantly longer than under 4.1BSD. System profiling shows
-that as much as 20% of the time spent in the kernel on a loaded
-system (a VAX-11/780) can be spent in \fIschedcpu\fP and, on average,
-5-10% of the kernel time is spent in \fIschedcpu\fP.
-The other searches of the proc table are similarly affected.
-This shows the system can no longer tolerate using linear searches of
-the process table.
-.NH 3
-File system buffer cache
-.PP
-The trace facilities described in section 2.3 were used
-to gather statistics on the performance of the buffer cache.
-We were interested in measuring the effectiveness of the
-cache and the read-ahead policies.
-With the file system block size in 4.2BSD four to
-eight times that of a 4.1BSD file system, we were concerned
-that large amounts of read-ahead might be performed without
-being used. Also, we were interested in seeing if the
-rules used to size the buffer cache at boot time were severely
-affecting the overall cache operation.
-.PP
-The tracing package was run over a three hour period during
-a peak mid-afternoon period on a VAX 11/780 with four megabytes
-of physical memory.
-This resulted in a buffer cache containing 400 kilobytes of memory
-spread among 50 to 200 buffers
-(the actual number of buffers depends on the size mix of
-disk blocks being read at any given time).
-The pertinent configuration information is shown in Table 5.
-.KF
-.DS L
-.TS
-center box;
-l l l l.
-Controller Drive Device File System
-_
-DEC MASSBUS DEC RP06 hp0d /usr
- hp0b swap
-Emulex SC780 Fujitsu Eagle hp1a /usr/spool/news
- hp1b swap
- hp1e /usr/src
- hp1d /u0 (users)
- Fujitsu Eagle hp2a /tmp
- hp2b swap
- hp2d /u1 (users)
- Fujitsu Eagle hp3a /
-.TE
-.ce
-Table 5. Active file systems during buffer cache tests.
-.DE
-.KE
-.PP
-During the test period the load average ranged from 2 to 13
-with an average of 5.
-The system had no idle time, 43% user time, and 57% system time.
-The system averaged 90 interrupts per second
-(excluding the system clock interrupts),
-220 system calls per second,
-and 50 context switches per second (40 voluntary, 10 involuntary).
-.PP
-The active virtual memory (the sum of the address space sizes of
-all jobs that have run in the previous twenty seconds)
-over the period ranged from 2 to 6 megabytes with an average
-of 3.5 megabytes.
-There was no swapping, though the page daemon was inspecting
-about 25 pages per second.
-.PP
-On average 250 requests to read disk blocks were initiated
-per second.
-These include read requests for file blocks made by user
-programs as well as requests initiated by the system.
-System reads include requests for indexing information to determine
-where a file's next data block resides,
-file system layout maps to allocate new data blocks,
-and requests for directory contents needed to do path name translations.
-.PP
-On average, an 85% cache hit rate was observed for read requests.
-Thus only 37 disk reads were initiated per second.
-In addition, 5 read-ahead requests were made each second
-filling about 20% of the buffer pool.
-Despite the policies to rapidly reuse read-ahead buffers
-that remain unclaimed, more than 90% of the read-ahead
-buffers were used.
-.PP
-These measurements showed that the buffer cache was working
-effectively. Independent tests have also showed that the size
-of the buffer cache may be reduced significantly on memory-poor
-system without severe effects;
-we have not yet tested this hypothesis [Shannon83].
-.NH 3
-Network subsystem
-.PP
-The overhead associated with the
-network facilities found in 4.2BSD is often
-difficult to gauge without profiling the system.
-This is because most input processing is performed
-in modules scheduled with software interrupts.
-As a result, the system time spent performing protocol
-processing is rarely attributed to the processes that
-really receive the data. Since the protocols supported
-by 4.2BSD can involve significant overhead this was a serious
-concern. Results from a profiled kernel show an average
-of 5% of the system time is spent
-performing network input and timer processing in our environment
-(a 3Mb/s Ethernet with most traffic using TCP).
-This figure can vary significantly depending on
-the network hardware used, the average message
-size, and whether packet reassembly is required at the network
-layer. On one machine we profiled over a 17 hour
-period (our gateway to the ARPANET)
-206,000 input messages accounted for 2.4% of the system time,
-while another 0.6% of the system time was spent performing
-protocol timer processing.
-This machine was configured with an ACC LH/DH IMP interface
-and a DMA 3Mb/s Ethernet controller.
-.PP
-The performance of TCP over slower long-haul networks
-was degraded substantially by two problems.
-The first problem was a bug that prevented round-trip timing measurements
-from being made, thus increasing retransmissions unnecessarily.
-The second was a problem with the maximum segment size chosen by TCP,
-that was well-tuned for Ethernet, but was poorly chosen for
-the ARPANET, where it causes packet fragmentation. (The maximum
-segment size was actually negotiated upwards to a value that
-resulted in excessive fragmentation.)
-.PP
-When benchmarked in Ethernet environments the main memory buffer management
-of the network subsystem presented some performance anomalies.
-The overhead of processing small ``mbufs'' severely affected throughput for a
-substantial range of message sizes.
-In spite of the fact that most system ustilities made use of the throughput
-optimal 1024 byte size, user processes faced large degradations for some
-arbitrary sizes. This was specially true for TCP/IP transmissions [Cabrera84,
-Cabrera85].
-.NH 3
-Virtual memory subsystem
-.PP
-We ran a set of tests intended to exercise the virtual
-memory system under both 4.1BSD and 4.2BSD.
-The tests are described in Table 6.
-The test programs dynamically allocated
-a 7.3 Megabyte array (using \fIsbrk\fP\|(2)) then referenced
-pages in the array either: sequentially, in a purely random
-fashion, or such that the distance between
-successive pages accessed was randomly selected from a Gaussian
-distribution. In the last case, successive runs were made with
-increasing standard deviations.
-.KF
-.DS L
-.TS
-center box;
-l | l.
-Test Description
-_
-seqpage sequentially touch pages, 10 iterations
-seqpage-v as above, but first make \fIvadvise\fP\|(2) call
-randpage touch random page 30,000 times
-randpage-v as above, but first make \fIvadvise\fP call
-gausspage.1 30,000 Gaussian accesses, standard deviation of 1
-gausspage.10 as above, standard deviation of 10
-gausspage.30 as above, standard deviation of 30
-gausspage.40 as above, standard deviation of 40
-gausspage.50 as above, standard deviation of 50
-gausspage.60 as above, standard deviation of 60
-gausspage.80 as above, standard deviation of 80
-gausspage.inf as above, standard deviation of 10,000
-.TE
-.ce
-Table 6. Paging benchmark programs.
-.DE
-.KE
-.PP
-The results in Table 7 show how the additional
-memory requirements
-of 4.2BSD can generate more work for the paging system.
-Under 4.1BSD,
-the system used 0.5 of the 4.5 megabytes of physical memory
-on the test machine;
-under 4.2BSD it used nearly 1 megabyte of physical memory.\**
-.FS
-\** The 4.1BSD system used for testing was really a 4.1a
-system configured
-with networking facilities and code to support
-remote file access. The
-4.2BSD system also included the remote file access code.
-Since both
-systems would be larger than similarly configured ``vanilla''
-4.1BSD or 4.2BSD system, we consider out conclusions to still be valid.
-.FE
-This resulted in more page faults and, hence, more system time.
-To establish a common ground on which to compare the paging
-routines of each system, we check instead the average page fault
-service times for those test runs that had a statistically significant
-number of random page faults. These figures, shown in Table 8, show
-no significant difference between the two systems in
-the area of page fault servicing. We currently have
-no explanation for the results of the sequential
-paging tests.
-.KF
-.DS L
-.TS
-center box;
-l || c s || c s || c s || c s
-l || c s || c s || c s || c s
-l || c | c || c | c || c | c || c | c
-l || n | n || n | n || n | n || n | n.
-Test Real User System Page Faults
-\^ _ _ _ _
-\^ 4.1 4.2 4.1 4.2 4.1 4.2 4.1 4.2
-=
-seqpage 959 1126 16.7 12.8 197.0 213.0 17132 17113
-seqpage-v 579 812 3.8 5.3 216.0 237.7 8394 8351
-randpage 571 569 6.7 7.6 64.0 77.2 8085 9776
-randpage-v 572 562 6.1 7.3 62.2 77.5 8126 9852
-gausspage.1 25 24 23.6 23.8 0.8 0.8 8 8
-gausspage.10 26 26 22.7 23.0 3.2 3.6 2 2
-gausspage.30 34 33 25.0 24.8 8.6 8.9 2 2
-gausspage.40 42 81 23.9 25.0 11.5 13.6 3 260
-gausspage.50 113 175 24.2 26.2 19.6 26.3 784 1851
-gausspage.60 191 234 27.6 26.7 27.4 36.0 2067 3177
-gausspage.80 312 329 28.0 27.9 41.5 52.0 3933 5105
-gausspage.inf 619 621 82.9 85.6 68.3 81.5 8046 9650
-.TE
-.ce
-Table 7. Paging benchmark results (all times in seconds).
-.DE
-.KE
-.KF
-.DS L
-.TS
-center box;
-c || c s || c s
-c || c s || c s
-c || c | c || c | c
-l || n | n || n | n.
-Test Page Faults PFST
-\^ _ _
-\^ 4.1 4.2 4.1 4.2
-=
-randpage 8085 9776 791 789
-randpage-v 8126 9852 765 786
-gausspage.inf 8046 9650 848 844
-.TE
-.ce
-Table 8. Page fault service times (all times in microseconds).
-.DE
-.KE
diff --git a/share/doc/papers/sysperf/4.t b/share/doc/papers/sysperf/4.t
deleted file mode 100644
index 0cb458b75b8..00000000000
--- a/share/doc/papers/sysperf/4.t
+++ /dev/null
@@ -1,772 +0,0 @@
-.\" $OpenBSD: 4.t,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1985 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)4.t 5.1 (Berkeley) 4/17/91
-.\"
-.ds RH Performance Improvements
-.NH
-Performance Improvements
-.PP
-This section outlines the changes made to the system
-since the 4.2BSD distribution.
-The changes reported here were made in response
-to the problems described in Section 3.
-The improvements fall into two major classes;
-changes to the kernel that are described in this section,
-and changes to the system libraries and utilities that are
-described in the following section.
-.NH 2
-Performance Improvements in the Kernel
-.PP
-Our goal has been to optimize system performance
-for our general timesharing environment.
-Since most sites running 4.2BSD have been forced to take
-advantage of declining
-memory costs rather than replace their existing machines with
-ones that are more powerful, we have
-chosen to optimize running time at the expense of memory.
-This tradeoff may need to be reconsidered for personal workstations
-that have smaller memories and higher latency disks.
-Decreases in the running time of the system may be unnoticeable
-because of higher paging rates incurred by a larger kernel.
-Where possible, we have allowed the size of caches to be controlled
-so that systems with limited memory may reduce them as appropriate.
-.NH 3
-Name Cacheing
-.PP
-Our initial profiling studies showed that more than one quarter
-of the time in the system was spent in the
-pathname translation routine, \fInamei\fP,
-translating path names to inodes\u\s-21\s0\d\**.
-.FS
-\** \u\s-21\s0\d Inode is an abbreviation for ``Index node''.
-Each file on the system is described by an inode;
-the inode maintains access permissions, and an array of pointers to
-the disk blocks that hold the data associated with the file.
-.FE
-An inspection of \fInamei\fP shows that
-it consists of two nested loops.
-The outer loop is traversed once per pathname component.
-The inner loop performs a linear search through a directory looking
-for a particular pathname component.
-.PP
-Our first idea was to reduce the number of iterations
-around the inner loop of \fInamei\fP by observing that many programs
-step through a directory performing an operation on each entry in turn.
-To improve performance for processes doing directory scans,
-the system keeps track of the directory offset of the last component of the
-most recently translated path name for each process.
-If the next name the process requests is in the same directory,
-the search is started from the offset that the previous name was found
-(instead of from the beginning of the directory).
-Changing directories invalidates the cache, as
-does modifying the directory.
-For programs that step sequentially through a directory with
-.EQ
-delim $$
-.EN
-$N$ files, search time decreases from $O ( N sup 2 )$ to $O(N)$.
-.EQ
-delim off
-.EN
-.PP
-The cost of the cache is about 20 lines of code
-(about 0.2 kilobytes)
-and 16 bytes per process, with the cached data
-stored in a process's \fIuser\fP vector.
-.PP
-As a quick benchmark to verify the maximum effectiveness of the
-cache we ran ``ls \-l''
-on a directory containing 600 files.
-Before the per-process cache this command
-used 22.3 seconds of system time.
-After adding the cache the program used the same amount
-of user time, but the system time dropped to 3.3 seconds.
-.PP
-This change prompted our rerunning a profiled system
-on a machine containing the new \fInamei\fP.
-The results showed that the time in \fInamei\fP
-dropped by only 2.6 ms/call and
-still accounted for 36% of the system call time,
-18% of the kernel, or about 10% of all the machine cycles.
-This amounted to a drop in system time from 57% to about 55%.
-The results are shown in Table 9.
-.KF
-.DS L
-.TS
-center box;
-l r r.
-part time % of kernel
-_
-self 11.0 ms/call 9.2%
-child 10.6 ms/call 8.9%
-_
-total 21.6 ms/call 18.1%
-.TE
-.ce
-Table 9. Call times for \fInamei\fP with per-process cache.
-.DE
-.KE
-.PP
-The small performance improvement
-was caused by a low cache hit ratio.
-Although the cache was 90% effective when hit,
-it was only usable on about 25% of the names being translated.
-An additional reason for the small improvement was that
-although the amount of time spent in \fInamei\fP itself
-decreased substantially,
-more time was spent in the routines that it called
-since each directory had to be accessed twice;
-once to search from the middle to the end,
-and once to search from the beginning to the middle.
-.PP
-Frequent requests for a small set of names are best handled
-with a cache of recent name translations\**.
-.FS
-\** The cache is keyed on a name and the
-inode and device number of the directory that contains it.
-Associated with each entry is a pointer to the corresponding
-entry in the inode table.
-.FE
-This has the effect of eliminating the inner loop of \fInamei\fP.
-For each path name component,
-\fInamei\fP first looks in its cache of recent translations
-for the needed name.
-If it exists, the directory search can be completely eliminated.
-.PP
-The system already maintained a cache of recently accessed inodes,
-so the initial name cache
-maintained a simple name-inode association that was used to
-check each component of a path name during name translations.
-We considered implementing the cache by tagging each inode
-with its most recently translated name,
-but eventually decided to have a separate data structure that
-kept names with pointers to the inode table.
-Tagging inodes has two drawbacks;
-many inodes such as those associated with login ports remain in
-the inode table for a long period of time, but are never looked
-up by name.
-Other inodes, such as those describing directories are looked up
-frequently by many different names (\fIe.g.\fP ``..'').
-By keeping a separate table of names, the cache can
-truly reflect the most recently used names.
-An added benefit is that the table can be sized independently
-of the inode table, so that machines with small amounts of memory
-can reduce the size of the cache (or even eliminate it)
-without modifying the inode table structure.
-.PP
-Another issue to be considered is how the name cache should
-hold references to the inode table.
-Normally processes hold ``hard references'' by incrementing the
-reference count in the inode they reference.
-Since the system reuses only inodes with zero reference counts,
-a hard reference insures that the inode pointer will remain valid.
-However, if the name cache holds hard references,
-it is limited to some fraction of the size of the inode table,
-since some inodes must be left free for new files.
-It also makes it impossible for other parts of the kernel
-to verify sole use of a device or file.
-These reasons made it impractical to use hard references
-without affecting the behavior of the inode cacheing scheme.
-Thus, we chose instead to keep ``soft references'' protected
-by a \fIcapability\fP \- a 32-bit number
-guaranteed to be unique\u\s-22\s0\d \**.
-.FS
-\** \u\s-22\s0\d When all the numbers have been exhausted, all outstanding
-capabilities are purged and numbering starts over from scratch.
-Purging is possible as all capabilities are easily found in kernel memory.
-.FE
-When an entry is made in the name cache,
-the capability of its inode is copied to the name cache entry.
-When an inode is reused it is issued a new capability.
-When a name cache hit occurs,
-the capability of the name cache entry is compared
-with the capability of the inode that it references.
-If the capabilities do not match, the name cache entry is invalid.
-Since the name cache holds only soft references,
-it may be sized independent of the size of the inode table.
-A final benefit of using capabilities is that all
-cached names for an inode may be invalidated without
-searching through the entire cache;
-instead all you need to do is assign a new capability to the inode.
-.PP
-The cost of the name cache is about 200 lines of code
-(about 1.2 kilobytes)
-and 48 bytes per cache entry.
-Depending on the size of the system,
-about 200 to 1000 entries will normally be configured,
-using 10-50 kilobytes of physical memory.
-The name cache is resident in memory at all times.
-.PP
-After adding the system wide name cache we reran ``ls \-l''
-on the same directory.
-The user time remained the same,
-however the system time rose slightly to 3.7 seconds.
-This was not surprising as \fInamei\fP
-now had to maintain the cache,
-but was never able to make any use of it.
-.PP
-Another profiled system was created and measurements
-were collected over a 17 hour period. These measurements
-showed a 13 ms/call decrease in \fInamei\fP, with
-\fInamei\fP accounting for only 26% of the system call time,
-13% of the time in the kernel,
-or about 7% of all the machine cycles.
-System time dropped from 55% to about 49%.
-The results are shown in Table 10.
-.KF
-.DS L
-.TS
-center box;
-l r r.
-part time % of kernel
-_
-self 4.2 ms/call 6.2%
-child 4.4 ms/call 6.6%
-_
-total 8.6 ms/call 12.8%
-.TE
-.ce
-Table 10. Call times for \fInamei\fP with both caches.
-.DE
-.KE
-.PP
-On our general time sharing systems we find that during the twelve
-hour period from 8AM to 8PM the system does 500,000 to 1,000,000
-name translations.
-Statistics on the performance of both caches show that
-the large performance improvement is
-caused by the high hit ratio.
-The name cache has a hit rate of 70%-80%;
-the directory offset cache gets a hit rate of 5%-15%.
-The combined hit rate of the two caches almost always adds up to 85%.
-With the addition of the two caches,
-the percentage of system time devoted to name translation has
-dropped from 25% to less than 13%.
-While the system wide cache reduces both the amount of time in
-the routines that \fInamei\fP calls as well as \fInamei\fP itself
-(since fewer directories need to be accessed or searched),
-it is interesting to note that the actual percentage of system
-time spent in \fInamei\fP itself increases even though the
-actual time per call decreases.
-This is because less total time is being spent in the kernel,
-hence a smaller absolute time becomes a larger total percentage.
-.NH 3
-Intelligent Auto Siloing
-.PP
-Most terminal input hardware can run in two modes:
-it can either generate an interrupt each time a character is received,
-or collect characters in a silo that the system then periodically drains.
-To provide quick response for interactive input and flow control,
-a silo must be checked 30 to 50 times per second.
-Ascii terminals normally exhibit
-an input rate of less than 30 characters per second.
-At this input rate
-they are most efficiently handled with interrupt per character mode,
-since this generates fewer interrupts than draining the input silos
-of the terminal multiplexors at each clock interrupt.
-When input is being generated by another machine
-or a malfunctioning terminal connection, however,
-the input rate is usually more than 50 characters per second.
-It is more efficient to use a device's silo input mode,
-since this generates fewer interrupts than handling each character
-as a separate interrupt.
-Since a given dialup port may switch between uucp logins and user logins,
-it is impossible to statically select the most efficient input mode to use.
-.PP
-We therefore changed the terminal multiplexor handlers
-to dynamically choose between the use of the silo and the use of
-per-character interrupts.
-At low input rates the handler processes characters on an
-interrupt basis, avoiding the overhead
-of checking each interface on each clock interrupt.
-During periods of sustained input, the handler enables the silo
-and starts a timer to drain input.
-This timer runs less frequently than the clock interrupts,
-and is used only when there is a substantial amount of input.
-The transition from using silos to an interrupt per character is
-damped to minimize the number of transitions with bursty traffic
-(such as in network communication).
-Input characters serve to flush the silo, preventing long latency.
-By switching between these two modes of operation dynamically,
-the overhead of checking the silos is incurred only
-when necessary.
-.PP
-In addition to the savings in the terminal handlers,
-the clock interrupt routine is no longer required to schedule
-a software interrupt after each hardware interrupt to drain the silos.
-The software-interrupt level portion of the clock routine is only
-needed when timers expire or the current user process is collecting
-an execution profile.
-Thus, the number of interrupts attributable to clock processing
-is substantially reduced.
-.NH 3
-Process Table Management
-.PP
-As systems have grown larger, the size of the process table
-has grown far past 200 entries.
-With large tables, linear searches must be eliminated
-from any frequently used facility.
-The kernel process table is now multi-threaded to allow selective searching
-of active and zombie processes.
-A third list threads unused process table slots.
-Free slots can be obtained in constant time by taking one
-from the front of the free list.
-The number of processes used by a given user may be computed by scanning
-only the active list.
-Since the 4.2BSD release,
-the kernel maintained linked lists of the descendents of each process.
-This linkage is now exploited when dealing with process exit;
-parents seeking the exit status of children now avoid linear search
-of the process table, but examine only their direct descendents.
-In addition, the previous algorithm for finding all descendents of an exiting
-process used multiple linear scans of the process table.
-This has been changed to follow the links between child process and siblings.
-.PP
-When forking a new process,
-the system must assign it a unique process identifier.
-The system previously scanned the entire process table each time it created
-a new process to locate an identifier that was not already in use.
-Now, to avoid scanning the process table for each new process,
-the system computes a range of unused identifiers
-that can be directly assigned.
-Only when the set of identifiers is exhausted is another process table
-scan required.
-.NH 3
-Scheduling
-.PP
-Previously the scheduler scanned the entire process table
-once per second to recompute process priorities.
-Processes that had run for their entire time slice had their
-priority lowered.
-Processes that had not used their time slice, or that had
-been sleeping for the past second had their priority raised.
-On systems running many processes,
-the scheduler represented nearly 20% of the system time.
-To reduce this overhead,
-the scheduler has been changed to consider only
-runnable processes when recomputing priorities.
-To insure that processes sleeping for more than a second
-still get their appropriate priority boost,
-their priority is recomputed when they are placed back on the run queue.
-Since the set of runnable process is typically only a small fraction
-of the total number of processes on the system,
-the cost of invoking the scheduler drops proportionally.
-.NH 3
-Clock Handling
-.PP
-The hardware clock interrupts the processor 100 times per second
-at high priority.
-As most of the clock-based events need not be done at high priority,
-the system schedules a lower priority software interrupt to do the less
-time-critical events such as cpu scheduling and timeout processing.
-Often there are no such events, and the software interrupt handler
-finds nothing to do and returns.
-The high priority event now checks to see if there are low priority
-events to process;
-if there is nothing to do, the software interrupt is not requested.
-Often, the high priority interrupt occurs during a period when the
-machine had been running at low priority.
-Rather than posting a software interrupt that would occur as
-soon as it returns,
-the hardware clock interrupt handler simply lowers the processor priority
-and calls the software clock routines directly.
-Between these two optimizations, nearly 80 of the 100 software
-interrupts per second can be eliminated.
-.NH 3
-File System
-.PP
-The file system uses a large block size, typically 4096 or 8192 bytes.
-To allow small files to be stored efficiently, the large blocks can
-be broken into smaller fragments, typically multiples of 1024 bytes.
-To minimize the number of full-sized blocks that must be broken
-into fragments, the file system uses a best fit strategy.
-Programs that slowly grow files using write of 1024 bytes or less
-can force the file system to copy the data to
-successively larger and larger fragments until it finally
-grows to a full sized block.
-The file system still uses a best fit strategy the first time
-a fragment is written.
-However, the first time that the file system is forced to copy a growing
-fragment it places it at the beginning of a full sized block.
-Continued growth can be accommodated without further copying
-by using up the rest of the block.
-If the file ceases to grow, the rest of the block is still
-available for holding other fragments.
-.PP
-When creating a new file name,
-the entire directory in which it will reside must be scanned
-to insure that the name does not already exist.
-For large directories, this scan is time consuming.
-Because there was no provision for shortening directories,
-a directory that is once over-filled will increase the cost
-of file creation even after the over-filling is corrected.
-Thus, for example, a congested uucp connection can leave a legacy long
-after it is cleared up.
-To alleviate the problem, the system now deletes empty blocks
-that it finds at the end of a directory while doing a complete
-scan to create a new name.
-.NH 3
-Network
-.PP
-The default amount of buffer space allocated for stream sockets (including
-pipes) has been increased to 4096 bytes.
-Stream sockets and pipes now return their buffer sizes in the block size field
-of the stat structure.
-This information allows the standard I/O library to use more optimal buffering.
-Unix domain stream sockets also return a dummy device and inode number
-in the stat structure to increase compatibility
-with other pipe implementations.
-The TCP maximum segment size is calculated according to the destination
-and interface in use; non-local connections use a more conservative size
-for long-haul networks.
-.PP
-On multiply-homed hosts, the local address bound by TCP now always corresponds
-to the interface that will be used in transmitting data packets for the
-connection.
-Several bugs in the calculation of round trip timing have been corrected.
-TCP now switches to an alternate gateway when an existing route fails,
-or when an ICMP redirect message is received.
-ICMP source quench messages are used to throttle the transmission
-rate of TCP streams by temporarily creating an artificially small
-send window, and retransmissions send only a single packet
-rather than resending all queued data.
-A send policy has been implemented
-that decreases the number of small packets outstanding
-for network terminal traffic [Nagle84],
-providing additional reduction of network congestion.
-The overhead of packet routing has been decreased by changes in the routing
-code and by cacheing the most recently used route for each datagram socket.
-.PP
-The buffer management strategy implemented by \fIsosend\fP has been
-changed to make better use of the increased size of the socket buffers
-and a better tuned delayed acknowledgement algorithm.
-Routing has been modified to include a one element cache of the last
-route computed.
-Multiple messages send with the same destination now require less processing.
-Performance deteriorates because of load in
-either the sender host, receiver host, or ether.
-Also, any CPU contention degrades substantially
-the throughput achievable by user processes [Cabrera85].
-We have observed empty VAX 11/750s using up to 90% of their cycles
-transmitting network messages.
-.NH 3
-Exec
-.PP
-When \fIexec\fP-ing a new process, the kernel creates the new
-program's argument list by copying the arguments and environment
-from the parent process's address space into the system, then back out
-again onto the stack of the newly created process.
-These two copy operations were done one byte at a time, but
-are now done a string at a time.
-This optimization reduced the time to process
-an argument list by a factor of ten;
-the average time to do an \fIexec\fP call decreased by 25%.
-.NH 3
-Context Switching
-.PP
-The kernel used to post a software event when it wanted to force
-a process to be rescheduled.
-Often the process would be rescheduled for other reasons before
-exiting the kernel, delaying the event trap.
-At some later time the process would again
-be selected to run and would complete its pending system call,
-finally causing the event to take place.
-The event would cause the scheduler to be invoked a second time
-selecting the same process to run.
-The fix to this problem is to cancel any software reschedule
-events when saving a process context.
-This change doubles the speed with which processes
-can synchronize using pipes or signals.
-.NH 3
-Setjmp/Longjmp
-.PP
-The kernel routine \fIsetjmp\fP, that saves the current system
-context in preparation for a non-local goto used to save many more
-registers than necessary under most circumstances.
-By trimming its operation to save only the minimum state required,
-the overhead for system calls decreased by an average of 13%.
-.NH 3
-Compensating for Lack of Compiler Technology
-.PP
-The current compilers available for C do not
-do any significant optimization.
-Good optimizing compilers are unlikely to be built;
-the C language is not well suited to optimization
-because of its rampant use of unbound pointers.
-Thus, many classical optimizations such as common subexpression
-analysis and selection of register variables must be done
-by hand using ``exterior'' knowledge of when such optimizations are safe.
-.PP
-Another optimization usually done by optimizing compilers
-is inline expansion of small or frequently used routines.
-In past Berkeley systems this has been done by using \fIsed\fP to
-run over the assembly language and replace calls to small
-routines with the code for the body of the routine, often
-a single VAX instruction.
-While this optimization eliminated the cost of the subroutine
-call and return,
-it did not eliminate the pushing and popping of several arguments
-to the routine.
-The \fIsed\fP script has been replaced by a more intelligent expander,
-\fIinline\fP, that merges the pushes and pops into moves to registers.
-For example, if the C code
-.DS
-if (scanc(map[i], 1, 47, i - 63))
-.DE
-is compiled into assembly language it generates the code shown
-in the left hand column of Table 11.
-The \fIsed\fP inline expander changes this code to that
-shown in the middle column.
-The newer optimizer eliminates most of the stack
-operations to generate the code shown in the right hand column.
-.KF
-.TS
-center, box;
-c s s s s s
-c s | c s | c s
-l l | l l | l l.
-Alternative C Language Code Optimizations
-_
-cc sed inline
-_
-subl3 $64,_i,\-(sp) subl3 $64,_i,\-(sp) subl3 $64,_i,r5
-pushl $47 pushl $47 movl $47,r4
-pushl $1 pushl $1 pushl $1
-mull2 $16,_i,r3 mull2 $16,_i,r3 mull2 $16,_i,r3
-pushl \-56(fp)[r3] pushl \-56(fp)[r3] movl \-56(fp)[r3],r2
-calls $4,_scanc movl (sp)+,r5 movl (sp)+,r3
-tstl r0 movl (sp)+,r4 scanc r2,(r3),(r4),r5
-jeql L7 movl (sp)+,r3 tstl r0
- movl (sp)+,r2 jeql L7
- scanc r2,(r3),(r4),r5
- tstl r0
- jeql L7
-.TE
-.ce
-Table 11. Alternative inline code expansions.
-.KE
-.PP
-Another optimization involved reevaluating
-existing data structures in the context of the current system.
-For example, disk buffer hashing was implemented when the system
-typically had thirty to fifty buffers.
-Most systems today have 200 to 1000 buffers.
-Consequently, most of the hash chains contained
-ten to a hundred buffers each!
-The running time of the low level buffer management primitives was
-dramatically improved simply by enlarging the size of the hash table.
-.NH 2
-Improvements to Libraries and Utilities
-.PP
-Intuitively, changes to the kernel would seem to have the greatest
-payoff since they affect all programs that run on the system.
-However, the kernel has been tuned many times before, so the
-opportunity for significant improvement was small.
-By contrast, many of the libraries and utilities had never been tuned.
-For example, we found utilities that spent 90% of their
-running time doing single character read system calls.
-Changing the utility to use the standard I/O library cut the
-running time by a factor of five!
-Thus, while most of our time has been spent tuning the kernel,
-more than half of the speedups are because of improvements in
-other parts of the system.
-Some of the more dramatic changes are described in the following
-subsections.
-.NH 3
-Hashed Databases
-.PP
-UNIX provides a set of database management routines, \fIdbm\fP,
-that can be used to speed lookups in large data files
-with an external hashed index file.
-The original version of dbm was designed to work with only one
-database at a time. These routines were generalized to handle
-multiple database files, enabling them to be used in rewrites
-of the password and host file lookup routines. The new routines
-used to access the password file significantly improve the running
-time of many important programs such as the mail subsystem,
-the C-shell (in doing tilde expansion), \fIls \-l\fP, etc.
-.NH 3
-Buffered I/O
-.PP
-The new filesystem with its larger block sizes allows better
-performance, but it is possible to degrade system performance
-by performing numerous small transfers rather than using
-appropriately-sized buffers.
-The standard I/O library
-automatically determines the optimal buffer size for each file.
-Some C library routines and commonly-used programs use low-level
-I/O or their own buffering, however.
-Several important utilities that did not use the standard I/O library
-and were buffering I/O using the old optimal buffer size,
-1Kbytes; the programs were changed to buffer I/O according to the
-optimal file system blocksize.
-These include the editor, the assembler, loader, remote file copy,
-the text formatting programs, and the C compiler.
-.PP
-The standard error output has traditionally been unbuffered
-to prevent delay in presenting the output to the user,
-and to prevent it from being lost if buffers are not flushed.
-The inordinate expense of sending single-byte packets through
-the network led us to impose a buffering scheme on the standard
-error stream.
-Within a single call to \fIfprintf\fP, all output is buffered temporarily.
-Before the call returns, all output is flushed and the stream is again
-marked unbuffered.
-As before, the normal block or line buffering mechanisms can be used
-instead of the default behavior.
-.PP
-It is possible for programs with good intentions to unintentionally
-defeat the standard I/O library's choice of I/O buffer size by using
-the \fIsetbuf\fP call to assign an output buffer.
-Because of portability requirements, the default buffer size provided
-by \fIsetbuf\fP is 1024 bytes; this can lead, once again, to added
-overhead.
-One such program with this problem was \fIcat\fP;
-there are undoubtedly other standard system utilities with similar problems
-as the system has changed much since they were originally written.
-.NH 3
-Mail System
-.PP
-The problems discussed in section 3.1.1 prompted significant work
-on the entire mail system. The first problem identified was a bug
-in the \fIsyslog\fP program. The mail delivery program, \fIsendmail\fP
-logs all mail transactions through this process with the 4.2BSD interprocess
-communication facilities. \fISyslog\fP then records the information in
-a log file. Unfortunately, \fIsyslog\fP was performing a \fIsync\fP
-operation after each message it received, whether it was logged to a file
-or not. This wreaked havoc on the effectiveness of the
-buffer cache and explained, to a large
-extent, why sending mail to large distribution lists generated such a
-heavy load on the system (one syslog message was generated for each
-message recipient causing almost a continuous sequence of sync operations).
-.PP
-The hashed data base files were
-installed in all mail programs, resulting in a order of magnitude
-speedup on large distribution lists. The code in \fI/bin/mail\fP
-that notifies the \fIcomsat\fP program when mail has been delivered to
-a user was changed to cache host table lookups, resulting in a similar
-speedup on large distribution lists.
-.PP
-Next, the file locking facilities
-provided in 4.2BSD, \fIflock\fP\|(2), were used in place of the old
-locking mechanism.
-The mail system previously used \fIlink\fP and \fIunlink\fP in
-implementing file locking primitives.
-Because these operations usually modify the contents of directories
-they require synchronous disk operations and cannot take
-advantage of the name cache maintained by the system.
-Unlink requires that the entry be found in the directory so that
-it can be removed;
-link requires that the directory be scanned to insure that the name
-does not already exist.
-By contrast the advisory locking facility in 4.2BSD is
-efficient because it is all done with in-memory tables.
-Thus, the mail system was modified to use the file locking primitives.
-This yielded another 10% cut in the basic overhead of delivering mail.
-Extensive profiling and tuning of \fIsendmail\fP and
-compiling it without debugging code reduced the overhead by another 20%.
-.NH 3
-Network Servers
-.PP
-With the introduction of the network facilities in 4.2BSD,
-a myriad of services became available, each of which
-required its own daemon process.
-Many of these daemons were rarely if ever used,
-yet they lay asleep in the process table consuming
-system resources and generally slowing down response.
-Rather than having many servers started at boot time, a single server,
-\fIinetd\fP was substituted.
-This process reads a simple configuration file
-that specifies the services the system is willing to support
-and listens for service requests on each service's Internet port.
-When a client requests service the appropriate server is created
-and passed a service connection as its standard input. Servers
-that require the identity of their client may use the \fIgetpeername\fP
-system call; likewise \fIgetsockname\fP may be used to find out
-a server's local address without consulting data base files.
-This scheme is attractive for several reasons:
-.IP \(bu 3
-it eliminates
-as many as a dozen processes, easing system overhead and
-allowing the file and text tables to be made smaller,
-.IP \(bu 3
-servers need not contain the code required to handle connection
-queueing, simplifying the programs, and
-.IP \(bu 3
-installing and replacing servers becomes simpler.
-.PP
-With an increased numbers of networks, both local and external to Berkeley,
-we found that the overhead of the routing process was becoming
-inordinately high.
-Several changes were made in the routing daemon to reduce this load.
-Routes to external networks are no longer exchanged by routers
-on the internal machines, only a route to a default gateway.
-This reduces the amount of network traffic and the time required
-to process routing messages.
-In addition, the routing daemon was profiled
-and functions responsible for large amounts
-of time were optimized.
-The major changes were a faster hashing scheme,
-and inline expansions of the ubiquitous byte-swapping functions.
-.PP
-Under certain circumstances, when output was blocked,
-attempts by the remote login process
-to send output to the user were rejected by the system,
-although a prior \fIselect\fP call had indicated that data could be sent.
-This resulted in continuous attempts to write the data until the remote
-user restarted output.
-This problem was initially avoided in the remote login handler,
-and the original problem in the kernel has since been corrected.
-.NH 3
-The C Run-time Library
-.PP
-Several people have found poorly tuned code
-in frequently used routines in the C library [Lankford84].
-In particular the running time of the string routines can be
-cut in half by rewriting them using the VAX string instructions.
-The memory allocation routines have been tuned to waste less
-memory for memory allocations with sizes that are a power of two.
-Certain library routines that did file input in one-character reads
-have been corrected.
-Other library routines including \fIfread\fP and \fIfwrite\fP
-have been rewritten for efficiency.
-.NH 3
-Csh
-.PP
-The C-shell was converted to run on 4.2BSD by
-writing a set of routines to simulate the old jobs library.
-While this provided a functioning C-shell,
-it was grossly inefficient, generating up
-to twenty system calls per prompt.
-The C-shell has been modified to use the new signal
-facilities directly,
-cutting the number of system calls per prompt in half.
-Additional tuning was done with the help of profiling
-to cut the cost of frequently used facilities.
diff --git a/share/doc/papers/sysperf/5.t b/share/doc/papers/sysperf/5.t
deleted file mode 100644
index 91abc119af8..00000000000
--- a/share/doc/papers/sysperf/5.t
+++ /dev/null
@@ -1,283 +0,0 @@
-.\" $OpenBSD: 5.t,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1985 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)5.t 5.1 (Berkeley) 4/17/91
-.\"
-.ds RH Functional Extensions
-.NH
-Functional Extensions
-.PP
-Some of the facilities introduced in 4.2BSD were not completely
-implemented. An important part of the effort that went into
-4.3BSD was to clean up and unify both new and old facilities.
-.NH 2
-Kernel Extensions
-.PP
-A significant effort went into improving
-the networking part of the kernel.
-The work consisted of fixing bugs,
-tuning the algorithms,
-and revamping the lowest levels of the system
-to better handle heterogeneous network topologies.
-.NH 3
-Subnets, Broadcasts and Gateways
-.PP
-To allow sites to expand their network in an autonomous
-and orderly fashion, subnetworks have been introduced in 4.3BSD [GADS85].
-This facility allows sites to subdivide their local Internet address
-space into multiple subnetwork address spaces that are visible
-only by hosts at that site. To off-site hosts machines on a site's
-subnetworks appear to reside on a single network. The routing daemon
-has been reworked to provide routing support in this type of
-environment.
-.PP
-The default Internet broadcast address is now specified with a host part
-of all one's, rather than all zero's.
-The broadcast address may be set at boot time on a per-interface basis.
-.NH 3
-Interface Addressing
-.PP
-The organization of network interfaces has been
-reworked to more cleanly support multiple
-network protocols. Network interfaces no longer
-contain a host's address on that network; instead
-each interface contains a pointer to a list of addresses
-assigned to that interface. This permits a single
-interface to support, for example, Internet protocols
-at the same time as XNS protocols.
-.PP
-The Address Resolution Protocol (ARP) support
-for 10 megabyte/second Ethernet\(dg
-.FS
-\(dg Ethernet is a trademark of Xerox.
-.FE
-has been made more flexible by allowing hosts to
-act as an ``clearing house'' for hosts that do
-not support ARP. In addition, system managers have
-more control over the contents of the ARP translation
-cache and may interactively interrogate and modify
-the cache's contents.
-.NH 3
-User Control of Network Buffering
-.PP
-Although the system allocates reasonable default amounts of buffering
-for most connections, certain operations such as file system dumps
-to remote machines benefit from significant increases in buffering [Walsh84].
-The \fIsetsockopt\fP system call has been extended to allow such requests.
-In addition, \fIgetsockopt\fP and \fIsetsockopt\fP,
-are now interfaced to the protocol level allowing protocol-specific
-options to be manipulated by the user.
-.NH 3
-Number of File Descriptors
-.PP
-To allow full use of the many descriptor based services available,
-the previous hard limit of 30 open files per process has been relaxed.
-The changes entailed generalizing \fIselect\fP to handle arrays of
-32-bit words, removing the dependency on file descriptors from
-the page table entries,
-and limiting most of the linear scans of a process's file table.
-The default per-process descriptor limit was raised from 20 to 64,
-though there are no longer any hard upper limits on the number
-of file descriptors.
-.NH 3
-Kernel Limits
-.PP
-Many internal kernel configuration limits have been increased by suitable
-modifications to data structures.
-The limit on physical memory has been changed from 8 megabyte to 64 megabyte,
-and the limit of 15 mounted file systems has been changed to 255.
-The maximum file system size has been increased to 8 gigabyte,
-number of processes to 65536,
-and per process size to 64 megabyte of data and 64 megabyte of stack.
-Note that these are upper bounds,
-the default limits for these quantities are tuned for systems
-with 4-8 megabyte of physical memory.
-.NH 3
-Memory Management
-.PP
-The global clock page replacement algorithm used to have a single
-hand that was used both to mark and to reclaim memory.
-The first time that it encountered a page it would clear its reference bit.
-If the reference bit was still clear on its next pass across the page,
-it would reclaim the page.
-The use of a single hand does not work well with large physical
-memories as the time to complete a single revolution of the hand
-can take up to a minute or more.
-By the time the hand gets around to the marked pages,
-the information is usually no longer pertinent.
-During periods of sudden shortages,
-the page daemon will not be able to find any reclaimable pages until
-it has completed a full revolution.
-To alleviate this problem,
-the clock hand has been split into two separate hands.
-The front hand clears the reference bits,
-the back hand follows a constant number of pages behind
-reclaiming pages that still have cleared reference bits.
-While the code has been written to allow the distance between
-the hands to be varied, we have not found any algorithms
-suitable for determining how to dynamically adjust this distance.
-.PP
-The configuration of the virtual memory system used to require
-a significant understanding of its operation to do such
-simple tasks as increasing the maximum process size.
-This process has been significantly improved so that the most
-common configuration parameters, such as the virtual memory sizes,
-can be specified using a single option in the configuration file.
-Standard configurations support data and stack segments
-of 17, 33 and 64 megabytes.
-.NH 3
-Signals
-.PP
-The 4.2BSD signal implementation would push several words
-onto the normal run-time stack before switching to an
-alternate signal stack.
-The 4.3BSD implementation has been corrected so that
-the entire signal handler's state is now pushed onto the signal stack.
-Another limitation in the original signal implementation was
-that it used an undocumented system call to return from signals.
-Users could not write their own return from exceptions;
-4.3BSD formally specifies the \fIsigreturn\fP system call.
-.PP
-Many existing programs depend on interrupted system calls.
-The restartable system call semantics of 4.2BSD signals caused
-many of these programs to break.
-To simplify porting of programs from inferior versions of
-.UX
-the \fIsigvec\fP system call has been extended so that
-programmers may specify that system calls are not to be
-restarted after particular signals.
-.NH 3
-System Logging
-.PP
-A system logging facility has been added
-that sends kernel messages to the
-syslog daemon for logging in /usr/adm/messages and possibly for
-printing on the system console.
-The revised scheme for logging messages
-eliminates the time lag in updating the messages file,
-unifies the format of kernel messages,
-provides a finer granularity of control over the messages
-that get printed on the console,
-and eliminates the degradation in response during the printing of
-low-priority kernel messages.
-Recoverable system errors and common resource limitations are logged
-using this facility.
-Most system utilities such as init and login,
-have been modified to log errors to syslog
-rather than writing directly on the console.
-.NH 3
-Windows
-.PP
-The tty structure has been augmented to hold
-information about the size
-of an associated window or terminal.
-These sizes can be obtained by programs such as editors that want
-to know the size of the screen they are manipulating.
-When these sizes are changed,
-a new signal, SIGWINCH, is sent the current process group.
-The editors have been modified to catch this signal and reshape
-their view of the world, and the remote login program and server
-now cooperate to propagate window sizes and window size changes
-across a network.
-Other programs and libraries such as curses that need the width
-or height of the screen have been modified to use this facility as well.
-.NH 3
-Configuration of UNIBUS Devices
-.PP
-The UNIBUS configuration routines have been extended to allow auto-configuration
-of dedicated UNIBUS memory held by devices.
-The new routines simplify the configuration of memory-mapped devices
-and correct problems occurring on reset of the UNIBUS.
-.NH 3
-Disk Recovery from Errors
-.PP
-The MASSBUS disk driver's error recovery routines have been fixed to
-retry before correcting ECC errors, support ECC on bad-sector replacements,
-and correctly attempt retries after earlier
-corrective actions in the same transfer.
-The error messages are more accurate.
-.NH 2
-Functional Extensions to Libraries and Utilities
-.PP
-Most of the changes to the utilities and libraries have been to
-allow them to handle a more general set of problems,
-or to handle the same set of problems more quickly.
-.NH 3
-Name Server
-.PP
-In 4.2BSD the name resolution routines (\fIgethostbyname\fP,
-\fIgetservbyname\fP,
-etc.) were implemented by a set of database files maintained on the
-local machine.
-Inconsistencies or obsolescence in these files resulted in inaccessibility of
-hosts or services.
-In 4.3BSD these files may be replaced by a network name server that can
-insure a consistent view of the name space in a multimachine environment.
-This name server operates in accordance with Internet standards
-for service on the ARPANET [Mockapetris83].
-.NH 3
-System Management
-.PP
-A new utility, \fIrdist\fP,
-has been provided to assist system managers in keeping
-all their machines up to date with a consistent set of sources and binaries.
-A master set of sources may reside on a single central machine,
-or be distributed at (known) locations throughout the environment.
-New versions of \fIgetty\fP, \fIinit\fP, and \fIlogin\fP
-merge the functions of several
-files into a single place, and allow more flexibility in the
-startup of processes such as window managers.
-.PP
-The new utility \fItimed\fP keeps the time on a group of cooperating machines
-(within a single LAN) synchronized to within 30 milliseconds.
-It does its corrections using a new system call that changes
-the rate of time advance without stopping or reversing the system clock.
-It normally selects one machine to act as a master.
-If the master dies or is partitioned, a new master is elected.
-Other machines may participate in a purely slave role.
-.NH 3
-Routing
-.PP
-Many bugs in the routing daemon have been fixed;
-it is considerably more robust,
-and now understands how to properly deal with
-subnets and point-to-point networks.
-Its operation has been made more efficient by tuning with the use
-of execution profiles, along with inline expansion of common operations
-using the kernel's \fIinline\fP optimizer.
-.NH 3
-Compilers
-.PP
-The symbolic debugger \fIdbx\fP has had many new features added,
-and all the known bugs fixed. In addition \fIdbx\fP
-has been extended to work with the Pascal compiler.
-The fortran compiler \fIf77\fP has had numerous bugs fixed.
-The C compiler has been modified so that it can, optionally,
-generate single precision floating point instructions when operating
-on single precision variables.
diff --git a/share/doc/papers/sysperf/6.t b/share/doc/papers/sysperf/6.t
deleted file mode 100644
index bc32c01d9d8..00000000000
--- a/share/doc/papers/sysperf/6.t
+++ /dev/null
@@ -1,68 +0,0 @@
-.\" $OpenBSD: 6.t,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1985 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)6.t 5.1 (Berkeley) 4/17/91
-.\"
-.ds RH Security Tightening
-.NH
-Security Tightening
-.PP
-Since we do not wish to encourage rampant system cracking,
-we describe only briefly the changes made to enhance security.
-.NH 2
-Generic Kernel
-.PP
-Several loopholes in the process tracing facility have been corrected.
-Programs being traced may not be executed;
-executing programs may not be traced.
-Programs may not provide input to terminals to which they do not
-have read permission.
-The handling of process groups has been tightened to eliminate
-some problems.
-When a program attempts to change its process group,
-the system checks to see if the process with the pid of the process
-group was started by the same user.
-If it exists and was started by a different user the process group
-number change is denied.
-.NH 2
-Security Problems in Utilities
-.PP
-Setuid utilities no longer use the \fIpopen\fP or \fIsystem\fP library routines.
-Access to the kernel's data structures through the kmem device
-is now restricted to programs that are set group id ``kmem''.
-Thus many programs that used to run with root privileges
-no longer need to do so.
-Access to disk devices is now controlled by an ``operator'' group id;
-this permission allows operators to function without being the super-user.
-Only users in group wheel can do ``su root''; this restriction
-allows administrators to define a super-user access list.
-Numerous holes have been closed in the shell to prevent
-users from gaining privileges from set user id shell scripts,
-although use of such scripts is still highly discouraged on systems
-that are concerned about security.
diff --git a/share/doc/papers/sysperf/7.t b/share/doc/papers/sysperf/7.t
deleted file mode 100644
index d53c258ebf5..00000000000
--- a/share/doc/papers/sysperf/7.t
+++ /dev/null
@@ -1,162 +0,0 @@
-.\" $OpenBSD: 7.t,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1985 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)7.t 5.1 (Berkeley) 4/17/91
-.\"
-.ds RH Conclusions
-.NH
-Conclusions
-.PP
-4.2BSD, while functionally superior to 4.1BSD, lacked much of the
-performance tuning required of a good system. We found that
-the distributed system spent 10-20% more time in the kernel than
-4.1BSD. This added overhead combined with problems with several
-user programs severely limited the overall performance of the
-system in a general timesharing environment.
-.PP
-Changes made to the system since the 4.2BSD distribution have
-eliminated most of the
-added system overhead by replacing old algorithms
-or introducing additional cacheing schemes.
-The combined caches added to the name translation process
-reduce the average cost of translating a pathname to an inode by more than 50%.
-These changes reduce the percentage of time spent running
-in the system by nearly 9%.
-.PP
-The use of silo input on terminal ports only when necessary
-has allowed the system to avoid a large amount of software interrupt
-processing. Observations show that the system is forced to
-field about 25% fewer interrupts than before.
-.PP
-The kernel
-changes, combined with many bug fixes, make the system much more
-responsive in a general timesharing environment.
-The 4.3BSD Berkeley UNIX system now appears
-capable of supporting loads at least as large as those supported under
-4.1BSD while providing all the new interprocess communication, networking,
-and file system facilities.
-.nr H2 1
-.ds RH Acknowledgements
-.SH
-\s+2Acknowledgements\s0
-.PP
-We would like to thank Robert Elz for sharing his ideas and
-his code for cacheing system wide names and searching the process table.
-We thank Alan Smith for initially suggesting the use of a
-capability based cache.
-We also acknowledge
-George Goble who dropped many of our changes
-into his production system and reported back fixes to the
-disasters that they caused.
-The buffer cache read-ahead trace package was based
-on a program written by Jim Lawson. Ralph Campbell
-implemented several of the C library changes. The original
-version of the Internet daemon was written by Bill Joy.
-In addition,
-we would like to thank the many other people that contributed
-ideas, information, and work while the system was undergoing change.
-.ds RH References
-.nr H2 1
-.sp 2
-.SH
-\s+2References\s-2
-.LP
-.IP [Cabrera84] 20
-Luis Felipe Cabrera, Eduard Hunter, Michael J. Karels, and David Mosher,
-``A User-Process Oriented Performance Study of Ethernet Networking Under
-Berkeley UNIX 4.2BSD,''
-Research Report No. UCB/CSD 84/217, University of California,
-Berkeley, December 1984.
-.IP [Cabrera85] 20
-Luis Felipe Cabrera, Michael J. Karels, and David Mosher,
-``The Impact of Buffer Management on Networking Software Performance
-in Berkeley UNIX 4.2BSD: A Case Study,''
-Proceedings of the Summer Usenix Conference, Portland, Oregon,
-June 1985, pp. 507-517.
-.IP [GADS85] 20
-GADS (Gateway Algorithms and Data Structures Task Force),
-``Toward an Internet Standard for Subnetting,'' RFC-940,
-Network Information Center, SRI International,
-April 1985.
-.IP [Joy80] 20
-Joy, William,
-``Comments on the performance of UNIX on the VAX'',
-Computer System Research Group, U.C. Berkeley.
-April 1980.
-.IP [Kashtan80] 20
-Kashtan, David L.,
-``UNIX and VMS, Some Performance Comparisons'',
-SRI International. February 1980.
-.IP [Lankford84] 20
-Jeffrey Lankford,
-``UNIX System V and 4BSD Performance,''
-\fIProceedings of the Salt Lake City Usenix Conference\fP,
-pp 228-236, June 1984.
-.IP [Leffler84] 20
-Sam Leffler, Mike Karels, and M. Kirk McKusick,
-``Measuring and Improving the Performance of 4.2BSD,''
-\fIProceedings of the Salt Lake City Usenix Conference\fP,
-pp 237-252, June 1984.
-.IP [McKusick85]
-M. Kirk McKusick, Mike Karels, and Samual Leffler,
-``Performance Improvements and Functional Enhancements in 4.3BSD''
-\fIProceedings of the Portland Usenix Conference\fP,
-pp 519-531, June 1985.
-.IP [Mockapetris83] 20
-Paul Mockapetris, ``Domain Names \- Implementation and Schedule,''
-Network Information Center, SRI International,
-RFC-883,
-November 1983.
-.IP [Mogul84] 20
-Jeffrey Mogul, ``Broadcasting Internet Datagrams,'' RFC-919,
-Network Information Center, SRI International,
-October 1984.
-.IP [Mosher80] 20
-Mosher, David,
-``UNIX Performance, an Introspection'',
-Presented at the Boulder, Colorado Usenix Conference, January 1980.
-Copies of the paper are available from
-Computer System Research Group, U.C. Berkeley.
-.IP [Nagle84] 20
-John Nagle, ``Congestion Control in IP/TCP Internetworks,'' RFC-896,
-Network Information Center, SRI International,
-January 1984.
-.IP [Ritchie74] 20
-Ritchie, D. M. and Thompson, K.,
-``The UNIX Time-Sharing System'',
-CACM 17, 7. July 1974. pp 365-375
-.IP [Shannon83] 20
-Shannon, W.,
-private communication,
-July 1983
-.IP [Walsh84] 20
-Robert Walsh and Robert Gurwitz,
-``Converting BBN TCP/IP to 4.2BSD,''
-\fIProceedings of the Salt Lake City Usenix Conference\fP,
-pp 52-61, June 1984.
diff --git a/share/doc/papers/sysperf/Makefile b/share/doc/papers/sysperf/Makefile
deleted file mode 100644
index 79f75770a79..00000000000
--- a/share/doc/papers/sysperf/Makefile
+++ /dev/null
@@ -1,25 +0,0 @@
-# $OpenBSD: Makefile,v 1.3 2004/02/01 14:22:45 jmc Exp $
-
-
-DIR= papers/sysperf
-MACROS= -ms
-SRCS= 0.t 1.t 2.t 3.t 4.t 5.t 6.t 7.t
-EXTRA= a1.t a2.t
-OBJS= paper.tmp appendix.tmp
-CLEANFILES+=${OBJS}
-
-paper.ps: ${OBJS}
- ${ROFF} ${OBJS} > ${.TARGET}
-paper.txt: ${OBJS}
- ${ROFF} -Tascii ${OBJS} > ${.TARGET}
-
-paper.tmp: ${SRCS}
- ${TBL} ${SRCS} | ${EQN} > paper.tmp
-
-appendix.tmp: a1.t a2.t
- ${GRIND} -f a1.t | awk '/\.\(\)/{ cnt = 2 } \
- { if (cnt) cnt -= 1; else print $$0; } ' > appendix.tmp
- ${GRIND} -f -lcsh a2.t | awk '/\.\(\)/{ cnt = 2 } \
- { if (cnt) cnt -= 1; else print $$0; } ' >> appendix.tmp
-
-.include <bsd.doc.mk>
diff --git a/share/doc/papers/sysperf/a1.t b/share/doc/papers/sysperf/a1.t
deleted file mode 100644
index 0a3d582afbd..00000000000
--- a/share/doc/papers/sysperf/a1.t
+++ /dev/null
@@ -1,666 +0,0 @@
-.\" $OpenBSD: a1.t,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1985 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)a1.t 5.1 (Berkeley) 4/17/91
-.\"
-.ds RH Appendix A \- Benchmark sources
-.nr H2 1
-.sp 2
-.de vS
-.nf
-..
-.de vE
-.fi
-..
-.bp
-.SH
-\s+2Appendix A \- Benchmark sources\s-2
-.LP
-The programs shown here run under 4.2 with only routines
-from the standard libraries. When run under 4.1 they were augmented
-with a \fIgetpagesize\fP routine and a copy of the \fIrandom\fP
-function from the C library. The \fIvforks\fP and \fIvexecs\fP
-programs are constructed from the \fIforks\fP and \fIexecs\fP programs,
-respectively, by substituting calls to \fIfork\fP with calls to
-\fIvfork\fP.
-.SH
-syscall
-.LP
-.vS
-/*
- * System call overhead benchmark.
- */
-main(argc, argv)
- char *argv[];
-{
- register int ncalls;
-
- if (argc < 2) {
- printf("usage: %s #syscalls\n", argv[0]);
- exit(1);
- }
- ncalls = atoi(argv[1]);
- while (ncalls-- > 0)
- (void) getpid();
-}
-.vE
-.SH
-csw
-.LP
-.vS
-/*
- * Context switching benchmark.
- *
- * Force system to context switch 2*nsigs
- * times by forking and exchanging signals.
- * To calculate system overhead for a context
- * switch, the signocsw program must be run
- * with nsigs. Overhead is then estimated by
- * t1 = time csw <n>
- * t2 = time signocsw <n>
- * overhead = t1 - 2 * t2;
- */
-#include <signal.h>
-
-int sigsub();
-int otherpid;
-int nsigs;
-
-main(argc, argv)
- char *argv[];
-{
- int pid;
-
- if (argc < 2) {
- printf("usage: %s nsignals\n", argv[0]);
- exit(1);
- }
- nsigs = atoi(argv[1]);
- signal(SIGALRM, sigsub);
- otherpid = getpid();
- pid = fork();
- if (pid != 0) {
- otherpid = pid;
- kill(otherpid, SIGALRM);
- }
- for (;;)
- sigpause(0);
-}
-
-sigsub()
-{
-
- signal(SIGALRM, sigsub);
- kill(otherpid, SIGALRM);
- if (--nsigs <= 0)
- exit(0);
-}
-.vE
-.SH
-signocsw
-.LP
-.vS
-/*
- * Signal without context switch benchmark.
- */
-#include <signal.h>
-
-int pid;
-int nsigs;
-int sigsub();
-
-main(argc, argv)
- char *argv[];
-{
- register int i;
-
- if (argc < 2) {
- printf("usage: %s nsignals\n", argv[0]);
- exit(1);
- }
- nsigs = atoi(argv[1]);
- signal(SIGALRM, sigsub);
- pid = getpid();
- for (i = 0; i < nsigs; i++)
- kill(pid, SIGALRM);
-}
-
-sigsub()
-{
-
- signal(SIGALRM, sigsub);
-}
-.vE
-.SH
-pipeself
-.LP
-.vS
-/*
- * IPC benchmark,
- * write to self using pipes.
- */
-
-main(argc, argv)
- char *argv[];
-{
- char buf[512];
- int fd[2], msgsize;
- register int i, iter;
-
- if (argc < 3) {
- printf("usage: %s iterations message-size\n", argv[0]);
- exit(1);
- }
- argc--, argv++;
- iter = atoi(*argv);
- argc--, argv++;
- msgsize = atoi(*argv);
- if (msgsize > sizeof (buf) || msgsize <= 0) {
- printf("%s: Bad message size.\n", *argv);
- exit(2);
- }
- if (pipe(fd) < 0) {
- perror("pipe");
- exit(3);
- }
- for (i = 0; i < iter; i++) {
- write(fd[1], buf, msgsize);
- read(fd[0], buf, msgsize);
- }
-}
-.vE
-.SH
-pipediscard
-.LP
-.vS
-/*
- * IPC benchmarkl,
- * write and discard using pipes.
- */
-
-main(argc, argv)
- char *argv[];
-{
- char buf[512];
- int fd[2], msgsize;
- register int i, iter;
-
- if (argc < 3) {
- printf("usage: %s iterations message-size\n", argv[0]);
- exit(1);
- }
- argc--, argv++;
- iter = atoi(*argv);
- argc--, argv++;
- msgsize = atoi(*argv);
- if (msgsize > sizeof (buf) || msgsize <= 0) {
- printf("%s: Bad message size.\n", *argv);
- exit(2);
- }
- if (pipe(fd) < 0) {
- perror("pipe");
- exit(3);
- }
- if (fork() == 0)
- for (i = 0; i < iter; i++)
- read(fd[0], buf, msgsize);
- else
- for (i = 0; i < iter; i++)
- write(fd[1], buf, msgsize);
-}
-.vE
-.SH
-pipeback
-.LP
-.vS
-/*
- * IPC benchmark,
- * read and reply using pipes.
- *
- * Process forks and exchanges messages
- * over a pipe in a request-response fashion.
- */
-
-main(argc, argv)
- char *argv[];
-{
- char buf[512];
- int fd[2], fd2[2], msgsize;
- register int i, iter;
-
- if (argc < 3) {
- printf("usage: %s iterations message-size\n", argv[0]);
- exit(1);
- }
- argc--, argv++;
- iter = atoi(*argv);
- argc--, argv++;
- msgsize = atoi(*argv);
- if (msgsize > sizeof (buf) || msgsize <= 0) {
- printf("%s: Bad message size.\n", *argv);
- exit(2);
- }
- if (pipe(fd) < 0) {
- perror("pipe");
- exit(3);
- }
- if (pipe(fd2) < 0) {
- perror("pipe");
- exit(3);
- }
- if (fork() == 0)
- for (i = 0; i < iter; i++) {
- read(fd[0], buf, msgsize);
- write(fd2[1], buf, msgsize);
- }
- else
- for (i = 0; i < iter; i++) {
- write(fd[1], buf, msgsize);
- read(fd2[0], buf, msgsize);
- }
-}
-.vE
-.SH
-forks
-.LP
-.vS
-/*
- * Benchmark program to calculate fork+wait
- * overhead (approximately). Process
- * forks and exits while parent waits.
- * The time to run this program is used
- * in calculating exec overhead.
- */
-
-main(argc, argv)
- char *argv[];
-{
- register int nforks, i;
- char *cp;
- int pid, child, status, brksize;
-
- if (argc < 2) {
- printf("usage: %s number-of-forks sbrk-size\n", argv[0]);
- exit(1);
- }
- nforks = atoi(argv[1]);
- if (nforks < 0) {
- printf("%s: bad number of forks\n", argv[1]);
- exit(2);
- }
- brksize = atoi(argv[2]);
- if (brksize < 0) {
- printf("%s: bad size to sbrk\n", argv[2]);
- exit(3);
- }
- cp = (char *)sbrk(brksize);
- if ((int)cp == -1) {
- perror("sbrk");
- exit(4);
- }
- for (i = 0; i < brksize; i += 1024)
- cp[i] = i;
- while (nforks-- > 0) {
- child = fork();
- if (child == -1) {
- perror("fork");
- exit(-1);
- }
- if (child == 0)
- _exit(-1);
- while ((pid = wait(&status)) != -1 && pid != child)
- ;
- }
- exit(0);
-}
-.vE
-.SH
-execs
-.LP
-.vS
-/*
- * Benchmark program to calculate exec
- * overhead (approximately). Process
- * forks and execs "null" test program.
- * The time to run the fork program should
- * then be deducted from this one to
- * estimate the overhead for the exec.
- */
-
-main(argc, argv)
- char *argv[];
-{
- register int nexecs, i;
- char *cp, *sbrk();
- int pid, child, status, brksize;
-
- if (argc < 3) {
- printf("usage: %s number-of-execs sbrk-size job-name\n",
- argv[0]);
- exit(1);
- }
- nexecs = atoi(argv[1]);
- if (nexecs < 0) {
- printf("%s: bad number of execs\n", argv[1]);
- exit(2);
- }
- brksize = atoi(argv[2]);
- if (brksize < 0) {
- printf("%s: bad size to sbrk\n", argv[2]);
- exit(3);
- }
- cp = sbrk(brksize);
- if ((int)cp == -1) {
- perror("sbrk");
- exit(4);
- }
- for (i = 0; i < brksize; i += 1024)
- cp[i] = i;
- while (nexecs-- > 0) {
- child = fork();
- if (child == -1) {
- perror("fork");
- exit(-1);
- }
- if (child == 0) {
- execv(argv[3], argv);
- perror("execv");
- _exit(-1);
- }
- while ((pid = wait(&status)) != -1 && pid != child)
- ;
- }
- exit(0);
-}
-.vE
-.SH
-nulljob
-.LP
-.vS
-/*
- * Benchmark "null job" program.
- */
-
-main(argc, argv)
- char *argv[];
-{
-
- exit(0);
-}
-.vE
-.SH
-bigjob
-.LP
-.vS
-/*
- * Benchmark "null big job" program.
- */
-/* 250 here is intended to approximate vi's text+data size */
-char space[1024 * 250] = "force into data segment";
-
-main(argc, argv)
- char *argv[];
-{
-
- exit(0);
-}
-.vE
-.bp
-.SH
-seqpage
-.LP
-.vS
-/*
- * Sequential page access benchmark.
- */
-#include <sys/vadvise.h>
-
-char *valloc();
-
-main(argc, argv)
- char *argv[];
-{
- register i, niter;
- register char *pf, *lastpage;
- int npages = 4096, pagesize, vflag = 0;
- char *pages, *name;
-
- name = argv[0];
- argc--, argv++;
-again:
- if (argc < 1) {
-usage:
- printf("usage: %s [ -v ] [ -p #pages ] niter\n", name);
- exit(1);
- }
- if (strcmp(*argv, "-p") == 0) {
- argc--, argv++;
- if (argc < 1)
- goto usage;
- npages = atoi(*argv);
- if (npages <= 0) {
- printf("%s: Bad page count.\n", *argv);
- exit(2);
- }
- argc--, argv++;
- goto again;
- }
- if (strcmp(*argv, "-v") == 0) {
- argc--, argv++;
- vflag++;
- goto again;
- }
- niter = atoi(*argv);
- pagesize = getpagesize();
- pages = valloc(npages * pagesize);
- if (pages == (char *)0) {
- printf("Can't allocate %d pages (%2.1f megabytes).\n",
- npages, (npages * pagesize) / (1024. * 1024.));
- exit(3);
- }
- lastpage = pages + (npages * pagesize);
- if (vflag)
- vadvise(VA_SEQL);
- for (i = 0; i < niter; i++)
- for (pf = pages; pf < lastpage; pf += pagesize)
- *pf = 1;
-}
-.vE
-.SH
-randpage
-.LP
-.vS
-/*
- * Random page access benchmark.
- */
-#include <sys/vadvise.h>
-
-char *valloc();
-int rand();
-
-main(argc, argv)
- char *argv[];
-{
- register int npages = 4096, pagesize, pn, i, niter;
- int vflag = 0, debug = 0;
- char *pages, *name;
-
- name = argv[0];
- argc--, argv++;
-again:
- if (argc < 1) {
-usage:
- printf("usage: %s [ -d ] [ -v ] [ -p #pages ] niter\n", name);
- exit(1);
- }
- if (strcmp(*argv, "-p") == 0) {
- argc--, argv++;
- if (argc < 1)
- goto usage;
- npages = atoi(*argv);
- if (npages <= 0) {
- printf("%s: Bad page count.\n", *argv);
- exit(2);
- }
- argc--, argv++;
- goto again;
- }
- if (strcmp(*argv, "-v") == 0) {
- argc--, argv++;
- vflag++;
- goto again;
- }
- if (strcmp(*argv, "-d") == 0) {
- argc--, argv++;
- debug++;
- goto again;
- }
- niter = atoi(*argv);
- pagesize = getpagesize();
- pages = valloc(npages * pagesize);
- if (pages == (char *)0) {
- printf("Can't allocate %d pages (%2.1f megabytes).\n",
- npages, (npages * pagesize) / (1024. * 1024.));
- exit(3);
- }
- if (vflag)
- vadvise(VA_ANOM);
- for (i = 0; i < niter; i++) {
- pn = random() % npages;
- if (debug)
- printf("touch page %d\n", pn);
- pages[pagesize * pn] = 1;
- }
-}
-.vE
-.SH
-gausspage
-.LP
-.vS
-/*
- * Random page access with
- * a gaussian distribution.
- *
- * Allocate a large (zero fill on demand) address
- * space and fault the pages in a random gaussian
- * order.
- */
-
-float sqrt(), log(), rnd(), cos(), gauss();
-char *valloc();
-int rand();
-
-main(argc, argv)
- char *argv[];
-{
- register int pn, i, niter, delta;
- register char *pages;
- float sd = 10.0;
- int npages = 4096, pagesize, debug = 0;
- char *name;
-
- name = argv[0];
- argc--, argv++;
-again:
- if (argc < 1) {
-usage:
- printf(
-"usage: %s [ -d ] [ -p #pages ] [ -s standard-deviation ] iterations\n", name);
- exit(1);
- }
- if (strcmp(*argv, "-s") == 0) {
- argc--, argv++;
- if (argc < 1)
- goto usage;
- sscanf(*argv, "%f", &sd);
- if (sd <= 0) {
- printf("%s: Bad standard deviation.\n", *argv);
- exit(2);
- }
- argc--, argv++;
- goto again;
- }
- if (strcmp(*argv, "-p") == 0) {
- argc--, argv++;
- if (argc < 1)
- goto usage;
- npages = atoi(*argv);
- if (npages <= 0) {
- printf("%s: Bad page count.\n", *argv);
- exit(2);
- }
- argc--, argv++;
- goto again;
- }
- if (strcmp(*argv, "-d") == 0) {
- argc--, argv++;
- debug++;
- goto again;
- }
- niter = atoi(*argv);
- pagesize = getpagesize();
- pages = valloc(npages*pagesize);
- if (pages == (char *)0) {
- printf("Can't allocate %d pages (%2.1f megabytes).\n",
- npages, (npages*pagesize) / (1024. * 1024.));
- exit(3);
- }
- pn = 0;
- for (i = 0; i < niter; i++) {
- delta = gauss(sd, 0.0);
- while (pn + delta < 0 || pn + delta > npages)
- delta = gauss(sd, 0.0);
- pn += delta;
- if (debug)
- printf("touch page %d\n", pn);
- else
- pages[pn * pagesize] = 1;
- }
-}
-
-float
-gauss(sd, mean)
- float sd, mean;
-{
- register float qa, qb;
-
- qa = sqrt(log(rnd()) * -2.0);
- qb = 3.14159 * rnd();
- return (qa * cos(qb) * sd + mean);
-}
-
-float
-rnd()
-{
- static int seed = 1;
- static int biggest = 0x7fffffff;
-
- return ((float)rand(seed) / (float)biggest);
-}
-.vE
diff --git a/share/doc/papers/sysperf/a2.t b/share/doc/papers/sysperf/a2.t
deleted file mode 100644
index be8678ec4aa..00000000000
--- a/share/doc/papers/sysperf/a2.t
+++ /dev/null
@@ -1,115 +0,0 @@
-.\" $OpenBSD: a2.t,v 1.3 2003/06/02 23:30:10 millert Exp $
-.\"
-.\" Copyright (c) 1985 The Regents of the University of California.
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)a2.t 5.1 (Berkeley) 4/17/91
-.\"
-.SH
-run (shell script)
-.LP
-.vS
-#! /bin/csh -fx
-# Script to run benchmark programs.
-#
-date
-make clean; time make
-time syscall 100000
-time seqpage -p 7500 10
-time seqpage -v -p 7500 10
-time randpage -p 7500 30000
-time randpage -v -p 7500 30000
-time gausspage -p 7500 -s 1 30000
-time gausspage -p 7500 -s 10 30000
-time gausspage -p 7500 -s 30 30000
-time gausspage -p 7500 -s 40 30000
-time gausspage -p 7500 -s 50 30000
-time gausspage -p 7500 -s 60 30000
-time gausspage -p 7500 -s 80 30000
-time gausspage -p 7500 -s 10000 30000
-time csw 10000
-time signocsw 10000
-time pipeself 10000 512
-time pipeself 10000 4
-time udgself 10000 512
-time udgself 10000 4
-time pipediscard 10000 512
-time pipediscard 10000 4
-time udgdiscard 10000 512
-time udgdiscard 10000 4
-time pipeback 10000 512
-time pipeback 10000 4
-time udgback 10000 512
-time udgback 10000 4
-size forks
-time forks 1000 0
-time forks 1000 1024
-time forks 1000 102400
-size vforks
-time vforks 1000 0
-time vforks 1000 1024
-time vforks 1000 102400
-countenv
-size nulljob
-time execs 1000 0 nulljob
-time execs 1000 1024 nulljob
-time execs 1000 102400 nulljob
-time vexecs 1000 0 nulljob
-time vexecs 1000 1024 nulljob
-time vexecs 1000 102400 nulljob
-size bigjob
-time execs 1000 0 bigjob
-time execs 1000 1024 bigjob
-time execs 1000 102400 bigjob
-time vexecs 1000 0 bigjob
-time vexecs 1000 1024 bigjob
-time vexecs 1000 102400 bigjob
-# fill environment with ~1024 bytes
-setenv a 012345678901234567890123456789012345678901234567890123456780123456789
-setenv b 012345678901234567890123456789012345678901234567890123456780123456789
-setenv c 012345678901234567890123456789012345678901234567890123456780123456789
-setenv d 012345678901234567890123456789012345678901234567890123456780123456789
-setenv e 012345678901234567890123456789012345678901234567890123456780123456789
-setenv f 012345678901234567890123456789012345678901234567890123456780123456789
-setenv g 012345678901234567890123456789012345678901234567890123456780123456789
-setenv h 012345678901234567890123456789012345678901234567890123456780123456789
-setenv i 012345678901234567890123456789012345678901234567890123456780123456789
-setenv j 012345678901234567890123456789012345678901234567890123456780123456789
-setenv k 012345678901234567890123456789012345678901234567890123456780123456789
-setenv l 012345678901234567890123456789012345678901234567890123456780123456789
-setenv m 012345678901234567890123456789012345678901234567890123456780123456789
-setenv n 012345678901234567890123456789012345678901234567890123456780123456789
-setenv o 012345678901234567890123456789012345678901234567890123456780123456789
-countenv
-time execs 1000 0 nulljob
-time execs 1000 1024 nulljob
-time execs 1000 102400 nulljob
-time execs 1000 0 bigjob
-time execs 1000 1024 bigjob
-time execs 1000 102400 bigjob
-.vE
-.bp