12 files changed, 817 insertions, 764 deletions
diff --git a/sys/vm/kern_lock.c b/sys/vm/kern_lock.c
index 2d03a11c25a..d5c87beee1e 100644
--- a/sys/vm/kern_lock.c
+++ b/sys/vm/kern_lock.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: kern_lock.c,v 1.4 1996/08/02 00:05:56 niklas Exp $	*/
+/*	$OpenBSD: kern_lock.c,v 1.5 1997/04/17 01:25:16 niklas Exp $	*/
 /*	$NetBSD: kern_lock.c,v 1.10 1994/10/30 19:11:09 cgd Exp $	*/
 
 /* 
@@ -73,6 +73,7 @@
 #include <sys/systm.h>
 
 #include <vm/vm.h>
+#include <vm/vm_extern.h>
 
 /* XXX */
 #include <sys/proc.h>
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index 7ace4838f1b..4774428adcb 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: swap_pager.c,v 1.9 1997/03/05 12:49:38 niklas Exp $	*/
+/*	$OpenBSD: swap_pager.c,v 1.10 1997/04/17 01:25:16 niklas Exp $	*/
 /*	$NetBSD: swap_pager.c,v 1.27 1996/03/16 23:15:20 christos Exp $	*/
 
 /*
@@ -272,7 +272,7 @@ swap_pager_alloc(handle, size, prot, foff)
 			 */
 			if (vm_object_lookup(pager) == NULL)
 				panic("swap_pager_alloc: bad object");
-			return(pager);
+			return (pager);
 		}
 	}
 	/*
@@ -282,7 +282,7 @@ swap_pager_alloc(handle, size, prot, foff)
 	waitok = handle ? M_WAITOK : M_NOWAIT;
 	pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok);
 	if (pager == NULL)
-		return(NULL);
+		return (NULL);
 	swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok);
 	if (swp == NULL) {
 #ifdef DEBUG
@@ -290,7 +290,7 @@ swap_pager_alloc(handle, size, prot, foff)
 			printf("swpg_alloc: swpager malloc failed\n");
 #endif
 		free((caddr_t)pager, M_VMPAGER);
-		return(NULL);
+		return (NULL);
 	}
 	size = round_page(size);
 	for (swt = swtab; swt->st_osize; swt++)
@@ -303,9 +303,8 @@ swap_pager_alloc(handle, size, prot, foff)
 	swp->sw_osize = size;
 	swp->sw_bsize = swt->st_bsize;
 	swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize;
-	swp->sw_blocks = (sw_blk_t)
-		malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks),
-		       M_VMPGDATA, M_NOWAIT);
+	swp->sw_blocks = (sw_blk_t)malloc(swp->sw_nblocks *
+	    sizeof(*swp->sw_blocks), M_VMPGDATA, M_NOWAIT);
 	if (swp->sw_blocks == NULL) {
 		free((caddr_t)swp, M_VMPGDATA);
 		free((caddr_t)pager, M_VMPAGER);
@@ -315,10 +314,10 @@ swap_pager_alloc(handle, size, prot, foff)
 		swt->st_inuse--;
 		swt->st_usecnt--;
 #endif
-		return(FALSE);
+		return (FALSE);
 	}
 	bzero((caddr_t)swp->sw_blocks,
-	      swp->sw_nblocks * sizeof(*swp->sw_blocks));
+	    swp->sw_nblocks * sizeof(*swp->sw_blocks));
 	swp->sw_poip = swp->sw_cnt = 0;
 	if (handle) {
 		vm_object_t object;
@@ -349,7 +348,7 @@ swap_pager_alloc(handle, size, prot, foff)
 		printf("swpg_alloc: pg_data %p, %x of %x at %p\n",
 		    swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks);
 #endif
-	return(pager);
+	return (pager);
 }
 
 static void
@@ -451,7 +450,7 @@ swap_pager_getpage(pager, mlist, npages, sync)
 		m->flags &= ~PG_FAULTING;
 	}
 #endif
-	return(rv);
+	return (rv);
 }
 
 static int
@@ -475,8 +474,8 @@ swap_pager_putpage(pager, mlist, npages, sync)
 	flags = B_WRITE;
 	if (!sync)
 		flags |= B_ASYNC;
-	return(swap_pager_io((sw_pager_t)pager->pg_data,
-			     mlist, npages, flags));
+	return (swap_pager_io((sw_pager_t)pager->pg_data, mlist, npages,
+	    flags));
 }
 
 static boolean_t
@@ -500,7 +499,7 @@ swap_pager_haspage(pager, offset)
 			printf("swpg_haspage: %p bad offset %lx, ix %x\n",
 			    swp->sw_blocks, offset, ix);
 #endif
-		return(FALSE);
+		return (FALSE);
 	}
 	swb = &swp->sw_blocks[ix];
 	if (swb->swb_block)
@@ -513,8 +512,8 @@ swap_pager_haspage(pager, offset)
 		    "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]);
 #endif
 	if (swb->swb_block && (swb->swb_mask & (1 << ix)))
-		return(TRUE);
-	return(FALSE);
+		return (TRUE);
+	return (FALSE);
 }
 
 static void
@@ -603,16 +602,16 @@ swap_pager_io(swp, mlist, npages, flags)
 #ifdef DEBUG
 		if ((flags & B_READ) == 0 && (swpagerdebug & SDB_ANOM)) {
 			printf("swap_pager_io: no swap block on write\n");
-			return(VM_PAGER_BAD);
+			return (VM_PAGER_BAD);
 		}
 #endif
-		return(VM_PAGER_FAIL);
+		return (VM_PAGER_FAIL);
 	}
 	swb = &swp->sw_blocks[ix];
 	off = off % dbtob(swp->sw_bsize);
 	if ((flags & B_READ) &&
 	    (swb->swb_block == 0 || (swb->swb_mask & (1 << atop(off))) == 0))
-		return(VM_PAGER_FAIL);
+		return (VM_PAGER_FAIL);
 
 	/*
 	 * For reads (pageins) and synchronous writes, we clean up
@@ -645,7 +644,7 @@ swap_pager_io(swp, mlist, npages, flags)
 				printf("%s: no available io headers\n",
 				    "swap_pager_io");
 #endif
-			return(VM_PAGER_AGAIN);
+			return (VM_PAGER_AGAIN);
 		}
 	}
 
@@ -667,7 +666,7 @@ swap_pager_io(swp, mlist, npages, flags)
 			 * trying again (the pageout daemon's current response
 			 * to AGAIN) so we just return FAIL.
 			 */
-			return(VM_PAGER_FAIL);
+			return (VM_PAGER_FAIL);
 		}
 #ifdef DEBUG
 		if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK))
@@ -687,7 +686,7 @@ swap_pager_io(swp, mlist, npages, flags)
 			printf("%s: no KVA space to map pages\n",
 			    "swap_pager_io");
 #endif
-		return(VM_PAGER_AGAIN);
+		return (VM_PAGER_AGAIN);
 	}
 
 	/*
@@ -795,7 +794,7 @@ swap_pager_io(swp, mlist, npages, flags)
 		if (swpagerdebug & SDB_IO)
 			printf("swpg_io:  IO started: bp %p\n", bp);
 #endif
-		return(VM_PAGER_PEND);
+		return (VM_PAGER_PEND);
 	}
 	s = splbio();
 #ifdef DEBUG
@@ -836,7 +835,7 @@ swap_pager_io(swp, mlist, npages, flags)
 		printf("swpg_io: IO error\n");
 #endif
 	vm_pager_unmap_pages(kva, npages);
-	return(rv);
+	return (rv);
 }
 
 static void
@@ -926,8 +925,7 @@ swap_pager_clean(rw)
 		 * Done with the object, decrement the paging count
 		 * and unlock it.
 		 */
-		if (--object->paging_in_progress == 0)
-			wakeup(object);
+		vm_object_paging_end(object);
 		vm_object_unlock(object);
 
 		/*
@@ -1066,7 +1064,7 @@ swap_pager_remove(pager, from, to)
 
 	/*	Special case stupid ranges.	*/
 	if (to > 0 && from >= to)
-		return(0);
+		return (0);
 
 	swp = (sw_pager_t)pager->pg_data;
 
@@ -1079,14 +1077,14 @@ swap_pager_remove(pager, from, to)
 	 *	be created without any pages put into it?
 	 */
 	if (swp->sw_cnt == 0)
-		return(0);
+		return (0);
 
 	bsize = dbtob(swp->sw_bsize);
 	blk = from / bsize;
 
 	/*	Another fast one.. no blocks in range.	*/
 	if (blk >= swp->sw_nblocks)
-		return(0);
+		return (0);
 	bit = atop(from % bsize);
 
 	/*
@@ -1149,18 +1147,18 @@ swap_pager_remove(pager, from, to)
 	if (swp->sw_cnt < 0)
 		panic("swap_pager_remove: sw_cnt < 0");
 #endif
-	return(cnt);
+	return (cnt);
 }
 
 /*
- *	swap_pager_next:
+ * swap_pager_next:
  *
- *	This is called via the vm_pager_next path and
- *	will return the offset of the next page (addresswise)
- *	which this pager is backing.  If there are no more
- *	pages we will return the size of the pager's managed
- *	space (which by definition is larger than any page's
- *	offset).
+ * This is called via the vm_pager_next path and
+ * will return the offset of the next page (addresswise)
+ * which this pager is backing.  If there are no more
+ * pages we will return the size of the pager's managed
+ * space (which by definition is larger than any page's
+ * offset).
  */
 static vm_offset_t
 swap_pager_next(pager, offset)
@@ -1179,22 +1177,22 @@ swap_pager_next(pager, offset)
 	swp = (sw_pager_t)pager->pg_data;
 
 	/*
-	 *	If we back no pages, just return our size.  XXX Can
-	 *	this ever be the case?  At least all remove calls
-	 *	should be through vm_object_remove_from_pager which
-	 *	also deallocates the pager when it no longer backs any
-	 *	pages.  Left is the initial case: can a swap-pager
-	 *	be created without any pages put into it?
+	 * If we back no pages, just return our size.  XXX Can
+	 * this ever be the case?  At least all remove calls
+	 * should be through vm_object_remove_from_pager which
+	 * also deallocates the pager when it no longer backs any
+	 * pages.  Left is the initial case: can a swap-pager
+	 * be created without any pages put into it?
 	 */
 	if (swp->sw_cnt == 0)
-		return(swp->sw_osize);
+		return (swp->sw_osize);
 
 	bsize = dbtob(swp->sw_bsize);
 	blk = offset / bsize;
 
-	/*	Another fast one.. no blocks in range.	*/
+	/* Another fast one.. no blocks in range.	*/
 	if (blk >= swp->sw_nblocks)
-		return(swp->sw_osize);
+		return (swp->sw_osize);
 	bit = atop(offset % bsize);
 	to_blk = swp->sw_osize / bsize;
 	to_bit = atop(swp->sw_osize % bsize);
@@ -1219,7 +1217,7 @@ swap_pager_next(pager, offset)
 		 */
 		mask &= swb->swb_mask;
 		if (mask)
-			return(blk * bsize + (ffs (mask) - 1) * PAGE_SIZE);
+			return (blk * bsize + (ffs (mask) - 1) * PAGE_SIZE);
 
 		/*
 		 *	If we handled the end of range now, this
@@ -1233,7 +1231,7 @@ swap_pager_next(pager, offset)
 		swb++;
 		mask = ~0;
  	}
-	return swp->sw_osize;
+	return (swp->sw_osize);
 }
 
 /*
@@ -1246,7 +1244,7 @@ swap_pager_count(pager)
 	vm_pager_t	pager;
 {
 #ifndef notyet
-	return((sw_pager_t)pager->pg_data)->sw_cnt;
+	return ((sw_pager_t)pager->pg_data)->sw_cnt;
 #else
 	sw_pager_t swp;
 	sw_blk_t swb;
@@ -1254,10 +1252,10 @@ swap_pager_count(pager)
 
 	swp = (sw_pager_t)pager->pg_data;
 	if (swp->sw_blocks == NULL)
-		return 0;
+		return (0);
 	for (i = 0; i < swp->sw_nblocks; i++)
 		cnt += count_bits(swp->sw_blocks[i].swb_mask); 
-	return cnt;
+	return (cnt);
 #endif
 }
 
@@ -1276,5 +1274,5 @@ count_bits(x)
 		cnt += x & 1;
 		x >>= 1;
 	}
-	return(cnt);
+	return (cnt);
 }
diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h
index 14346cb8a05..964adb26dec 100644
--- a/sys/vm/vm_extern.h
+++ b/sys/vm/vm_extern.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vm_extern.h,v 1.9 1997/01/07 05:37:35 tholo Exp $	*/
+/*	$OpenBSD: vm_extern.h,v 1.10 1997/04/17 01:25:17 niklas Exp $	*/
 /*	$NetBSD: vm_extern.h,v 1.20 1996/04/23 12:25:23 christos Exp $	*/
 
 /*-
@@ -98,7 +98,13 @@ void		swapout_threads __P((void));
 int		swfree __P((struct proc *, int));
 void		swstrategy __P((struct buf *));
 void		thread_block __P((void));
-void		thread_sleep __P((void *, simple_lock_t, boolean_t));
+void		thread_sleep_msg __P((void *, simple_lock_t,
+		    boolean_t, char *));
+
+/* backwards compatibility */
+#define		thread_sleep(event, lock, ruptible) \
+    thread_sleep_msg((event), (lock), (ruptible), "thrd_sleep")
+
 /*
  * This define replaces a thread_wakeup prototype, as thread_wakeup
  * was solely a wrapper around wakeup.
@@ -145,7 +151,6 @@ int		vsunlock __P((caddr_t, u_int));
 /* Machine dependent portion */
 void		vmapbuf __P((struct buf *, vm_size_t));
 void		vunmapbuf __P((struct buf *, vm_size_t));
-void		remrq __P((struct proc *));
 void		pagemove __P((caddr_t, caddr_t, size_t));
 #ifdef __FORK_BRAINDAMAGE
 int		cpu_fork __P((struct proc *, struct proc *));
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index e591f555d16..5a28eb128ab 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vm_fault.c,v 1.5 1996/11/23 21:47:14 kstailey Exp $	*/
+/*	$OpenBSD: vm_fault.c,v 1.6 1997/04/17 01:25:17 niklas Exp $	*/
 /*	$NetBSD: vm_fault.c,v 1.18 1996/05/20 17:40:02 mrg Exp $	*/
 
 /* 
@@ -145,12 +145,12 @@ vm_fault(map, vaddr, fault_type, change_wiring)
 }
 
 #define	UNLOCK_THINGS	{				\
-	object->paging_in_progress--;			\
+	vm_object_paging_end(object);			\
 	vm_object_unlock(object);			\
 	if (object != first_object) {			\
 		vm_object_lock(first_object);		\
 		FREE_PAGE(first_m);			\
-		first_object->paging_in_progress--;	\
+		vm_object_paging_end(first_object);	\
 		vm_object_unlock(first_object);		\
 	}						\
 	UNLOCK_MAP;					\
@@ -191,11 +191,7 @@ vm_fault(map, vaddr, fault_type, change_wiring)
 	vm_object_lock(first_object);
 
 	first_object->ref_count++;
-#ifdef DIAGNOSTIC
-	if (first_object->paging_in_progress == 0xdead)
-		panic("vm_fault: first_object deallocated");
-#endif
-	first_object->paging_in_progress++;
+	vm_object_paging_begin(first_object);
 
 	/*
 	 *	INVARIANTS (through entire routine):
@@ -407,7 +403,7 @@ vm_fault(map, vaddr, fault_type, change_wiring)
 			 *	in the top object with zeros.
 			 */
 			if (object != first_object) {
-				object->paging_in_progress--;
+				vm_object_paging_end(object);
 				vm_object_unlock(object);
 
 				object = first_object;
@@ -425,14 +421,10 @@ vm_fault(map, vaddr, fault_type, change_wiring)
 		else {
 			vm_object_lock(next_object);
 			if (object != first_object)
-				object->paging_in_progress--;
+				vm_object_paging_end(object);
 			vm_object_unlock(object);
 			object = next_object;
-#ifdef DIAGNOSTIC
-			if (object->paging_in_progress == 0xdead)
-				panic("vm_fault: object deallocated (1)");
-#endif
-			object->paging_in_progress++;
+			vm_object_paging_begin(object);
 		}
 	}
 
@@ -508,7 +500,7 @@ vm_fault(map, vaddr, fault_type, change_wiring)
 			 *	We no longer need the old page or object.
 			 */
 			PAGE_WAKEUP(m);
-			object->paging_in_progress--;
+			vm_object_paging_end(object);
 			vm_object_unlock(object);
 
 			/*
@@ -529,15 +521,10 @@ vm_fault(map, vaddr, fault_type, change_wiring)
 			 *	But we have to play ugly games with
 			 *	paging_in_progress to do that...
 			 */
-			object->paging_in_progress--;
+			vm_object_paging_end(object);
 			vm_object_collapse(object);
-#ifdef DIAGNOSTIC
-			if (object->paging_in_progress == 0xdead)
-				panic("vm_fault: object deallocated (2)");
-#endif
-			object->paging_in_progress++;
-		}
-		else {
+			vm_object_paging_begin(object);
+		} else {
 		    	prot &= ~VM_PROT_WRITE;
 			m->flags |= PG_COPYONWRITE;
 		}
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
index 2851697956f..10ef84b937a 100644
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vm_glue.c,v 1.22 1997/01/07 05:37:36 tholo Exp $    */
+/*	$OpenBSD: vm_glue.c,v 1.23 1997/04/17 01:25:18 niklas Exp $    */
 /*	$NetBSD: vm_glue.c,v 1.55.4.1 1996/06/13 17:25:45 cgd Exp $	*/
 
 /* 
@@ -74,6 +74,7 @@
 #endif
 
 #include <vm/vm.h>
+#include <vm/vm_extern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_kern.h>
 
@@ -231,7 +232,7 @@ vm_fork(p1, p2)
 		shmfork(p1, p2);
 #endif
 
-#if !defined(pc532) && !defined(vax)
+#if !defined(vax)
 	/*
 	 * Allocate a wired-down (for now) pcb and kernel stack for the process
 	 */
@@ -578,10 +579,11 @@ thread_block()
 }
 
 void
-thread_sleep(event, lock, ruptible)
+thread_sleep_msg(event, lock, ruptible, msg)
 	void *event;
 	simple_lock_t lock;
 	boolean_t ruptible;
+	char *msg;
 {
 	int s = splhigh();
 
@@ -591,7 +593,7 @@ thread_sleep(event, lock, ruptible)
 	curproc->p_thread = event;
 	simple_unlock(lock);
 	if (curproc->p_thread)
-		tsleep(event, PVM, "thrd_sleep", 0);
+		tsleep(event, PVM, msg, 0);
 	splx(s);
 }
 
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index 4ebf2bbbd3b..b15b91e71e2 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vm_kern.c,v 1.3 1996/08/02 00:06:01 niklas Exp $	*/
+/*	$OpenBSD: vm_kern.c,v 1.4 1997/04/17 01:25:18 niklas Exp $	*/
 /*	$NetBSD: vm_kern.c,v 1.17.6.1 1996/06/13 17:21:28 cgd Exp $	*/
 
 /* 
@@ -74,6 +74,7 @@
 #include <sys/proc.h>
 
 #include <vm/vm.h>
+#include <vm/vm_extern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_kern.h>
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index f8089e268ba..af5713ea852 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vm_map.c,v 1.3 1996/07/23 23:54:23 deraadt Exp $	*/
+/*	$OpenBSD: vm_map.c,v 1.4 1997/04/17 01:25:19 niklas Exp $	*/
 /*	$NetBSD: vm_map.c,v 1.23 1996/02/10 00:08:08 christos Exp $	*/
 
 /* 
@@ -1753,13 +1753,11 @@ vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry)
 		 *	Make a copy of the object.
 		 */
 		temp_object = dst_entry->object.vm_object;
-		vm_object_copy(src_entry->object.vm_object,
-				src_entry->offset,
-				(vm_size_t)(src_entry->end -
-					    src_entry->start),
-				&dst_entry->object.vm_object,
-				&dst_entry->offset,
-				&src_needs_copy);
+		vm_object_copy(src_entry->object.vm_object, src_entry->offset,
+		    (vm_size_t)(src_entry->end - src_entry->start),
+		    &dst_entry->object.vm_object, &dst_entry->offset,
+		    &src_needs_copy);
+
 		/*
 		 *	If we didn't get a copy-object now, mark the
 		 *	source map entry so that a shadow will be created
@@ -1770,9 +1768,12 @@ vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry)
 
 		/*
 		 *	The destination always needs to have a shadow
-		 *	created.
+		 *	created, unless it's a zero-fill entry.
 		 */
-		dst_entry->needs_copy = TRUE;
+		if (dst_entry->object.vm_object != NULL)
+			dst_entry->needs_copy = TRUE;
+		else
+			dst_entry->needs_copy = FALSE;
 
 		/*
 		 *	Mark the entries copy-on-write, so that write-enabling
diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c
index 217455c559b..3d96b889c5a 100644
--- a/sys/vm/vm_meter.c
+++ b/sys/vm/vm_meter.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vm_meter.c,v 1.3 1996/10/23 15:38:36 deraadt Exp $	*/
+/*	$OpenBSD: vm_meter.c,v 1.4 1997/04/17 01:25:20 niklas Exp $	*/
 /*	$NetBSD: vm_meter.c,v 1.18 1996/02/05 01:53:59 christos Exp $	*/
 
 /*
@@ -203,7 +203,7 @@ vmtotal(totalp)
 			    entry->object.vm_object == NULL)
 				continue;
 			entry->object.vm_object->flags |= OBJ_ACTIVE;
-			paging |= entry->object.vm_object->paging_in_progress;
+			paging |= vm_object_paging(entry->object.vm_object);
 		}
 		if (paging)
 			totalp->t_pw++;
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 183d9bb0780..951a84e0939 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -1,7 +1,37 @@
-/*	$OpenBSD: vm_object.c,v 1.14 1997/03/26 18:45:31 niklas Exp $	*/
-/*	$NetBSD: vm_object.c,v 1.34 1996/02/28 22:35:35 gwr Exp $	*/
+/*	$OpenBSD: vm_object.c,v 1.15 1997/04/17 01:25:20 niklas Exp $	*/
+/*	$NetBSD: vm_object.c,v 1.46 1997/03/30 20:56:12 mycroft Exp $	*/
 
-/* 
+/*-
+ * Copyright (c) 1997 Charles M. Hannum.  All rights reserved.
+ * Copyright (c) 1997 Niklas Hallqvist.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by Charles M. Hannum.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
@@ -129,11 +159,12 @@ int	vmdebug = VMDEBUG;
 #endif
 
 void		_vm_object_allocate __P((vm_size_t, vm_object_t));
-int		vm_object_collapse_aux __P((vm_object_t));
 int		vm_object_bypass __P((vm_object_t));
-void		vm_object_set_shadow __P((vm_object_t, vm_object_t));
+void		vm_object_collapse_internal __P((vm_object_t, vm_object_t *));
+int		vm_object_overlay __P((vm_object_t));
 int		vm_object_remove_from_pager
 		    __P((vm_object_t, vm_offset_t, vm_offset_t));
+void		vm_object_set_shadow __P((vm_object_t, vm_object_t));
 
 /*
  * vm_object_init:
@@ -173,8 +204,8 @@ vm_object_allocate(size)
 {
 	register vm_object_t	result;
 
-	result = (vm_object_t)malloc((u_long)sizeof *result,
-	    M_VMOBJ, M_WAITOK);
+	result = (vm_object_t)malloc((u_long)sizeof *result, M_VMOBJ,
+	    M_WAITOK);
 
 	_vm_object_allocate(size, result);
 
@@ -242,7 +273,7 @@ vm_object_reference(object)
  */
 void
 vm_object_deallocate(object)
-	register vm_object_t	object;
+	vm_object_t	object;
 {
 	/*
 	 * While "temp" is used for other things as well, we
@@ -254,9 +285,8 @@ vm_object_deallocate(object)
 	while (object != NULL) {
 
 		/*
-		 * The cache holds a reference (uncounted) to
-		 * the object; we must lock it before removing
-		 * the object.
+		 * The cache holds a reference (uncounted) to the object; we
+		 * must lock it before removing the object.
 		 */
 
 		vm_object_cache_lock();
@@ -266,34 +296,47 @@ vm_object_deallocate(object)
 		 */
 		vm_object_lock(object);
 		if (--(object->ref_count) != 0) {
+			vm_object_unlock(object);
+			vm_object_cache_unlock();
+
 			/*
-			 * If this is a deallocation of a shadow
-			 * reference (which it is unless it's the
-			 * first time round) and this operation made
-			 * us singly-shadowed, try to collapse us
-			 * with our shadower.
+			 * If this is a deallocation of a shadow reference
+			 * (which it is unless it's the first time round) and
+			 * this operation made us singly-shadowed, try to
+			 * collapse us with our shadower.  Otherwise we're
+			 * ready.
 			 */
-			vm_object_unlock(object);
 			if (temp != NULL &&
 			    (temp = object->shadowers.lh_first) != NULL &&
 			    temp->shadowers_list.le_next == NULL) {
 				vm_object_lock(temp);
-				vm_object_collapse(temp);
-				vm_object_unlock(temp);
-			}
 
-			/*
-			 * If there are still references, then
-			 * we are done.
-			 */
-			vm_object_cache_unlock();
-			return;
+				/*
+				 * This is a bit tricky: the temp object can
+				 * go away while collapsing, check the
+				 * vm_object_collapse_internal comments for
+				 * details.  In this case we get an object
+				 * back to deallocate (it's done like this
+				 * to prevent potential recursion and hence
+				 * kernel stack overflow).  In the normal case
+				 * we won't get an object back, if so, we are
+				 * ready and may return.
+				 */
+				vm_object_collapse_internal(temp, &object);
+				if (object != NULL) {
+					vm_object_lock(object);
+					vm_object_cache_lock();
+				} else {
+					vm_object_unlock(temp);
+					return;
+				}
+			} else
+				return;
 		}
 
 		/*
-		 * See if this object can persist.  If so, enter
-		 * it in the cache, then deactivate all of its
-		 * pages.
+		 * See if this object can persist.  If so, enter it in the
+		 * cache, then deactivate all of its pages.
 		 */
 		if (object->flags & OBJ_CANPERSIST) {
 
@@ -315,9 +358,12 @@ vm_object_deallocate(object)
 		vm_object_remove(object->pager);
 		vm_object_cache_unlock();
 
+		/*
+		 * Deallocate the object, and move on to the backing object.
+		 */
 		temp = object->shadow;
+		vm_object_reference(temp);
 		vm_object_terminate(object);
-			/* unlocks and deallocates object */
 		object = temp;
 	}
 }
@@ -337,8 +383,7 @@ vm_object_terminate(object)
 	vm_object_t		shadow_object;
 
 	/*
-	 * Setters of paging_in_progress might be interested that this object
-	 * is going away as soon as we get a grip on it.
+	 * Protect against simultaneous collapses.
 	 */
 	object->flags |= OBJ_FADING;
 
@@ -346,10 +391,7 @@ vm_object_terminate(object)
 	 * Wait until the pageout daemon is through with the object or a
 	 * potential collapse operation is finished.
 	 */
-	while (object->paging_in_progress) {
-		vm_object_sleep(object, object, FALSE);
-		vm_object_lock(object);
-	}
+	vm_object_paging_wait(object);
 
 	/*
 	 * Detach the object from its shadow if we are the shadow's
@@ -362,7 +404,8 @@ vm_object_terminate(object)
 			shadow_object->copy = NULL;
 #if 0
 		else if (shadow_object->copy != NULL)
-			panic("vm_object_terminate: copy/shadow inconsistency");
+			panic("vm_object_terminate: "
+			    "copy/shadow inconsistency");
 #endif
 		vm_object_unlock(shadow_object);
 	}
@@ -466,10 +509,8 @@ again:
 	/*
 	 * Wait until the pageout daemon is through with the object.
 	 */
-	while (object->paging_in_progress) {
-		vm_object_sleep(object, object, FALSE);
-		vm_object_lock(object);
-	}
+	vm_object_paging_wait(object);
+
 	/*
 	 * Loop through the object page list cleaning as necessary.
 	 */
@@ -515,12 +556,7 @@ again:
 			pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_READ);
 			if (!(p->flags & PG_CLEAN)) {
 				p->flags |= PG_BUSY;
-#ifdef DIAGNOSTIC
-				if (object->paging_in_progress == 0xdead)
-					panic("vm_object_page_clean: "
-					    "object deallocated");
-#endif
-				object->paging_in_progress++;
+				vm_object_paging_begin(object);
 				vm_object_unlock(object);
 				/*
 				 * XXX if put fails we mark the page as
@@ -529,12 +565,12 @@ again:
 				 */
 				if (vm_pager_put(object->pager, p, syncio)) {
 					printf("%s: pager_put error\n",
-					       "vm_object_page_clean");
+					    "vm_object_page_clean");
 					p->flags |= PG_CLEAN;
 					noerror = FALSE;
 				}
 				vm_object_lock(object);
-				object->paging_in_progress--;
+				vm_object_paging_end(object);
 				if (!de_queue && onqueue) {
 					vm_page_lock_queues();
 					if (onqueue > 0)
@@ -703,7 +739,7 @@ vm_object_copy(src_object, src_offset, size,
 	    (src_object->flags & OBJ_INTERNAL)) {
 
 		/*
-		 * Make another reference to the object
+		 * Make another reference to the object.
 		 */
 		src_object->ref_count++;
 
@@ -751,7 +787,7 @@ Retry1:
 		if (!vm_object_lock_try(old_copy)) {
 			vm_object_unlock(src_object);
 
-			/* should spin a bit here... */
+			/* XXX should spin a bit here... */
 			vm_object_lock(src_object);
 			goto Retry1;
 		}
@@ -815,15 +851,13 @@ Retry2:
 		 * object.  Locking of new_copy not needed.  We
 		 * have the only pointer.
 		 */
-		src_object->ref_count--;	/* remove ref. from old_copy */
 		vm_object_set_shadow(old_copy, new_copy);
-		new_copy->ref_count++;		/* locking not needed - we
-						   have the only pointer */
-		vm_object_unlock(old_copy);	/* done with old_copy */
+		vm_object_unlock(old_copy);
 	}
 
-	new_start = (vm_offset_t)0;	/* always shadow original at 0 */
-	new_end   = (vm_offset_t)new_copy->size; /* for the whole object */
+	/* Always shadow original at 0 for the whole object */
+	new_start = (vm_offset_t)0;
+	new_end = (vm_offset_t)new_copy->size;
 
 	/*
 	 * Point the new copy at the existing object.
@@ -831,7 +865,6 @@ Retry2:
 
 	vm_object_set_shadow(new_copy, src_object);
 	new_copy->shadow_offset = new_start;
-	src_object->ref_count++;
 	src_object->copy = new_copy;
 
 	/*
@@ -872,6 +905,11 @@ vm_object_shadow(object, offset, length)
 
 	source = *object;
 
+#ifdef DIAGNOSTIC
+	if (source == NULL)
+		panic("vm_object_shadow: attempt to shadow null object");
+#endif
+
 	/*
 	 * Allocate a new object with the given length
 	 */
@@ -879,14 +917,13 @@ vm_object_shadow(object, offset, length)
 		panic("vm_object_shadow: no object for shadowing");
 
 	/*
-	 * The new object shadows the source object, adding
-	 * a reference to it.  Our caller changes his reference
-	 * to point to the new object, removing a reference to
-	 * the source object.  Net result: no change of reference
-	 * count.
+	 * The new object shadows the source object.  Our caller changes his
+	 * reference to point to the new object, removing a reference to the
+	 * source object.
 	 */
 	vm_object_lock(source);
 	vm_object_set_shadow(result, source);
+	source->ref_count--;
 	vm_object_unlock(source);
 	
 	/*
@@ -1030,7 +1067,6 @@ vm_object_remove(pager)
 
 /*
  * vm_object_cache_clear removes all objects from the cache.
- *
  */
 void
 vm_object_cache_clear()
@@ -1079,7 +1115,7 @@ vm_object_remove_from_pager(object, from, to)
 
 	cnt = vm_pager_remove(pager, from, to);
 
-	/* If pager became empty, remove it.	*/
+	/* If pager became empty, remove it.  */
 	if (cnt > 0 && vm_pager_count(pager) == 0) {
 		vm_pager_deallocate(pager);
 		object->pager = NULL;
@@ -1087,8 +1123,15 @@ vm_object_remove_from_pager(object, from, to)
 	return(cnt);
 }
 
+#define	FREE_PAGE(m)	do {					\
+	PAGE_WAKEUP(m);						\
+	vm_page_lock_queues();					\
+	vm_page_free(m);					\
+	vm_page_unlock_queues();				\
+} while(0)
+
 /*
- * vm_object_collapse_aux:
+ * vm_object_overlay:
  *
  * Internal function to vm_object_collapse called when
  * it has been shown that a collapse operation is likely
@@ -1096,7 +1139,7 @@ vm_object_remove_from_pager(object, from, to)
  * referenced by me and that paging is not in progress.
  */
 int
-vm_object_collapse_aux(object)
+vm_object_overlay(object)
 	vm_object_t	object;
 {
 	vm_object_t	backing_object = object->shadow;
@@ -1104,35 +1147,36 @@ vm_object_collapse_aux(object)
 	vm_size_t	size = object->size;
 	vm_offset_t	offset, paged_offset;
 	vm_page_t	backing_page, page = NULL;
+	int		rv;
 
 #ifdef DEBUG
 	if (vmdebug & VMDEBUG_COLLAPSE)
-		printf("vm_object_collapse_aux(0x%x)\n", object);
+		printf("vm_object_overlay(0x%p)\n", object);
 #endif
 
 	/*
+	 * Protect against multiple collapses.
+	 */
+	backing_object->flags |= OBJ_FADING;
+
+	/*
 	 * The algorithm used is roughly like this:
-	 * (1)	Trim a potential pager in the backing
-	 * 	object so it'll only hold pages in reach.
-	 * (2)	Loop over all the resident pages in the
-	 * 	shadow object and either remove them if
-	 * 	they are shadowed or move them into the
+	 * (1)	Trim a potential pager in the backing object so it'll only hold
+	 *      pages in reach.
+	 * (2)	Loop over all the resident pages in the shadow object and
+	 *      either remove them if they are shadowed or move them into the
 	 * 	shadowing object.
-	 * (3)	Loop over the paged out pages in the
-	 * 	shadow object.  Start pageins on those
-	 * 	that aren't shadowed, and just deallocate
-	 * 	the others.  In each iteration check if
-	 * 	other users of these objects have caused
-	 * 	pageins resulting in new resident pages.
-	 * 	This can happen while we are waiting for
-	 * 	a pagein of ours.  If such resident pages
-	 * 	turn up, restart from (2).
+	 * (3)	Loop over the paged out pages in the shadow object.  Start
+	 *      pageins on those that aren't shadowed, and just deallocate
+	 * 	the others.  In each iteration check if other users of these
+	 *      objects have caused pageins resulting in new resident pages.
+	 * 	This can happen while we are waiting for a page or a pagein of
+	 *      ours.  If such resident pages turn up, restart from (2).
 	 */
 
 	/*
-	 * As a first measure we know we can discard
-	 * everything that the shadowing object doesn't
-	 * shadow.
+	 * As a first measure we know we can discard everything that the
+	 * shadowing object doesn't shadow.
 	 */
 	if (backing_object->pager != NULL) {
 		if (backing_offset > 0)
@@ -1144,257 +1188,217 @@ vm_object_collapse_aux(object)
 	}
 
 	/*
-	 * This is the outer loop, iterating until all resident and
-	 * paged out pages in the shadow object are drained.
+	 * At this point, there may still be asynchronous paging in the parent
+	 * object.  Any pages being paged in will be represented by fake pages.
+	 * There are three cases:
+	 * 1) The page is being paged in from the parent object's own pager.
+	 *    In this case, we just delete our copy, since it's not needed.
+	 * 2) The page is being paged in from the backing object.  We prevent
+	 *    this case by waiting for paging to complete on the backing object
+	 *    before continuing.
+	 * 3) The page is being paged in from a backing object behind the one
+	 *    we're deleting.  We'll never notice this case, because the
+	 *    backing object we're deleting won't have the page.
 	 */
-	paged_offset = 0;
-	while (backing_object->memq.tqh_first != NULL ||
-	    backing_object->pager != NULL) {
-		/*
-		 * First of all get rid of resident pages in the
-		 * backing object.  We can guarantee to remove
-		 * every page thus we can write the while-test
-		 * like this.
-		 */
-		while ((backing_page = backing_object->memq.tqh_first) !=
-		    NULL) {
-			/*
-			 * If the page is outside the shadowing object's
-			 * range or if the page is shadowed (either by a
-			 * resident "non-fake" page or a paged out one) we
-			 * can discard it right away.  Otherwise we need
-			 * to move the page to the shadowing object,
-			 * perhaps waking up waiters for "fake" pages
-			 * first.
-			 */
-			if (backing_page->offset < backing_offset ||
-			    (offset = backing_page->offset - backing_offset) >=
-			    size ||
-			    ((page = vm_page_lookup(object, offset)) != NULL &&
-			     !(page->flags & PG_FAKE)) ||
-			    (object->pager != NULL &&
-			    vm_pager_has_page(object->pager, offset))) {
 
-				/*
-				 * Just discard the page, noone needs it.
-				 */
-				vm_page_lock_queues();
-				vm_page_free(backing_page);
-				vm_page_unlock_queues();
-			} else {
-				/*
-				 * If a "fake" page was found, someone may
-				 * be waiting for it.  Wake her up and
-				 * then remove the page.
-				 */
-				if (page) {
-					PAGE_WAKEUP(page);
-					vm_page_lock_queues();
-					vm_page_free(page);
-					vm_page_unlock_queues();
-				}
+	vm_object_unlock(object);
+retry:
+	vm_object_paging_wait(backing_object);
 
-				/*
-				 * If the backing page was ever paged out,
-				 * it was due to it being dirty at one
-				 * point.  Unless we have no pager
-				 * allocated to the front object (thus
-				 * will move forward the shadow's one),
-				 * mark it dirty again so it won't be
-				 * thrown away without being paged out to
-				 * the front pager.
-				 */
-				if (object->pager != NULL &&
-				    vm_object_remove_from_pager(backing_object,
-				    backing_page->offset,
-				    backing_page->offset + PAGE_SIZE))
-					backing_page->flags &= ~PG_CLEAN;
+	/*
+	 * While we were asleep, the parent object might have been deleted.  If
+	 * so, the backing object will now have only one reference (the one we
+	 * hold).  If this happened, just deallocate the backing object and
+	 * return failure status so vm_object_collapse() will stop.  This will
+	 * continue vm_object_deallocate() where it stopped due to our
+	 * reference.
+	 */
+	if (backing_object->ref_count == 1)
+		goto fail;
+	vm_object_lock(object);
 
-				/* Move the page up front.	*/
-				vm_page_rename(backing_page, object, offset);
-			}
-		}
+	/*
+	 * Next, get rid of resident pages in the backing object.  We can
+	 * guarantee to remove every page thus we can write the while-test like
+	 * this.
+	 */
+	while ((backing_page = backing_object->memq.tqh_first) != NULL) {
+		offset = backing_page->offset - backing_offset;
 
-		/*
-		 * If there isn't a pager in the shadow object, we're
-		 * ready.  Take the easy way out.
-		 */
-		if (backing_object->pager == NULL)
-			break;
+#ifdef DIAGNOSTIC
+		if (backing_page->flags & (PG_BUSY | PG_FAKE))
+			panic("vm_object_overlay: "
+			    "busy or fake page in backing_object");
+#endif
 
 		/*
-		 * If the shadowing object doesn't have a pager
-		 * the easiest thing to do now is to just move the
-		 * backing pager up front and everything is done.  
+		 * If the page is outside the shadowing object's range or if
+		 * the page is shadowed (either by a resident page or a paged
+		 * out one) we can discard it right away.  Otherwise we need to
+		 * move the page to the shadowing object.
 		 */
-		if (object->pager == NULL) {
-			object->pager = backing_object->pager;
-			object->paging_offset = backing_object->paging_offset +
-			    backing_offset;
-			backing_object->pager = NULL;
-			break;
+		if (backing_page->offset < backing_offset || offset >= size ||
+		    ((page = vm_page_lookup(object, offset)) != NULL) ||
+		    (object->pager != NULL &&
+		     vm_pager_has_page(object->pager, offset))) {
+			/*
+			 * Just discard the page, noone needs it.  This
+			 * includes removing the possible backing store too.
+			 */
+			if (backing_object->pager != NULL)
+				vm_object_remove_from_pager(backing_object,
+				    backing_page->offset,
+				    backing_page->offset + PAGE_SIZE);
+			vm_page_lock_queues();
+			vm_page_free(backing_page);
+			vm_page_unlock_queues();
+		} else {
+			/*
+			 * If the backing page was ever paged out, it was due
+			 * to it being dirty at one point.  Unless we have no
+			 * pager allocated to the front object (thus will move
+			 * forward the shadow's one), mark it dirty again so it
+			 * won't be thrown away without being paged out to the
+			 * front pager.
+			 *
+			 * XXX
+			 * Should be able to move a page from one pager to
+			 * another.
+			 */
+			if (object->pager != NULL &&
+			    vm_object_remove_from_pager(backing_object,
+			    backing_page->offset,
+			    backing_page->offset + PAGE_SIZE))
+				backing_page->flags &= ~PG_CLEAN;
+
+			/* Move the page up front.  */
+			vm_page_rename(backing_page, object, offset);
 		}
+	}
+
+	/*
+	 * If the shadowing object doesn't have a pager the easiest
+	 * thing to do now is to just move the backing pager up front
+	 * and everything is done.  
+	 */
+	if (object->pager == NULL && backing_object->pager != NULL) {
+		object->pager = backing_object->pager;
+		object->paging_offset = backing_object->paging_offset +
+		    backing_offset;
+		backing_object->pager = NULL;
+		goto done;
+	}
+
+	/*
+	 * What's left to do is to find all paged out pages in the
+	 * backing pager and either discard or move it to the front
+	 * object.  We need to recheck the resident page set as a
+	 * pagein might have given other threads the chance to, via
+	 * readfaults, page in another page into the resident set.  In
+	 * this case we need to retry getting rid of pages from core.
+	 */
+	paged_offset = 0;
+	while (backing_object->pager != NULL &&
+	    (paged_offset = vm_pager_next(backing_object->pager,
+	    paged_offset)) < backing_object->size) {
+		offset = paged_offset - backing_offset;
 
 		/*
-		 * What's left to do is to find all paged out
-		 * pages in the backing pager and either discard
-		 * or move it to the front object.  We need to
-		 * recheck the resident page set as a pagein might
-		 * have given other threads the chance to, via
-		 * readfaults, page in another page into the
-		 * resident set.  In this case the outer loop must
-		 * get reentered.  That is also the case if some other
-		 * thread removes the front pager, a case that has
-		 * been seen...
+		 * If the parent object already has this page, delete it.
+		 * Otherwise, start a pagein.
 		 */
-		while (backing_object->memq.tqh_first == NULL &&
-		    backing_object->pager != NULL && object->pager != NULL &&
-		    (paged_offset = vm_pager_next(backing_object->pager,
-		     paged_offset)) < backing_object->size) {
+		if (((page = vm_page_lookup(object, offset)) == NULL) &&
+		    (object->pager == NULL ||
+		     !vm_pager_has_page(object->pager, offset))) {
+			vm_object_unlock(object);
+
 			/*
-			 * If the shadowing object has this page, get
-			 * rid of it from the backing pager.  Trust
-			 * the loop condition to get us out of here
-			 * quickly if we remove the last paged out page.
-			 *
-			 * XXX Would clustering several pages at a time
-			 * be a win in this situation?
+			 * First allocate a page and mark it busy so another
+			 * thread won't try to start another pagein.
 			 */
-			if (((page = vm_page_lookup(object,
-			    paged_offset - backing_offset)) == NULL ||
-			    (page->flags & PG_FAKE)) &&
-			    !vm_pager_has_page(object->pager,
-			    paged_offset - backing_offset)) {
-				/*
-				 * If a "fake" page was found, someone
-				 * may be waiting for it.  Wake her up
-				 * and then remove the page.
-				 */
-				if (page) {
-					PAGE_WAKEUP(page);
-					vm_page_lock_queues();
-					vm_page_free(page);
-					vm_page_unlock_queues();
-				}
-				/*
-				 * Suck the page from the pager and give
-				 * it to the shadowing object.
-				 */
+			backing_page = vm_page_alloc(backing_object,
+			    paged_offset);
+			if (backing_page == NULL) {
+				vm_object_unlock(backing_object);
+				VM_WAIT;
+				vm_object_lock(backing_object);
+				goto retry;
+			}
+			backing_page->flags |= PG_BUSY;
+
 #ifdef DEBUG
-				if (vmdebug & VMDEBUG_COLLAPSE_PAGEIN)
-					printf("vm_object_collapse_aux: "
-					    "pagein needed\n");
+			if (vmdebug & VMDEBUG_COLLAPSE_PAGEIN)
+				printf("vm_object_overlay: pagein needed\n");
 #endif
 
-				/*
-				 * First allocate a page and mark it
-				 * busy so another thread won't try
-				 * to start another pagein.
-				 */
-				for (;;) {
-					backing_page =
-					    vm_page_alloc(backing_object,
-					    paged_offset);
-					if (backing_page)
-						break;
-					VM_WAIT;
-				}
-				backing_page->flags |= PG_BUSY;
+			/*
+			 * Second, start paging it in.  If this fails,
+			 * what can we do but punt?
+			 */
+			vm_object_paging_begin(backing_object);
+			vm_object_unlock(backing_object);
+			cnt.v_pageins++;
+			rv = vm_pager_get_pages(backing_object->pager,
+			    &backing_page, 1, TRUE);
+			vm_object_lock(backing_object);
+			vm_object_paging_end(backing_object);
 
-				/*
-				 * Second, start paging it in.  If this
-				 * fails, what can we do but punt?
-				 * Even though the shadowing object
-				 * isn't exactly paging we say so in
-				 * order to not get simultaneous
-				 * cascaded collapses.
-				 */
-				object->paging_in_progress++;
-				backing_object->paging_in_progress++;
-				if (vm_pager_get_pages(backing_object->pager,
-				    &backing_page, 1, TRUE) != VM_PAGER_OK) {
+			/*
+			 * IO error or page outside the range of the pager:
+			 * cleanup and return an error.
+			 */
+			if (rv == VM_PAGER_ERROR || rv == VM_PAGER_BAD) {
+				FREE_PAGE(backing_page);
+				goto fail;
+			}
+
+			/* Handle the remaining failures.  */
+			if (rv != VM_PAGER_OK) {
 #ifdef DIAGNOSTIC
-					panic("vm_object_collapse_aux: "
-					    "could not get paged out page");
+				panic("vm_object_overlay: pager returned %d",
+				    rv);
+#else
+				FREE_PAGE(backing_page);
+				goto fail;
 #endif
-					return KERN_FAILURE;
-				}
-				cnt.v_pgpgin++;
-
-				/*
-				 * A fault might have issued other
-				 * pagein operations.  We must wait for
-				 * them to complete, then we get to
-				 * wakeup potential other waiters as
-				 * well.
-				 */
-				while (backing_object->paging_in_progress != 1
-				    || object->paging_in_progress != 1) {
-					if (object->paging_in_progress != 1) {
-						vm_object_sleep(object, object,
-						    FALSE);
-						vm_object_lock(object);
-						continue;
-					}
-					vm_object_sleep(backing_object,
-					    backing_object, FALSE);
-					vm_object_lock(backing_object);
-				}
-				backing_object->paging_in_progress--;
-				object->paging_in_progress--;
-				thread_wakeup(backing_object);
-				thread_wakeup(object);
-
-				/*
-				 * During the pagein vm_object_terminate
-				 * might have slept on our front object in
-				 * order to remove it.  If this is the
-				 * case, we might as well stop all the
-				 * collapse work right here.
-				 */
-				if (object->flags & OBJ_FADING) {
-					PAGE_WAKEUP(backing_page);
-					return KERN_FAILURE;
-				}
-
-				/*
-				 * Third, relookup in case pager changed
-				 * page.  Pager is responsible for
-				 * disposition of old page if moved.
-				 */
-				backing_page = vm_page_lookup(backing_object,
-				    paged_offset);
+			}
+			cnt.v_pgpgin++;
 
-				/*
-				 * This page was once dirty, otherwise
-				 * it hadn't been paged out in this
-				 * shadow object.  As we now remove the
-				 * persistant store of the page, make
-				 * sure it will be paged out in the
-				 * front pager by dirtying it.
-				 */
-				backing_page->flags &= ~(PG_FAKE|PG_CLEAN);
+			/*
+			 * Third, relookup in case pager changed page.  Pager
+			 * is responsible for disposition of old page if moved.
+			 */
+			backing_page = vm_page_lookup(backing_object,
+			    paged_offset);
 
-				/*
-				 * Fourth, move it up front, and wake up
-				 * potential waiters.
-				 */
-				vm_page_rename(backing_page, object,
-				    paged_offset - backing_offset);
-				PAGE_WAKEUP(backing_page);
+			/*
+			 * This page was once dirty, otherwise it
+			 * hadn't been paged out in this shadow object.
+			 * As we now remove the persistant store of the
+			 * page, make sure it will be paged out in the
+			 * front pager by dirtying it.
+			 */
+			backing_page->flags &= ~(PG_FAKE | PG_CLEAN);
 
-			}
-			vm_object_remove_from_pager(backing_object,
-			    paged_offset, paged_offset + PAGE_SIZE);
-			paged_offset += PAGE_SIZE;
+			/*
+			 * Fourth, restart the process as we have slept,
+			 * thereby letting other threads change object's
+			 * internal structure.  Don't be tempted to move it up
+			 * front here, the parent may be gone already.
+			 */
+			PAGE_WAKEUP(backing_page);
+			goto retry;
 		}
+		vm_object_remove_from_pager(backing_object, paged_offset, 
+		    paged_offset + PAGE_SIZE);
+		paged_offset += PAGE_SIZE;
 	}
 
+done:
 	/*
-	 * I've seen this condition once in an out of VM situation.
-	 * For the moment I don't know why it occurred, although I suspect
-	 * vm_object_page_clean can create a pager even if it won't use
-	 * it.
+	 * I've seen this condition once in an out of VM situation.  For the
+	 * moment I don't know why it occurred, although I suspect
+	 * vm_object_page_clean can create a pager even if it won't use it.
 	 */
 	if (backing_object->pager != NULL &&
 	    vm_pager_count(backing_object->pager) == 0) {
@@ -1404,37 +1408,32 @@ vm_object_collapse_aux(object)
 
 #ifdef DIAGNOSTIC
 	if (backing_object->pager)
-		panic("vm_object_collapse_aux: backing_object->pager remains");
+		panic("vm_object_overlay: backing_object->pager remains");
 #endif
 
 	/*
 	 * Object now shadows whatever backing_object did.
-	 * Note that the reference to backing_object->shadow
-	 * moves from within backing_object to within object.
 	 */
-	if(backing_object->shadow)
+	if (backing_object->shadow)
 		vm_object_lock(backing_object->shadow);
 	vm_object_set_shadow(object, backing_object->shadow);
-	if(backing_object->shadow) {
-		vm_object_set_shadow(backing_object, NULL);
+	if (backing_object->shadow)
 		vm_object_unlock(backing_object->shadow);
-	}
 	object->shadow_offset += backing_object->shadow_offset;
 	if (object->shadow != NULL && object->shadow->copy != NULL)
-		panic("vm_object_collapse_aux: we collapsed a copy-object!");
-
-	/* Fast cleanup is the only thing left now.	*/
-	vm_object_unlock(backing_object);
+		panic("vm_object_overlay: we collapsed a copy-object!");
 
-	simple_lock(&vm_object_list_lock);
-	TAILQ_REMOVE(&vm_object_list, backing_object, object_list);
-	vm_object_count--;
-	simple_unlock(&vm_object_list_lock);
-
-	free((caddr_t)backing_object, M_VMOBJ);
+#ifdef DIAGNOSTIC
+	if (backing_object->ref_count != 1)
+		panic("vm_object_overlay: backing_object still referenced");
+#endif
 
 	object_collapses++;
 	return KERN_SUCCESS;
+
+fail:
+	backing_object->flags &= ~OBJ_FADING;
+	return KERN_FAILURE;
 }
 
 /*
@@ -1444,91 +1443,100 @@ vm_object_collapse_aux(object)
  * the object with its backing one is not allowed but there may
  * be an opportunity to bypass the backing object and shadow the
  * next object in the chain instead.
+ *
+ * If all of the pages in the backing object are shadowed by the parent
+ * object, the parent object no longer has to shadow the backing
+ * object; it can shadow the next one in the chain.
  */
 int
 vm_object_bypass(object)
 	vm_object_t	object;
 {
-	register vm_object_t	backing_object = object->shadow;
-	register vm_offset_t	backing_offset = object->shadow_offset;
-	register vm_offset_t	new_offset;
-	register vm_page_t	p, pp;
+	vm_object_t	backing_object = object->shadow;
+	vm_offset_t	backing_offset = object->shadow_offset;
+	vm_offset_t	offset, new_offset;
+	vm_page_t	p, pp;
 
 	/*
-	 * If all of the pages in the backing object are
-	 * shadowed by the parent object, the parent
-	 * object no longer has to shadow the backing
-	 * object; it can shadow the next one in the
-	 * chain.
-	 *
-	 * The backing object must not be paged out - we'd
-	 * have to check all of the paged-out pages, as
-	 * well.
+	 * XXX Punt if paging is going on.  The issues in this case need to be
+	 * looked into more closely.  For now play it safe and return.  There's
+	 * no need to wait for it to end, as the expense will be much higher
+	 * than the gain.
 	 */
-
-	if (backing_object->pager != NULL)
+	if (vm_object_paging(backing_object))
 		return KERN_FAILURE;
 
 	/*
-	 * Should have a check for a 'small' number
-	 * of pages here.
+	 * Should have a check for a 'small' number of pages here.
 	 */
-
 	for (p = backing_object->memq.tqh_first; p != NULL;
 	    p = p->listq.tqe_next) {
 		new_offset = p->offset - backing_offset;
 
 		/*
-		 * If the parent has a page here, or if
-		 * this page falls outside the parent,
-		 * keep going.
+		 * If the parent has a page here, or if this page falls outside
+		 * the parent, keep going.
 		 *
-		 * Otherwise, the backing_object must be
-		 * left in the chain.
+		 * Otherwise, the backing_object must be left in the chain.
 		 */
-
 		if (p->offset >= backing_offset && new_offset < object->size &&
 		    ((pp = vm_page_lookup(object, new_offset)) == NULL ||
-		    (pp->flags & PG_FAKE))) {
+		    (pp->flags & PG_FAKE)) &&
+		    (object->pager == NULL ||
+		    !vm_pager_has_page(object->pager, new_offset)))
 			/*
 			 * Page still needed.  Can't go any further.
 			 */
 			return KERN_FAILURE;
+	}
+
+	if (backing_object->pager) {
+		/*
+		 * Should have a check for a 'small' number of pages here.
+		 */
+		for (offset = vm_pager_next(backing_object->pager, 0);
+		    offset < backing_object->size;
+		    offset = vm_pager_next(backing_object->pager,
+		    offset + PAGE_SIZE)) {
+			new_offset = offset - backing_offset;
+
+			/*
+			 * If the parent has a page here, or if this page falls
+			 * outside the parent, keep going.
+			 *
+			 * Otherwise, the backing_object must be left in the
+			 * chain.
+			 */
+			if (offset >= backing_offset &&
+			    new_offset < object->size &&
+			    ((pp = vm_page_lookup(object, new_offset)) ==
+			    NULL || (pp->flags & PG_FAKE)) &&
+			    (object->pager == NULL ||
+			    !vm_pager_has_page(object->pager, new_offset)))
+				/*
+				 * Page still needed.  Can't go any further.
+				 */
+				return KERN_FAILURE;
 		}
 	}
 
 	/*
-	 * Make the parent shadow the next object
-	 * in the chain.  Deallocating backing_object
-	 * will not remove it, since its reference
-	 * count is at least 2.
+	 * Object now shadows whatever backing_object did.
 	 */
-
-	vm_object_lock(object->shadow);
 	if (backing_object->shadow)
 		vm_object_lock(backing_object->shadow);
 	vm_object_set_shadow(object, backing_object->shadow);
 	if (backing_object->shadow)
 		vm_object_unlock(backing_object->shadow);
-	vm_object_reference(object->shadow);
-	vm_object_unlock(object->shadow);
 	object->shadow_offset += backing_object->shadow_offset;
 
 	/*
-	 * Backing object might have had a copy pointer
-	 * to us.  If it did, clear it. 
+	 * Backing object might have had a copy pointer to us.  If it did,
+	 * clear it. 
 	 */
-
 	if (backing_object->copy == object)
 		backing_object->copy = NULL;
 
-	/* Drop the reference count on backing_object.
-	 * Since its ref_count was at least 2, it
-	 * will not vanish; so we don't need to call
-	 * vm_object_deallocate.
-	 */
-	backing_object->ref_count--;
-	vm_object_unlock(backing_object);
 	object_bypasses++;
 	return KERN_SUCCESS;
 }
@@ -1536,65 +1544,70 @@ vm_object_bypass(object)
 /*
  * vm_object_collapse:
  *
- * Collapse an object with the object backing it.
- * Pages in the backing object are moved into the
- * parent, and the backing object is deallocated.
- *
- * Requires that the object be locked and the page
- * queues be unlocked.
+ * Collapse an object with the object backing it.  Pages in the backing object
+ * are moved into the parent, and the backing object is deallocated.
  *
+ * Requires that the object be locked and the page queues be unlocked.
  */
 void
 vm_object_collapse(object)
-	register vm_object_t	object;
+	vm_object_t object;
 
 {
+	vm_object_collapse_internal(object, NULL);
+}
+
+/*
+ * An internal to vm_object.c entry point to the collapsing logic, used by
+ * vm_object_deallocate to get rid of a potential recursion case.  In that case
+ * an object to be deallocated is fed back via the retry_object pointer.
+ * External users will have that parameter wired to NULL, and then we are
+ * allowed to do vm_object_deallocate calls that may mutually recursive call us
+ * again.  In that case it will only get one level deep and thus not be a real
+ * recursion.
+ */
+void
+vm_object_collapse_internal(object, retry_object)
+	vm_object_t	object, *retry_object;
+{
 	register vm_object_t	backing_object;
+	int			rv;
+
+	/* We'd better initialize this one if the pointer is given.  */
+	if (retry_object)
+		*retry_object = NULL;
 
-	if (!vm_object_collapse_allowed)
+	if (!vm_object_collapse_allowed || object == NULL)
 		return;
 
-	while (TRUE) {
+	do {
 		/*
 		 * Verify that the conditions are right for collapse:
 		 *
-		 * The object exists and no pages in it are currently
-		 * being paged out.
+		 * There is a backing object, and
 		 */
-		if (object == NULL || object->paging_in_progress)
-			return;
-
-		/*
-		 * 	There is a backing object, and
-		 */
-	
 		if ((backing_object = object->shadow) == NULL)
 			return;
-	
+
 		vm_object_lock(backing_object);
+
 		/*
-		 * ...
-		 * 	The backing object is not read_only,
-		 * 	and no pages in the backing object are
-		 * 	currently being paged out.
-		 * 	The backing object is internal.
+		 * ... the backing object is not read_only, is internal and is
+		 * not already being collapsed, ...
 		 */
-	
-		if ((backing_object->flags & OBJ_INTERNAL) == 0 ||
-		    backing_object->paging_in_progress != 0) {
+		if ((backing_object->flags & (OBJ_INTERNAL | OBJ_FADING)) !=
+		    OBJ_INTERNAL) {
 			vm_object_unlock(backing_object);
 			return;
 		}
 	
 		/*
-		 * The backing object can't be a copy-object:
-		 * the shadow_offset for the copy-object must stay
-		 * as 0.  Furthermore (for the 'we have all the
-		 * pages' case), if we bypass backing_object and
-		 * just shadow the next object in the chain, old
-		 * pages from that object would then have to be copied
-		 * BOTH into the (former) backing_object and into the
-		 * parent object.
+		 * The backing object can't be a copy-object: the shadow_offset
+		 * for the copy-object must stay as 0.  Furthermore (for the
+		 * we have all the pages' case), if we bypass backing_object
+		 * and just shadow the next object in the chain, old pages from
+		 * that object would then have to be copied BOTH into the
+		 *(former) backing_object and into the parent object.
 		 */
 		if (backing_object->shadow != NULL &&
 		    backing_object->shadow->copy != NULL) {
@@ -1603,26 +1616,50 @@ vm_object_collapse(object)
 		}
 
 		/*
-		 * If there is exactly one reference to the backing
-		 * object, we can collapse it into the parent,
-		 * otherwise we might be able to bypass it completely.
+		 * Grab a reference to the backing object so that it
+		 * can't be deallocated behind our back.
 		 */
-	
-		if (backing_object->ref_count == 1) {
-			if (vm_object_collapse_aux(object) != KERN_SUCCESS) {
-				vm_object_unlock(backing_object);
-				return;
-			}
-		} else
-			if (vm_object_bypass(object) != KERN_SUCCESS) {
-				vm_object_unlock(backing_object);
-				return;
-			}
+		backing_object->ref_count++;
+
+#ifdef DIAGNOSTIC
+		if (backing_object->ref_count == 1)
+			panic("vm_object_collapse: "
+			    "collapsing unreferenced object");
+#endif
+
+		/*
+		 * If there is exactly one reference to the backing object, we
+		 * can collapse it into the parent, otherwise we might be able
+		 * to bypass it completely.
+		 */
+		rv = backing_object->ref_count == 2 ?
+		    vm_object_overlay(object) : vm_object_bypass(object);
+
+		/*
+		 * Unlock and note we're ready with the backing object.  If
+		 * we are now the last referrer this will also deallocate the
+		 * object itself.  If the backing object has been orphaned
+		 * and still have a shadow (it is possible in case of
+		 * KERN_FAILURE from vm_object_overlay) this might lead to a
+		 * recursion.  However, if we are called from
+		 * vm_object_deallocate, retry_object is not NULL and we are
+		 * allowed to feedback the current backing object via that
+		 * pointer.  That way the recursion case turns into an
+		 * iteration in vm_object_deallcate instead.
+		 */
+		if (retry_object != NULL && backing_object->ref_count == 1 &&
+		    backing_object->shadow != NULL) {
+			*retry_object = backing_object;
+			vm_object_unlock(backing_object);
+			return;
+		}
+		vm_object_unlock(backing_object);
+		vm_object_deallocate(backing_object);
 
 		/*
 		 * Try again with this object's new backing object.
 		 */
-	}
+	} while (rv == KERN_SUCCESS);
 }
 
 /*
@@ -1658,30 +1695,28 @@ vm_object_page_remove(object, start, end)
 /*
  * Routine:	vm_object_coalesce
  * Function:	Coalesces two objects backing up adjoining
- * 		regions of memory into a single object.
+ *		regions of memory into a single object.
  *
  * returns TRUE if objects were combined.
  *
- * NOTE:	Only works at the moment if the second object is NULL -
- * 	if it's not, which object do we lock first?
+ * NOTE: Only works at the moment if the second object is NULL -
+ *	 if it's not, which object do we lock first?
  *
  * Parameters:
- * 	prev_object	First object to coalesce
- * 	prev_offset	Offset into prev_object
- * 	next_object	Second object into coalesce
- * 	next_offset	Offset into next_object
+ *	prev_object	First object to coalesce
+ *	prev_offset	Offset into prev_object
+ *	next_object	Second object into coalesce
+ *	next_offset	Offset into next_object
  *
- * 	prev_size	Size of reference to prev_object
- * 	next_size	Size of reference to next_object
+ *	prev_size	Size of reference to prev_object
+ *	next_size	Size of reference to next_object
  *
  * Conditions:
  * The object must *not* be locked.
  */
 boolean_t
-vm_object_coalesce(prev_object, next_object,
-			prev_offset, next_offset,
-			prev_size, next_size)
-
+vm_object_coalesce(prev_object, next_object, prev_offset, next_offset,
+    prev_size, next_size)
 	register vm_object_t	prev_object;
 	vm_object_t	next_object;
 	vm_offset_t	prev_offset, next_offset;
@@ -1718,7 +1753,7 @@ vm_object_coalesce(prev_object, next_object,
 	 * prev_entry may be in use anyway)
 	 */
 
-	if (prev_object->ref_count > 1 || prev_object->pager != NULL ||
+	if (prev_object->ref_count > 1 ||  prev_object->pager != NULL ||
 	    prev_object->shadow != NULL || prev_object->copy != NULL) {
 		vm_object_unlock(prev_object);
 		return(FALSE);
@@ -1728,7 +1763,6 @@ vm_object_coalesce(prev_object, next_object,
 	 * Remove any pages that may still be in the object from
 	 * a previous deallocation.
 	 */
-
 	vm_object_page_remove(prev_object, prev_offset + prev_size,
 	    prev_offset + prev_size + next_size);
 
@@ -1769,23 +1803,22 @@ _vm_object_print(object, full, pr)
 	if (object == NULL)
 		return;
 
-	iprintf(pr, "Object 0x%lx: size=0x%lx, res=%d, ref=%d, ",
-		(long)object, (long)object->size,
-		object->resident_page_count, object->ref_count);
-	(*pr)("pager=0x%lx+0x%lx, shadow=(0x%lx)+0x%lx\n",
-	       (long)object->pager, (long)object->paging_offset,
-	       (long)object->shadow, (long)object->shadow_offset);
+	iprintf(pr, "Object 0x%p: size=0x%lx, res=%d, ref=%d, ", object,
+	    (long)object->size, object->resident_page_count,
+	    object->ref_count);
+	(*pr)("pager=0x%p+0x%lx, shadow=(0x%p)+0x%lx\n", object->pager,
+	    (long)object->paging_offset, object->shadow,
+	    (long)object->shadow_offset);
 	(*pr)("shadowers=(");
 	delim = "";
 	for (o = object->shadowers.lh_first; o;
 	    o = o->shadowers_list.le_next) {
-		(*pr)("%s0x%x", delim, o);
+		(*pr)("%s0x%p", delim, o);
 		delim = ", ";
 	};
 	(*pr)(")\n");
-	(*pr)("cache: next=0x%lx, prev=0x%lx\n",
-	       (long)object->cached_list.tqe_next,
-	       (long)object->cached_list.tqe_prev);
+	(*pr)("cache: next=0x%p, prev=0x%p\n", object->cached_list.tqe_next,
+	    object->cached_list.tqe_prev);
 
 	if (!full)
 		return;
@@ -1803,7 +1836,8 @@ _vm_object_print(object, full, pr)
 			(*pr)(",");
 		count++;
 
-		(*pr)("(off=0x%x,page=0x%x)", p->offset, VM_PAGE_TO_PHYS(p));
+		(*pr)("(off=0x%lx,page=0x%lx)", (long)p->offset,
+		    (long)VM_PAGE_TO_PHYS(p));
 	}
 	if (count != 0)
 		(*pr)("\n");
@@ -1813,11 +1847,10 @@ _vm_object_print(object, full, pr)
 /*
  * vm_object_set_shadow:
  *
- * Maintain the shadow graph so that back-link consistency is
- * always kept.
+ * Maintain the shadow graph so that back-link consistency is always kept.
  *
- * Assumes both objects as well as the old shadow to be locked
- * (unless NULL of course).
+ * Assumes both objects as well as the old shadow to be locked (unless NULL
+ * of course).
  */
 void
 vm_object_set_shadow(object, shadow)
@@ -1827,8 +1860,8 @@ vm_object_set_shadow(object, shadow)
 
 #ifdef DEBUG
 	if (vmdebug & VMDEBUG_SHADOW)
-		printf("vm_object_set_shadow(object=0x%x, shadow=0x%x) "
-		    "old_shadow=0x%x\n", object, shadow, old_shadow);
+		printf("vm_object_set_shadow(object=0x%p, shadow=0x%p) "
+		    "old_shadow=0x%p\n", object, shadow, old_shadow);
 	if (vmdebug & VMDEBUG_SHADOW_VERBOSE) {
 		vm_object_print(object, 0);
 		vm_object_print(old_shadow, 0);
@@ -1838,9 +1871,11 @@ vm_object_set_shadow(object, shadow)
 	if (old_shadow == shadow)
 		return;
 	if (old_shadow) {
+		old_shadow->ref_count--;
 		LIST_REMOVE(object, shadowers_list);
 	}
 	if (shadow) {
+		shadow->ref_count++;
 		LIST_INSERT_HEAD(&shadow->shadowers, object, shadowers_list);
 	}
 	object->shadow = shadow;
diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h
index be739e1fb11..370bab3920b 100644
--- a/sys/vm/vm_object.h
+++ b/sys/vm/vm_object.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vm_object.h,v 1.4 1996/12/24 20:14:32 niklas Exp $	*/
+/*	$OpenBSD: vm_object.h,v 1.5 1997/04/17 01:25:21 niklas Exp $	*/
 /*	$NetBSD: vm_object.h,v 1.16 1995/03/29 22:10:28 briggs Exp $	*/
 
 /* 
@@ -111,6 +111,7 @@ struct vm_object {
 #define OBJ_INTERNAL	0x0002	/* internally created object */
 #define OBJ_ACTIVE	0x0004	/* used to mark active objects */
 #define OBJ_FADING	0x0008	/* tell others that the object is going away */
+#define OBJ_WAITING	0x8000	/* someone is waiting for paging to finish */
 
 TAILQ_HEAD(vm_object_hash_head, vm_object_hash_entry);
 
@@ -144,8 +145,53 @@ vm_object_t	kmem_object;
 #define	vm_object_lock(object)		simple_lock(&(object)->Lock)
 #define	vm_object_unlock(object)	simple_unlock(&(object)->Lock)
 #define	vm_object_lock_try(object)	simple_lock_try(&(object)->Lock)
-#define	vm_object_sleep(event, object, interruptible) \
-			thread_sleep((event), &(object)->Lock, (interruptible))
+
+#define	vm_object_sleep(event, object, interruptible, where) \
+	do {								\
+		(object)->flags |= OBJ_WAITING;				\
+		thread_sleep_msg((event), &(object)->Lock,		\
+		    (interruptible), (where));				\
+	} while (0)
+
+#define	vm_object_wakeup(object) \
+	do {								\
+		if ((object)->flags & OBJ_WAITING) {			\
+			(object)->flags &= ~OBJ_WAITING;		\
+			thread_wakeup((object));			\
+		}							\
+	} while (0)
+
+#define	vm_object_paging(object) \
+	((object)->paging_in_progress != 0)
+
+#ifndef DIAGNOSTIC
+#define	vm_object_paging_begin(object) \
+	do {								\
+		(object)->paging_in_progress++;				\
+	} while (0)
+#else
+#define	vm_object_paging_begin(object) \
+	do {								\
+		if ((object)->paging_in_progress == 0xdead)		\
+			panic("vm_object_paging_begin");		\
+		(object)->paging_in_progress++;				\
+	} while (0)
+#endif
+
+#define	vm_object_paging_end(object) \
+	do {								\
+		if (--((object)->paging_in_progress) == 0)		\
+			vm_object_wakeup((object));			\
+	} while (0)
+
+#define	vm_object_paging_wait(object) \
+	do {								\
+		while (vm_object_paging((object))) {			\
+			vm_object_sleep((object), (object), FALSE,	\
+			    "vospgw");					\
+			vm_object_lock((object));			\
+		}							\
+	} while (0)
 
 #ifdef _KERNEL
 vm_object_t	 vm_object_allocate __P((vm_size_t));
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 37131d3c021..c3c0bf6e460 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -1,4 +1,4 @@
-/*    $OpenBSD: vm_page.c,v 1.4 1997/01/04 14:17:30 niklas Exp $    */
+/*    $OpenBSD: vm_page.c,v 1.5 1997/04/17 01:25:21 niklas Exp $    */
 /*    $NetBSD: vm_page.c,v 1.28 1996/02/05 01:54:05 christos Exp $    */
 
 /* 
@@ -36,7 +36,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	@(#)vm_page.c	8.3 (Berkeley) 3/21/94
+ * @(#)vm_page.c	8.3 (Berkeley) 3/21/94
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
@@ -66,7 +66,7 @@
  */
 
 /*
- *	Resident memory management module.
+ * Resident memory management module.
  */
 
 #include <sys/param.h>
@@ -82,17 +82,17 @@
 
 #ifdef MACHINE_NONCONTIG
 /*
- *	These variables record the values returned by vm_page_bootstrap,
- *	for debugging purposes.  The implementation of pmap_steal_memory
- *	and pmap_startup here also uses them internally.
+ * These variables record the values returned by vm_page_bootstrap,
+ * for debugging purposes.  The implementation of pmap_steal_memory
+ * and pmap_startup here also uses them internally.
  */
 vm_offset_t	virtual_space_start;
 vm_offset_t	virtual_space_end;
 #endif /* MACHINE_NONCONTIG */
 
 /*
- *	Associated with page of user-allocatable memory is a
- *	page structure.
+ * Associated with page of user-allocatable memory is a
+ * page structure.
  */
 
 struct pglist	*vm_page_buckets;		/* Array of buckets */
@@ -123,13 +123,13 @@ vm_size_t	page_mask;
 int		page_shift;
 
 /*
- *	vm_set_page_size:
+ * vm_set_page_size:
  *
- *	Sets the page size, perhaps based upon the memory
- *	size.  Must be called before any use of page-size
- *	dependent functions.
+ * Sets the page size, perhaps based upon the memory
+ * size.  Must be called before any use of page-size
+ * dependent functions.
  *
- *	Sets page_shift and page_mask from cnt.v_page_size.
+ * Sets page_shift and page_mask from cnt.v_page_size.
  */
 void
 vm_set_page_size()
@@ -148,14 +148,14 @@ vm_set_page_size()
 
 #ifdef	MACHINE_NONCONTIG
 /*
- *	vm_page_bootstrap:
+ * vm_page_bootstrap:
  *
- *	Initializes the resident memory module.
+ * Initializes the resident memory module.
  *
- *	Allocates memory for the page cells, and
- *	for the object/offset-to-page hash table headers.
- *	Each page cell is initialized and placed on the free list.
- *	Returns the range of available kernel virtual memory.
+ * Allocates memory for the page cells, and
+ * for the object/offset-to-page hash table headers.
+ * Each page cell is initialized and placed on the free list.
+ * Returns the range of available kernel virtual memory.
  */
 void
 vm_page_bootstrap(startp, endp)
@@ -170,55 +170,50 @@ vm_page_bootstrap(startp, endp)
 
 
 	/*
-	 *	Initialize the locks
+	 * Initialize the locks
 	 */
-
 	simple_lock_init(&vm_page_queue_free_lock);
 	simple_lock_init(&vm_page_queue_lock);
 
 	/*
-	 *	Initialize the queue headers for the free queue,
-	 *	the active queue and the inactive queue.
+	 * Initialize the queue headers for the free queue,
+	 * the active queue and the inactive queue.
 	 */
-
 	TAILQ_INIT(&vm_page_queue_free);
 	TAILQ_INIT(&vm_page_queue_active);
 	TAILQ_INIT(&vm_page_queue_inactive);
 
 	/*
-	 *	Pre-allocate maps and map entries that cannot be dynamically
-	 *	allocated via malloc().  The maps include the kernel_map and
-	 *	kmem_map which must be initialized before malloc() will
-	 *	work (obviously).  Also could include pager maps which would
-	 *	be allocated before kmeminit.
+	 * Pre-allocate maps and map entries that cannot be dynamically
+	 * allocated via malloc().  The maps include the kernel_map and
+	 * kmem_map which must be initialized before malloc() will
+	 * work (obviously).  Also could include pager maps which would
+	 * be allocated before kmeminit.
 	 *
-	 *	Allow some kernel map entries... this should be plenty
-	 *	since people shouldn't be cluttering up the kernel
-	 *	map (they should use their own maps).
+	 * Allow some kernel map entries... this should be plenty
+	 * since people shouldn't be cluttering up the kernel
+	 * map (they should use their own maps).
 	 */
-
 	kentry_data_size = round_page(MAX_KMAP*sizeof(struct vm_map) +
 				      MAX_KMAPENT*sizeof(struct vm_map_entry));
 	kentry_data = (vm_offset_t) pmap_steal_memory(kentry_data_size);
 	
 	/*
-	 *	Validate these zone addresses.
+	 * Validate these zone addresses.
 	 */
-
 	bzero((caddr_t) kentry_data, kentry_data_size);
 
 	/*
-	 *	Allocate (and initialize) the virtual-to-physical
-	 *	table hash buckets.
+	 * Allocate (and initialize) the virtual-to-physical
+	 * table hash buckets.
 	 *
-	 *	The number of buckets MUST BE a power of 2, and
-	 *	the actual value is the next power of 2 greater
-	 *	than the number of physical pages in the system.
+	 * The number of buckets MUST BE a power of 2, and
+	 * the actual value is the next power of 2 greater
+	 * than the number of physical pages in the system.
 	 *
-	 *	Note:
-	 *		This computation can be tweaked if desired.
+	 * Note:
+	 * 	This computation can be tweaked if desired.
 	 */
-
 	if (vm_page_bucket_count == 0) {
 		unsigned int npages = pmap_free_pages();
 	    
@@ -241,13 +236,12 @@ vm_page_bootstrap(startp, endp)
 	simple_lock_init(&bucket_lock);
 
 	/*
-	 *	Machine-dependent code allocates the resident page table.
-	 *	It uses VM_PAGE_INIT to initialize the page frames.
-	 *	The code also returns to us the virtual space available
-	 *	to the kernel.  We don't trust the pmap module
-	 *	to get the alignment right.
+	 * Machine-dependent code allocates the resident page table.
+	 * It uses VM_PAGE_INIT to initialize the page frames.
+	 * The code also returns to us the virtual space available
+	 * to the kernel.  We don't trust the pmap module
+	 * to get the alignment right.
 	 */
-	
 	pmap_startup(&virtual_space_start, &virtual_space_end);
 	virtual_space_start = round_page(virtual_space_start);
 	virtual_space_end = trunc_page(virtual_space_end);
@@ -261,13 +255,13 @@ vm_page_bootstrap(startp, endp)
 #else	/* MACHINE_NONCONTIG */
 
 /*
- *	vm_page_startup:
+ * vm_page_startup:
  *
- *	Initializes the resident memory module.
+ * Initializes the resident memory module.
  *
- *	Allocates memory for the page cells, and
- *	for the object/offset-to-page hash table headers.
- *	Each page cell is initialized and placed on the free list.
+ * Allocates memory for the page cells, and
+ * for the object/offset-to-page hash table headers.
+ * Each page cell is initialized and placed on the free list.
  */
 void
 vm_page_startup(start, end)
@@ -284,32 +278,29 @@ vm_page_startup(start, end)
 
 
 	/*
-	 *	Initialize the locks
+	 * Initialize the locks
 	 */
-
 	simple_lock_init(&vm_page_queue_free_lock);
 	simple_lock_init(&vm_page_queue_lock);
 
 	/*
-	 *	Initialize the queue headers for the free queue,
-	 *	the active queue and the inactive queue.
+	 * Initialize the queue headers for the free queue,
+	 * the active queue and the inactive queue.
 	 */
-
 	TAILQ_INIT(&vm_page_queue_free);
 	TAILQ_INIT(&vm_page_queue_active);
 	TAILQ_INIT(&vm_page_queue_inactive);
 
 	/*
-	 *	Calculate the number of hash table buckets.
+	 * Calculate the number of hash table buckets.
 	 *
-	 *	The number of buckets MUST BE a power of 2, and
-	 *	the actual value is the next power of 2 greater
-	 *	than the number of physical pages in the system.
+	 * The number of buckets MUST BE a power of 2, and
+	 * the actual value is the next power of 2 greater
+	 * than the number of physical pages in the system.
 	 *
-	 *	Note:
-	 *		This computation can be tweaked if desired.
+	 * Note:
+	 * 	This computation can be tweaked if desired.
 	 */
-
 	if (vm_page_bucket_count == 0) {
 		vm_page_bucket_count = 1;
 		while (vm_page_bucket_count < atop(*end - *start))
@@ -319,7 +310,7 @@ vm_page_startup(start, end)
 	vm_page_hash_mask = vm_page_bucket_count - 1;
 
 	/*
-	 *	Allocate (and initialize) the hash table buckets.
+	 * Allocate (and initialize) the hash table buckets.
 	 */
 	vm_page_buckets = (struct pglist *)
 	    pmap_bootstrap_alloc(vm_page_bucket_count * sizeof(struct pglist));
@@ -333,41 +324,37 @@ vm_page_startup(start, end)
 	simple_lock_init(&bucket_lock);
 
 	/*
-	 *	Truncate the remainder of physical memory to our page size.
+	 * Truncate the remainder of physical memory to our page size.
 	 */
-
 	*end = trunc_page(*end);
 
 	/*
-	 *	Pre-allocate maps and map entries that cannot be dynamically
-	 *	allocated via malloc().  The maps include the kernel_map and
-	 *	kmem_map which must be initialized before malloc() will
-	 *	work (obviously).  Also could include pager maps which would
-	 *	be allocated before kmeminit.
+	 * Pre-allocate maps and map entries that cannot be dynamically
+	 * allocated via malloc().  The maps include the kernel_map and
+	 * kmem_map which must be initialized before malloc() will
+	 * work (obviously).  Also could include pager maps which would
+	 * be allocated before kmeminit.
 	 *
-	 *	Allow some kernel map entries... this should be plenty
-	 *	since people shouldn't be cluttering up the kernel
-	 *	map (they should use their own maps).
+	 * Allow some kernel map entries... this should be plenty
+	 * since people shouldn't be cluttering up the kernel
+	 * map (they should use their own maps).
 	 */
-
 	kentry_data_size = round_page(MAX_KMAP*sizeof(struct vm_map) +
 				      MAX_KMAPENT*sizeof(struct vm_map_entry));
 	kentry_data = (vm_offset_t) pmap_bootstrap_alloc(kentry_data_size);
 
 	/*
- 	 *	Compute the number of pages of memory that will be
-	 *	available for use (taking into account the overhead
-	 *	of a page structure per page).
+ 	 * Compute the number of pages of memory that will be
+	 * available for use (taking into account the overhead
+	 * of a page structure per page).
 	 */
-
 	cnt.v_free_count = npages = (*end - *start + sizeof(struct vm_page))
 		/ (PAGE_SIZE + sizeof(struct vm_page));
 
 	/*
-	 *	Record the extent of physical memory that the
-	 *	virtual memory system manages.
+	 * Record the extent of physical memory that the
+	 * virtual memory system manages.
 	 */
-
 	first_page = *start;
 	first_page += npages*sizeof(struct vm_page);
 	first_page = atop(round_page(first_page));
@@ -378,17 +365,15 @@ vm_page_startup(start, end)
 
 
 	/*
-	 *	Allocate and clear the mem entry structures.
+	 * Allocate and clear the mem entry structures.
 	 */
-
 	m = vm_page_array = (vm_page_t)
 		pmap_bootstrap_alloc(npages * sizeof(struct vm_page));
 
 	/*
-	 *	Initialize the mem entry structures now, and
-	 *	put them in the free queue.
+	 * Initialize the mem entry structures now, and
+	 * put them in the free queue.
 	 */
-
 	pa = first_phys_addr;
 	while (npages--) {
 		m->flags = 0;
@@ -400,8 +385,8 @@ vm_page_startup(start, end)
 	}
 
 	/*
-	 *	Initialize vm_pages_needed lock here - don't wait for pageout
-	 *	daemon	XXX
+	 * Initialize vm_pages_needed lock here - don't wait for pageout
+	 * daemon	XXX
 	 */
 	simple_lock_init(&vm_pages_needed_lock);
 
@@ -412,8 +397,8 @@ vm_page_startup(start, end)
 
 #if	defined(MACHINE_NONCONTIG) && !defined(MACHINE_PAGES)
 /*
- *	We implement pmap_steal_memory and pmap_startup with the help
- *	of two simpler functions, pmap_virtual_space and pmap_next_page.
+ * We implement pmap_steal_memory and pmap_startup with the help
+ * of two simpler functions, pmap_virtual_space and pmap_next_page.
  */
 vm_offset_t
 pmap_steal_memory(size)
@@ -427,22 +412,22 @@ pmap_steal_memory(size)
 #endif
 
 	/*
-	 *	We round the size to an integer multiple.
+	 * We round the size to an integer multiple.
 	 */
 	
 	size = (size + 3) &~ 3; /* XXX */
 	
 	/*
-	 *	If this is the first call to pmap_steal_memory,
-	 *	we have to initialize ourself.
+	 * If this is the first call to pmap_steal_memory,
+	 * we have to initialize ourself.
 	 */
 	
 	if (virtual_space_start == virtual_space_end) {
 		pmap_virtual_space(&virtual_space_start, &virtual_space_end);
 		
 		/*
-		 *	The initial values must be aligned properly, and
-		 *	we don't trust the pmap module to do it right.
+		 * The initial values must be aligned properly, and
+		 * we don't trust the pmap module to do it right.
 		 */
 		
 		virtual_space_start = round_page(virtual_space_start);
@@ -450,14 +435,14 @@ pmap_steal_memory(size)
 	}
 	
 	/*
-	 *	Allocate virtual memory for this request.
+	 * Allocate virtual memory for this request.
 	 */
 	
 	addr = virtual_space_start;
 	virtual_space_start += size;
 	
 	/*
-	 *	Allocate and map physical pages to back new virtual pages.
+	 * Allocate and map physical pages to back new virtual pages.
 	 */
 	
 	for (vaddr = round_page(addr);
@@ -467,8 +452,8 @@ pmap_steal_memory(size)
 			panic("pmap_steal_memory");
 		
 		/*
-		 *	XXX Logically, these mappings should be wired,
-		 *	but some pmap modules barf if they are.
+		 * XXX Logically, these mappings should be wired,
+		 * but some pmap modules barf if they are.
 		 */
 		
 		pmap_enter(pmap_kernel(), vaddr, paddr,
@@ -491,25 +476,24 @@ pmap_startup(startp, endp)
 	 * and then allocate the page structures in one chunk.
 	 * The calculation is non-trivial.  We want:
 	 *
-	 *	vmpages > (freepages - (vmpages / sizeof(vm_page_t)))
+	 * vmpages > (freepages - (vmpages / sizeof(vm_page_t)))
 	 *
 	 * which, with some algebra, becomes:
 	 *
-	 *	vmpages > (freepages * sizeof(...) / (1 + sizeof(...)))
+	 * vmpages > (freepages * sizeof(...) / (1 + sizeof(...)))
 	 *
 	 * The value of vm_page_count need not be exact, but must be
 	 * large enough so vm_page_array handles the index range.
 	 */
-
 	freepages = pmap_free_pages();
 	/* Fudge slightly to deal with truncation error. */
 	freepages += 1;	/* fudge */
 
 	vm_page_count = (PAGE_SIZE * freepages) /
-		(PAGE_SIZE + sizeof(*vm_page_array));
+	    (PAGE_SIZE + sizeof(*vm_page_array));
 
 	vm_page_array = (vm_page_t)
-		pmap_steal_memory(vm_page_count * sizeof(*vm_page_array));
+	    pmap_steal_memory(vm_page_count * sizeof(*vm_page_array));
 
 #ifdef	DIAGNOSTIC
 	/*
@@ -523,9 +507,9 @@ pmap_startup(startp, endp)
 #endif
 
 	/*
-	 *	Initialize the page frames.
-	 *	Note that some page indices may not be usable
-	 *	when pmap_free_pages() counts pages in a hole.
+	 * Initialize the page frames.
+	 * Note that some page indices may not be usable
+	 * when pmap_free_pages() counts pages in a hole.
 	 */
 	if (!pmap_next_page(&paddr))
 		panic("pmap_startup: can't get first page");
@@ -548,7 +532,7 @@ pmap_startup(startp, endp)
 		    /* Cannot happen; i is unsigned */
 		    i < 0 ||
 #endif
-			    i >= vm_page_count)
+		    i >= vm_page_count)
 			panic("pmap_startup: bad i=0x%x", i);
 	}
 
@@ -558,22 +542,22 @@ pmap_startup(startp, endp)
 #endif /* MACHINE_NONCONTIG && !MACHINE_PAGES */
 
 /*
- *	vm_page_hash:
+ * vm_page_hash:
  *
- *	Distributes the object/offset key pair among hash buckets.
+ * Distributes the object/offset key pair among hash buckets.
  *
- *	NOTE:  This macro depends on vm_page_bucket_count being a power of 2.
+ * NOTE:  This macro depends on vm_page_bucket_count being a power of 2.
  */
 #define vm_page_hash(object, offset) \
 	(((unsigned long)object+(unsigned long)atop(offset))&vm_page_hash_mask)
 
 /*
- *	vm_page_insert:		[ internal use only ]
+ * vm_page_insert:		[ internal use only ]
  *
- *	Inserts the given mem entry into the object/object-page
- *	table and object list.
+ * Inserts the given mem entry into the object/object-page
+ * table and object list.
  *
- *	The object and page must be locked.
+ * The object and page must be locked.
  */
 void
 vm_page_insert(mem, object, offset)
@@ -590,16 +574,14 @@ vm_page_insert(mem, object, offset)
 		panic("vm_page_insert: already inserted");
 
 	/*
-	 *	Record the object/offset pair in this page
+	 * Record the object/offset pair in this page
 	 */
-
 	mem->object = object;
 	mem->offset = offset;
 
 	/*
-	 *	Insert it into the object_object/offset hash table
+	 * Insert it into the object_object/offset hash table
 	 */
-
 	bucket = &vm_page_buckets[vm_page_hash(object, offset)];
 	spl = splimp();
 	simple_lock(&bucket_lock);
@@ -608,28 +590,26 @@ vm_page_insert(mem, object, offset)
 	(void) splx(spl);
 
 	/*
-	 *	Now link into the object's list of backed pages.
+	 * Now link into the object's list of backed pages.
 	 */
-
 	TAILQ_INSERT_TAIL(&object->memq, mem, listq);
 	mem->flags |= PG_TABLED;
 
 	/*
-	 *	And show that the object has one more resident
-	 *	page.
+	 * And show that the object has one more resident
+	 * page.
 	 */
-
 	object->resident_page_count++;
 }
 
 /*
- *	vm_page_remove:		[ internal use only ]
- *				NOTE: used by device pager as well -wfj
+ * vm_page_remove:		[ internal use only ]
+ * 			NOTE: used by device pager as well -wfj
  *
- *	Removes the given mem entry from the object/offset-page
- *	table and the object page list.
+ * Removes the given mem entry from the object/offset-page
+ * table and the object page list.
  *
- *	The object and page must be locked.
+ * The object and page must be locked.
  */
 void
 vm_page_remove(mem)
@@ -649,9 +629,8 @@ vm_page_remove(mem)
 		return;
 
 	/*
-	 *	Remove from the object_object/offset hash table
+	 * Remove from the object_object/offset hash table
 	 */
-
 	bucket = &vm_page_buckets[vm_page_hash(mem->object, mem->offset)];
 	spl = splimp();
 	simple_lock(&bucket_lock);
@@ -660,28 +639,26 @@ vm_page_remove(mem)
 	(void) splx(spl);
 
 	/*
-	 *	Now remove from the object's list of backed pages.
+	 * Now remove from the object's list of backed pages.
 	 */
-
 	TAILQ_REMOVE(&mem->object->memq, mem, listq);
 
 	/*
-	 *	And show that the object has one fewer resident
-	 *	page.
+	 * And show that the object has one fewer resident
+	 * page.
 	 */
-
 	mem->object->resident_page_count--;
 
 	mem->flags &= ~PG_TABLED;
 }
 
 /*
- *	vm_page_lookup:
+ * vm_page_lookup:
  *
- *	Returns the page associated with the object/offset
- *	pair specified; if none is found, NULL is returned.
+ * Returns the page associated with the object/offset
+ * pair specified; if none is found, NULL is returned.
  *
- *	The object must be locked.  No side effects.
+ * The object must be locked.  No side effects.
  */
 vm_page_t
 vm_page_lookup(object, offset)
@@ -693,9 +670,8 @@ vm_page_lookup(object, offset)
 	int			spl;
 
 	/*
-	 *	Search the hash table for this object/offset pair
+	 * Search the hash table for this object/offset pair
 	 */
-
 	bucket = &vm_page_buckets[vm_page_hash(object, offset)];
 
 	spl = splimp();
@@ -715,12 +691,12 @@ vm_page_lookup(object, offset)
 }
 
 /*
- *	vm_page_rename:
+ * vm_page_rename:
  *
- *	Move the given memory entry from its
- *	current object to the specified target object/offset.
+ * Move the given memory entry from its
+ * current object to the specified target object/offset.
  *
- *	The object must be locked.
+ * The object must be locked.
  */
 void
 vm_page_rename(mem, new_object, new_offset)
@@ -739,12 +715,12 @@ vm_page_rename(mem, new_object, new_offset)
 }
 
 /*
- *	vm_page_alloc:
+ * vm_page_alloc:
  *
- *	Allocate and return a memory cell associated
- *	with this VM object/offset pair.
+ * Allocate and return a memory cell associated
+ * with this VM object/offset pair.
  *
- *	Object must be locked.
+ * Object must be locked.
  */
 vm_page_t
 vm_page_alloc(object, offset)
@@ -772,16 +748,15 @@ vm_page_alloc(object, offset)
 	VM_PAGE_INIT(mem, object, offset);
 
 	/*
-	 *	Decide if we should poke the pageout daemon.
-	 *	We do this if the free count is less than the low
-	 *	water mark, or if the free count is less than the high
-	 *	water mark (but above the low water mark) and the inactive
-	 *	count is less than its target.
+	 * Decide if we should poke the pageout daemon.
+	 * We do this if the free count is less than the low
+	 * water mark, or if the free count is less than the high
+	 * water mark (but above the low water mark) and the inactive
+	 * count is less than its target.
 	 *
-	 *	We don't have the counts locked ... if they change a little,
-	 *	it doesn't really matter.
+	 * We don't have the counts locked ... if they change a little,
+	 * it doesn't really matter.
 	 */
-
 	if (cnt.v_free_count < cnt.v_free_min ||
 	    (cnt.v_free_count < cnt.v_free_target &&
 	     cnt.v_inactive_count < cnt.v_inactive_target))
@@ -790,12 +765,12 @@ vm_page_alloc(object, offset)
 }
 
 /*
- *	vm_page_free:
+ * vm_page_free:
  *
- *	Returns the given page to the free list,
- *	disassociating it with any VM object.
+ * Returns the given page to the free list,
+ * disassociating it with any VM object.
  *
- *	Object and page must be locked prior to entry.
+ * Object and page must be locked prior to entry.
  */
 void
 vm_page_free(mem)
@@ -828,13 +803,13 @@ vm_page_free(mem)
 }
 
 /*
- *	vm_page_wire:
+ * vm_page_wire:
  *
- *	Mark this page as wired down by yet
- *	another map, removing it from paging queues
- *	as necessary.
+ * Mark this page as wired down by yet
+ * another map, removing it from paging queues
+ * as necessary.
  *
- *	The page queues must be locked.
+ * The page queues must be locked.
  */
 void
 vm_page_wire(mem)
@@ -859,12 +834,12 @@ vm_page_wire(mem)
 }
 
 /*
- *	vm_page_unwire:
+ * vm_page_unwire:
  *
- *	Release one wiring of this page, potentially
- *	enabling it to be paged again.
+ * Release one wiring of this page, potentially
+ * enabling it to be paged again.
  *
- *	The page queues must be locked.
+ * The page queues must be locked.
  */
 void
 vm_page_unwire(mem)
@@ -882,13 +857,13 @@ vm_page_unwire(mem)
 }
 
 /*
- *	vm_page_deactivate:
+ * vm_page_deactivate:
  *
- *	Returns the given page to the inactive list,
- *	indicating that no physical maps have access
- *	to this page.  [Used by the physical mapping system.]
+ * Returns the given page to the inactive list,
+ * indicating that no physical maps have access
+ * to this page.  [Used by the physical mapping system.]
  *
- *	The page queues must be locked.
+ * The page queues must be locked.
  */
 void
 vm_page_deactivate(m)
@@ -897,10 +872,9 @@ vm_page_deactivate(m)
 	VM_PAGE_CHECK(m);
 
 	/*
-	 *	Only move active pages -- ignore locked or already
-	 *	inactive ones.
+	 * Only move active pages -- ignore locked or already
+	 * inactive ones.
 	 */
-
 	if (m->flags & PG_ACTIVE) {
 		TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
 		m->flags &= ~PG_ACTIVE;
@@ -923,11 +897,11 @@ vm_page_deactivate(m)
 }
 
 /*
- *	vm_page_activate:
+ * vm_page_activate:
  *
- *	Put the specified page on the active list (if appropriate).
+ * Put the specified page on the active list (if appropriate).
  *
- *	The page queues must be locked.
+ * The page queues must be locked.
  */
 void
 vm_page_activate(m)
@@ -951,11 +925,11 @@ vm_page_activate(m)
 }
 
 /*
- *	vm_page_zero_fill:
+ * vm_page_zero_fill:
  *
- *	Zero-fill the specified page.
- *	Written as a standard pagein routine, to
- *	be used by the zero-fill object.
+ * Zero-fill the specified page.
+ * Written as a standard pagein routine, to
+ * be used by the zero-fill object.
  */
 boolean_t
 vm_page_zero_fill(m)
@@ -969,9 +943,9 @@ vm_page_zero_fill(m)
 }
 
 /*
- *	vm_page_copy:
+ * vm_page_copy:
  *
- *	Copy one page to another
+ * Copy one page to another
  */
 void
 vm_page_copy(src_m, dest_m)
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index f2c9cad7509..23585120dd2 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vm_pageout.c,v 1.4 1996/09/18 11:57:38 deraadt Exp $	*/
+/*	$OpenBSD: vm_pageout.c,v 1.5 1997/04/17 01:25:22 niklas Exp $	*/
 /*	$NetBSD: vm_pageout.c,v 1.23 1996/02/05 01:54:07 christos Exp $	*/
 
 /* 
@@ -72,6 +72,7 @@
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
+#include <sys/kernel.h>
 
 #include <vm/vm.h>
 #include <vm/vm_page.h>
@@ -274,14 +275,22 @@ vm_pageout_page(m, object)
 	 * We must unlock the page queues first.
 	 */
 	vm_page_unlock_queues();
+
+#if 0
+	/*
+	 * vm_object_collapse might want to sleep waiting for pages which
+	 * is not allowed to do in this thread.  Anyway, we now aggressively
+	 * collapse object-chains as early as possible so this call ought
+	 * to not be very useful anyhow.  This is just an educated guess.
+	 * Not doing a collapse operation is never fatal though, so we skip
+	 * it for the time being.  Later we might add some NOWAIT option for
+	 * the collapse code to look at, if it's deemed necessary.
+	 */
 	if (object->pager == NULL)
 		vm_object_collapse(object);
-
-#ifdef DIAGNOSTIC
-	if (object->paging_in_progress == 0xdead)
-		panic("vm_pageout_page: object deallocated");
 #endif
-	object->paging_in_progress++;
+
+	vm_object_paging_begin(object);
 	vm_object_unlock(object);
 
 	/*
@@ -297,7 +306,7 @@ vm_pageout_page(m, object)
 	 */
 	if ((pager = object->pager) == NULL) {
 		pager = vm_pager_allocate(PG_DFLT, (caddr_t)0, object->size,
-					  VM_PROT_ALL, (vm_offset_t)0);
+		    VM_PROT_ALL, (vm_offset_t)0);
 		if (pager != NULL)
 			vm_object_setpager(object, pager, 0, FALSE);
 	}
@@ -330,8 +339,8 @@ vm_pageout_page(m, object)
 		 * shortage, so we put pause for awhile and try again.
 		 * XXX could get stuck here.
 		 */
-		(void) tsleep((caddr_t)&vm_pages_needed, PZERO|PCATCH,
-		    "pageout", 100);
+		(void)tsleep((caddr_t)&vm_pages_needed, PZERO|PCATCH,
+		    "pageout", hz);
 		break;
 	}
 	case VM_PAGER_FAIL:
@@ -357,7 +366,7 @@ vm_pageout_page(m, object)
 	if (pageout_status != VM_PAGER_PEND) {
 		m->flags &= ~PG_BUSY;
 		PAGE_WAKEUP(m);
-		object->paging_in_progress--;
+		vm_object_paging_end(object);
 	}
 }
 
@@ -381,7 +390,6 @@ vm_pageout_cluster(m, object)
 	vm_offset_t offset, loff, hoff;
 	vm_page_t plist[MAXPOCLUSTER], *plistp, p;
 	int postatus, ix, count;
-	extern int lbolt;
 
 	/*
 	 * Determine the range of pages that can be part of a cluster
@@ -448,11 +456,7 @@ vm_pageout_cluster(m, object)
 	 * in case it blocks.
 	 */
 	vm_page_unlock_queues();
-#ifdef DIAGNOSTIC
-	if (object->paging_in_progress == 0xdead)
-		panic("vm_pageout_cluster: object deallocated");
-#endif
-	object->paging_in_progress++;
+	vm_object_paging_begin(object);
 	vm_object_unlock(object);
 again:
 	thread_wakeup(&cnt.v_free_count);
@@ -461,7 +465,8 @@ again:
 	 * XXX rethink this
 	 */
 	if (postatus == VM_PAGER_AGAIN) {
-		(void) tsleep((caddr_t)&lbolt, PZERO|PCATCH, "pageout", 0);
+		(void)tsleep((caddr_t)&vm_pages_needed, PZERO|PCATCH,
+		    "pageout", 0);
 		goto again;
 	} else if (postatus == VM_PAGER_BAD)
 		panic("vm_pageout_cluster: VM_PAGER_BAD");
@@ -501,7 +506,6 @@ again:
 		if (postatus != VM_PAGER_PEND) {
 			p->flags &= ~PG_BUSY;
 			PAGE_WAKEUP(p);
-
 		}
 	}
 	/*
@@ -509,8 +513,7 @@ again:
 	 * indicator set so that we don't attempt an object collapse.
 	 */
 	if (postatus != VM_PAGER_PEND)
-		object->paging_in_progress--;
-
+		vm_object_paging_end(object);
 }
 #endif
 
@@ -521,7 +524,7 @@ again:
 void
 vm_pageout()
 {
-	(void) spl0();
+	(void)spl0();
 
 	/*
 	 *	Initialize some paging parameters.