summaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorMike Larkin <mlarkin@cvs.openbsd.org>2015-03-13 23:23:14 +0000
committerMike Larkin <mlarkin@cvs.openbsd.org>2015-03-13 23:23:14 +0000
commit135c8122311da1b7ed10aa8e4ce010732352c022 (patch)
treefac0e1660b17aac57de1f40a11cf9d6b54db8ecd /sys
parentae2d30616685ecf58b9e5bc857c230f32dd955cc (diff)
move some deck chairs around in preparation for i386 PAE. no functional
change, just moving a few hundred lines of comments from one place to another. Note that some of these comments are giant lies that will get rewritten later. ok deraadt@
Diffstat (limited to 'sys')
-rw-r--r--sys/arch/i386/i386/pmap.c205
-rw-r--r--sys/arch/i386/include/pmap.h98
-rw-r--r--sys/arch/i386/include/pte.h86
3 files changed, 179 insertions, 210 deletions
diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c
index 9b21031eb5f..c8697933418 100644
--- a/sys/arch/i386/i386/pmap.c
+++ b/sys/arch/i386/i386/pmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.c,v 1.170 2015/03/09 07:46:03 kettenis Exp $ */
+/* $OpenBSD: pmap.c,v 1.171 2015/03/13 23:23:13 mlarkin Exp $ */
/* $NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $ */
/*
@@ -75,15 +75,6 @@
#include <stand/boot/bootarg.h>
/*
- * general info:
- *
- * - for an explanation of how the i386 MMU hardware works see
- * the comments in <machine/pte.h>.
- *
- * - for an explanation of the general memory structure used by
- * this pmap (including the recursive mapping), see the comments
- * in <machine/pmap.h>.
- *
* this file contains the code for the "pmap module." the module's
* job is to manage the hardware's virtual to physical address mappings.
* note that there are two levels of mapping in the VM system:
@@ -118,6 +109,182 @@
* of a page]
*/
/*
+ * i386 MMU hardware structure:
+ *
+ * the i386 MMU is a two-level MMU which maps 4GB of virtual memory.
+ * the pagesize is 4K (4096 [0x1000] bytes), although newer pentium
+ * processors can support a 4MB pagesize as well.
+ *
+ * the first level table (segment table?) is called a "page directory"
+ * and it contains 1024 page directory entries (PDEs). each PDE is
+ * 4 bytes (an int), so a PD fits in a single 4K page. this page is
+ * the page directory page (PDP). each PDE in a PDP maps 4MB of space
+ * (1024 * 4MB = 4GB). a PDE contains the physical address of the
+ * second level table: the page table. or, if 4MB pages are being used,
+ * then the PDE contains the PA of the 4MB page being mapped.
+ *
+ * a page table consists of 1024 page table entries (PTEs). each PTE is
+ * 4 bytes (an int), so a page table also fits in a single 4K page. a
+ * 4K page being used as a page table is called a page table page (PTP).
+ * each PTE in a PTP maps one 4K page (1024 * 4K = 4MB). a PTE contains
+ * the physical address of the page it maps and some flag bits (described
+ * below).
+ *
+ * the processor has a special register, "cr3", which points to the
+ * the PDP which is currently controlling the mappings of the virtual
+ * address space.
+ *
+ * the following picture shows the translation process for a 4K page:
+ *
+ * %cr3 register [PA of PDP]
+ * |
+ * |
+ * | bits <31-22> of VA bits <21-12> of VA bits <11-0>
+ * | index the PDP (0 - 1023) index the PTP are the page offset
+ * | | | |
+ * | v | |
+ * +--->+----------+ | |
+ * | PD Page | PA of v |
+ * | |---PTP-------->+------------+ |
+ * | 1024 PDE | | page table |--PTE--+ |
+ * | entries | | (aka PTP) | | |
+ * +----------+ | 1024 PTE | | |
+ * | entries | | |
+ * +------------+ | |
+ * | |
+ * bits <31-12> bits <11-0>
+ * p h y s i c a l a d d r
+ *
+ * the i386 caches PTEs in a TLB. it is important to flush out old
+ * TLB mappings when making a change to a mappings. writing to the
+ * %cr3 will flush the entire TLB. newer processors also have an
+ * instruction that will invalidate the mapping of a single page (which
+ * is useful if you are changing a single mappings because it preserves
+ * all the cached TLB entries).
+ *
+ * as shows, bits 31-12 of the PTE contain PA of the page being mapped.
+ * the rest of the PTE is defined as follows:
+ * bit# name use
+ * 11 n/a available for OS use, hardware ignores it
+ * 10 n/a available for OS use, hardware ignores it
+ * 9 n/a available for OS use, hardware ignores it
+ * 8 G global bit (see discussion below)
+ * 7 PS page size [for PDEs] (0=4k, 1=4M <if supported>)
+ * 6 D dirty (modified) page
+ * 5 A accessed (referenced) page
+ * 4 PCD cache disable
+ * 3 PWT prevent write through (cache)
+ * 2 U/S user/supervisor bit (0=supervisor only, 1=both u&s)
+ * 1 R/W read/write bit (0=read only, 1=read-write)
+ * 0 P present (valid)
+ *
+ * notes:
+ * - on the i386 the R/W bit is ignored if processor is in supervisor
+ * state (bug!)
+ * - PS is only supported on newer processors
+ * - PTEs with the G bit are global in the sense that they are not
+ * flushed from the TLB when %cr3 is written (to flush, use the
+ * "flush single page" instruction). this is only supported on
+ * newer processors. this bit can be used to keep the kernel's
+ * TLB entries around while context switching. since the kernel
+ * is mapped into all processes at the same place it does not make
+ * sense to flush these entries when switching from one process'
+ * pmap to another.
+ */
+/*
+ * A pmap describes a process' 4GB virtual address space. This
+ * virtual address space can be broken up into 1024 4MB regions which
+ * are described by PDEs in the PDP. The PDEs are defined as follows:
+ *
+ * Ranges are inclusive -> exclusive, just like vm_map_entry start/end.
+ * The following assumes that KERNBASE is 0xd0000000.
+ *
+ * PDE#s VA range Usage
+ * 0->831 0x0 -> 0xcfc00000 user address space, note that the
+ * max user address is 0xcfbfe000
+ * the final two pages in the last 4MB
+ * used to be reserved for the UAREA
+ * but now are no longer used.
+ * 831 0xcfc00000-> recursive mapping of PDP (used for
+ * 0xd0000000 linear mapping of PTPs).
+ * 832->1023 0xd0000000-> kernel address space (constant
+ * 0xffc00000 across all pmaps/processes).
+ * 1023 0xffc00000-> "alternate" recursive PDP mapping
+ * <end> (for other pmaps).
+ *
+ *
+ * Note: A recursive PDP mapping provides a way to map all the PTEs for
+ * a 4GB address space into a linear chunk of virtual memory. In other
+ * words, the PTE for page 0 is the first int mapped into the 4MB recursive
+ * area. The PTE for page 1 is the second int. The very last int in the
+ * 4MB range is the PTE that maps VA 0xffffe000 (the last page in a 4GB
+ * address).
+ *
+ * All pmaps' PDs must have the same values in slots 832->1023 so that
+ * the kernel is always mapped in every process. These values are loaded
+ * into the PD at pmap creation time.
+ *
+ * At any one time only one pmap can be active on a processor. This is
+ * the pmap whose PDP is pointed to by processor register %cr3. This pmap
+ * will have all its PTEs mapped into memory at the recursive mapping
+ * point (slot #831 as show above). When the pmap code wants to find the
+ * PTE for a virtual address, all it has to do is the following:
+ *
+ * Address of PTE = (831 * 4MB) + (VA / PAGE_SIZE) * sizeof(pt_entry_t)
+ * = 0xcfc00000 + (VA / 4096) * 4
+ *
+ * What happens if the pmap layer is asked to perform an operation
+ * on a pmap that is not the one which is currently active? In that
+ * case we take the PA of the PDP of non-active pmap and put it in
+ * slot 1023 of the active pmap. This causes the non-active pmap's
+ * PTEs to get mapped in the final 4MB of the 4GB address space
+ * (e.g. starting at 0xffc00000).
+ *
+ * The following figure shows the effects of the recursive PDP mapping:
+ *
+ * PDP (%cr3)
+ * +----+
+ * | 0| -> PTP#0 that maps VA 0x0 -> 0x400000
+ * | |
+ * | |
+ * | 831| -> points back to PDP (%cr3) mapping VA 0xcfc00000 -> 0xd0000000
+ * | 832| -> first kernel PTP (maps 0xd0000000 -> 0xe0400000)
+ * | |
+ * |1023| -> points to alternate pmap's PDP (maps 0xffc00000 -> end)
+ * +----+
+ *
+ * Note that the PDE#831 VA (0xcfc00000) is defined as "PTE_BASE".
+ * Note that the PDE#1023 VA (0xffc00000) is defined as "APTE_BASE".
+ *
+ * Starting at VA 0xcfc00000 the current active PDP (%cr3) acts as a
+ * PTP:
+ *
+ * PTP#831 == PDP(%cr3) => maps VA 0xcfc00000 -> 0xd0000000
+ * +----+
+ * | 0| -> maps the contents of PTP#0 at VA 0xcfc00000->0xcfc01000
+ * | |
+ * | |
+ * | 831| -> maps the contents of PTP#831 (the PDP) at VA 0xcff3f000
+ * | 832| -> maps the contents of first kernel PTP
+ * | |
+ * |1023|
+ * +----+
+ *
+ * Note that mapping of the PDP at PTP#831's VA (0xcff3f000) is
+ * defined as "PDP_BASE".... within that mapping there are two
+ * defines:
+ * "PDP_PDE" (0xcff3fcfc) is the VA of the PDE in the PDP
+ * which points back to itself.
+ * "APDP_PDE" (0xcff3fffc) is the VA of the PDE in the PDP which
+ * establishes the recursive mapping of the alternate pmap.
+ * To set the alternate PDP, one just has to put the correct
+ * PA info in *APDP_PDE.
+ *
+ * Note that in the APTE_BASE space, the APDP appears at VA
+ * "APDP_BASE" (0xfffff000).
+ */
+
+/*
* memory allocation
*
* - there are three data structures that we must dynamically allocate:
@@ -153,24 +320,6 @@
* call pool_get()
* If we fail, we simply let pmap_enter() tell UVM about it.
*/
-/*
- * locking
- *
- * we have the following locks that we must contend with:
- *
- * "simple" locks:
- *
- * - pmap lock (per pmap, part of uvm_object)
- * this lock protects the fields in the pmap structure including
- * the non-kernel PDEs in the PDP, and the PTEs. it also locks
- * in the alternate PTE space (since that is determined by the
- * entry in the PDP).
- *
- * - pmaps_lock
- * this lock protects the list of active pmaps (headed by "pmaps").
- * we lock it when adding or removing pmaps from this list.
- *
- */
#define PD_MASK 0xffc00000 /* page directory address bits */
#define PT_MASK 0x003ff000 /* page table address bits */
diff --git a/sys/arch/i386/include/pmap.h b/sys/arch/i386/include/pmap.h
index 5f0422529e9..4d3e0bfbc06 100644
--- a/sys/arch/i386/include/pmap.h
+++ b/sys/arch/i386/include/pmap.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.h,v 1.71 2015/02/15 21:34:33 miod Exp $ */
+/* $OpenBSD: pmap.h,v 1.72 2015/03/13 23:23:13 mlarkin Exp $ */
/* $NetBSD: pmap.h,v 1.44 2000/04/24 17:18:18 thorpej Exp $ */
/*
@@ -42,102 +42,6 @@
#include <uvm/uvm_object.h>
/*
- * See pte.h for a description of i386 MMU terminology and hardware
- * interface.
- *
- * A pmap describes a process' 4GB virtual address space. This
- * virtual address space can be broken up into 1024 4MB regions which
- * are described by PDEs in the PDP. The PDEs are defined as follows:
- *
- * Ranges are inclusive -> exclusive, just like vm_map_entry start/end.
- * The following assumes that KERNBASE is 0xd0000000.
- *
- * PDE#s VA range Usage
- * 0->831 0x0 -> 0xcfc00000 user address space, note that the
- * max user address is 0xcfbfe000
- * the final two pages in the last 4MB
- * used to be reserved for the UAREA
- * but now are no longer used.
- * 831 0xcfc00000-> recursive mapping of PDP (used for
- * 0xd0000000 linear mapping of PTPs).
- * 832->1023 0xd0000000-> kernel address space (constant
- * 0xffc00000 across all pmaps/processes).
- * 1023 0xffc00000-> "alternate" recursive PDP mapping
- * <end> (for other pmaps).
- *
- *
- * Note: A recursive PDP mapping provides a way to map all the PTEs for
- * a 4GB address space into a linear chunk of virtual memory. In other
- * words, the PTE for page 0 is the first int mapped into the 4MB recursive
- * area. The PTE for page 1 is the second int. The very last int in the
- * 4MB range is the PTE that maps VA 0xffffe000 (the last page in a 4GB
- * address).
- *
- * All pmaps' PDs must have the same values in slots 832->1023 so that
- * the kernel is always mapped in every process. These values are loaded
- * into the PD at pmap creation time.
- *
- * At any one time only one pmap can be active on a processor. This is
- * the pmap whose PDP is pointed to by processor register %cr3. This pmap
- * will have all its PTEs mapped into memory at the recursive mapping
- * point (slot #831 as show above). When the pmap code wants to find the
- * PTE for a virtual address, all it has to do is the following:
- *
- * Address of PTE = (831 * 4MB) + (VA / PAGE_SIZE) * sizeof(pt_entry_t)
- * = 0xcfc00000 + (VA / 4096) * 4
- *
- * What happens if the pmap layer is asked to perform an operation
- * on a pmap that is not the one which is currently active? In that
- * case we take the PA of the PDP of non-active pmap and put it in
- * slot 1023 of the active pmap. This causes the non-active pmap's
- * PTEs to get mapped in the final 4MB of the 4GB address space
- * (e.g. starting at 0xffc00000).
- *
- * The following figure shows the effects of the recursive PDP mapping:
- *
- * PDP (%cr3)
- * +----+
- * | 0| -> PTP#0 that maps VA 0x0 -> 0x400000
- * | |
- * | |
- * | 831| -> points back to PDP (%cr3) mapping VA 0xcfc00000 -> 0xd0000000
- * | 832| -> first kernel PTP (maps 0xd0000000 -> 0xe0400000)
- * | |
- * |1023| -> points to alternate pmap's PDP (maps 0xffc00000 -> end)
- * +----+
- *
- * Note that the PDE#831 VA (0xcfc00000) is defined as "PTE_BASE".
- * Note that the PDE#1023 VA (0xffc00000) is defined as "APTE_BASE".
- *
- * Starting at VA 0xcfc00000 the current active PDP (%cr3) acts as a
- * PTP:
- *
- * PTP#831 == PDP(%cr3) => maps VA 0xcfc00000 -> 0xd0000000
- * +----+
- * | 0| -> maps the contents of PTP#0 at VA 0xcfc00000->0xcfc01000
- * | |
- * | |
- * | 831| -> maps the contents of PTP#831 (the PDP) at VA 0xcff3f000
- * | 832| -> maps the contents of first kernel PTP
- * | |
- * |1023|
- * +----+
- *
- * Note that mapping of the PDP at PTP#831's VA (0xcff3f000) is
- * defined as "PDP_BASE".... within that mapping there are two
- * defines:
- * "PDP_PDE" (0xcff3fcfc) is the VA of the PDE in the PDP
- * which points back to itself.
- * "APDP_PDE" (0xcff3fffc) is the VA of the PDE in the PDP which
- * establishes the recursive mapping of the alternate pmap.
- * To set the alternate PDP, one just has to put the correct
- * PA info in *APDP_PDE.
- *
- * Note that in the APTE_BASE space, the APDP appears at VA
- * "APDP_BASE" (0xfffff000).
- */
-
-/*
* The following defines identify the slots used as described above.
*/
diff --git a/sys/arch/i386/include/pte.h b/sys/arch/i386/include/pte.h
index 805ed0f4868..efaa89caaec 100644
--- a/sys/arch/i386/include/pte.h
+++ b/sys/arch/i386/include/pte.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: pte.h,v 1.19 2015/01/09 03:43:52 mlarkin Exp $ */
+/* $OpenBSD: pte.h,v 1.20 2015/03/13 23:23:13 mlarkin Exp $ */
/* $NetBSD: pte.h,v 1.11 1998/02/06 21:58:05 thorpej Exp $ */
/*
@@ -37,90 +37,6 @@
#ifndef _MACHINE_PTE_H_
#define _MACHINE_PTE_H_
-/*
- * i386 MMU hardware structure:
- *
- * the i386 MMU is a two-level MMU which maps 4GB of virtual memory.
- * the pagesize is 4K (4096 [0x1000] bytes), although newer pentium
- * processors can support a 4MB pagesize as well.
- *
- * the first level table (segment table?) is called a "page directory"
- * and it contains 1024 page directory entries (PDEs). each PDE is
- * 4 bytes (an int), so a PD fits in a single 4K page. this page is
- * the page directory page (PDP). each PDE in a PDP maps 4MB of space
- * (1024 * 4MB = 4GB). a PDE contains the physical address of the
- * second level table: the page table. or, if 4MB pages are being used,
- * then the PDE contains the PA of the 4MB page being mapped.
- *
- * a page table consists of 1024 page table entries (PTEs). each PTE is
- * 4 bytes (an int), so a page table also fits in a single 4K page. a
- * 4K page being used as a page table is called a page table page (PTP).
- * each PTE in a PTP maps one 4K page (1024 * 4K = 4MB). a PTE contains
- * the physical address of the page it maps and some flag bits (described
- * below).
- *
- * the processor has a special register, "cr3", which points to the
- * the PDP which is currently controlling the mappings of the virtual
- * address space.
- *
- * the following picture shows the translation process for a 4K page:
- *
- * %cr3 register [PA of PDP]
- * |
- * |
- * | bits <31-22> of VA bits <21-12> of VA bits <11-0>
- * | index the PDP (0 - 1023) index the PTP are the page offset
- * | | | |
- * | v | |
- * +--->+----------+ | |
- * | PD Page | PA of v |
- * | |---PTP-------->+------------+ |
- * | 1024 PDE | | page table |--PTE--+ |
- * | entries | | (aka PTP) | | |
- * +----------+ | 1024 PTE | | |
- * | entries | | |
- * +------------+ | |
- * | |
- * bits <31-12> bits <11-0>
- * p h y s i c a l a d d r
- *
- * the i386 caches PTEs in a TLB. it is important to flush out old
- * TLB mappings when making a change to a mappings. writing to the
- * %cr3 will flush the entire TLB. newer processors also have an
- * instruction that will invalidate the mapping of a single page (which
- * is useful if you are changing a single mappings because it preserves
- * all the cached TLB entries).
- *
- * as shows, bits 31-12 of the PTE contain PA of the page being mapped.
- * the rest of the PTE is defined as follows:
- * bit# name use
- * 11 n/a available for OS use, hardware ignores it
- * 10 n/a available for OS use, hardware ignores it
- * 9 n/a available for OS use, hardware ignores it
- * 8 G global bit (see discussion below)
- * 7 PS page size [for PDEs] (0=4k, 1=4M <if supported>)
- * 6 D dirty (modified) page
- * 5 A accessed (referenced) page
- * 4 PCD cache disable
- * 3 PWT prevent write through (cache)
- * 2 U/S user/supervisor bit (0=supervisor only, 1=both u&s)
- * 1 R/W read/write bit (0=read only, 1=read-write)
- * 0 P present (valid)
- *
- * notes:
- * - on the i386 the R/W bit is ignored if processor is in supervisor
- * state (bug!)
- * - PS is only supported on newer processors
- * - PTEs with the G bit are global in the sense that they are not
- * flushed from the TLB when %cr3 is written (to flush, use the
- * "flush single page" instruction). this is only supported on
- * newer processors. this bit can be used to keep the kernel's
- * TLB entries around while context switching. since the kernel
- * is mapped into all processes at the same place it does not make
- * sense to flush these entries when switching from one process'
- * pmap to another.
- */
-
#if !defined(_LOCORE)
/*